1 /* Subroutines used for code generation on IA-32.
2 Copyright (C) 1988, 1992, 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001,
3 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009
4 Free Software Foundation, Inc.
6 This file is part of GCC.
8 GCC is free software; you can redistribute it and/or modify
9 it under the terms of the GNU General Public License as published by
10 the Free Software Foundation; either version 3, or (at your option)
13 GCC is distributed in the hope that it will be useful,
14 but WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 GNU General Public License for more details.
18 You should have received a copy of the GNU General Public License
19 along with GCC; see the file COPYING3. If not see
20 <http://www.gnu.org/licenses/>. */
24 #include "coretypes.h"
30 #include "hard-reg-set.h"
32 #include "insn-config.h"
33 #include "conditions.h"
35 #include "insn-codes.h"
36 #include "insn-attr.h"
45 #include "basic-block.h"
48 #include "target-def.h"
49 #include "langhooks.h"
54 #include "tm-constrs.h"
58 static int x86_builtin_vectorization_cost (bool);
59 static rtx legitimize_dllimport_symbol (rtx, bool);
61 #ifndef CHECK_STACK_LIMIT
62 #define CHECK_STACK_LIMIT (-1)
65 /* Return index of given mode in mult and division cost tables. */
66 #define MODE_INDEX(mode) \
67 ((mode) == QImode ? 0 \
68 : (mode) == HImode ? 1 \
69 : (mode) == SImode ? 2 \
70 : (mode) == DImode ? 3 \
73 /* Processor costs (relative to an add) */
74 /* We assume COSTS_N_INSNS is defined as (N)*4 and an addition is 2 bytes. */
75 #define COSTS_N_BYTES(N) ((N) * 2)
77 #define DUMMY_STRINGOP_ALGS {libcall, {{-1, libcall}}}
80 struct processor_costs ix86_size_cost = {/* costs for tuning for size */
81 COSTS_N_BYTES (2), /* cost of an add instruction */
82 COSTS_N_BYTES (3), /* cost of a lea instruction */
83 COSTS_N_BYTES (2), /* variable shift costs */
84 COSTS_N_BYTES (3), /* constant shift costs */
85 {COSTS_N_BYTES (3), /* cost of starting multiply for QI */
86 COSTS_N_BYTES (3), /* HI */
87 COSTS_N_BYTES (3), /* SI */
88 COSTS_N_BYTES (3), /* DI */
89 COSTS_N_BYTES (5)}, /* other */
90 0, /* cost of multiply per each bit set */
91 {COSTS_N_BYTES (3), /* cost of a divide/mod for QI */
92 COSTS_N_BYTES (3), /* HI */
93 COSTS_N_BYTES (3), /* SI */
94 COSTS_N_BYTES (3), /* DI */
95 COSTS_N_BYTES (5)}, /* other */
96 COSTS_N_BYTES (3), /* cost of movsx */
97 COSTS_N_BYTES (3), /* cost of movzx */
100 2, /* cost for loading QImode using movzbl */
101 {2, 2, 2}, /* cost of loading integer registers
102 in QImode, HImode and SImode.
103 Relative to reg-reg move (2). */
104 {2, 2, 2}, /* cost of storing integer registers */
105 2, /* cost of reg,reg fld/fst */
106 {2, 2, 2}, /* cost of loading fp registers
107 in SFmode, DFmode and XFmode */
108 {2, 2, 2}, /* cost of storing fp registers
109 in SFmode, DFmode and XFmode */
110 3, /* cost of moving MMX register */
111 {3, 3}, /* cost of loading MMX registers
112 in SImode and DImode */
113 {3, 3}, /* cost of storing MMX registers
114 in SImode and DImode */
115 3, /* cost of moving SSE register */
116 {3, 3, 3}, /* cost of loading SSE registers
117 in SImode, DImode and TImode */
118 {3, 3, 3}, /* cost of storing SSE registers
119 in SImode, DImode and TImode */
120 3, /* MMX or SSE register to integer */
121 0, /* size of l1 cache */
122 0, /* size of l2 cache */
123 0, /* size of prefetch block */
124 0, /* number of parallel prefetches */
126 COSTS_N_BYTES (2), /* cost of FADD and FSUB insns. */
127 COSTS_N_BYTES (2), /* cost of FMUL instruction. */
128 COSTS_N_BYTES (2), /* cost of FDIV instruction. */
129 COSTS_N_BYTES (2), /* cost of FABS instruction. */
130 COSTS_N_BYTES (2), /* cost of FCHS instruction. */
131 COSTS_N_BYTES (2), /* cost of FSQRT instruction. */
132 {{rep_prefix_1_byte, {{-1, rep_prefix_1_byte}}},
133 {rep_prefix_1_byte, {{-1, rep_prefix_1_byte}}}},
134 {{rep_prefix_1_byte, {{-1, rep_prefix_1_byte}}},
135 {rep_prefix_1_byte, {{-1, rep_prefix_1_byte}}}},
136 1, /* scalar_stmt_cost. */
137 1, /* scalar load_cost. */
138 1, /* scalar_store_cost. */
139 1, /* vec_stmt_cost. */
140 1, /* vec_to_scalar_cost. */
141 1, /* scalar_to_vec_cost. */
142 1, /* vec_align_load_cost. */
143 1, /* vec_unalign_load_cost. */
144 1, /* vec_store_cost. */
145 1, /* cond_taken_branch_cost. */
146 1, /* cond_not_taken_branch_cost. */
149 /* Processor costs (relative to an add) */
151 struct processor_costs i386_cost = { /* 386 specific costs */
152 COSTS_N_INSNS (1), /* cost of an add instruction */
153 COSTS_N_INSNS (1), /* cost of a lea instruction */
154 COSTS_N_INSNS (3), /* variable shift costs */
155 COSTS_N_INSNS (2), /* constant shift costs */
156 {COSTS_N_INSNS (6), /* cost of starting multiply for QI */
157 COSTS_N_INSNS (6), /* HI */
158 COSTS_N_INSNS (6), /* SI */
159 COSTS_N_INSNS (6), /* DI */
160 COSTS_N_INSNS (6)}, /* other */
161 COSTS_N_INSNS (1), /* cost of multiply per each bit set */
162 {COSTS_N_INSNS (23), /* cost of a divide/mod for QI */
163 COSTS_N_INSNS (23), /* HI */
164 COSTS_N_INSNS (23), /* SI */
165 COSTS_N_INSNS (23), /* DI */
166 COSTS_N_INSNS (23)}, /* other */
167 COSTS_N_INSNS (3), /* cost of movsx */
168 COSTS_N_INSNS (2), /* cost of movzx */
169 15, /* "large" insn */
171 4, /* cost for loading QImode using movzbl */
172 {2, 4, 2}, /* cost of loading integer registers
173 in QImode, HImode and SImode.
174 Relative to reg-reg move (2). */
175 {2, 4, 2}, /* cost of storing integer registers */
176 2, /* cost of reg,reg fld/fst */
177 {8, 8, 8}, /* cost of loading fp registers
178 in SFmode, DFmode and XFmode */
179 {8, 8, 8}, /* cost of storing fp registers
180 in SFmode, DFmode and XFmode */
181 2, /* cost of moving MMX register */
182 {4, 8}, /* cost of loading MMX registers
183 in SImode and DImode */
184 {4, 8}, /* cost of storing MMX registers
185 in SImode and DImode */
186 2, /* cost of moving SSE register */
187 {4, 8, 16}, /* cost of loading SSE registers
188 in SImode, DImode and TImode */
189 {4, 8, 16}, /* cost of storing SSE registers
190 in SImode, DImode and TImode */
191 3, /* MMX or SSE register to integer */
192 0, /* size of l1 cache */
193 0, /* size of l2 cache */
194 0, /* size of prefetch block */
195 0, /* number of parallel prefetches */
197 COSTS_N_INSNS (23), /* cost of FADD and FSUB insns. */
198 COSTS_N_INSNS (27), /* cost of FMUL instruction. */
199 COSTS_N_INSNS (88), /* cost of FDIV instruction. */
200 COSTS_N_INSNS (22), /* cost of FABS instruction. */
201 COSTS_N_INSNS (24), /* cost of FCHS instruction. */
202 COSTS_N_INSNS (122), /* cost of FSQRT instruction. */
203 {{rep_prefix_1_byte, {{-1, rep_prefix_1_byte}}},
204 DUMMY_STRINGOP_ALGS},
205 {{rep_prefix_1_byte, {{-1, rep_prefix_1_byte}}},
206 DUMMY_STRINGOP_ALGS},
207 1, /* scalar_stmt_cost. */
208 1, /* scalar load_cost. */
209 1, /* scalar_store_cost. */
210 1, /* vec_stmt_cost. */
211 1, /* vec_to_scalar_cost. */
212 1, /* scalar_to_vec_cost. */
213 1, /* vec_align_load_cost. */
214 2, /* vec_unalign_load_cost. */
215 1, /* vec_store_cost. */
216 3, /* cond_taken_branch_cost. */
217 1, /* cond_not_taken_branch_cost. */
221 struct processor_costs i486_cost = { /* 486 specific costs */
222 COSTS_N_INSNS (1), /* cost of an add instruction */
223 COSTS_N_INSNS (1), /* cost of a lea instruction */
224 COSTS_N_INSNS (3), /* variable shift costs */
225 COSTS_N_INSNS (2), /* constant shift costs */
226 {COSTS_N_INSNS (12), /* cost of starting multiply for QI */
227 COSTS_N_INSNS (12), /* HI */
228 COSTS_N_INSNS (12), /* SI */
229 COSTS_N_INSNS (12), /* DI */
230 COSTS_N_INSNS (12)}, /* other */
231 1, /* cost of multiply per each bit set */
232 {COSTS_N_INSNS (40), /* cost of a divide/mod for QI */
233 COSTS_N_INSNS (40), /* HI */
234 COSTS_N_INSNS (40), /* SI */
235 COSTS_N_INSNS (40), /* DI */
236 COSTS_N_INSNS (40)}, /* other */
237 COSTS_N_INSNS (3), /* cost of movsx */
238 COSTS_N_INSNS (2), /* cost of movzx */
239 15, /* "large" insn */
241 4, /* cost for loading QImode using movzbl */
242 {2, 4, 2}, /* cost of loading integer registers
243 in QImode, HImode and SImode.
244 Relative to reg-reg move (2). */
245 {2, 4, 2}, /* cost of storing integer registers */
246 2, /* cost of reg,reg fld/fst */
247 {8, 8, 8}, /* cost of loading fp registers
248 in SFmode, DFmode and XFmode */
249 {8, 8, 8}, /* cost of storing fp registers
250 in SFmode, DFmode and XFmode */
251 2, /* cost of moving MMX register */
252 {4, 8}, /* cost of loading MMX registers
253 in SImode and DImode */
254 {4, 8}, /* cost of storing MMX registers
255 in SImode and DImode */
256 2, /* cost of moving SSE register */
257 {4, 8, 16}, /* cost of loading SSE registers
258 in SImode, DImode and TImode */
259 {4, 8, 16}, /* cost of storing SSE registers
260 in SImode, DImode and TImode */
261 3, /* MMX or SSE register to integer */
262 4, /* size of l1 cache. 486 has 8kB cache
263 shared for code and data, so 4kB is
264 not really precise. */
265 4, /* size of l2 cache */
266 0, /* size of prefetch block */
267 0, /* number of parallel prefetches */
269 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
270 COSTS_N_INSNS (16), /* cost of FMUL instruction. */
271 COSTS_N_INSNS (73), /* cost of FDIV instruction. */
272 COSTS_N_INSNS (3), /* cost of FABS instruction. */
273 COSTS_N_INSNS (3), /* cost of FCHS instruction. */
274 COSTS_N_INSNS (83), /* cost of FSQRT instruction. */
275 {{rep_prefix_4_byte, {{-1, rep_prefix_4_byte}}},
276 DUMMY_STRINGOP_ALGS},
277 {{rep_prefix_4_byte, {{-1, rep_prefix_4_byte}}},
278 DUMMY_STRINGOP_ALGS},
279 1, /* scalar_stmt_cost. */
280 1, /* scalar load_cost. */
281 1, /* scalar_store_cost. */
282 1, /* vec_stmt_cost. */
283 1, /* vec_to_scalar_cost. */
284 1, /* scalar_to_vec_cost. */
285 1, /* vec_align_load_cost. */
286 2, /* vec_unalign_load_cost. */
287 1, /* vec_store_cost. */
288 3, /* cond_taken_branch_cost. */
289 1, /* cond_not_taken_branch_cost. */
293 struct processor_costs pentium_cost = {
294 COSTS_N_INSNS (1), /* cost of an add instruction */
295 COSTS_N_INSNS (1), /* cost of a lea instruction */
296 COSTS_N_INSNS (4), /* variable shift costs */
297 COSTS_N_INSNS (1), /* constant shift costs */
298 {COSTS_N_INSNS (11), /* cost of starting multiply for QI */
299 COSTS_N_INSNS (11), /* HI */
300 COSTS_N_INSNS (11), /* SI */
301 COSTS_N_INSNS (11), /* DI */
302 COSTS_N_INSNS (11)}, /* other */
303 0, /* cost of multiply per each bit set */
304 {COSTS_N_INSNS (25), /* cost of a divide/mod for QI */
305 COSTS_N_INSNS (25), /* HI */
306 COSTS_N_INSNS (25), /* SI */
307 COSTS_N_INSNS (25), /* DI */
308 COSTS_N_INSNS (25)}, /* other */
309 COSTS_N_INSNS (3), /* cost of movsx */
310 COSTS_N_INSNS (2), /* cost of movzx */
311 8, /* "large" insn */
313 6, /* cost for loading QImode using movzbl */
314 {2, 4, 2}, /* cost of loading integer registers
315 in QImode, HImode and SImode.
316 Relative to reg-reg move (2). */
317 {2, 4, 2}, /* cost of storing integer registers */
318 2, /* cost of reg,reg fld/fst */
319 {2, 2, 6}, /* cost of loading fp registers
320 in SFmode, DFmode and XFmode */
321 {4, 4, 6}, /* cost of storing fp registers
322 in SFmode, DFmode and XFmode */
323 8, /* cost of moving MMX register */
324 {8, 8}, /* cost of loading MMX registers
325 in SImode and DImode */
326 {8, 8}, /* cost of storing MMX registers
327 in SImode and DImode */
328 2, /* cost of moving SSE register */
329 {4, 8, 16}, /* cost of loading SSE registers
330 in SImode, DImode and TImode */
331 {4, 8, 16}, /* cost of storing SSE registers
332 in SImode, DImode and TImode */
333 3, /* MMX or SSE register to integer */
334 8, /* size of l1 cache. */
335 8, /* size of l2 cache */
336 0, /* size of prefetch block */
337 0, /* number of parallel prefetches */
339 COSTS_N_INSNS (3), /* cost of FADD and FSUB insns. */
340 COSTS_N_INSNS (3), /* cost of FMUL instruction. */
341 COSTS_N_INSNS (39), /* cost of FDIV instruction. */
342 COSTS_N_INSNS (1), /* cost of FABS instruction. */
343 COSTS_N_INSNS (1), /* cost of FCHS instruction. */
344 COSTS_N_INSNS (70), /* cost of FSQRT instruction. */
345 {{libcall, {{256, rep_prefix_4_byte}, {-1, libcall}}},
346 DUMMY_STRINGOP_ALGS},
347 {{libcall, {{-1, rep_prefix_4_byte}}},
348 DUMMY_STRINGOP_ALGS},
349 1, /* scalar_stmt_cost. */
350 1, /* scalar load_cost. */
351 1, /* scalar_store_cost. */
352 1, /* vec_stmt_cost. */
353 1, /* vec_to_scalar_cost. */
354 1, /* scalar_to_vec_cost. */
355 1, /* vec_align_load_cost. */
356 2, /* vec_unalign_load_cost. */
357 1, /* vec_store_cost. */
358 3, /* cond_taken_branch_cost. */
359 1, /* cond_not_taken_branch_cost. */
363 struct processor_costs pentiumpro_cost = {
364 COSTS_N_INSNS (1), /* cost of an add instruction */
365 COSTS_N_INSNS (1), /* cost of a lea instruction */
366 COSTS_N_INSNS (1), /* variable shift costs */
367 COSTS_N_INSNS (1), /* constant shift costs */
368 {COSTS_N_INSNS (4), /* cost of starting multiply for QI */
369 COSTS_N_INSNS (4), /* HI */
370 COSTS_N_INSNS (4), /* SI */
371 COSTS_N_INSNS (4), /* DI */
372 COSTS_N_INSNS (4)}, /* other */
373 0, /* cost of multiply per each bit set */
374 {COSTS_N_INSNS (17), /* cost of a divide/mod for QI */
375 COSTS_N_INSNS (17), /* HI */
376 COSTS_N_INSNS (17), /* SI */
377 COSTS_N_INSNS (17), /* DI */
378 COSTS_N_INSNS (17)}, /* other */
379 COSTS_N_INSNS (1), /* cost of movsx */
380 COSTS_N_INSNS (1), /* cost of movzx */
381 8, /* "large" insn */
383 2, /* cost for loading QImode using movzbl */
384 {4, 4, 4}, /* cost of loading integer registers
385 in QImode, HImode and SImode.
386 Relative to reg-reg move (2). */
387 {2, 2, 2}, /* cost of storing integer registers */
388 2, /* cost of reg,reg fld/fst */
389 {2, 2, 6}, /* cost of loading fp registers
390 in SFmode, DFmode and XFmode */
391 {4, 4, 6}, /* cost of storing fp registers
392 in SFmode, DFmode and XFmode */
393 2, /* cost of moving MMX register */
394 {2, 2}, /* cost of loading MMX registers
395 in SImode and DImode */
396 {2, 2}, /* cost of storing MMX registers
397 in SImode and DImode */
398 2, /* cost of moving SSE register */
399 {2, 2, 8}, /* cost of loading SSE registers
400 in SImode, DImode and TImode */
401 {2, 2, 8}, /* cost of storing SSE registers
402 in SImode, DImode and TImode */
403 3, /* MMX or SSE register to integer */
404 8, /* size of l1 cache. */
405 256, /* size of l2 cache */
406 32, /* size of prefetch block */
407 6, /* number of parallel prefetches */
409 COSTS_N_INSNS (3), /* cost of FADD and FSUB insns. */
410 COSTS_N_INSNS (5), /* cost of FMUL instruction. */
411 COSTS_N_INSNS (56), /* cost of FDIV instruction. */
412 COSTS_N_INSNS (2), /* cost of FABS instruction. */
413 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
414 COSTS_N_INSNS (56), /* cost of FSQRT instruction. */
415 /* PentiumPro has optimized rep instructions for blocks aligned by 8 bytes (we ensure
416 the alignment). For small blocks inline loop is still a noticeable win, for bigger
417 blocks either rep movsl or rep movsb is way to go. Rep movsb has apparently
418 more expensive startup time in CPU, but after 4K the difference is down in the noise.
420 {{rep_prefix_4_byte, {{128, loop}, {1024, unrolled_loop},
421 {8192, rep_prefix_4_byte}, {-1, rep_prefix_1_byte}}},
422 DUMMY_STRINGOP_ALGS},
423 {{rep_prefix_4_byte, {{1024, unrolled_loop},
424 {8192, rep_prefix_4_byte}, {-1, libcall}}},
425 DUMMY_STRINGOP_ALGS},
426 1, /* scalar_stmt_cost. */
427 1, /* scalar load_cost. */
428 1, /* scalar_store_cost. */
429 1, /* vec_stmt_cost. */
430 1, /* vec_to_scalar_cost. */
431 1, /* scalar_to_vec_cost. */
432 1, /* vec_align_load_cost. */
433 2, /* vec_unalign_load_cost. */
434 1, /* vec_store_cost. */
435 3, /* cond_taken_branch_cost. */
436 1, /* cond_not_taken_branch_cost. */
440 struct processor_costs geode_cost = {
441 COSTS_N_INSNS (1), /* cost of an add instruction */
442 COSTS_N_INSNS (1), /* cost of a lea instruction */
443 COSTS_N_INSNS (2), /* variable shift costs */
444 COSTS_N_INSNS (1), /* constant shift costs */
445 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
446 COSTS_N_INSNS (4), /* HI */
447 COSTS_N_INSNS (7), /* SI */
448 COSTS_N_INSNS (7), /* DI */
449 COSTS_N_INSNS (7)}, /* other */
450 0, /* cost of multiply per each bit set */
451 {COSTS_N_INSNS (15), /* cost of a divide/mod for QI */
452 COSTS_N_INSNS (23), /* HI */
453 COSTS_N_INSNS (39), /* SI */
454 COSTS_N_INSNS (39), /* DI */
455 COSTS_N_INSNS (39)}, /* other */
456 COSTS_N_INSNS (1), /* cost of movsx */
457 COSTS_N_INSNS (1), /* cost of movzx */
458 8, /* "large" insn */
460 1, /* cost for loading QImode using movzbl */
461 {1, 1, 1}, /* cost of loading integer registers
462 in QImode, HImode and SImode.
463 Relative to reg-reg move (2). */
464 {1, 1, 1}, /* cost of storing integer registers */
465 1, /* cost of reg,reg fld/fst */
466 {1, 1, 1}, /* cost of loading fp registers
467 in SFmode, DFmode and XFmode */
468 {4, 6, 6}, /* cost of storing fp registers
469 in SFmode, DFmode and XFmode */
471 1, /* cost of moving MMX register */
472 {1, 1}, /* cost of loading MMX registers
473 in SImode and DImode */
474 {1, 1}, /* cost of storing MMX registers
475 in SImode and DImode */
476 1, /* cost of moving SSE register */
477 {1, 1, 1}, /* cost of loading SSE registers
478 in SImode, DImode and TImode */
479 {1, 1, 1}, /* cost of storing SSE registers
480 in SImode, DImode and TImode */
481 1, /* MMX or SSE register to integer */
482 64, /* size of l1 cache. */
483 128, /* size of l2 cache. */
484 32, /* size of prefetch block */
485 1, /* number of parallel prefetches */
487 COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */
488 COSTS_N_INSNS (11), /* cost of FMUL instruction. */
489 COSTS_N_INSNS (47), /* cost of FDIV instruction. */
490 COSTS_N_INSNS (1), /* cost of FABS instruction. */
491 COSTS_N_INSNS (1), /* cost of FCHS instruction. */
492 COSTS_N_INSNS (54), /* cost of FSQRT instruction. */
493 {{libcall, {{256, rep_prefix_4_byte}, {-1, libcall}}},
494 DUMMY_STRINGOP_ALGS},
495 {{libcall, {{256, rep_prefix_4_byte}, {-1, libcall}}},
496 DUMMY_STRINGOP_ALGS},
497 1, /* scalar_stmt_cost. */
498 1, /* scalar load_cost. */
499 1, /* scalar_store_cost. */
500 1, /* vec_stmt_cost. */
501 1, /* vec_to_scalar_cost. */
502 1, /* scalar_to_vec_cost. */
503 1, /* vec_align_load_cost. */
504 2, /* vec_unalign_load_cost. */
505 1, /* vec_store_cost. */
506 3, /* cond_taken_branch_cost. */
507 1, /* cond_not_taken_branch_cost. */
511 struct processor_costs k6_cost = {
512 COSTS_N_INSNS (1), /* cost of an add instruction */
513 COSTS_N_INSNS (2), /* cost of a lea instruction */
514 COSTS_N_INSNS (1), /* variable shift costs */
515 COSTS_N_INSNS (1), /* constant shift costs */
516 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
517 COSTS_N_INSNS (3), /* HI */
518 COSTS_N_INSNS (3), /* SI */
519 COSTS_N_INSNS (3), /* DI */
520 COSTS_N_INSNS (3)}, /* other */
521 0, /* cost of multiply per each bit set */
522 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
523 COSTS_N_INSNS (18), /* HI */
524 COSTS_N_INSNS (18), /* SI */
525 COSTS_N_INSNS (18), /* DI */
526 COSTS_N_INSNS (18)}, /* other */
527 COSTS_N_INSNS (2), /* cost of movsx */
528 COSTS_N_INSNS (2), /* cost of movzx */
529 8, /* "large" insn */
531 3, /* cost for loading QImode using movzbl */
532 {4, 5, 4}, /* cost of loading integer registers
533 in QImode, HImode and SImode.
534 Relative to reg-reg move (2). */
535 {2, 3, 2}, /* cost of storing integer registers */
536 4, /* cost of reg,reg fld/fst */
537 {6, 6, 6}, /* cost of loading fp registers
538 in SFmode, DFmode and XFmode */
539 {4, 4, 4}, /* cost of storing fp registers
540 in SFmode, DFmode and XFmode */
541 2, /* cost of moving MMX register */
542 {2, 2}, /* cost of loading MMX registers
543 in SImode and DImode */
544 {2, 2}, /* cost of storing MMX registers
545 in SImode and DImode */
546 2, /* cost of moving SSE register */
547 {2, 2, 8}, /* cost of loading SSE registers
548 in SImode, DImode and TImode */
549 {2, 2, 8}, /* cost of storing SSE registers
550 in SImode, DImode and TImode */
551 6, /* MMX or SSE register to integer */
552 32, /* size of l1 cache. */
553 32, /* size of l2 cache. Some models
554 have integrated l2 cache, but
555 optimizing for k6 is not important
556 enough to worry about that. */
557 32, /* size of prefetch block */
558 1, /* number of parallel prefetches */
560 COSTS_N_INSNS (2), /* cost of FADD and FSUB insns. */
561 COSTS_N_INSNS (2), /* cost of FMUL instruction. */
562 COSTS_N_INSNS (56), /* cost of FDIV instruction. */
563 COSTS_N_INSNS (2), /* cost of FABS instruction. */
564 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
565 COSTS_N_INSNS (56), /* cost of FSQRT instruction. */
566 {{libcall, {{256, rep_prefix_4_byte}, {-1, libcall}}},
567 DUMMY_STRINGOP_ALGS},
568 {{libcall, {{256, rep_prefix_4_byte}, {-1, libcall}}},
569 DUMMY_STRINGOP_ALGS},
570 1, /* scalar_stmt_cost. */
571 1, /* scalar load_cost. */
572 1, /* scalar_store_cost. */
573 1, /* vec_stmt_cost. */
574 1, /* vec_to_scalar_cost. */
575 1, /* scalar_to_vec_cost. */
576 1, /* vec_align_load_cost. */
577 2, /* vec_unalign_load_cost. */
578 1, /* vec_store_cost. */
579 3, /* cond_taken_branch_cost. */
580 1, /* cond_not_taken_branch_cost. */
584 struct processor_costs athlon_cost = {
585 COSTS_N_INSNS (1), /* cost of an add instruction */
586 COSTS_N_INSNS (2), /* cost of a lea instruction */
587 COSTS_N_INSNS (1), /* variable shift costs */
588 COSTS_N_INSNS (1), /* constant shift costs */
589 {COSTS_N_INSNS (5), /* cost of starting multiply for QI */
590 COSTS_N_INSNS (5), /* HI */
591 COSTS_N_INSNS (5), /* SI */
592 COSTS_N_INSNS (5), /* DI */
593 COSTS_N_INSNS (5)}, /* other */
594 0, /* cost of multiply per each bit set */
595 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
596 COSTS_N_INSNS (26), /* HI */
597 COSTS_N_INSNS (42), /* SI */
598 COSTS_N_INSNS (74), /* DI */
599 COSTS_N_INSNS (74)}, /* other */
600 COSTS_N_INSNS (1), /* cost of movsx */
601 COSTS_N_INSNS (1), /* cost of movzx */
602 8, /* "large" insn */
604 4, /* cost for loading QImode using movzbl */
605 {3, 4, 3}, /* cost of loading integer registers
606 in QImode, HImode and SImode.
607 Relative to reg-reg move (2). */
608 {3, 4, 3}, /* cost of storing integer registers */
609 4, /* cost of reg,reg fld/fst */
610 {4, 4, 12}, /* cost of loading fp registers
611 in SFmode, DFmode and XFmode */
612 {6, 6, 8}, /* cost of storing fp registers
613 in SFmode, DFmode and XFmode */
614 2, /* cost of moving MMX register */
615 {4, 4}, /* cost of loading MMX registers
616 in SImode and DImode */
617 {4, 4}, /* cost of storing MMX registers
618 in SImode and DImode */
619 2, /* cost of moving SSE register */
620 {4, 4, 6}, /* cost of loading SSE registers
621 in SImode, DImode and TImode */
622 {4, 4, 5}, /* cost of storing SSE registers
623 in SImode, DImode and TImode */
624 5, /* MMX or SSE register to integer */
625 64, /* size of l1 cache. */
626 256, /* size of l2 cache. */
627 64, /* size of prefetch block */
628 6, /* number of parallel prefetches */
630 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
631 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
632 COSTS_N_INSNS (24), /* cost of FDIV instruction. */
633 COSTS_N_INSNS (2), /* cost of FABS instruction. */
634 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
635 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
636 /* For some reason, Athlon deals better with REP prefix (relative to loops)
637 compared to K8. Alignment becomes important after 8 bytes for memcpy and
638 128 bytes for memset. */
639 {{libcall, {{2048, rep_prefix_4_byte}, {-1, libcall}}},
640 DUMMY_STRINGOP_ALGS},
641 {{libcall, {{2048, rep_prefix_4_byte}, {-1, libcall}}},
642 DUMMY_STRINGOP_ALGS},
643 1, /* scalar_stmt_cost. */
644 1, /* scalar load_cost. */
645 1, /* scalar_store_cost. */
646 1, /* vec_stmt_cost. */
647 1, /* vec_to_scalar_cost. */
648 1, /* scalar_to_vec_cost. */
649 1, /* vec_align_load_cost. */
650 2, /* vec_unalign_load_cost. */
651 1, /* vec_store_cost. */
652 3, /* cond_taken_branch_cost. */
653 1, /* cond_not_taken_branch_cost. */
657 struct processor_costs k8_cost = {
658 COSTS_N_INSNS (1), /* cost of an add instruction */
659 COSTS_N_INSNS (2), /* cost of a lea instruction */
660 COSTS_N_INSNS (1), /* variable shift costs */
661 COSTS_N_INSNS (1), /* constant shift costs */
662 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
663 COSTS_N_INSNS (4), /* HI */
664 COSTS_N_INSNS (3), /* SI */
665 COSTS_N_INSNS (4), /* DI */
666 COSTS_N_INSNS (5)}, /* other */
667 0, /* cost of multiply per each bit set */
668 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
669 COSTS_N_INSNS (26), /* HI */
670 COSTS_N_INSNS (42), /* SI */
671 COSTS_N_INSNS (74), /* DI */
672 COSTS_N_INSNS (74)}, /* other */
673 COSTS_N_INSNS (1), /* cost of movsx */
674 COSTS_N_INSNS (1), /* cost of movzx */
675 8, /* "large" insn */
677 4, /* cost for loading QImode using movzbl */
678 {3, 4, 3}, /* cost of loading integer registers
679 in QImode, HImode and SImode.
680 Relative to reg-reg move (2). */
681 {3, 4, 3}, /* cost of storing integer registers */
682 4, /* cost of reg,reg fld/fst */
683 {4, 4, 12}, /* cost of loading fp registers
684 in SFmode, DFmode and XFmode */
685 {6, 6, 8}, /* cost of storing fp registers
686 in SFmode, DFmode and XFmode */
687 2, /* cost of moving MMX register */
688 {3, 3}, /* cost of loading MMX registers
689 in SImode and DImode */
690 {4, 4}, /* cost of storing MMX registers
691 in SImode and DImode */
692 2, /* cost of moving SSE register */
693 {4, 3, 6}, /* cost of loading SSE registers
694 in SImode, DImode and TImode */
695 {4, 4, 5}, /* cost of storing SSE registers
696 in SImode, DImode and TImode */
697 5, /* MMX or SSE register to integer */
698 64, /* size of l1 cache. */
699 512, /* size of l2 cache. */
700 64, /* size of prefetch block */
701 /* New AMD processors never drop prefetches; if they cannot be performed
702 immediately, they are queued. We set number of simultaneous prefetches
703 to a large constant to reflect this (it probably is not a good idea not
704 to limit number of prefetches at all, as their execution also takes some
706 100, /* number of parallel prefetches */
708 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
709 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
710 COSTS_N_INSNS (19), /* cost of FDIV instruction. */
711 COSTS_N_INSNS (2), /* cost of FABS instruction. */
712 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
713 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
714 /* K8 has optimized REP instruction for medium sized blocks, but for very small
715 blocks it is better to use loop. For large blocks, libcall can do
716 nontemporary accesses and beat inline considerably. */
717 {{libcall, {{6, loop}, {14, unrolled_loop}, {-1, rep_prefix_4_byte}}},
718 {libcall, {{16, loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
719 {{libcall, {{8, loop}, {24, unrolled_loop},
720 {2048, rep_prefix_4_byte}, {-1, libcall}}},
721 {libcall, {{48, unrolled_loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
722 4, /* scalar_stmt_cost. */
723 2, /* scalar load_cost. */
724 2, /* scalar_store_cost. */
725 5, /* vec_stmt_cost. */
726 0, /* vec_to_scalar_cost. */
727 2, /* scalar_to_vec_cost. */
728 2, /* vec_align_load_cost. */
729 3, /* vec_unalign_load_cost. */
730 3, /* vec_store_cost. */
731 3, /* cond_taken_branch_cost. */
732 2, /* cond_not_taken_branch_cost. */
735 struct processor_costs amdfam10_cost = {
736 COSTS_N_INSNS (1), /* cost of an add instruction */
737 COSTS_N_INSNS (2), /* cost of a lea instruction */
738 COSTS_N_INSNS (1), /* variable shift costs */
739 COSTS_N_INSNS (1), /* constant shift costs */
740 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
741 COSTS_N_INSNS (4), /* HI */
742 COSTS_N_INSNS (3), /* SI */
743 COSTS_N_INSNS (4), /* DI */
744 COSTS_N_INSNS (5)}, /* other */
745 0, /* cost of multiply per each bit set */
746 {COSTS_N_INSNS (19), /* cost of a divide/mod for QI */
747 COSTS_N_INSNS (35), /* HI */
748 COSTS_N_INSNS (51), /* SI */
749 COSTS_N_INSNS (83), /* DI */
750 COSTS_N_INSNS (83)}, /* other */
751 COSTS_N_INSNS (1), /* cost of movsx */
752 COSTS_N_INSNS (1), /* cost of movzx */
753 8, /* "large" insn */
755 4, /* cost for loading QImode using movzbl */
756 {3, 4, 3}, /* cost of loading integer registers
757 in QImode, HImode and SImode.
758 Relative to reg-reg move (2). */
759 {3, 4, 3}, /* cost of storing integer registers */
760 4, /* cost of reg,reg fld/fst */
761 {4, 4, 12}, /* cost of loading fp registers
762 in SFmode, DFmode and XFmode */
763 {6, 6, 8}, /* cost of storing fp registers
764 in SFmode, DFmode and XFmode */
765 2, /* cost of moving MMX register */
766 {3, 3}, /* cost of loading MMX registers
767 in SImode and DImode */
768 {4, 4}, /* cost of storing MMX registers
769 in SImode and DImode */
770 2, /* cost of moving SSE register */
771 {4, 4, 3}, /* cost of loading SSE registers
772 in SImode, DImode and TImode */
773 {4, 4, 5}, /* cost of storing SSE registers
774 in SImode, DImode and TImode */
775 3, /* MMX or SSE register to integer */
777 MOVD reg64, xmmreg Double FSTORE 4
778 MOVD reg32, xmmreg Double FSTORE 4
780 MOVD reg64, xmmreg Double FADD 3
782 MOVD reg32, xmmreg Double FADD 3
784 64, /* size of l1 cache. */
785 512, /* size of l2 cache. */
786 64, /* size of prefetch block */
787 /* New AMD processors never drop prefetches; if they cannot be performed
788 immediately, they are queued. We set number of simultaneous prefetches
789 to a large constant to reflect this (it probably is not a good idea not
790 to limit number of prefetches at all, as their execution also takes some
792 100, /* number of parallel prefetches */
794 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
795 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
796 COSTS_N_INSNS (19), /* cost of FDIV instruction. */
797 COSTS_N_INSNS (2), /* cost of FABS instruction. */
798 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
799 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
801 /* AMDFAM10 has optimized REP instruction for medium sized blocks, but for
802 very small blocks it is better to use loop. For large blocks, libcall can
803 do nontemporary accesses and beat inline considerably. */
804 {{libcall, {{6, loop}, {14, unrolled_loop}, {-1, rep_prefix_4_byte}}},
805 {libcall, {{16, loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
806 {{libcall, {{8, loop}, {24, unrolled_loop},
807 {2048, rep_prefix_4_byte}, {-1, libcall}}},
808 {libcall, {{48, unrolled_loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
809 4, /* scalar_stmt_cost. */
810 2, /* scalar load_cost. */
811 2, /* scalar_store_cost. */
812 6, /* vec_stmt_cost. */
813 0, /* vec_to_scalar_cost. */
814 2, /* scalar_to_vec_cost. */
815 2, /* vec_align_load_cost. */
816 2, /* vec_unalign_load_cost. */
817 2, /* vec_store_cost. */
818 2, /* cond_taken_branch_cost. */
819 1, /* cond_not_taken_branch_cost. */
823 struct processor_costs pentium4_cost = {
824 COSTS_N_INSNS (1), /* cost of an add instruction */
825 COSTS_N_INSNS (3), /* cost of a lea instruction */
826 COSTS_N_INSNS (4), /* variable shift costs */
827 COSTS_N_INSNS (4), /* constant shift costs */
828 {COSTS_N_INSNS (15), /* cost of starting multiply for QI */
829 COSTS_N_INSNS (15), /* HI */
830 COSTS_N_INSNS (15), /* SI */
831 COSTS_N_INSNS (15), /* DI */
832 COSTS_N_INSNS (15)}, /* other */
833 0, /* cost of multiply per each bit set */
834 {COSTS_N_INSNS (56), /* cost of a divide/mod for QI */
835 COSTS_N_INSNS (56), /* HI */
836 COSTS_N_INSNS (56), /* SI */
837 COSTS_N_INSNS (56), /* DI */
838 COSTS_N_INSNS (56)}, /* other */
839 COSTS_N_INSNS (1), /* cost of movsx */
840 COSTS_N_INSNS (1), /* cost of movzx */
841 16, /* "large" insn */
843 2, /* cost for loading QImode using movzbl */
844 {4, 5, 4}, /* cost of loading integer registers
845 in QImode, HImode and SImode.
846 Relative to reg-reg move (2). */
847 {2, 3, 2}, /* cost of storing integer registers */
848 2, /* cost of reg,reg fld/fst */
849 {2, 2, 6}, /* cost of loading fp registers
850 in SFmode, DFmode and XFmode */
851 {4, 4, 6}, /* cost of storing fp registers
852 in SFmode, DFmode and XFmode */
853 2, /* cost of moving MMX register */
854 {2, 2}, /* cost of loading MMX registers
855 in SImode and DImode */
856 {2, 2}, /* cost of storing MMX registers
857 in SImode and DImode */
858 12, /* cost of moving SSE register */
859 {12, 12, 12}, /* cost of loading SSE registers
860 in SImode, DImode and TImode */
861 {2, 2, 8}, /* cost of storing SSE registers
862 in SImode, DImode and TImode */
863 10, /* MMX or SSE register to integer */
864 8, /* size of l1 cache. */
865 256, /* size of l2 cache. */
866 64, /* size of prefetch block */
867 6, /* number of parallel prefetches */
869 COSTS_N_INSNS (5), /* cost of FADD and FSUB insns. */
870 COSTS_N_INSNS (7), /* cost of FMUL instruction. */
871 COSTS_N_INSNS (43), /* cost of FDIV instruction. */
872 COSTS_N_INSNS (2), /* cost of FABS instruction. */
873 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
874 COSTS_N_INSNS (43), /* cost of FSQRT instruction. */
875 {{libcall, {{12, loop_1_byte}, {-1, rep_prefix_4_byte}}},
876 DUMMY_STRINGOP_ALGS},
877 {{libcall, {{6, loop_1_byte}, {48, loop}, {20480, rep_prefix_4_byte},
879 DUMMY_STRINGOP_ALGS},
880 1, /* scalar_stmt_cost. */
881 1, /* scalar load_cost. */
882 1, /* scalar_store_cost. */
883 1, /* vec_stmt_cost. */
884 1, /* vec_to_scalar_cost. */
885 1, /* scalar_to_vec_cost. */
886 1, /* vec_align_load_cost. */
887 2, /* vec_unalign_load_cost. */
888 1, /* vec_store_cost. */
889 3, /* cond_taken_branch_cost. */
890 1, /* cond_not_taken_branch_cost. */
894 struct processor_costs nocona_cost = {
895 COSTS_N_INSNS (1), /* cost of an add instruction */
896 COSTS_N_INSNS (1), /* cost of a lea instruction */
897 COSTS_N_INSNS (1), /* variable shift costs */
898 COSTS_N_INSNS (1), /* constant shift costs */
899 {COSTS_N_INSNS (10), /* cost of starting multiply for QI */
900 COSTS_N_INSNS (10), /* HI */
901 COSTS_N_INSNS (10), /* SI */
902 COSTS_N_INSNS (10), /* DI */
903 COSTS_N_INSNS (10)}, /* other */
904 0, /* cost of multiply per each bit set */
905 {COSTS_N_INSNS (66), /* cost of a divide/mod for QI */
906 COSTS_N_INSNS (66), /* HI */
907 COSTS_N_INSNS (66), /* SI */
908 COSTS_N_INSNS (66), /* DI */
909 COSTS_N_INSNS (66)}, /* other */
910 COSTS_N_INSNS (1), /* cost of movsx */
911 COSTS_N_INSNS (1), /* cost of movzx */
912 16, /* "large" insn */
914 4, /* cost for loading QImode using movzbl */
915 {4, 4, 4}, /* cost of loading integer registers
916 in QImode, HImode and SImode.
917 Relative to reg-reg move (2). */
918 {4, 4, 4}, /* cost of storing integer registers */
919 3, /* cost of reg,reg fld/fst */
920 {12, 12, 12}, /* cost of loading fp registers
921 in SFmode, DFmode and XFmode */
922 {4, 4, 4}, /* cost of storing fp registers
923 in SFmode, DFmode and XFmode */
924 6, /* cost of moving MMX register */
925 {12, 12}, /* cost of loading MMX registers
926 in SImode and DImode */
927 {12, 12}, /* cost of storing MMX registers
928 in SImode and DImode */
929 6, /* cost of moving SSE register */
930 {12, 12, 12}, /* cost of loading SSE registers
931 in SImode, DImode and TImode */
932 {12, 12, 12}, /* cost of storing SSE registers
933 in SImode, DImode and TImode */
934 8, /* MMX or SSE register to integer */
935 8, /* size of l1 cache. */
936 1024, /* size of l2 cache. */
937 128, /* size of prefetch block */
938 8, /* number of parallel prefetches */
940 COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */
941 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
942 COSTS_N_INSNS (40), /* cost of FDIV instruction. */
943 COSTS_N_INSNS (3), /* cost of FABS instruction. */
944 COSTS_N_INSNS (3), /* cost of FCHS instruction. */
945 COSTS_N_INSNS (44), /* cost of FSQRT instruction. */
946 {{libcall, {{12, loop_1_byte}, {-1, rep_prefix_4_byte}}},
947 {libcall, {{32, loop}, {20000, rep_prefix_8_byte},
948 {100000, unrolled_loop}, {-1, libcall}}}},
949 {{libcall, {{6, loop_1_byte}, {48, loop}, {20480, rep_prefix_4_byte},
951 {libcall, {{24, loop}, {64, unrolled_loop},
952 {8192, rep_prefix_8_byte}, {-1, libcall}}}},
953 1, /* scalar_stmt_cost. */
954 1, /* scalar load_cost. */
955 1, /* scalar_store_cost. */
956 1, /* vec_stmt_cost. */
957 1, /* vec_to_scalar_cost. */
958 1, /* scalar_to_vec_cost. */
959 1, /* vec_align_load_cost. */
960 2, /* vec_unalign_load_cost. */
961 1, /* vec_store_cost. */
962 3, /* cond_taken_branch_cost. */
963 1, /* cond_not_taken_branch_cost. */
967 struct processor_costs core2_cost = {
968 COSTS_N_INSNS (1), /* cost of an add instruction */
969 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
970 COSTS_N_INSNS (1), /* variable shift costs */
971 COSTS_N_INSNS (1), /* constant shift costs */
972 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
973 COSTS_N_INSNS (3), /* HI */
974 COSTS_N_INSNS (3), /* SI */
975 COSTS_N_INSNS (3), /* DI */
976 COSTS_N_INSNS (3)}, /* other */
977 0, /* cost of multiply per each bit set */
978 {COSTS_N_INSNS (22), /* cost of a divide/mod for QI */
979 COSTS_N_INSNS (22), /* HI */
980 COSTS_N_INSNS (22), /* SI */
981 COSTS_N_INSNS (22), /* DI */
982 COSTS_N_INSNS (22)}, /* other */
983 COSTS_N_INSNS (1), /* cost of movsx */
984 COSTS_N_INSNS (1), /* cost of movzx */
985 8, /* "large" insn */
987 2, /* cost for loading QImode using movzbl */
988 {6, 6, 6}, /* cost of loading integer registers
989 in QImode, HImode and SImode.
990 Relative to reg-reg move (2). */
991 {4, 4, 4}, /* cost of storing integer registers */
992 2, /* cost of reg,reg fld/fst */
993 {6, 6, 6}, /* cost of loading fp registers
994 in SFmode, DFmode and XFmode */
995 {4, 4, 4}, /* cost of storing fp registers
996 in SFmode, DFmode and XFmode */
997 2, /* cost of moving MMX register */
998 {6, 6}, /* cost of loading MMX registers
999 in SImode and DImode */
1000 {4, 4}, /* cost of storing MMX registers
1001 in SImode and DImode */
1002 2, /* cost of moving SSE register */
1003 {6, 6, 6}, /* cost of loading SSE registers
1004 in SImode, DImode and TImode */
1005 {4, 4, 4}, /* cost of storing SSE registers
1006 in SImode, DImode and TImode */
1007 2, /* MMX or SSE register to integer */
1008 32, /* size of l1 cache. */
1009 2048, /* size of l2 cache. */
1010 128, /* size of prefetch block */
1011 8, /* number of parallel prefetches */
1012 3, /* Branch cost */
1013 COSTS_N_INSNS (3), /* cost of FADD and FSUB insns. */
1014 COSTS_N_INSNS (5), /* cost of FMUL instruction. */
1015 COSTS_N_INSNS (32), /* cost of FDIV instruction. */
1016 COSTS_N_INSNS (1), /* cost of FABS instruction. */
1017 COSTS_N_INSNS (1), /* cost of FCHS instruction. */
1018 COSTS_N_INSNS (58), /* cost of FSQRT instruction. */
1019 {{libcall, {{11, loop}, {-1, rep_prefix_4_byte}}},
1020 {libcall, {{32, loop}, {64, rep_prefix_4_byte},
1021 {8192, rep_prefix_8_byte}, {-1, libcall}}}},
1022 {{libcall, {{8, loop}, {15, unrolled_loop},
1023 {2048, rep_prefix_4_byte}, {-1, libcall}}},
1024 {libcall, {{24, loop}, {32, unrolled_loop},
1025 {8192, rep_prefix_8_byte}, {-1, libcall}}}},
1026 1, /* scalar_stmt_cost. */
1027 1, /* scalar load_cost. */
1028 1, /* scalar_store_cost. */
1029 1, /* vec_stmt_cost. */
1030 1, /* vec_to_scalar_cost. */
1031 1, /* scalar_to_vec_cost. */
1032 1, /* vec_align_load_cost. */
1033 2, /* vec_unalign_load_cost. */
1034 1, /* vec_store_cost. */
1035 3, /* cond_taken_branch_cost. */
1036 1, /* cond_not_taken_branch_cost. */
1039 /* Generic64 should produce code tuned for Nocona and K8. */
1041 struct processor_costs generic64_cost = {
1042 COSTS_N_INSNS (1), /* cost of an add instruction */
1043 /* On all chips taken into consideration lea is 2 cycles and more. With
1044 this cost however our current implementation of synth_mult results in
1045 use of unnecessary temporary registers causing regression on several
1046 SPECfp benchmarks. */
1047 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
1048 COSTS_N_INSNS (1), /* variable shift costs */
1049 COSTS_N_INSNS (1), /* constant shift costs */
1050 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1051 COSTS_N_INSNS (4), /* HI */
1052 COSTS_N_INSNS (3), /* SI */
1053 COSTS_N_INSNS (4), /* DI */
1054 COSTS_N_INSNS (2)}, /* other */
1055 0, /* cost of multiply per each bit set */
1056 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
1057 COSTS_N_INSNS (26), /* HI */
1058 COSTS_N_INSNS (42), /* SI */
1059 COSTS_N_INSNS (74), /* DI */
1060 COSTS_N_INSNS (74)}, /* other */
1061 COSTS_N_INSNS (1), /* cost of movsx */
1062 COSTS_N_INSNS (1), /* cost of movzx */
1063 8, /* "large" insn */
1064 17, /* MOVE_RATIO */
1065 4, /* cost for loading QImode using movzbl */
1066 {4, 4, 4}, /* cost of loading integer registers
1067 in QImode, HImode and SImode.
1068 Relative to reg-reg move (2). */
1069 {4, 4, 4}, /* cost of storing integer registers */
1070 4, /* cost of reg,reg fld/fst */
1071 {12, 12, 12}, /* cost of loading fp registers
1072 in SFmode, DFmode and XFmode */
1073 {6, 6, 8}, /* cost of storing fp registers
1074 in SFmode, DFmode and XFmode */
1075 2, /* cost of moving MMX register */
1076 {8, 8}, /* cost of loading MMX registers
1077 in SImode and DImode */
1078 {8, 8}, /* cost of storing MMX registers
1079 in SImode and DImode */
1080 2, /* cost of moving SSE register */
1081 {8, 8, 8}, /* cost of loading SSE registers
1082 in SImode, DImode and TImode */
1083 {8, 8, 8}, /* cost of storing SSE registers
1084 in SImode, DImode and TImode */
1085 5, /* MMX or SSE register to integer */
1086 32, /* size of l1 cache. */
1087 512, /* size of l2 cache. */
1088 64, /* size of prefetch block */
1089 6, /* number of parallel prefetches */
1090 /* Benchmarks shows large regressions on K8 sixtrack benchmark when this value
1091 is increased to perhaps more appropriate value of 5. */
1092 3, /* Branch cost */
1093 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
1094 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
1095 COSTS_N_INSNS (20), /* cost of FDIV instruction. */
1096 COSTS_N_INSNS (8), /* cost of FABS instruction. */
1097 COSTS_N_INSNS (8), /* cost of FCHS instruction. */
1098 COSTS_N_INSNS (40), /* cost of FSQRT instruction. */
1099 {DUMMY_STRINGOP_ALGS,
1100 {libcall, {{32, loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
1101 {DUMMY_STRINGOP_ALGS,
1102 {libcall, {{32, loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
1103 1, /* scalar_stmt_cost. */
1104 1, /* scalar load_cost. */
1105 1, /* scalar_store_cost. */
1106 1, /* vec_stmt_cost. */
1107 1, /* vec_to_scalar_cost. */
1108 1, /* scalar_to_vec_cost. */
1109 1, /* vec_align_load_cost. */
1110 2, /* vec_unalign_load_cost. */
1111 1, /* vec_store_cost. */
1112 3, /* cond_taken_branch_cost. */
1113 1, /* cond_not_taken_branch_cost. */
1116 /* Generic32 should produce code tuned for Athlon, PPro, Pentium4, Nocona and K8. */
1118 struct processor_costs generic32_cost = {
1119 COSTS_N_INSNS (1), /* cost of an add instruction */
1120 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
1121 COSTS_N_INSNS (1), /* variable shift costs */
1122 COSTS_N_INSNS (1), /* constant shift costs */
1123 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1124 COSTS_N_INSNS (4), /* HI */
1125 COSTS_N_INSNS (3), /* SI */
1126 COSTS_N_INSNS (4), /* DI */
1127 COSTS_N_INSNS (2)}, /* other */
1128 0, /* cost of multiply per each bit set */
1129 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
1130 COSTS_N_INSNS (26), /* HI */
1131 COSTS_N_INSNS (42), /* SI */
1132 COSTS_N_INSNS (74), /* DI */
1133 COSTS_N_INSNS (74)}, /* other */
1134 COSTS_N_INSNS (1), /* cost of movsx */
1135 COSTS_N_INSNS (1), /* cost of movzx */
1136 8, /* "large" insn */
1137 17, /* MOVE_RATIO */
1138 4, /* cost for loading QImode using movzbl */
1139 {4, 4, 4}, /* cost of loading integer registers
1140 in QImode, HImode and SImode.
1141 Relative to reg-reg move (2). */
1142 {4, 4, 4}, /* cost of storing integer registers */
1143 4, /* cost of reg,reg fld/fst */
1144 {12, 12, 12}, /* cost of loading fp registers
1145 in SFmode, DFmode and XFmode */
1146 {6, 6, 8}, /* cost of storing fp registers
1147 in SFmode, DFmode and XFmode */
1148 2, /* cost of moving MMX register */
1149 {8, 8}, /* cost of loading MMX registers
1150 in SImode and DImode */
1151 {8, 8}, /* cost of storing MMX registers
1152 in SImode and DImode */
1153 2, /* cost of moving SSE register */
1154 {8, 8, 8}, /* cost of loading SSE registers
1155 in SImode, DImode and TImode */
1156 {8, 8, 8}, /* cost of storing SSE registers
1157 in SImode, DImode and TImode */
1158 5, /* MMX or SSE register to integer */
1159 32, /* size of l1 cache. */
1160 256, /* size of l2 cache. */
1161 64, /* size of prefetch block */
1162 6, /* number of parallel prefetches */
1163 3, /* Branch cost */
1164 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
1165 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
1166 COSTS_N_INSNS (20), /* cost of FDIV instruction. */
1167 COSTS_N_INSNS (8), /* cost of FABS instruction. */
1168 COSTS_N_INSNS (8), /* cost of FCHS instruction. */
1169 COSTS_N_INSNS (40), /* cost of FSQRT instruction. */
1170 {{libcall, {{32, loop}, {8192, rep_prefix_4_byte}, {-1, libcall}}},
1171 DUMMY_STRINGOP_ALGS},
1172 {{libcall, {{32, loop}, {8192, rep_prefix_4_byte}, {-1, libcall}}},
1173 DUMMY_STRINGOP_ALGS},
1174 1, /* scalar_stmt_cost. */
1175 1, /* scalar load_cost. */
1176 1, /* scalar_store_cost. */
1177 1, /* vec_stmt_cost. */
1178 1, /* vec_to_scalar_cost. */
1179 1, /* scalar_to_vec_cost. */
1180 1, /* vec_align_load_cost. */
1181 2, /* vec_unalign_load_cost. */
1182 1, /* vec_store_cost. */
1183 3, /* cond_taken_branch_cost. */
1184 1, /* cond_not_taken_branch_cost. */
1187 const struct processor_costs *ix86_cost = &pentium_cost;
1189 /* Processor feature/optimization bitmasks. */
1190 #define m_386 (1<<PROCESSOR_I386)
1191 #define m_486 (1<<PROCESSOR_I486)
1192 #define m_PENT (1<<PROCESSOR_PENTIUM)
1193 #define m_PPRO (1<<PROCESSOR_PENTIUMPRO)
1194 #define m_PENT4 (1<<PROCESSOR_PENTIUM4)
1195 #define m_NOCONA (1<<PROCESSOR_NOCONA)
1196 #define m_CORE2 (1<<PROCESSOR_CORE2)
1198 #define m_GEODE (1<<PROCESSOR_GEODE)
1199 #define m_K6 (1<<PROCESSOR_K6)
1200 #define m_K6_GEODE (m_K6 | m_GEODE)
1201 #define m_K8 (1<<PROCESSOR_K8)
1202 #define m_ATHLON (1<<PROCESSOR_ATHLON)
1203 #define m_ATHLON_K8 (m_K8 | m_ATHLON)
1204 #define m_AMDFAM10 (1<<PROCESSOR_AMDFAM10)
1205 #define m_AMD_MULTIPLE (m_K8 | m_ATHLON | m_AMDFAM10)
1207 #define m_GENERIC32 (1<<PROCESSOR_GENERIC32)
1208 #define m_GENERIC64 (1<<PROCESSOR_GENERIC64)
1210 /* Generic instruction choice should be common subset of supported CPUs
1211 (PPro/PENT4/NOCONA/CORE2/Athlon/K8). */
1212 #define m_GENERIC (m_GENERIC32 | m_GENERIC64)
1214 /* Feature tests against the various tunings. */
1215 unsigned char ix86_tune_features[X86_TUNE_LAST];
1217 /* Feature tests against the various tunings used to create ix86_tune_features
1218 based on the processor mask. */
1219 static unsigned int initial_ix86_tune_features[X86_TUNE_LAST] = {
1220 /* X86_TUNE_USE_LEAVE: Leave does not affect Nocona SPEC2000 results
1221 negatively, so enabling for Generic64 seems like good code size
1222 tradeoff. We can't enable it for 32bit generic because it does not
1223 work well with PPro base chips. */
1224 m_386 | m_K6_GEODE | m_AMD_MULTIPLE | m_CORE2 | m_GENERIC64,
1226 /* X86_TUNE_PUSH_MEMORY */
1227 m_386 | m_K6_GEODE | m_AMD_MULTIPLE | m_PENT4
1228 | m_NOCONA | m_CORE2 | m_GENERIC,
1230 /* X86_TUNE_ZERO_EXTEND_WITH_AND */
1233 /* X86_TUNE_UNROLL_STRLEN */
1234 m_486 | m_PENT | m_PPRO | m_AMD_MULTIPLE | m_K6 | m_CORE2 | m_GENERIC,
1236 /* X86_TUNE_DEEP_BRANCH_PREDICTION */
1237 m_PPRO | m_K6_GEODE | m_AMD_MULTIPLE | m_PENT4 | m_GENERIC,
1239 /* X86_TUNE_BRANCH_PREDICTION_HINTS: Branch hints were put in P4 based
1240 on simulation result. But after P4 was made, no performance benefit
1241 was observed with branch hints. It also increases the code size.
1242 As a result, icc never generates branch hints. */
1245 /* X86_TUNE_DOUBLE_WITH_ADD */
1248 /* X86_TUNE_USE_SAHF */
1249 m_PPRO | m_K6_GEODE | m_K8 | m_AMDFAM10 | m_PENT4
1250 | m_NOCONA | m_CORE2 | m_GENERIC,
1252 /* X86_TUNE_MOVX: Enable to zero extend integer registers to avoid
1253 partial dependencies. */
1254 m_AMD_MULTIPLE | m_PPRO | m_PENT4 | m_NOCONA
1255 | m_CORE2 | m_GENERIC | m_GEODE /* m_386 | m_K6 */,
1257 /* X86_TUNE_PARTIAL_REG_STALL: We probably ought to watch for partial
1258 register stalls on Generic32 compilation setting as well. However
1259 in current implementation the partial register stalls are not eliminated
1260 very well - they can be introduced via subregs synthesized by combine
1261 and can happen in caller/callee saving sequences. Because this option
1262 pays back little on PPro based chips and is in conflict with partial reg
1263 dependencies used by Athlon/P4 based chips, it is better to leave it off
1264 for generic32 for now. */
1267 /* X86_TUNE_PARTIAL_FLAG_REG_STALL */
1268 m_CORE2 | m_GENERIC,
1270 /* X86_TUNE_USE_HIMODE_FIOP */
1271 m_386 | m_486 | m_K6_GEODE,
1273 /* X86_TUNE_USE_SIMODE_FIOP */
1274 ~(m_PPRO | m_AMD_MULTIPLE | m_PENT | m_CORE2 | m_GENERIC),
1276 /* X86_TUNE_USE_MOV0 */
1279 /* X86_TUNE_USE_CLTD */
1280 ~(m_PENT | m_K6 | m_CORE2 | m_GENERIC),
1282 /* X86_TUNE_USE_XCHGB: Use xchgb %rh,%rl instead of rolw/rorw $8,rx. */
1285 /* X86_TUNE_SPLIT_LONG_MOVES */
1288 /* X86_TUNE_READ_MODIFY_WRITE */
1291 /* X86_TUNE_READ_MODIFY */
1294 /* X86_TUNE_PROMOTE_QIMODE */
1295 m_K6_GEODE | m_PENT | m_386 | m_486 | m_AMD_MULTIPLE | m_CORE2
1296 | m_GENERIC /* | m_PENT4 ? */,
1298 /* X86_TUNE_FAST_PREFIX */
1299 ~(m_PENT | m_486 | m_386),
1301 /* X86_TUNE_SINGLE_STRINGOP */
1302 m_386 | m_PENT4 | m_NOCONA,
1304 /* X86_TUNE_QIMODE_MATH */
1307 /* X86_TUNE_HIMODE_MATH: On PPro this flag is meant to avoid partial
1308 register stalls. Just like X86_TUNE_PARTIAL_REG_STALL this option
1309 might be considered for Generic32 if our scheme for avoiding partial
1310 stalls was more effective. */
1313 /* X86_TUNE_PROMOTE_QI_REGS */
1316 /* X86_TUNE_PROMOTE_HI_REGS */
1319 /* X86_TUNE_ADD_ESP_4: Enable if add/sub is preferred over 1/2 push/pop. */
1320 m_AMD_MULTIPLE | m_K6_GEODE | m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC,
1322 /* X86_TUNE_ADD_ESP_8 */
1323 m_AMD_MULTIPLE | m_PPRO | m_K6_GEODE | m_386
1324 | m_486 | m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC,
1326 /* X86_TUNE_SUB_ESP_4 */
1327 m_AMD_MULTIPLE | m_PPRO | m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC,
1329 /* X86_TUNE_SUB_ESP_8 */
1330 m_AMD_MULTIPLE | m_PPRO | m_386 | m_486
1331 | m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC,
1333 /* X86_TUNE_INTEGER_DFMODE_MOVES: Enable if integer moves are preferred
1334 for DFmode copies */
1335 ~(m_AMD_MULTIPLE | m_PENT4 | m_NOCONA | m_PPRO | m_CORE2
1336 | m_GENERIC | m_GEODE),
1338 /* X86_TUNE_PARTIAL_REG_DEPENDENCY */
1339 m_AMD_MULTIPLE | m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC,
1341 /* X86_TUNE_SSE_PARTIAL_REG_DEPENDENCY: In the Generic model we have a
1342 conflict here in between PPro/Pentium4 based chips that thread 128bit
1343 SSE registers as single units versus K8 based chips that divide SSE
1344 registers to two 64bit halves. This knob promotes all store destinations
1345 to be 128bit to allow register renaming on 128bit SSE units, but usually
1346 results in one extra microop on 64bit SSE units. Experimental results
1347 shows that disabling this option on P4 brings over 20% SPECfp regression,
1348 while enabling it on K8 brings roughly 2.4% regression that can be partly
1349 masked by careful scheduling of moves. */
1350 m_PENT4 | m_NOCONA | m_PPRO | m_CORE2 | m_GENERIC | m_AMDFAM10,
1352 /* X86_TUNE_SSE_UNALIGNED_MOVE_OPTIMAL */
1355 /* X86_TUNE_SSE_SPLIT_REGS: Set for machines where the type and dependencies
1356 are resolved on SSE register parts instead of whole registers, so we may
1357 maintain just lower part of scalar values in proper format leaving the
1358 upper part undefined. */
1361 /* X86_TUNE_SSE_TYPELESS_STORES */
1364 /* X86_TUNE_SSE_LOAD0_BY_PXOR */
1365 m_PPRO | m_PENT4 | m_NOCONA,
1367 /* X86_TUNE_MEMORY_MISMATCH_STALL */
1368 m_AMD_MULTIPLE | m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC,
1370 /* X86_TUNE_PROLOGUE_USING_MOVE */
1371 m_ATHLON_K8 | m_PPRO | m_CORE2 | m_GENERIC,
1373 /* X86_TUNE_EPILOGUE_USING_MOVE */
1374 m_ATHLON_K8 | m_PPRO | m_CORE2 | m_GENERIC,
1376 /* X86_TUNE_SHIFT1 */
1379 /* X86_TUNE_USE_FFREEP */
1382 /* X86_TUNE_INTER_UNIT_MOVES */
1383 ~(m_AMD_MULTIPLE | m_GENERIC),
1385 /* X86_TUNE_INTER_UNIT_CONVERSIONS */
1388 /* X86_TUNE_FOUR_JUMP_LIMIT: Some CPU cores are not able to predict more
1389 than 4 branch instructions in the 16 byte window. */
1390 m_PPRO | m_AMD_MULTIPLE | m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC,
1392 /* X86_TUNE_SCHEDULE */
1393 m_PPRO | m_AMD_MULTIPLE | m_K6_GEODE | m_PENT | m_CORE2 | m_GENERIC,
1395 /* X86_TUNE_USE_BT */
1396 m_AMD_MULTIPLE | m_CORE2 | m_GENERIC,
1398 /* X86_TUNE_USE_INCDEC */
1399 ~(m_PENT4 | m_NOCONA | m_GENERIC),
1401 /* X86_TUNE_PAD_RETURNS */
1402 m_AMD_MULTIPLE | m_CORE2 | m_GENERIC,
1404 /* X86_TUNE_EXT_80387_CONSTANTS */
1405 m_K6_GEODE | m_ATHLON_K8 | m_PENT4 | m_NOCONA | m_PPRO | m_CORE2 | m_GENERIC,
1407 /* X86_TUNE_SHORTEN_X87_SSE */
1410 /* X86_TUNE_AVOID_VECTOR_DECODE */
1413 /* X86_TUNE_PROMOTE_HIMODE_IMUL: Modern CPUs have same latency for HImode
1414 and SImode multiply, but 386 and 486 do HImode multiply faster. */
1417 /* X86_TUNE_SLOW_IMUL_IMM32_MEM: Imul of 32-bit constant and memory is
1418 vector path on AMD machines. */
1419 m_K8 | m_GENERIC64 | m_AMDFAM10,
1421 /* X86_TUNE_SLOW_IMUL_IMM8: Imul of 8-bit constant is vector path on AMD
1423 m_K8 | m_GENERIC64 | m_AMDFAM10,
1425 /* X86_TUNE_MOVE_M1_VIA_OR: On pentiums, it is faster to load -1 via OR
1429 /* X86_TUNE_NOT_UNPAIRABLE: NOT is not pairable on Pentium, while XOR is,
1430 but one byte longer. */
1433 /* X86_TUNE_NOT_VECTORMODE: On AMD K6, NOT is vector decoded with memory
1434 operand that cannot be represented using a modRM byte. The XOR
1435 replacement is long decoded, so this split helps here as well. */
1438 /* X86_TUNE_USE_VECTOR_FP_CONVERTS: Prefer vector packed SSE conversion
1440 m_AMDFAM10 | m_GENERIC,
1442 /* X86_TUNE_USE_VECTOR_CONVERTS: Prefer vector packed SSE conversion
1443 from integer to FP. */
1446 /* X86_TUNE_FUSE_CMP_AND_BRANCH: Fuse a compare or test instruction
1447 with a subsequent conditional jump instruction into a single
1448 compare-and-branch uop. */
1452 /* Feature tests against the various architecture variations. */
1453 unsigned char ix86_arch_features[X86_ARCH_LAST];
1455 /* Feature tests against the various architecture variations, used to create
1456 ix86_arch_features based on the processor mask. */
1457 static unsigned int initial_ix86_arch_features[X86_ARCH_LAST] = {
1458 /* X86_ARCH_CMOVE: Conditional move was added for pentiumpro. */
1459 ~(m_386 | m_486 | m_PENT | m_K6),
1461 /* X86_ARCH_CMPXCHG: Compare and exchange was added for 80486. */
1464 /* X86_ARCH_CMPXCHG8B: Compare and exchange 8 bytes was added for pentium. */
1467 /* X86_ARCH_XADD: Exchange and add was added for 80486. */
1470 /* X86_ARCH_BSWAP: Byteswap was added for 80486. */
1474 static const unsigned int x86_accumulate_outgoing_args
1475 = m_AMD_MULTIPLE | m_PENT4 | m_NOCONA | m_PPRO | m_CORE2 | m_GENERIC;
1477 static const unsigned int x86_arch_always_fancy_math_387
1478 = m_PENT | m_PPRO | m_AMD_MULTIPLE | m_PENT4
1479 | m_NOCONA | m_CORE2 | m_GENERIC;
1481 static enum stringop_alg stringop_alg = no_stringop;
1483 /* In case the average insn count for single function invocation is
1484 lower than this constant, emit fast (but longer) prologue and
1486 #define FAST_PROLOGUE_INSN_COUNT 20
1488 /* Names for 8 (low), 8 (high), and 16-bit registers, respectively. */
1489 static const char *const qi_reg_name[] = QI_REGISTER_NAMES;
1490 static const char *const qi_high_reg_name[] = QI_HIGH_REGISTER_NAMES;
1491 static const char *const hi_reg_name[] = HI_REGISTER_NAMES;
1493 /* Array of the smallest class containing reg number REGNO, indexed by
1494 REGNO. Used by REGNO_REG_CLASS in i386.h. */
1496 enum reg_class const regclass_map[FIRST_PSEUDO_REGISTER] =
1498 /* ax, dx, cx, bx */
1499 AREG, DREG, CREG, BREG,
1500 /* si, di, bp, sp */
1501 SIREG, DIREG, NON_Q_REGS, NON_Q_REGS,
1503 FP_TOP_REG, FP_SECOND_REG, FLOAT_REGS, FLOAT_REGS,
1504 FLOAT_REGS, FLOAT_REGS, FLOAT_REGS, FLOAT_REGS,
1507 /* flags, fpsr, fpcr, frame */
1508 NO_REGS, NO_REGS, NO_REGS, NON_Q_REGS,
1510 SSE_FIRST_REG, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS,
1513 MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS,
1516 NON_Q_REGS, NON_Q_REGS, NON_Q_REGS, NON_Q_REGS,
1517 NON_Q_REGS, NON_Q_REGS, NON_Q_REGS, NON_Q_REGS,
1518 /* SSE REX registers */
1519 SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS,
1523 /* The "default" register map used in 32bit mode. */
1525 int const dbx_register_map[FIRST_PSEUDO_REGISTER] =
1527 0, 2, 1, 3, 6, 7, 4, 5, /* general regs */
1528 12, 13, 14, 15, 16, 17, 18, 19, /* fp regs */
1529 -1, -1, -1, -1, -1, /* arg, flags, fpsr, fpcr, frame */
1530 21, 22, 23, 24, 25, 26, 27, 28, /* SSE */
1531 29, 30, 31, 32, 33, 34, 35, 36, /* MMX */
1532 -1, -1, -1, -1, -1, -1, -1, -1, /* extended integer registers */
1533 -1, -1, -1, -1, -1, -1, -1, -1, /* extended SSE registers */
1536 static int const x86_64_int_parameter_registers[6] =
1538 5 /*RDI*/, 4 /*RSI*/, 1 /*RDX*/, 2 /*RCX*/,
1539 FIRST_REX_INT_REG /*R8 */, FIRST_REX_INT_REG + 1 /*R9 */
1542 static int const x86_64_ms_abi_int_parameter_registers[4] =
1544 2 /*RCX*/, 1 /*RDX*/,
1545 FIRST_REX_INT_REG /*R8 */, FIRST_REX_INT_REG + 1 /*R9 */
1548 static int const x86_64_int_return_registers[4] =
1550 0 /*RAX*/, 1 /*RDX*/, 5 /*RDI*/, 4 /*RSI*/
1553 /* The "default" register map used in 64bit mode. */
1554 int const dbx64_register_map[FIRST_PSEUDO_REGISTER] =
1556 0, 1, 2, 3, 4, 5, 6, 7, /* general regs */
1557 33, 34, 35, 36, 37, 38, 39, 40, /* fp regs */
1558 -1, -1, -1, -1, -1, /* arg, flags, fpsr, fpcr, frame */
1559 17, 18, 19, 20, 21, 22, 23, 24, /* SSE */
1560 41, 42, 43, 44, 45, 46, 47, 48, /* MMX */
1561 8,9,10,11,12,13,14,15, /* extended integer registers */
1562 25, 26, 27, 28, 29, 30, 31, 32, /* extended SSE registers */
1565 /* Define the register numbers to be used in Dwarf debugging information.
1566 The SVR4 reference port C compiler uses the following register numbers
1567 in its Dwarf output code:
1568 0 for %eax (gcc regno = 0)
1569 1 for %ecx (gcc regno = 2)
1570 2 for %edx (gcc regno = 1)
1571 3 for %ebx (gcc regno = 3)
1572 4 for %esp (gcc regno = 7)
1573 5 for %ebp (gcc regno = 6)
1574 6 for %esi (gcc regno = 4)
1575 7 for %edi (gcc regno = 5)
1576 The following three DWARF register numbers are never generated by
1577 the SVR4 C compiler or by the GNU compilers, but SDB on x86/svr4
1578 believes these numbers have these meanings.
1579 8 for %eip (no gcc equivalent)
1580 9 for %eflags (gcc regno = 17)
1581 10 for %trapno (no gcc equivalent)
1582 It is not at all clear how we should number the FP stack registers
1583 for the x86 architecture. If the version of SDB on x86/svr4 were
1584 a bit less brain dead with respect to floating-point then we would
1585 have a precedent to follow with respect to DWARF register numbers
1586 for x86 FP registers, but the SDB on x86/svr4 is so completely
1587 broken with respect to FP registers that it is hardly worth thinking
1588 of it as something to strive for compatibility with.
1589 The version of x86/svr4 SDB I have at the moment does (partially)
1590 seem to believe that DWARF register number 11 is associated with
1591 the x86 register %st(0), but that's about all. Higher DWARF
1592 register numbers don't seem to be associated with anything in
1593 particular, and even for DWARF regno 11, SDB only seems to under-
1594 stand that it should say that a variable lives in %st(0) (when
1595 asked via an `=' command) if we said it was in DWARF regno 11,
1596 but SDB still prints garbage when asked for the value of the
1597 variable in question (via a `/' command).
1598 (Also note that the labels SDB prints for various FP stack regs
1599 when doing an `x' command are all wrong.)
1600 Note that these problems generally don't affect the native SVR4
1601 C compiler because it doesn't allow the use of -O with -g and
1602 because when it is *not* optimizing, it allocates a memory
1603 location for each floating-point variable, and the memory
1604 location is what gets described in the DWARF AT_location
1605 attribute for the variable in question.
1606 Regardless of the severe mental illness of the x86/svr4 SDB, we
1607 do something sensible here and we use the following DWARF
1608 register numbers. Note that these are all stack-top-relative
1610 11 for %st(0) (gcc regno = 8)
1611 12 for %st(1) (gcc regno = 9)
1612 13 for %st(2) (gcc regno = 10)
1613 14 for %st(3) (gcc regno = 11)
1614 15 for %st(4) (gcc regno = 12)
1615 16 for %st(5) (gcc regno = 13)
1616 17 for %st(6) (gcc regno = 14)
1617 18 for %st(7) (gcc regno = 15)
1619 int const svr4_dbx_register_map[FIRST_PSEUDO_REGISTER] =
1621 0, 2, 1, 3, 6, 7, 5, 4, /* general regs */
1622 11, 12, 13, 14, 15, 16, 17, 18, /* fp regs */
1623 -1, 9, -1, -1, -1, /* arg, flags, fpsr, fpcr, frame */
1624 21, 22, 23, 24, 25, 26, 27, 28, /* SSE registers */
1625 29, 30, 31, 32, 33, 34, 35, 36, /* MMX registers */
1626 -1, -1, -1, -1, -1, -1, -1, -1, /* extended integer registers */
1627 -1, -1, -1, -1, -1, -1, -1, -1, /* extended SSE registers */
1630 /* Test and compare insns in i386.md store the information needed to
1631 generate branch and scc insns here. */
1633 rtx ix86_compare_op0 = NULL_RTX;
1634 rtx ix86_compare_op1 = NULL_RTX;
1635 rtx ix86_compare_emitted = NULL_RTX;
1637 /* Define the structure for the machine field in struct function. */
1639 struct stack_local_entry GTY(())
1641 unsigned short mode;
1644 struct stack_local_entry *next;
1647 /* Structure describing stack frame layout.
1648 Stack grows downward:
1654 saved frame pointer if frame_pointer_needed
1655 <- HARD_FRAME_POINTER
1664 [va_arg registers] (
1665 > to_allocate <- FRAME_POINTER
1677 HOST_WIDE_INT frame;
1679 int outgoing_arguments_size;
1682 HOST_WIDE_INT to_allocate;
1683 /* The offsets relative to ARG_POINTER. */
1684 HOST_WIDE_INT frame_pointer_offset;
1685 HOST_WIDE_INT hard_frame_pointer_offset;
1686 HOST_WIDE_INT stack_pointer_offset;
1688 /* When save_regs_using_mov is set, emit prologue using
1689 move instead of push instructions. */
1690 bool save_regs_using_mov;
1693 /* Code model option. */
1694 enum cmodel ix86_cmodel;
1696 enum asm_dialect ix86_asm_dialect = ASM_ATT;
1698 enum tls_dialect ix86_tls_dialect = TLS_DIALECT_GNU;
1700 /* Which unit we are generating floating point math for. */
1701 enum fpmath_unit ix86_fpmath;
1703 /* Which cpu are we scheduling for. */
1704 enum attr_cpu ix86_schedule;
1706 /* Which cpu are we optimizing for. */
1707 enum processor_type ix86_tune;
1709 /* Which instruction set architecture to use. */
1710 enum processor_type ix86_arch;
1712 /* true if sse prefetch instruction is not NOOP. */
1713 int x86_prefetch_sse;
1715 /* ix86_regparm_string as a number */
1716 static int ix86_regparm;
1718 /* -mstackrealign option */
1719 extern int ix86_force_align_arg_pointer;
1720 static const char ix86_force_align_arg_pointer_string[]
1721 = "force_align_arg_pointer";
1723 static rtx (*ix86_gen_leave) (void);
1724 static rtx (*ix86_gen_pop1) (rtx);
1725 static rtx (*ix86_gen_add3) (rtx, rtx, rtx);
1726 static rtx (*ix86_gen_sub3) (rtx, rtx, rtx);
1727 static rtx (*ix86_gen_sub3_carry) (rtx, rtx, rtx, rtx);
1728 static rtx (*ix86_gen_one_cmpl2) (rtx, rtx);
1729 static rtx (*ix86_gen_monitor) (rtx, rtx, rtx);
1730 static rtx (*ix86_gen_andsp) (rtx, rtx, rtx);
1732 /* Preferred alignment for stack boundary in bits. */
1733 unsigned int ix86_preferred_stack_boundary;
1735 /* Alignment for incoming stack boundary in bits specified at
1737 static unsigned int ix86_user_incoming_stack_boundary;
1739 /* Default alignment for incoming stack boundary in bits. */
1740 static unsigned int ix86_default_incoming_stack_boundary;
1742 /* Alignment for incoming stack boundary in bits. */
1743 unsigned int ix86_incoming_stack_boundary;
1745 /* Values 1-5: see jump.c */
1746 int ix86_branch_cost;
1748 /* Calling abi specific va_list type nodes. */
1749 static GTY(()) tree sysv_va_list_type_node;
1750 static GTY(()) tree ms_va_list_type_node;
1752 /* Variables which are this size or smaller are put in the data/bss
1753 or ldata/lbss sections. */
1755 int ix86_section_threshold = 65536;
1757 /* Prefix built by ASM_GENERATE_INTERNAL_LABEL. */
1758 char internal_label_prefix[16];
1759 int internal_label_prefix_len;
1761 /* Fence to use after loop using movnt. */
1764 /* Register class used for passing given 64bit part of the argument.
1765 These represent classes as documented by the PS ABI, with the exception
1766 of SSESF, SSEDF classes, that are basically SSE class, just gcc will
1767 use SF or DFmode move instead of DImode to avoid reformatting penalties.
1769 Similarly we play games with INTEGERSI_CLASS to use cheaper SImode moves
1770 whenever possible (upper half does contain padding). */
1771 enum x86_64_reg_class
1774 X86_64_INTEGER_CLASS,
1775 X86_64_INTEGERSI_CLASS,
1782 X86_64_COMPLEX_X87_CLASS,
1786 #define MAX_CLASSES 4
1788 /* Table of constants used by fldpi, fldln2, etc.... */
1789 static REAL_VALUE_TYPE ext_80387_constants_table [5];
1790 static bool ext_80387_constants_init = 0;
1793 static struct machine_function * ix86_init_machine_status (void);
1794 static rtx ix86_function_value (const_tree, const_tree, bool);
1795 static int ix86_function_regparm (const_tree, const_tree);
1796 static void ix86_compute_frame_layout (struct ix86_frame *);
1797 static bool ix86_expand_vector_init_one_nonzero (bool, enum machine_mode,
1799 static void ix86_add_new_builtins (int);
1801 enum ix86_function_specific_strings
1803 IX86_FUNCTION_SPECIFIC_ARCH,
1804 IX86_FUNCTION_SPECIFIC_TUNE,
1805 IX86_FUNCTION_SPECIFIC_FPMATH,
1806 IX86_FUNCTION_SPECIFIC_MAX
1809 static char *ix86_target_string (int, int, const char *, const char *,
1810 const char *, bool);
1811 static void ix86_debug_options (void) ATTRIBUTE_UNUSED;
1812 static void ix86_function_specific_save (struct cl_target_option *);
1813 static void ix86_function_specific_restore (struct cl_target_option *);
1814 static void ix86_function_specific_print (FILE *, int,
1815 struct cl_target_option *);
1816 static bool ix86_valid_target_attribute_p (tree, tree, tree, int);
1817 static bool ix86_valid_target_attribute_inner_p (tree, char *[]);
1818 static bool ix86_can_inline_p (tree, tree);
1819 static void ix86_set_current_function (tree);
1822 /* The svr4 ABI for the i386 says that records and unions are returned
1824 #ifndef DEFAULT_PCC_STRUCT_RETURN
1825 #define DEFAULT_PCC_STRUCT_RETURN 1
1828 /* Whether -mtune= or -march= were specified */
1829 static int ix86_tune_defaulted;
1830 static int ix86_arch_specified;
1832 /* Bit flags that specify the ISA we are compiling for. */
1833 int ix86_isa_flags = TARGET_64BIT_DEFAULT | TARGET_SUBTARGET_ISA_DEFAULT;
1835 /* A mask of ix86_isa_flags that includes bit X if X
1836 was set or cleared on the command line. */
1837 static int ix86_isa_flags_explicit;
1839 /* Define a set of ISAs which are available when a given ISA is
1840 enabled. MMX and SSE ISAs are handled separately. */
1842 #define OPTION_MASK_ISA_MMX_SET OPTION_MASK_ISA_MMX
1843 #define OPTION_MASK_ISA_3DNOW_SET \
1844 (OPTION_MASK_ISA_3DNOW | OPTION_MASK_ISA_MMX_SET)
1846 #define OPTION_MASK_ISA_SSE_SET OPTION_MASK_ISA_SSE
1847 #define OPTION_MASK_ISA_SSE2_SET \
1848 (OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_SSE_SET)
1849 #define OPTION_MASK_ISA_SSE3_SET \
1850 (OPTION_MASK_ISA_SSE3 | OPTION_MASK_ISA_SSE2_SET)
1851 #define OPTION_MASK_ISA_SSSE3_SET \
1852 (OPTION_MASK_ISA_SSSE3 | OPTION_MASK_ISA_SSE3_SET)
1853 #define OPTION_MASK_ISA_SSE4_1_SET \
1854 (OPTION_MASK_ISA_SSE4_1 | OPTION_MASK_ISA_SSSE3_SET)
1855 #define OPTION_MASK_ISA_SSE4_2_SET \
1856 (OPTION_MASK_ISA_SSE4_2 | OPTION_MASK_ISA_SSE4_1_SET)
1857 #define OPTION_MASK_ISA_AVX_SET \
1858 (OPTION_MASK_ISA_AVX | OPTION_MASK_ISA_SSE4_2_SET)
1859 #define OPTION_MASK_ISA_FMA_SET \
1860 (OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_AVX_SET)
1862 /* SSE4 includes both SSE4.1 and SSE4.2. -msse4 should be the same
1864 #define OPTION_MASK_ISA_SSE4_SET OPTION_MASK_ISA_SSE4_2_SET
1866 #define OPTION_MASK_ISA_SSE4A_SET \
1867 (OPTION_MASK_ISA_SSE4A | OPTION_MASK_ISA_SSE3_SET)
1868 #define OPTION_MASK_ISA_SSE5_SET \
1869 (OPTION_MASK_ISA_SSE5 | OPTION_MASK_ISA_SSE4A_SET)
1871 /* AES and PCLMUL need SSE2 because they use xmm registers */
1872 #define OPTION_MASK_ISA_AES_SET \
1873 (OPTION_MASK_ISA_AES | OPTION_MASK_ISA_SSE2_SET)
1874 #define OPTION_MASK_ISA_PCLMUL_SET \
1875 (OPTION_MASK_ISA_PCLMUL | OPTION_MASK_ISA_SSE2_SET)
1877 #define OPTION_MASK_ISA_ABM_SET \
1878 (OPTION_MASK_ISA_ABM | OPTION_MASK_ISA_POPCNT)
1879 #define OPTION_MASK_ISA_POPCNT_SET OPTION_MASK_ISA_POPCNT
1880 #define OPTION_MASK_ISA_CX16_SET OPTION_MASK_ISA_CX16
1881 #define OPTION_MASK_ISA_SAHF_SET OPTION_MASK_ISA_SAHF
1883 /* Define a set of ISAs which aren't available when a given ISA is
1884 disabled. MMX and SSE ISAs are handled separately. */
1886 #define OPTION_MASK_ISA_MMX_UNSET \
1887 (OPTION_MASK_ISA_MMX | OPTION_MASK_ISA_3DNOW_UNSET)
1888 #define OPTION_MASK_ISA_3DNOW_UNSET \
1889 (OPTION_MASK_ISA_3DNOW | OPTION_MASK_ISA_3DNOW_A_UNSET)
1890 #define OPTION_MASK_ISA_3DNOW_A_UNSET OPTION_MASK_ISA_3DNOW_A
1892 #define OPTION_MASK_ISA_SSE_UNSET \
1893 (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_SSE2_UNSET)
1894 #define OPTION_MASK_ISA_SSE2_UNSET \
1895 (OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_SSE3_UNSET)
1896 #define OPTION_MASK_ISA_SSE3_UNSET \
1897 (OPTION_MASK_ISA_SSE3 \
1898 | OPTION_MASK_ISA_SSSE3_UNSET \
1899 | OPTION_MASK_ISA_SSE4A_UNSET )
1900 #define OPTION_MASK_ISA_SSSE3_UNSET \
1901 (OPTION_MASK_ISA_SSSE3 | OPTION_MASK_ISA_SSE4_1_UNSET)
1902 #define OPTION_MASK_ISA_SSE4_1_UNSET \
1903 (OPTION_MASK_ISA_SSE4_1 | OPTION_MASK_ISA_SSE4_2_UNSET)
1904 #define OPTION_MASK_ISA_SSE4_2_UNSET \
1905 (OPTION_MASK_ISA_SSE4_2 | OPTION_MASK_ISA_AVX_UNSET )
1906 #define OPTION_MASK_ISA_AVX_UNSET \
1907 (OPTION_MASK_ISA_AVX | OPTION_MASK_ISA_FMA_UNSET)
1908 #define OPTION_MASK_ISA_FMA_UNSET OPTION_MASK_ISA_FMA
1910 /* SSE4 includes both SSE4.1 and SSE4.2. -mno-sse4 should the same
1912 #define OPTION_MASK_ISA_SSE4_UNSET OPTION_MASK_ISA_SSE4_1_UNSET
1914 #define OPTION_MASK_ISA_SSE4A_UNSET \
1915 (OPTION_MASK_ISA_SSE4A | OPTION_MASK_ISA_SSE5_UNSET)
1916 #define OPTION_MASK_ISA_SSE5_UNSET OPTION_MASK_ISA_SSE5
1917 #define OPTION_MASK_ISA_AES_UNSET OPTION_MASK_ISA_AES
1918 #define OPTION_MASK_ISA_PCLMUL_UNSET OPTION_MASK_ISA_PCLMUL
1919 #define OPTION_MASK_ISA_ABM_UNSET OPTION_MASK_ISA_ABM
1920 #define OPTION_MASK_ISA_POPCNT_UNSET OPTION_MASK_ISA_POPCNT
1921 #define OPTION_MASK_ISA_CX16_UNSET OPTION_MASK_ISA_CX16
1922 #define OPTION_MASK_ISA_SAHF_UNSET OPTION_MASK_ISA_SAHF
1924 /* Vectorization library interface and handlers. */
1925 tree (*ix86_veclib_handler)(enum built_in_function, tree, tree) = NULL;
1926 static tree ix86_veclibabi_svml (enum built_in_function, tree, tree);
1927 static tree ix86_veclibabi_acml (enum built_in_function, tree, tree);
1929 /* Processor target table, indexed by processor number */
1932 const struct processor_costs *cost; /* Processor costs */
1933 const int align_loop; /* Default alignments. */
1934 const int align_loop_max_skip;
1935 const int align_jump;
1936 const int align_jump_max_skip;
1937 const int align_func;
1940 static const struct ptt processor_target_table[PROCESSOR_max] =
1942 {&i386_cost, 4, 3, 4, 3, 4},
1943 {&i486_cost, 16, 15, 16, 15, 16},
1944 {&pentium_cost, 16, 7, 16, 7, 16},
1945 {&pentiumpro_cost, 16, 15, 16, 10, 16},
1946 {&geode_cost, 0, 0, 0, 0, 0},
1947 {&k6_cost, 32, 7, 32, 7, 32},
1948 {&athlon_cost, 16, 7, 16, 7, 16},
1949 {&pentium4_cost, 0, 0, 0, 0, 0},
1950 {&k8_cost, 16, 7, 16, 7, 16},
1951 {&nocona_cost, 0, 0, 0, 0, 0},
1952 {&core2_cost, 16, 10, 16, 10, 16},
1953 {&generic32_cost, 16, 7, 16, 7, 16},
1954 {&generic64_cost, 16, 10, 16, 10, 16},
1955 {&amdfam10_cost, 32, 24, 32, 7, 32}
1958 static const char *const cpu_names[TARGET_CPU_DEFAULT_max] =
1983 /* Implement TARGET_HANDLE_OPTION. */
1986 ix86_handle_option (size_t code, const char *arg ATTRIBUTE_UNUSED, int value)
1993 ix86_isa_flags |= OPTION_MASK_ISA_MMX_SET;
1994 ix86_isa_flags_explicit |= OPTION_MASK_ISA_MMX_SET;
1998 ix86_isa_flags &= ~OPTION_MASK_ISA_MMX_UNSET;
1999 ix86_isa_flags_explicit |= OPTION_MASK_ISA_MMX_UNSET;
2006 ix86_isa_flags |= OPTION_MASK_ISA_3DNOW_SET;
2007 ix86_isa_flags_explicit |= OPTION_MASK_ISA_3DNOW_SET;
2011 ix86_isa_flags &= ~OPTION_MASK_ISA_3DNOW_UNSET;
2012 ix86_isa_flags_explicit |= OPTION_MASK_ISA_3DNOW_UNSET;
2022 ix86_isa_flags |= OPTION_MASK_ISA_SSE_SET;
2023 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE_SET;
2027 ix86_isa_flags &= ~OPTION_MASK_ISA_SSE_UNSET;
2028 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE_UNSET;
2035 ix86_isa_flags |= OPTION_MASK_ISA_SSE2_SET;
2036 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE2_SET;
2040 ix86_isa_flags &= ~OPTION_MASK_ISA_SSE2_UNSET;
2041 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE2_UNSET;
2048 ix86_isa_flags |= OPTION_MASK_ISA_SSE3_SET;
2049 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE3_SET;
2053 ix86_isa_flags &= ~OPTION_MASK_ISA_SSE3_UNSET;
2054 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE3_UNSET;
2061 ix86_isa_flags |= OPTION_MASK_ISA_SSSE3_SET;
2062 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSSE3_SET;
2066 ix86_isa_flags &= ~OPTION_MASK_ISA_SSSE3_UNSET;
2067 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSSE3_UNSET;
2074 ix86_isa_flags |= OPTION_MASK_ISA_SSE4_1_SET;
2075 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE4_1_SET;
2079 ix86_isa_flags &= ~OPTION_MASK_ISA_SSE4_1_UNSET;
2080 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE4_1_UNSET;
2087 ix86_isa_flags |= OPTION_MASK_ISA_SSE4_2_SET;
2088 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE4_2_SET;
2092 ix86_isa_flags &= ~OPTION_MASK_ISA_SSE4_2_UNSET;
2093 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE4_2_UNSET;
2100 ix86_isa_flags |= OPTION_MASK_ISA_AVX_SET;
2101 ix86_isa_flags_explicit |= OPTION_MASK_ISA_AVX_SET;
2105 ix86_isa_flags &= ~OPTION_MASK_ISA_AVX_UNSET;
2106 ix86_isa_flags_explicit |= OPTION_MASK_ISA_AVX_UNSET;
2113 ix86_isa_flags |= OPTION_MASK_ISA_FMA_SET;
2114 ix86_isa_flags_explicit |= OPTION_MASK_ISA_FMA_SET;
2118 ix86_isa_flags &= ~OPTION_MASK_ISA_FMA_UNSET;
2119 ix86_isa_flags_explicit |= OPTION_MASK_ISA_FMA_UNSET;
2124 ix86_isa_flags |= OPTION_MASK_ISA_SSE4_SET;
2125 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE4_SET;
2129 ix86_isa_flags &= ~OPTION_MASK_ISA_SSE4_UNSET;
2130 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE4_UNSET;
2136 ix86_isa_flags |= OPTION_MASK_ISA_SSE4A_SET;
2137 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE4A_SET;
2141 ix86_isa_flags &= ~OPTION_MASK_ISA_SSE4A_UNSET;
2142 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE4A_UNSET;
2149 ix86_isa_flags |= OPTION_MASK_ISA_SSE5_SET;
2150 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE5_SET;
2154 ix86_isa_flags &= ~OPTION_MASK_ISA_SSE5_UNSET;
2155 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE5_UNSET;
2162 ix86_isa_flags |= OPTION_MASK_ISA_ABM_SET;
2163 ix86_isa_flags_explicit |= OPTION_MASK_ISA_ABM_SET;
2167 ix86_isa_flags &= ~OPTION_MASK_ISA_ABM_UNSET;
2168 ix86_isa_flags_explicit |= OPTION_MASK_ISA_ABM_UNSET;
2175 ix86_isa_flags |= OPTION_MASK_ISA_POPCNT_SET;
2176 ix86_isa_flags_explicit |= OPTION_MASK_ISA_POPCNT_SET;
2180 ix86_isa_flags &= ~OPTION_MASK_ISA_POPCNT_UNSET;
2181 ix86_isa_flags_explicit |= OPTION_MASK_ISA_POPCNT_UNSET;
2188 ix86_isa_flags |= OPTION_MASK_ISA_SAHF_SET;
2189 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SAHF_SET;
2193 ix86_isa_flags &= ~OPTION_MASK_ISA_SAHF_UNSET;
2194 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SAHF_UNSET;
2201 ix86_isa_flags |= OPTION_MASK_ISA_CX16_SET;
2202 ix86_isa_flags_explicit |= OPTION_MASK_ISA_CX16_SET;
2206 ix86_isa_flags &= ~OPTION_MASK_ISA_CX16_UNSET;
2207 ix86_isa_flags_explicit |= OPTION_MASK_ISA_CX16_UNSET;
2214 ix86_isa_flags |= OPTION_MASK_ISA_AES_SET;
2215 ix86_isa_flags_explicit |= OPTION_MASK_ISA_AES_SET;
2219 ix86_isa_flags &= ~OPTION_MASK_ISA_AES_UNSET;
2220 ix86_isa_flags_explicit |= OPTION_MASK_ISA_AES_UNSET;
2227 ix86_isa_flags |= OPTION_MASK_ISA_PCLMUL_SET;
2228 ix86_isa_flags_explicit |= OPTION_MASK_ISA_PCLMUL_SET;
2232 ix86_isa_flags &= ~OPTION_MASK_ISA_PCLMUL_UNSET;
2233 ix86_isa_flags_explicit |= OPTION_MASK_ISA_PCLMUL_UNSET;
2242 /* Return a string the documents the current -m options. The caller is
2243 responsible for freeing the string. */
2246 ix86_target_string (int isa, int flags, const char *arch, const char *tune,
2247 const char *fpmath, bool add_nl_p)
2249 struct ix86_target_opts
2251 const char *option; /* option string */
2252 int mask; /* isa mask options */
2255 /* This table is ordered so that options like -msse5 or -msse4.2 that imply
2256 preceding options while match those first. */
2257 static struct ix86_target_opts isa_opts[] =
2259 { "-m64", OPTION_MASK_ISA_64BIT },
2260 { "-msse5", OPTION_MASK_ISA_SSE5 },
2261 { "-msse4a", OPTION_MASK_ISA_SSE4A },
2262 { "-msse4.2", OPTION_MASK_ISA_SSE4_2 },
2263 { "-msse4.1", OPTION_MASK_ISA_SSE4_1 },
2264 { "-mssse3", OPTION_MASK_ISA_SSSE3 },
2265 { "-msse3", OPTION_MASK_ISA_SSE3 },
2266 { "-msse2", OPTION_MASK_ISA_SSE2 },
2267 { "-msse", OPTION_MASK_ISA_SSE },
2268 { "-m3dnow", OPTION_MASK_ISA_3DNOW },
2269 { "-m3dnowa", OPTION_MASK_ISA_3DNOW_A },
2270 { "-mmmx", OPTION_MASK_ISA_MMX },
2271 { "-mabm", OPTION_MASK_ISA_ABM },
2272 { "-mpopcnt", OPTION_MASK_ISA_POPCNT },
2273 { "-maes", OPTION_MASK_ISA_AES },
2274 { "-mpclmul", OPTION_MASK_ISA_PCLMUL },
2278 static struct ix86_target_opts flag_opts[] =
2280 { "-m128bit-long-double", MASK_128BIT_LONG_DOUBLE },
2281 { "-m80387", MASK_80387 },
2282 { "-maccumulate-outgoing-args", MASK_ACCUMULATE_OUTGOING_ARGS },
2283 { "-malign-double", MASK_ALIGN_DOUBLE },
2284 { "-mcld", MASK_CLD },
2285 { "-mfp-ret-in-387", MASK_FLOAT_RETURNS },
2286 { "-mieee-fp", MASK_IEEE_FP },
2287 { "-minline-all-stringops", MASK_INLINE_ALL_STRINGOPS },
2288 { "-minline-stringops-dynamically", MASK_INLINE_STRINGOPS_DYNAMICALLY },
2289 { "-mms-bitfields", MASK_MS_BITFIELD_LAYOUT },
2290 { "-mno-align-stringops", MASK_NO_ALIGN_STRINGOPS },
2291 { "-mno-fancy-math-387", MASK_NO_FANCY_MATH_387 },
2292 { "-mno-fused-madd", MASK_NO_FUSED_MADD },
2293 { "-mno-push-args", MASK_NO_PUSH_ARGS },
2294 { "-mno-red-zone", MASK_NO_RED_ZONE },
2295 { "-momit-leaf-frame-pointer", MASK_OMIT_LEAF_FRAME_POINTER },
2296 { "-mrecip", MASK_RECIP },
2297 { "-mrtd", MASK_RTD },
2298 { "-msseregparm", MASK_SSEREGPARM },
2299 { "-mstack-arg-probe", MASK_STACK_PROBE },
2300 { "-mtls-direct-seg-refs", MASK_TLS_DIRECT_SEG_REFS },
2303 const char *opts[ARRAY_SIZE (isa_opts) + ARRAY_SIZE (flag_opts) + 6][2];
2306 char target_other[40];
2315 memset (opts, '\0', sizeof (opts));
2317 /* Add -march= option. */
2320 opts[num][0] = "-march=";
2321 opts[num++][1] = arch;
2324 /* Add -mtune= option. */
2327 opts[num][0] = "-mtune=";
2328 opts[num++][1] = tune;
2331 /* Pick out the options in isa options. */
2332 for (i = 0; i < ARRAY_SIZE (isa_opts); i++)
2334 if ((isa & isa_opts[i].mask) != 0)
2336 opts[num++][0] = isa_opts[i].option;
2337 isa &= ~ isa_opts[i].mask;
2341 if (isa && add_nl_p)
2343 opts[num++][0] = isa_other;
2344 sprintf (isa_other, "(other isa: 0x%x)", isa);
2347 /* Add flag options. */
2348 for (i = 0; i < ARRAY_SIZE (flag_opts); i++)
2350 if ((flags & flag_opts[i].mask) != 0)
2352 opts[num++][0] = flag_opts[i].option;
2353 flags &= ~ flag_opts[i].mask;
2357 if (flags && add_nl_p)
2359 opts[num++][0] = target_other;
2360 sprintf (target_other, "(other flags: 0x%x)", isa);
2363 /* Add -fpmath= option. */
2366 opts[num][0] = "-mfpmath=";
2367 opts[num++][1] = fpmath;
2374 gcc_assert (num < ARRAY_SIZE (opts));
2376 /* Size the string. */
2378 sep_len = (add_nl_p) ? 3 : 1;
2379 for (i = 0; i < num; i++)
2382 for (j = 0; j < 2; j++)
2384 len += strlen (opts[i][j]);
2387 /* Build the string. */
2388 ret = ptr = (char *) xmalloc (len);
2391 for (i = 0; i < num; i++)
2395 for (j = 0; j < 2; j++)
2396 len2[j] = (opts[i][j]) ? strlen (opts[i][j]) : 0;
2403 if (add_nl_p && line_len + len2[0] + len2[1] > 70)
2411 for (j = 0; j < 2; j++)
2414 memcpy (ptr, opts[i][j], len2[j]);
2416 line_len += len2[j];
2421 gcc_assert (ret + len >= ptr);
2426 /* Function that is callable from the debugger to print the current
2429 ix86_debug_options (void)
2431 char *opts = ix86_target_string (ix86_isa_flags, target_flags,
2432 ix86_arch_string, ix86_tune_string,
2433 ix86_fpmath_string, true);
2437 fprintf (stderr, "%s\n\n", opts);
2441 fprintf (stderr, "<no options>\n\n");
2446 /* Sometimes certain combinations of command options do not make
2447 sense on a particular target machine. You can define a macro
2448 `OVERRIDE_OPTIONS' to take account of this. This macro, if
2449 defined, is executed once just after all the command options have
2452 Don't use this macro to turn on various extra optimizations for
2453 `-O'. That is what `OPTIMIZATION_OPTIONS' is for. */
2456 override_options (bool main_args_p)
2459 unsigned int ix86_arch_mask, ix86_tune_mask;
2464 /* Comes from final.c -- no real reason to change it. */
2465 #define MAX_CODE_ALIGN 16
2473 PTA_PREFETCH_SSE = 1 << 4,
2475 PTA_3DNOW_A = 1 << 6,
2479 PTA_POPCNT = 1 << 10,
2481 PTA_SSE4A = 1 << 12,
2482 PTA_NO_SAHF = 1 << 13,
2483 PTA_SSE4_1 = 1 << 14,
2484 PTA_SSE4_2 = 1 << 15,
2487 PTA_PCLMUL = 1 << 18,
2494 const char *const name; /* processor name or nickname. */
2495 const enum processor_type processor;
2496 const enum attr_cpu schedule;
2497 const unsigned /*enum pta_flags*/ flags;
2499 const processor_alias_table[] =
2501 {"i386", PROCESSOR_I386, CPU_NONE, 0},
2502 {"i486", PROCESSOR_I486, CPU_NONE, 0},
2503 {"i586", PROCESSOR_PENTIUM, CPU_PENTIUM, 0},
2504 {"pentium", PROCESSOR_PENTIUM, CPU_PENTIUM, 0},
2505 {"pentium-mmx", PROCESSOR_PENTIUM, CPU_PENTIUM, PTA_MMX},
2506 {"winchip-c6", PROCESSOR_I486, CPU_NONE, PTA_MMX},
2507 {"winchip2", PROCESSOR_I486, CPU_NONE, PTA_MMX | PTA_3DNOW},
2508 {"c3", PROCESSOR_I486, CPU_NONE, PTA_MMX | PTA_3DNOW},
2509 {"c3-2", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO, PTA_MMX | PTA_SSE},
2510 {"i686", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO, 0},
2511 {"pentiumpro", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO, 0},
2512 {"pentium2", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO, PTA_MMX},
2513 {"pentium3", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO,
2515 {"pentium3m", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO,
2517 {"pentium-m", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO,
2518 PTA_MMX | PTA_SSE | PTA_SSE2},
2519 {"pentium4", PROCESSOR_PENTIUM4, CPU_NONE,
2520 PTA_MMX |PTA_SSE | PTA_SSE2},
2521 {"pentium4m", PROCESSOR_PENTIUM4, CPU_NONE,
2522 PTA_MMX | PTA_SSE | PTA_SSE2},
2523 {"prescott", PROCESSOR_NOCONA, CPU_NONE,
2524 PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3},
2525 {"nocona", PROCESSOR_NOCONA, CPU_NONE,
2526 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
2527 | PTA_CX16 | PTA_NO_SAHF},
2528 {"core2", PROCESSOR_CORE2, CPU_CORE2,
2529 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
2530 | PTA_SSSE3 | PTA_CX16},
2531 {"geode", PROCESSOR_GEODE, CPU_GEODE,
2532 PTA_MMX | PTA_3DNOW | PTA_3DNOW_A |PTA_PREFETCH_SSE},
2533 {"k6", PROCESSOR_K6, CPU_K6, PTA_MMX},
2534 {"k6-2", PROCESSOR_K6, CPU_K6, PTA_MMX | PTA_3DNOW},
2535 {"k6-3", PROCESSOR_K6, CPU_K6, PTA_MMX | PTA_3DNOW},
2536 {"athlon", PROCESSOR_ATHLON, CPU_ATHLON,
2537 PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_PREFETCH_SSE},
2538 {"athlon-tbird", PROCESSOR_ATHLON, CPU_ATHLON,
2539 PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_PREFETCH_SSE},
2540 {"athlon-4", PROCESSOR_ATHLON, CPU_ATHLON,
2541 PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE},
2542 {"athlon-xp", PROCESSOR_ATHLON, CPU_ATHLON,
2543 PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE},
2544 {"athlon-mp", PROCESSOR_ATHLON, CPU_ATHLON,
2545 PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE},
2546 {"x86-64", PROCESSOR_K8, CPU_K8,
2547 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_NO_SAHF},
2548 {"k8", PROCESSOR_K8, CPU_K8,
2549 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
2550 | PTA_SSE2 | PTA_NO_SAHF},
2551 {"k8-sse3", PROCESSOR_K8, CPU_K8,
2552 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
2553 | PTA_SSE2 | PTA_SSE3 | PTA_NO_SAHF},
2554 {"opteron", PROCESSOR_K8, CPU_K8,
2555 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
2556 | PTA_SSE2 | PTA_NO_SAHF},
2557 {"opteron-sse3", PROCESSOR_K8, CPU_K8,
2558 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
2559 | PTA_SSE2 | PTA_SSE3 | PTA_NO_SAHF},
2560 {"athlon64", PROCESSOR_K8, CPU_K8,
2561 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
2562 | PTA_SSE2 | PTA_NO_SAHF},
2563 {"athlon64-sse3", PROCESSOR_K8, CPU_K8,
2564 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
2565 | PTA_SSE2 | PTA_SSE3 | PTA_NO_SAHF},
2566 {"athlon-fx", PROCESSOR_K8, CPU_K8,
2567 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
2568 | PTA_SSE2 | PTA_NO_SAHF},
2569 {"amdfam10", PROCESSOR_AMDFAM10, CPU_AMDFAM10,
2570 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
2571 | PTA_SSE2 | PTA_SSE3 | PTA_SSE4A | PTA_CX16 | PTA_ABM},
2572 {"barcelona", PROCESSOR_AMDFAM10, CPU_AMDFAM10,
2573 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
2574 | PTA_SSE2 | PTA_SSE3 | PTA_SSE4A | PTA_CX16 | PTA_ABM},
2575 {"generic32", PROCESSOR_GENERIC32, CPU_PENTIUMPRO,
2576 0 /* flags are only used for -march switch. */ },
2577 {"generic64", PROCESSOR_GENERIC64, CPU_GENERIC64,
2578 PTA_64BIT /* flags are only used for -march switch. */ },
2581 int const pta_size = ARRAY_SIZE (processor_alias_table);
2583 /* Set up prefix/suffix so the error messages refer to either the command
2584 line argument, or the attribute(target). */
2593 prefix = "option(\"";
2598 #ifdef SUBTARGET_OVERRIDE_OPTIONS
2599 SUBTARGET_OVERRIDE_OPTIONS;
2602 #ifdef SUBSUBTARGET_OVERRIDE_OPTIONS
2603 SUBSUBTARGET_OVERRIDE_OPTIONS;
2606 /* -fPIC is the default for x86_64. */
2607 if (TARGET_MACHO && TARGET_64BIT)
2610 /* Set the default values for switches whose default depends on TARGET_64BIT
2611 in case they weren't overwritten by command line options. */
2614 /* Mach-O doesn't support omitting the frame pointer for now. */
2615 if (flag_omit_frame_pointer == 2)
2616 flag_omit_frame_pointer = (TARGET_MACHO ? 0 : 1);
2617 if (flag_asynchronous_unwind_tables == 2)
2618 flag_asynchronous_unwind_tables = 1;
2619 if (flag_pcc_struct_return == 2)
2620 flag_pcc_struct_return = 0;
2624 if (flag_omit_frame_pointer == 2)
2625 flag_omit_frame_pointer = 0;
2626 if (flag_asynchronous_unwind_tables == 2)
2627 flag_asynchronous_unwind_tables = 0;
2628 if (flag_pcc_struct_return == 2)
2629 flag_pcc_struct_return = DEFAULT_PCC_STRUCT_RETURN;
2632 /* Need to check -mtune=generic first. */
2633 if (ix86_tune_string)
2635 if (!strcmp (ix86_tune_string, "generic")
2636 || !strcmp (ix86_tune_string, "i686")
2637 /* As special support for cross compilers we read -mtune=native
2638 as -mtune=generic. With native compilers we won't see the
2639 -mtune=native, as it was changed by the driver. */
2640 || !strcmp (ix86_tune_string, "native"))
2643 ix86_tune_string = "generic64";
2645 ix86_tune_string = "generic32";
2647 /* If this call is for setting the option attribute, allow the
2648 generic32/generic64 that was previously set. */
2649 else if (!main_args_p
2650 && (!strcmp (ix86_tune_string, "generic32")
2651 || !strcmp (ix86_tune_string, "generic64")))
2653 else if (!strncmp (ix86_tune_string, "generic", 7))
2654 error ("bad value (%s) for %stune=%s %s",
2655 ix86_tune_string, prefix, suffix, sw);
2659 if (ix86_arch_string)
2660 ix86_tune_string = ix86_arch_string;
2661 if (!ix86_tune_string)
2663 ix86_tune_string = cpu_names[TARGET_CPU_DEFAULT];
2664 ix86_tune_defaulted = 1;
2667 /* ix86_tune_string is set to ix86_arch_string or defaulted. We
2668 need to use a sensible tune option. */
2669 if (!strcmp (ix86_tune_string, "generic")
2670 || !strcmp (ix86_tune_string, "x86-64")
2671 || !strcmp (ix86_tune_string, "i686"))
2674 ix86_tune_string = "generic64";
2676 ix86_tune_string = "generic32";
2679 if (ix86_stringop_string)
2681 if (!strcmp (ix86_stringop_string, "rep_byte"))
2682 stringop_alg = rep_prefix_1_byte;
2683 else if (!strcmp (ix86_stringop_string, "libcall"))
2684 stringop_alg = libcall;
2685 else if (!strcmp (ix86_stringop_string, "rep_4byte"))
2686 stringop_alg = rep_prefix_4_byte;
2687 else if (!strcmp (ix86_stringop_string, "rep_8byte")
2689 /* rep; movq isn't available in 32-bit code. */
2690 stringop_alg = rep_prefix_8_byte;
2691 else if (!strcmp (ix86_stringop_string, "byte_loop"))
2692 stringop_alg = loop_1_byte;
2693 else if (!strcmp (ix86_stringop_string, "loop"))
2694 stringop_alg = loop;
2695 else if (!strcmp (ix86_stringop_string, "unrolled_loop"))
2696 stringop_alg = unrolled_loop;
2698 error ("bad value (%s) for %sstringop-strategy=%s %s",
2699 ix86_stringop_string, prefix, suffix, sw);
2701 if (!strcmp (ix86_tune_string, "x86-64"))
2702 warning (OPT_Wdeprecated, "%stune=x86-64%s is deprecated. Use "
2703 "%stune=k8%s or %stune=generic%s instead as appropriate.",
2704 prefix, suffix, prefix, suffix, prefix, suffix);
2706 if (!ix86_arch_string)
2707 ix86_arch_string = TARGET_64BIT ? "x86-64" : "i386";
2709 ix86_arch_specified = 1;
2711 if (!strcmp (ix86_arch_string, "generic"))
2712 error ("generic CPU can be used only for %stune=%s %s",
2713 prefix, suffix, sw);
2714 if (!strncmp (ix86_arch_string, "generic", 7))
2715 error ("bad value (%s) for %sarch=%s %s",
2716 ix86_arch_string, prefix, suffix, sw);
2718 if (ix86_cmodel_string != 0)
2720 if (!strcmp (ix86_cmodel_string, "small"))
2721 ix86_cmodel = flag_pic ? CM_SMALL_PIC : CM_SMALL;
2722 else if (!strcmp (ix86_cmodel_string, "medium"))
2723 ix86_cmodel = flag_pic ? CM_MEDIUM_PIC : CM_MEDIUM;
2724 else if (!strcmp (ix86_cmodel_string, "large"))
2725 ix86_cmodel = flag_pic ? CM_LARGE_PIC : CM_LARGE;
2727 error ("code model %s does not support PIC mode", ix86_cmodel_string);
2728 else if (!strcmp (ix86_cmodel_string, "32"))
2729 ix86_cmodel = CM_32;
2730 else if (!strcmp (ix86_cmodel_string, "kernel") && !flag_pic)
2731 ix86_cmodel = CM_KERNEL;
2733 error ("bad value (%s) for %scmodel=%s %s",
2734 ix86_cmodel_string, prefix, suffix, sw);
2738 /* For TARGET_64BIT and MS_ABI, force pic on, in order to enable the
2739 use of rip-relative addressing. This eliminates fixups that
2740 would otherwise be needed if this object is to be placed in a
2741 DLL, and is essentially just as efficient as direct addressing. */
2742 if (TARGET_64BIT && DEFAULT_ABI == MS_ABI)
2743 ix86_cmodel = CM_SMALL_PIC, flag_pic = 1;
2744 else if (TARGET_64BIT)
2745 ix86_cmodel = flag_pic ? CM_SMALL_PIC : CM_SMALL;
2747 ix86_cmodel = CM_32;
2749 if (ix86_asm_string != 0)
2752 && !strcmp (ix86_asm_string, "intel"))
2753 ix86_asm_dialect = ASM_INTEL;
2754 else if (!strcmp (ix86_asm_string, "att"))
2755 ix86_asm_dialect = ASM_ATT;
2757 error ("bad value (%s) for %sasm=%s %s",
2758 ix86_asm_string, prefix, suffix, sw);
2760 if ((TARGET_64BIT == 0) != (ix86_cmodel == CM_32))
2761 error ("code model %qs not supported in the %s bit mode",
2762 ix86_cmodel_string, TARGET_64BIT ? "64" : "32");
2763 if ((TARGET_64BIT != 0) != ((ix86_isa_flags & OPTION_MASK_ISA_64BIT) != 0))
2764 sorry ("%i-bit mode not compiled in",
2765 (ix86_isa_flags & OPTION_MASK_ISA_64BIT) ? 64 : 32);
2767 for (i = 0; i < pta_size; i++)
2768 if (! strcmp (ix86_arch_string, processor_alias_table[i].name))
2770 ix86_schedule = processor_alias_table[i].schedule;
2771 ix86_arch = processor_alias_table[i].processor;
2772 /* Default cpu tuning to the architecture. */
2773 ix86_tune = ix86_arch;
2775 if (TARGET_64BIT && !(processor_alias_table[i].flags & PTA_64BIT))
2776 error ("CPU you selected does not support x86-64 "
2779 if (processor_alias_table[i].flags & PTA_MMX
2780 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_MMX))
2781 ix86_isa_flags |= OPTION_MASK_ISA_MMX;
2782 if (processor_alias_table[i].flags & PTA_3DNOW
2783 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_3DNOW))
2784 ix86_isa_flags |= OPTION_MASK_ISA_3DNOW;
2785 if (processor_alias_table[i].flags & PTA_3DNOW_A
2786 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_3DNOW_A))
2787 ix86_isa_flags |= OPTION_MASK_ISA_3DNOW_A;
2788 if (processor_alias_table[i].flags & PTA_SSE
2789 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE))
2790 ix86_isa_flags |= OPTION_MASK_ISA_SSE;
2791 if (processor_alias_table[i].flags & PTA_SSE2
2792 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE2))
2793 ix86_isa_flags |= OPTION_MASK_ISA_SSE2;
2794 if (processor_alias_table[i].flags & PTA_SSE3
2795 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE3))
2796 ix86_isa_flags |= OPTION_MASK_ISA_SSE3;
2797 if (processor_alias_table[i].flags & PTA_SSSE3
2798 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSSE3))
2799 ix86_isa_flags |= OPTION_MASK_ISA_SSSE3;
2800 if (processor_alias_table[i].flags & PTA_SSE4_1
2801 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE4_1))
2802 ix86_isa_flags |= OPTION_MASK_ISA_SSE4_1;
2803 if (processor_alias_table[i].flags & PTA_SSE4_2
2804 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE4_2))
2805 ix86_isa_flags |= OPTION_MASK_ISA_SSE4_2;
2806 if (processor_alias_table[i].flags & PTA_AVX
2807 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_AVX))
2808 ix86_isa_flags |= OPTION_MASK_ISA_AVX;
2809 if (processor_alias_table[i].flags & PTA_FMA
2810 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_FMA))
2811 ix86_isa_flags |= OPTION_MASK_ISA_FMA;
2812 if (processor_alias_table[i].flags & PTA_SSE4A
2813 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE4A))
2814 ix86_isa_flags |= OPTION_MASK_ISA_SSE4A;
2815 if (processor_alias_table[i].flags & PTA_SSE5
2816 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE5))
2817 ix86_isa_flags |= OPTION_MASK_ISA_SSE5;
2818 if (processor_alias_table[i].flags & PTA_ABM
2819 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_ABM))
2820 ix86_isa_flags |= OPTION_MASK_ISA_ABM;
2821 if (processor_alias_table[i].flags & PTA_CX16
2822 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_CX16))
2823 ix86_isa_flags |= OPTION_MASK_ISA_CX16;
2824 if (processor_alias_table[i].flags & (PTA_POPCNT | PTA_ABM)
2825 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_POPCNT))
2826 ix86_isa_flags |= OPTION_MASK_ISA_POPCNT;
2827 if (!(TARGET_64BIT && (processor_alias_table[i].flags & PTA_NO_SAHF))
2828 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SAHF))
2829 ix86_isa_flags |= OPTION_MASK_ISA_SAHF;
2830 if (processor_alias_table[i].flags & PTA_AES
2831 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_AES))
2832 ix86_isa_flags |= OPTION_MASK_ISA_AES;
2833 if (processor_alias_table[i].flags & PTA_PCLMUL
2834 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_PCLMUL))
2835 ix86_isa_flags |= OPTION_MASK_ISA_PCLMUL;
2836 if (processor_alias_table[i].flags & (PTA_PREFETCH_SSE | PTA_SSE))
2837 x86_prefetch_sse = true;
2843 error ("bad value (%s) for %sarch=%s %s",
2844 ix86_arch_string, prefix, suffix, sw);
2846 ix86_arch_mask = 1u << ix86_arch;
2847 for (i = 0; i < X86_ARCH_LAST; ++i)
2848 ix86_arch_features[i] = !!(initial_ix86_arch_features[i] & ix86_arch_mask);
2850 for (i = 0; i < pta_size; i++)
2851 if (! strcmp (ix86_tune_string, processor_alias_table[i].name))
2853 ix86_schedule = processor_alias_table[i].schedule;
2854 ix86_tune = processor_alias_table[i].processor;
2855 if (TARGET_64BIT && !(processor_alias_table[i].flags & PTA_64BIT))
2857 if (ix86_tune_defaulted)
2859 ix86_tune_string = "x86-64";
2860 for (i = 0; i < pta_size; i++)
2861 if (! strcmp (ix86_tune_string,
2862 processor_alias_table[i].name))
2864 ix86_schedule = processor_alias_table[i].schedule;
2865 ix86_tune = processor_alias_table[i].processor;
2868 error ("CPU you selected does not support x86-64 "
2871 /* Intel CPUs have always interpreted SSE prefetch instructions as
2872 NOPs; so, we can enable SSE prefetch instructions even when
2873 -mtune (rather than -march) points us to a processor that has them.
2874 However, the VIA C3 gives a SIGILL, so we only do that for i686 and
2875 higher processors. */
2877 && (processor_alias_table[i].flags & (PTA_PREFETCH_SSE | PTA_SSE)))
2878 x86_prefetch_sse = true;
2882 error ("bad value (%s) for %stune=%s %s",
2883 ix86_tune_string, prefix, suffix, sw);
2885 ix86_tune_mask = 1u << ix86_tune;
2886 for (i = 0; i < X86_TUNE_LAST; ++i)
2887 ix86_tune_features[i] = !!(initial_ix86_tune_features[i] & ix86_tune_mask);
2890 ix86_cost = &ix86_size_cost;
2892 ix86_cost = processor_target_table[ix86_tune].cost;
2894 /* Arrange to set up i386_stack_locals for all functions. */
2895 init_machine_status = ix86_init_machine_status;
2897 /* Validate -mregparm= value. */
2898 if (ix86_regparm_string)
2901 warning (0, "%sregparm%s is ignored in 64-bit mode", prefix, suffix);
2902 i = atoi (ix86_regparm_string);
2903 if (i < 0 || i > REGPARM_MAX)
2904 error ("%sregparm=%d%s is not between 0 and %d",
2905 prefix, i, suffix, REGPARM_MAX);
2910 ix86_regparm = REGPARM_MAX;
2912 /* If the user has provided any of the -malign-* options,
2913 warn and use that value only if -falign-* is not set.
2914 Remove this code in GCC 3.2 or later. */
2915 if (ix86_align_loops_string)
2917 warning (0, "%salign-loops%s is obsolete, use -falign-loops%s",
2918 prefix, suffix, suffix);
2919 if (align_loops == 0)
2921 i = atoi (ix86_align_loops_string);
2922 if (i < 0 || i > MAX_CODE_ALIGN)
2923 error ("%salign-loops=%d%s is not between 0 and %d",
2924 prefix, i, suffix, MAX_CODE_ALIGN);
2926 align_loops = 1 << i;
2930 if (ix86_align_jumps_string)
2932 warning (0, "%salign-jumps%s is obsolete, use -falign-jumps%s",
2933 prefix, suffix, suffix);
2934 if (align_jumps == 0)
2936 i = atoi (ix86_align_jumps_string);
2937 if (i < 0 || i > MAX_CODE_ALIGN)
2938 error ("%salign-loops=%d%s is not between 0 and %d",
2939 prefix, i, suffix, MAX_CODE_ALIGN);
2941 align_jumps = 1 << i;
2945 if (ix86_align_funcs_string)
2947 warning (0, "%salign-functions%s is obsolete, use -falign-functions%s",
2948 prefix, suffix, suffix);
2949 if (align_functions == 0)
2951 i = atoi (ix86_align_funcs_string);
2952 if (i < 0 || i > MAX_CODE_ALIGN)
2953 error ("%salign-loops=%d%s is not between 0 and %d",
2954 prefix, i, suffix, MAX_CODE_ALIGN);
2956 align_functions = 1 << i;
2960 /* Default align_* from the processor table. */
2961 if (align_loops == 0)
2963 align_loops = processor_target_table[ix86_tune].align_loop;
2964 align_loops_max_skip = processor_target_table[ix86_tune].align_loop_max_skip;
2966 if (align_jumps == 0)
2968 align_jumps = processor_target_table[ix86_tune].align_jump;
2969 align_jumps_max_skip = processor_target_table[ix86_tune].align_jump_max_skip;
2971 if (align_functions == 0)
2973 align_functions = processor_target_table[ix86_tune].align_func;
2976 /* Validate -mbranch-cost= value, or provide default. */
2977 ix86_branch_cost = ix86_cost->branch_cost;
2978 if (ix86_branch_cost_string)
2980 i = atoi (ix86_branch_cost_string);
2982 error ("%sbranch-cost=%d%s is not between 0 and 5", prefix, i, suffix);
2984 ix86_branch_cost = i;
2986 if (ix86_section_threshold_string)
2988 i = atoi (ix86_section_threshold_string);
2990 error ("%slarge-data-threshold=%d%s is negative", prefix, i, suffix);
2992 ix86_section_threshold = i;
2995 if (ix86_tls_dialect_string)
2997 if (strcmp (ix86_tls_dialect_string, "gnu") == 0)
2998 ix86_tls_dialect = TLS_DIALECT_GNU;
2999 else if (strcmp (ix86_tls_dialect_string, "gnu2") == 0)
3000 ix86_tls_dialect = TLS_DIALECT_GNU2;
3001 else if (strcmp (ix86_tls_dialect_string, "sun") == 0)
3002 ix86_tls_dialect = TLS_DIALECT_SUN;
3004 error ("bad value (%s) for %stls-dialect=%s %s",
3005 ix86_tls_dialect_string, prefix, suffix, sw);
3008 if (ix87_precision_string)
3010 i = atoi (ix87_precision_string);
3011 if (i != 32 && i != 64 && i != 80)
3012 error ("pc%d is not valid precision setting (32, 64 or 80)", i);
3017 target_flags |= TARGET_SUBTARGET64_DEFAULT & ~target_flags_explicit;
3019 /* Enable by default the SSE and MMX builtins. Do allow the user to
3020 explicitly disable any of these. In particular, disabling SSE and
3021 MMX for kernel code is extremely useful. */
3022 if (!ix86_arch_specified)
3024 |= ((OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_MMX
3025 | TARGET_SUBTARGET64_ISA_DEFAULT) & ~ix86_isa_flags_explicit);
3028 warning (0, "%srtd%s is ignored in 64bit mode", prefix, suffix);
3032 target_flags |= TARGET_SUBTARGET32_DEFAULT & ~target_flags_explicit;
3034 if (!ix86_arch_specified)
3036 |= TARGET_SUBTARGET32_ISA_DEFAULT & ~ix86_isa_flags_explicit;
3038 /* i386 ABI does not specify red zone. It still makes sense to use it
3039 when programmer takes care to stack from being destroyed. */
3040 if (!(target_flags_explicit & MASK_NO_RED_ZONE))
3041 target_flags |= MASK_NO_RED_ZONE;
3044 /* Keep nonleaf frame pointers. */
3045 if (flag_omit_frame_pointer)
3046 target_flags &= ~MASK_OMIT_LEAF_FRAME_POINTER;
3047 else if (TARGET_OMIT_LEAF_FRAME_POINTER)
3048 flag_omit_frame_pointer = 1;
3050 /* If we're doing fast math, we don't care about comparison order
3051 wrt NaNs. This lets us use a shorter comparison sequence. */
3052 if (flag_finite_math_only)
3053 target_flags &= ~MASK_IEEE_FP;
3055 /* If the architecture always has an FPU, turn off NO_FANCY_MATH_387,
3056 since the insns won't need emulation. */
3057 if (x86_arch_always_fancy_math_387 & ix86_arch_mask)
3058 target_flags &= ~MASK_NO_FANCY_MATH_387;
3060 /* Likewise, if the target doesn't have a 387, or we've specified
3061 software floating point, don't use 387 inline intrinsics. */
3063 target_flags |= MASK_NO_FANCY_MATH_387;
3065 /* Turn on MMX builtins for -msse. */
3068 ix86_isa_flags |= OPTION_MASK_ISA_MMX & ~ix86_isa_flags_explicit;
3069 x86_prefetch_sse = true;
3072 /* Turn on popcnt instruction for -msse4.2 or -mabm. */
3073 if (TARGET_SSE4_2 || TARGET_ABM)
3074 ix86_isa_flags |= OPTION_MASK_ISA_POPCNT & ~ix86_isa_flags_explicit;
3076 /* Validate -mpreferred-stack-boundary= value or default it to
3077 PREFERRED_STACK_BOUNDARY_DEFAULT. */
3078 ix86_preferred_stack_boundary = PREFERRED_STACK_BOUNDARY_DEFAULT;
3079 if (ix86_preferred_stack_boundary_string)
3081 i = atoi (ix86_preferred_stack_boundary_string);
3082 if (i < (TARGET_64BIT ? 4 : 2) || i > 12)
3083 error ("%spreferred-stack-boundary=%d%s is not between %d and 12",
3084 prefix, i, suffix, TARGET_64BIT ? 4 : 2);
3086 ix86_preferred_stack_boundary = (1 << i) * BITS_PER_UNIT;
3089 /* Set the default value for -mstackrealign. */
3090 if (ix86_force_align_arg_pointer == -1)
3091 ix86_force_align_arg_pointer = STACK_REALIGN_DEFAULT;
3093 /* Validate -mincoming-stack-boundary= value or default it to
3094 MIN_STACK_BOUNDARY/PREFERRED_STACK_BOUNDARY. */
3095 if (ix86_force_align_arg_pointer)
3096 ix86_default_incoming_stack_boundary = MIN_STACK_BOUNDARY;
3098 ix86_default_incoming_stack_boundary = PREFERRED_STACK_BOUNDARY;
3099 ix86_incoming_stack_boundary = ix86_default_incoming_stack_boundary;
3100 if (ix86_incoming_stack_boundary_string)
3102 i = atoi (ix86_incoming_stack_boundary_string);
3103 if (i < (TARGET_64BIT ? 4 : 2) || i > 12)
3104 error ("-mincoming-stack-boundary=%d is not between %d and 12",
3105 i, TARGET_64BIT ? 4 : 2);
3108 ix86_user_incoming_stack_boundary = (1 << i) * BITS_PER_UNIT;
3109 ix86_incoming_stack_boundary
3110 = ix86_user_incoming_stack_boundary;
3114 /* Accept -msseregparm only if at least SSE support is enabled. */
3115 if (TARGET_SSEREGPARM
3117 error ("%ssseregparm%s used without SSE enabled", prefix, suffix);
3119 ix86_fpmath = TARGET_FPMATH_DEFAULT;
3120 if (ix86_fpmath_string != 0)
3122 if (! strcmp (ix86_fpmath_string, "387"))
3123 ix86_fpmath = FPMATH_387;
3124 else if (! strcmp (ix86_fpmath_string, "sse"))
3128 warning (0, "SSE instruction set disabled, using 387 arithmetics");
3129 ix86_fpmath = FPMATH_387;
3132 ix86_fpmath = FPMATH_SSE;
3134 else if (! strcmp (ix86_fpmath_string, "387,sse")
3135 || ! strcmp (ix86_fpmath_string, "387+sse")
3136 || ! strcmp (ix86_fpmath_string, "sse,387")
3137 || ! strcmp (ix86_fpmath_string, "sse+387")
3138 || ! strcmp (ix86_fpmath_string, "both"))
3142 warning (0, "SSE instruction set disabled, using 387 arithmetics");
3143 ix86_fpmath = FPMATH_387;
3145 else if (!TARGET_80387)
3147 warning (0, "387 instruction set disabled, using SSE arithmetics");
3148 ix86_fpmath = FPMATH_SSE;
3151 ix86_fpmath = (enum fpmath_unit) (FPMATH_SSE | FPMATH_387);
3154 error ("bad value (%s) for %sfpmath=%s %s",
3155 ix86_fpmath_string, prefix, suffix, sw);
3158 /* If the i387 is disabled, then do not return values in it. */
3160 target_flags &= ~MASK_FLOAT_RETURNS;
3162 /* Use external vectorized library in vectorizing intrinsics. */
3163 if (ix86_veclibabi_string)
3165 if (strcmp (ix86_veclibabi_string, "svml") == 0)
3166 ix86_veclib_handler = ix86_veclibabi_svml;
3167 else if (strcmp (ix86_veclibabi_string, "acml") == 0)
3168 ix86_veclib_handler = ix86_veclibabi_acml;
3170 error ("unknown vectorization library ABI type (%s) for "
3171 "%sveclibabi=%s %s", ix86_veclibabi_string,
3172 prefix, suffix, sw);
3175 if ((x86_accumulate_outgoing_args & ix86_tune_mask)
3176 && !(target_flags_explicit & MASK_ACCUMULATE_OUTGOING_ARGS)
3178 target_flags |= MASK_ACCUMULATE_OUTGOING_ARGS;
3180 /* ??? Unwind info is not correct around the CFG unless either a frame
3181 pointer is present or M_A_O_A is set. Fixing this requires rewriting
3182 unwind info generation to be aware of the CFG and propagating states
3184 if ((flag_unwind_tables || flag_asynchronous_unwind_tables
3185 || flag_exceptions || flag_non_call_exceptions)
3186 && flag_omit_frame_pointer
3187 && !(target_flags & MASK_ACCUMULATE_OUTGOING_ARGS))
3189 if (target_flags_explicit & MASK_ACCUMULATE_OUTGOING_ARGS)
3190 warning (0, "unwind tables currently require either a frame pointer "
3191 "or %saccumulate-outgoing-args%s for correctness",
3193 target_flags |= MASK_ACCUMULATE_OUTGOING_ARGS;
3196 /* If stack probes are required, the space used for large function
3197 arguments on the stack must also be probed, so enable
3198 -maccumulate-outgoing-args so this happens in the prologue. */
3199 if (TARGET_STACK_PROBE
3200 && !(target_flags & MASK_ACCUMULATE_OUTGOING_ARGS))
3202 if (target_flags_explicit & MASK_ACCUMULATE_OUTGOING_ARGS)
3203 warning (0, "stack probing requires %saccumulate-outgoing-args%s "
3204 "for correctness", prefix, suffix);
3205 target_flags |= MASK_ACCUMULATE_OUTGOING_ARGS;
3208 /* For sane SSE instruction set generation we need fcomi instruction.
3209 It is safe to enable all CMOVE instructions. */
3213 /* Figure out what ASM_GENERATE_INTERNAL_LABEL builds as a prefix. */
3216 ASM_GENERATE_INTERNAL_LABEL (internal_label_prefix, "LX", 0);
3217 p = strchr (internal_label_prefix, 'X');
3218 internal_label_prefix_len = p - internal_label_prefix;
3222 /* When scheduling description is not available, disable scheduler pass
3223 so it won't slow down the compilation and make x87 code slower. */
3224 if (!TARGET_SCHEDULE)
3225 flag_schedule_insns_after_reload = flag_schedule_insns = 0;
3227 if (!PARAM_SET_P (PARAM_SIMULTANEOUS_PREFETCHES))
3228 set_param_value ("simultaneous-prefetches",
3229 ix86_cost->simultaneous_prefetches);
3230 if (!PARAM_SET_P (PARAM_L1_CACHE_LINE_SIZE))
3231 set_param_value ("l1-cache-line-size", ix86_cost->prefetch_block);
3232 if (!PARAM_SET_P (PARAM_L1_CACHE_SIZE))
3233 set_param_value ("l1-cache-size", ix86_cost->l1_cache_size);
3234 if (!PARAM_SET_P (PARAM_L2_CACHE_SIZE))
3235 set_param_value ("l2-cache-size", ix86_cost->l2_cache_size);
3237 /* If using typedef char *va_list, signal that __builtin_va_start (&ap, 0)
3238 can be optimized to ap = __builtin_next_arg (0). */
3240 targetm.expand_builtin_va_start = NULL;
3244 ix86_gen_leave = gen_leave_rex64;
3245 ix86_gen_pop1 = gen_popdi1;
3246 ix86_gen_add3 = gen_adddi3;
3247 ix86_gen_sub3 = gen_subdi3;
3248 ix86_gen_sub3_carry = gen_subdi3_carry_rex64;
3249 ix86_gen_one_cmpl2 = gen_one_cmpldi2;
3250 ix86_gen_monitor = gen_sse3_monitor64;
3251 ix86_gen_andsp = gen_anddi3;
3255 ix86_gen_leave = gen_leave;
3256 ix86_gen_pop1 = gen_popsi1;
3257 ix86_gen_add3 = gen_addsi3;
3258 ix86_gen_sub3 = gen_subsi3;
3259 ix86_gen_sub3_carry = gen_subsi3_carry;
3260 ix86_gen_one_cmpl2 = gen_one_cmplsi2;
3261 ix86_gen_monitor = gen_sse3_monitor;
3262 ix86_gen_andsp = gen_andsi3;
3266 /* Use -mcld by default for 32-bit code if configured with --enable-cld. */
3268 target_flags |= MASK_CLD & ~target_flags_explicit;
3271 /* Save the initial options in case the user does function specific options */
3273 target_option_default_node = target_option_current_node
3274 = build_target_option_node ();
3277 /* Save the current options */
3280 ix86_function_specific_save (struct cl_target_option *ptr)
3282 gcc_assert (IN_RANGE (ix86_arch, 0, 255));
3283 gcc_assert (IN_RANGE (ix86_schedule, 0, 255));
3284 gcc_assert (IN_RANGE (ix86_tune, 0, 255));
3285 gcc_assert (IN_RANGE (ix86_fpmath, 0, 255));
3286 gcc_assert (IN_RANGE (ix86_branch_cost, 0, 255));
3288 ptr->arch = ix86_arch;
3289 ptr->schedule = ix86_schedule;
3290 ptr->tune = ix86_tune;
3291 ptr->fpmath = ix86_fpmath;
3292 ptr->branch_cost = ix86_branch_cost;
3293 ptr->tune_defaulted = ix86_tune_defaulted;
3294 ptr->arch_specified = ix86_arch_specified;
3295 ptr->ix86_isa_flags_explicit = ix86_isa_flags_explicit;
3296 ptr->target_flags_explicit = target_flags_explicit;
3299 /* Restore the current options */
3302 ix86_function_specific_restore (struct cl_target_option *ptr)
3304 enum processor_type old_tune = ix86_tune;
3305 enum processor_type old_arch = ix86_arch;
3306 unsigned int ix86_arch_mask, ix86_tune_mask;
3309 ix86_arch = ptr->arch;
3310 ix86_schedule = ptr->schedule;
3311 ix86_tune = ptr->tune;
3312 ix86_fpmath = ptr->fpmath;
3313 ix86_branch_cost = ptr->branch_cost;
3314 ix86_tune_defaulted = ptr->tune_defaulted;
3315 ix86_arch_specified = ptr->arch_specified;
3316 ix86_isa_flags_explicit = ptr->ix86_isa_flags_explicit;
3317 target_flags_explicit = ptr->target_flags_explicit;
3319 /* Recreate the arch feature tests if the arch changed */
3320 if (old_arch != ix86_arch)
3322 ix86_arch_mask = 1u << ix86_arch;
3323 for (i = 0; i < X86_ARCH_LAST; ++i)
3324 ix86_arch_features[i]
3325 = !!(initial_ix86_arch_features[i] & ix86_arch_mask);
3328 /* Recreate the tune optimization tests */
3329 if (old_tune != ix86_tune)
3331 ix86_tune_mask = 1u << ix86_tune;
3332 for (i = 0; i < X86_TUNE_LAST; ++i)
3333 ix86_tune_features[i]
3334 = !!(initial_ix86_tune_features[i] & ix86_tune_mask);
3338 /* Print the current options */
3341 ix86_function_specific_print (FILE *file, int indent,
3342 struct cl_target_option *ptr)
3345 = ix86_target_string (ptr->ix86_isa_flags, ptr->target_flags,
3346 NULL, NULL, NULL, false);
3348 fprintf (file, "%*sarch = %d (%s)\n",
3351 ((ptr->arch < TARGET_CPU_DEFAULT_max)
3352 ? cpu_names[ptr->arch]
3355 fprintf (file, "%*stune = %d (%s)\n",
3358 ((ptr->tune < TARGET_CPU_DEFAULT_max)
3359 ? cpu_names[ptr->tune]
3362 fprintf (file, "%*sfpmath = %d%s%s\n", indent, "", ptr->fpmath,
3363 (ptr->fpmath & FPMATH_387) ? ", 387" : "",
3364 (ptr->fpmath & FPMATH_SSE) ? ", sse" : "");
3365 fprintf (file, "%*sbranch_cost = %d\n", indent, "", ptr->branch_cost);
3369 fprintf (file, "%*s%s\n", indent, "", target_string);
3370 free (target_string);
3375 /* Inner function to process the attribute((target(...))), take an argument and
3376 set the current options from the argument. If we have a list, recursively go
3380 ix86_valid_target_attribute_inner_p (tree args, char *p_strings[])
3385 #define IX86_ATTR_ISA(S,O) { S, sizeof (S)-1, ix86_opt_isa, O, 0 }
3386 #define IX86_ATTR_STR(S,O) { S, sizeof (S)-1, ix86_opt_str, O, 0 }
3387 #define IX86_ATTR_YES(S,O,M) { S, sizeof (S)-1, ix86_opt_yes, O, M }
3388 #define IX86_ATTR_NO(S,O,M) { S, sizeof (S)-1, ix86_opt_no, O, M }
3403 enum ix86_opt_type type;
3408 IX86_ATTR_ISA ("3dnow", OPT_m3dnow),
3409 IX86_ATTR_ISA ("abm", OPT_mabm),
3410 IX86_ATTR_ISA ("aes", OPT_maes),
3411 IX86_ATTR_ISA ("avx", OPT_mavx),
3412 IX86_ATTR_ISA ("mmx", OPT_mmmx),
3413 IX86_ATTR_ISA ("pclmul", OPT_mpclmul),
3414 IX86_ATTR_ISA ("popcnt", OPT_mpopcnt),
3415 IX86_ATTR_ISA ("sse", OPT_msse),
3416 IX86_ATTR_ISA ("sse2", OPT_msse2),
3417 IX86_ATTR_ISA ("sse3", OPT_msse3),
3418 IX86_ATTR_ISA ("sse4", OPT_msse4),
3419 IX86_ATTR_ISA ("sse4.1", OPT_msse4_1),
3420 IX86_ATTR_ISA ("sse4.2", OPT_msse4_2),
3421 IX86_ATTR_ISA ("sse4a", OPT_msse4a),
3422 IX86_ATTR_ISA ("sse5", OPT_msse5),
3423 IX86_ATTR_ISA ("ssse3", OPT_mssse3),
3425 /* string options */
3426 IX86_ATTR_STR ("arch=", IX86_FUNCTION_SPECIFIC_ARCH),
3427 IX86_ATTR_STR ("fpmath=", IX86_FUNCTION_SPECIFIC_FPMATH),
3428 IX86_ATTR_STR ("tune=", IX86_FUNCTION_SPECIFIC_TUNE),
3431 IX86_ATTR_YES ("cld",
3435 IX86_ATTR_NO ("fancy-math-387",
3436 OPT_mfancy_math_387,
3437 MASK_NO_FANCY_MATH_387),
3439 IX86_ATTR_NO ("fused-madd",
3441 MASK_NO_FUSED_MADD),
3443 IX86_ATTR_YES ("ieee-fp",
3447 IX86_ATTR_YES ("inline-all-stringops",
3448 OPT_minline_all_stringops,
3449 MASK_INLINE_ALL_STRINGOPS),
3451 IX86_ATTR_YES ("inline-stringops-dynamically",
3452 OPT_minline_stringops_dynamically,
3453 MASK_INLINE_STRINGOPS_DYNAMICALLY),
3455 IX86_ATTR_NO ("align-stringops",
3456 OPT_mno_align_stringops,
3457 MASK_NO_ALIGN_STRINGOPS),
3459 IX86_ATTR_YES ("recip",
3465 /* If this is a list, recurse to get the options. */
3466 if (TREE_CODE (args) == TREE_LIST)
3470 for (; args; args = TREE_CHAIN (args))
3471 if (TREE_VALUE (args)
3472 && !ix86_valid_target_attribute_inner_p (TREE_VALUE (args), p_strings))
3478 else if (TREE_CODE (args) != STRING_CST)
3481 /* Handle multiple arguments separated by commas. */
3482 next_optstr = ASTRDUP (TREE_STRING_POINTER (args));
3484 while (next_optstr && *next_optstr != '\0')
3486 char *p = next_optstr;
3488 char *comma = strchr (next_optstr, ',');
3489 const char *opt_string;
3490 size_t len, opt_len;
3495 enum ix86_opt_type type = ix86_opt_unknown;
3501 len = comma - next_optstr;
3502 next_optstr = comma + 1;
3510 /* Recognize no-xxx. */
3511 if (len > 3 && p[0] == 'n' && p[1] == 'o' && p[2] == '-')
3520 /* Find the option. */
3523 for (i = 0; i < ARRAY_SIZE (attrs); i++)
3525 type = attrs[i].type;
3526 opt_len = attrs[i].len;
3527 if (ch == attrs[i].string[0]
3528 && ((type != ix86_opt_str) ? len == opt_len : len > opt_len)
3529 && memcmp (p, attrs[i].string, opt_len) == 0)
3532 mask = attrs[i].mask;
3533 opt_string = attrs[i].string;
3538 /* Process the option. */
3541 error ("attribute(target(\"%s\")) is unknown", orig_p);
3545 else if (type == ix86_opt_isa)
3546 ix86_handle_option (opt, p, opt_set_p);
3548 else if (type == ix86_opt_yes || type == ix86_opt_no)
3550 if (type == ix86_opt_no)
3551 opt_set_p = !opt_set_p;
3554 target_flags |= mask;
3556 target_flags &= ~mask;
3559 else if (type == ix86_opt_str)
3563 error ("option(\"%s\") was already specified", opt_string);
3567 p_strings[opt] = xstrdup (p + opt_len);
3577 /* Return a TARGET_OPTION_NODE tree of the target options listed or NULL. */
3580 ix86_valid_target_attribute_tree (tree args)
3582 const char *orig_arch_string = ix86_arch_string;
3583 const char *orig_tune_string = ix86_tune_string;
3584 const char *orig_fpmath_string = ix86_fpmath_string;
3585 int orig_tune_defaulted = ix86_tune_defaulted;
3586 int orig_arch_specified = ix86_arch_specified;
3587 char *option_strings[IX86_FUNCTION_SPECIFIC_MAX] = { NULL, NULL, NULL };
3590 struct cl_target_option *def
3591 = TREE_TARGET_OPTION (target_option_default_node);
3593 /* Process each of the options on the chain. */
3594 if (! ix86_valid_target_attribute_inner_p (args, option_strings))
3597 /* If the changed options are different from the default, rerun override_options,
3598 and then save the options away. The string options are are attribute options,
3599 and will be undone when we copy the save structure. */
3600 if (ix86_isa_flags != def->ix86_isa_flags
3601 || target_flags != def->target_flags
3602 || option_strings[IX86_FUNCTION_SPECIFIC_ARCH]
3603 || option_strings[IX86_FUNCTION_SPECIFIC_TUNE]
3604 || option_strings[IX86_FUNCTION_SPECIFIC_FPMATH])
3606 /* If we are using the default tune= or arch=, undo the string assigned,
3607 and use the default. */
3608 if (option_strings[IX86_FUNCTION_SPECIFIC_ARCH])
3609 ix86_arch_string = option_strings[IX86_FUNCTION_SPECIFIC_ARCH];
3610 else if (!orig_arch_specified)
3611 ix86_arch_string = NULL;
3613 if (option_strings[IX86_FUNCTION_SPECIFIC_TUNE])
3614 ix86_tune_string = option_strings[IX86_FUNCTION_SPECIFIC_TUNE];
3615 else if (orig_tune_defaulted)
3616 ix86_tune_string = NULL;
3618 /* If fpmath= is not set, and we now have sse2 on 32-bit, use it. */
3619 if (option_strings[IX86_FUNCTION_SPECIFIC_FPMATH])
3620 ix86_fpmath_string = option_strings[IX86_FUNCTION_SPECIFIC_FPMATH];
3621 else if (!TARGET_64BIT && TARGET_SSE)
3622 ix86_fpmath_string = "sse,387";
3624 /* Do any overrides, such as arch=xxx, or tune=xxx support. */
3625 override_options (false);
3627 /* Add any builtin functions with the new isa if any. */
3628 ix86_add_new_builtins (ix86_isa_flags);
3630 /* Save the current options unless we are validating options for
3632 t = build_target_option_node ();
3634 ix86_arch_string = orig_arch_string;
3635 ix86_tune_string = orig_tune_string;
3636 ix86_fpmath_string = orig_fpmath_string;
3638 /* Free up memory allocated to hold the strings */
3639 for (i = 0; i < IX86_FUNCTION_SPECIFIC_MAX; i++)
3640 if (option_strings[i])
3641 free (option_strings[i]);
3647 /* Hook to validate attribute((target("string"))). */
3650 ix86_valid_target_attribute_p (tree fndecl,
3651 tree ARG_UNUSED (name),
3653 int ARG_UNUSED (flags))
3655 struct cl_target_option cur_target;
3657 tree old_optimize = build_optimization_node ();
3658 tree new_target, new_optimize;
3659 tree func_optimize = DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl);
3661 /* If the function changed the optimization levels as well as setting target
3662 options, start with the optimizations specified. */
3663 if (func_optimize && func_optimize != old_optimize)
3664 cl_optimization_restore (TREE_OPTIMIZATION (func_optimize));
3666 /* The target attributes may also change some optimization flags, so update
3667 the optimization options if necessary. */
3668 cl_target_option_save (&cur_target);
3669 new_target = ix86_valid_target_attribute_tree (args);
3670 new_optimize = build_optimization_node ();
3677 DECL_FUNCTION_SPECIFIC_TARGET (fndecl) = new_target;
3679 if (old_optimize != new_optimize)
3680 DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl) = new_optimize;
3683 cl_target_option_restore (&cur_target);
3685 if (old_optimize != new_optimize)
3686 cl_optimization_restore (TREE_OPTIMIZATION (old_optimize));
3692 /* Hook to determine if one function can safely inline another. */
3695 ix86_can_inline_p (tree caller, tree callee)
3698 tree caller_tree = DECL_FUNCTION_SPECIFIC_TARGET (caller);
3699 tree callee_tree = DECL_FUNCTION_SPECIFIC_TARGET (callee);
3701 /* If callee has no option attributes, then it is ok to inline. */
3705 /* If caller has no option attributes, but callee does then it is not ok to
3707 else if (!caller_tree)
3712 struct cl_target_option *caller_opts = TREE_TARGET_OPTION (caller_tree);
3713 struct cl_target_option *callee_opts = TREE_TARGET_OPTION (callee_tree);
3715 /* Callee's isa options should a subset of the caller's, i.e. a SSE5 function
3716 can inline a SSE2 function but a SSE2 function can't inline a SSE5
3718 if ((caller_opts->ix86_isa_flags & callee_opts->ix86_isa_flags)
3719 != callee_opts->ix86_isa_flags)
3722 /* See if we have the same non-isa options. */
3723 else if (caller_opts->target_flags != callee_opts->target_flags)
3726 /* See if arch, tune, etc. are the same. */
3727 else if (caller_opts->arch != callee_opts->arch)
3730 else if (caller_opts->tune != callee_opts->tune)
3733 else if (caller_opts->fpmath != callee_opts->fpmath)
3736 else if (caller_opts->branch_cost != callee_opts->branch_cost)
3747 /* Remember the last target of ix86_set_current_function. */
3748 static GTY(()) tree ix86_previous_fndecl;
3750 /* Establish appropriate back-end context for processing the function
3751 FNDECL. The argument might be NULL to indicate processing at top
3752 level, outside of any function scope. */
3754 ix86_set_current_function (tree fndecl)
3756 /* Only change the context if the function changes. This hook is called
3757 several times in the course of compiling a function, and we don't want to
3758 slow things down too much or call target_reinit when it isn't safe. */
3759 if (fndecl && fndecl != ix86_previous_fndecl)
3761 tree old_tree = (ix86_previous_fndecl
3762 ? DECL_FUNCTION_SPECIFIC_TARGET (ix86_previous_fndecl)
3765 tree new_tree = (fndecl
3766 ? DECL_FUNCTION_SPECIFIC_TARGET (fndecl)
3769 ix86_previous_fndecl = fndecl;
3770 if (old_tree == new_tree)
3775 cl_target_option_restore (TREE_TARGET_OPTION (new_tree));
3781 struct cl_target_option *def
3782 = TREE_TARGET_OPTION (target_option_current_node);
3784 cl_target_option_restore (def);
3791 /* Return true if this goes in large data/bss. */
3794 ix86_in_large_data_p (tree exp)
3796 if (ix86_cmodel != CM_MEDIUM && ix86_cmodel != CM_MEDIUM_PIC)
3799 /* Functions are never large data. */
3800 if (TREE_CODE (exp) == FUNCTION_DECL)
3803 if (TREE_CODE (exp) == VAR_DECL && DECL_SECTION_NAME (exp))
3805 const char *section = TREE_STRING_POINTER (DECL_SECTION_NAME (exp));
3806 if (strcmp (section, ".ldata") == 0
3807 || strcmp (section, ".lbss") == 0)
3813 HOST_WIDE_INT size = int_size_in_bytes (TREE_TYPE (exp));
3815 /* If this is an incomplete type with size 0, then we can't put it
3816 in data because it might be too big when completed. */
3817 if (!size || size > ix86_section_threshold)
3824 /* Switch to the appropriate section for output of DECL.
3825 DECL is either a `VAR_DECL' node or a constant of some sort.
3826 RELOC indicates whether forming the initial value of DECL requires
3827 link-time relocations. */
3829 static section * x86_64_elf_select_section (tree, int, unsigned HOST_WIDE_INT)
3833 x86_64_elf_select_section (tree decl, int reloc,
3834 unsigned HOST_WIDE_INT align)
3836 if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
3837 && ix86_in_large_data_p (decl))
3839 const char *sname = NULL;
3840 unsigned int flags = SECTION_WRITE;
3841 switch (categorize_decl_for_section (decl, reloc))
3846 case SECCAT_DATA_REL:
3847 sname = ".ldata.rel";
3849 case SECCAT_DATA_REL_LOCAL:
3850 sname = ".ldata.rel.local";
3852 case SECCAT_DATA_REL_RO:
3853 sname = ".ldata.rel.ro";
3855 case SECCAT_DATA_REL_RO_LOCAL:
3856 sname = ".ldata.rel.ro.local";
3860 flags |= SECTION_BSS;
3863 case SECCAT_RODATA_MERGE_STR:
3864 case SECCAT_RODATA_MERGE_STR_INIT:
3865 case SECCAT_RODATA_MERGE_CONST:
3869 case SECCAT_SRODATA:
3876 /* We don't split these for medium model. Place them into
3877 default sections and hope for best. */
3879 case SECCAT_EMUTLS_VAR:
3880 case SECCAT_EMUTLS_TMPL:
3885 /* We might get called with string constants, but get_named_section
3886 doesn't like them as they are not DECLs. Also, we need to set
3887 flags in that case. */
3889 return get_section (sname, flags, NULL);
3890 return get_named_section (decl, sname, reloc);
3893 return default_elf_select_section (decl, reloc, align);
3896 /* Build up a unique section name, expressed as a
3897 STRING_CST node, and assign it to DECL_SECTION_NAME (decl).
3898 RELOC indicates whether the initial value of EXP requires
3899 link-time relocations. */
3901 static void ATTRIBUTE_UNUSED
3902 x86_64_elf_unique_section (tree decl, int reloc)
3904 if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
3905 && ix86_in_large_data_p (decl))
3907 const char *prefix = NULL;
3908 /* We only need to use .gnu.linkonce if we don't have COMDAT groups. */
3909 bool one_only = DECL_ONE_ONLY (decl) && !HAVE_COMDAT_GROUP;
3911 switch (categorize_decl_for_section (decl, reloc))
3914 case SECCAT_DATA_REL:
3915 case SECCAT_DATA_REL_LOCAL:
3916 case SECCAT_DATA_REL_RO:
3917 case SECCAT_DATA_REL_RO_LOCAL:
3918 prefix = one_only ? ".ld" : ".ldata";
3921 prefix = one_only ? ".lb" : ".lbss";
3924 case SECCAT_RODATA_MERGE_STR:
3925 case SECCAT_RODATA_MERGE_STR_INIT:
3926 case SECCAT_RODATA_MERGE_CONST:
3927 prefix = one_only ? ".lr" : ".lrodata";
3929 case SECCAT_SRODATA:
3936 /* We don't split these for medium model. Place them into
3937 default sections and hope for best. */
3939 case SECCAT_EMUTLS_VAR:
3940 prefix = targetm.emutls.var_section;
3942 case SECCAT_EMUTLS_TMPL:
3943 prefix = targetm.emutls.tmpl_section;
3948 const char *name, *linkonce;
3951 name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl));
3952 name = targetm.strip_name_encoding (name);
3954 /* If we're using one_only, then there needs to be a .gnu.linkonce
3955 prefix to the section name. */
3956 linkonce = one_only ? ".gnu.linkonce" : "";
3958 string = ACONCAT ((linkonce, prefix, ".", name, NULL));
3960 DECL_SECTION_NAME (decl) = build_string (strlen (string), string);
3964 default_unique_section (decl, reloc);
3967 #ifdef COMMON_ASM_OP
3968 /* This says how to output assembler code to declare an
3969 uninitialized external linkage data object.
3971 For medium model x86-64 we need to use .largecomm opcode for
3974 x86_elf_aligned_common (FILE *file,
3975 const char *name, unsigned HOST_WIDE_INT size,
3978 if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
3979 && size > (unsigned int)ix86_section_threshold)
3980 fprintf (file, ".largecomm\t");
3982 fprintf (file, "%s", COMMON_ASM_OP);
3983 assemble_name (file, name);
3984 fprintf (file, ","HOST_WIDE_INT_PRINT_UNSIGNED",%u\n",
3985 size, align / BITS_PER_UNIT);
3989 /* Utility function for targets to use in implementing
3990 ASM_OUTPUT_ALIGNED_BSS. */
3993 x86_output_aligned_bss (FILE *file, tree decl ATTRIBUTE_UNUSED,
3994 const char *name, unsigned HOST_WIDE_INT size,
3997 if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
3998 && size > (unsigned int)ix86_section_threshold)
3999 switch_to_section (get_named_section (decl, ".lbss", 0));
4001 switch_to_section (bss_section);
4002 ASM_OUTPUT_ALIGN (file, floor_log2 (align / BITS_PER_UNIT));
4003 #ifdef ASM_DECLARE_OBJECT_NAME
4004 last_assemble_variable_decl = decl;
4005 ASM_DECLARE_OBJECT_NAME (file, name, decl);
4007 /* Standard thing is just output label for the object. */
4008 ASM_OUTPUT_LABEL (file, name);
4009 #endif /* ASM_DECLARE_OBJECT_NAME */
4010 ASM_OUTPUT_SKIP (file, size ? size : 1);
4014 optimization_options (int level, int size ATTRIBUTE_UNUSED)
4016 /* For -O2 and beyond, turn off -fschedule-insns by default. It tends to
4017 make the problem with not enough registers even worse. */
4018 #ifdef INSN_SCHEDULING
4020 flag_schedule_insns = 0;
4024 /* The Darwin libraries never set errno, so we might as well
4025 avoid calling them when that's the only reason we would. */
4026 flag_errno_math = 0;
4028 /* The default values of these switches depend on the TARGET_64BIT
4029 that is not known at this moment. Mark these values with 2 and
4030 let user the to override these. In case there is no command line option
4031 specifying them, we will set the defaults in override_options. */
4033 flag_omit_frame_pointer = 2;
4034 flag_pcc_struct_return = 2;
4035 flag_asynchronous_unwind_tables = 2;
4036 flag_vect_cost_model = 1;
4037 #ifdef SUBTARGET_OPTIMIZATION_OPTIONS
4038 SUBTARGET_OPTIMIZATION_OPTIONS;
4042 /* Decide whether we can make a sibling call to a function. DECL is the
4043 declaration of the function being targeted by the call and EXP is the
4044 CALL_EXPR representing the call. */
4047 ix86_function_ok_for_sibcall (tree decl, tree exp)
4052 /* If we are generating position-independent code, we cannot sibcall
4053 optimize any indirect call, or a direct call to a global function,
4054 as the PLT requires %ebx be live. */
4055 if (!TARGET_64BIT && flag_pic && (!decl || !targetm.binds_local_p (decl)))
4062 func = TREE_TYPE (CALL_EXPR_FN (exp));
4063 if (POINTER_TYPE_P (func))
4064 func = TREE_TYPE (func);
4067 /* Check that the return value locations are the same. Like
4068 if we are returning floats on the 80387 register stack, we cannot
4069 make a sibcall from a function that doesn't return a float to a
4070 function that does or, conversely, from a function that does return
4071 a float to a function that doesn't; the necessary stack adjustment
4072 would not be executed. This is also the place we notice
4073 differences in the return value ABI. Note that it is ok for one
4074 of the functions to have void return type as long as the return
4075 value of the other is passed in a register. */
4076 a = ix86_function_value (TREE_TYPE (exp), func, false);
4077 b = ix86_function_value (TREE_TYPE (DECL_RESULT (cfun->decl)),
4079 if (STACK_REG_P (a) || STACK_REG_P (b))
4081 if (!rtx_equal_p (a, b))
4084 else if (VOID_TYPE_P (TREE_TYPE (DECL_RESULT (cfun->decl))))
4086 else if (!rtx_equal_p (a, b))
4089 /* If this call is indirect, we'll need to be able to use a call-clobbered
4090 register for the address of the target function. Make sure that all
4091 such registers are not used for passing parameters. */
4092 if (!decl && !TARGET_64BIT)
4096 /* We're looking at the CALL_EXPR, we need the type of the function. */
4097 type = CALL_EXPR_FN (exp); /* pointer expression */
4098 type = TREE_TYPE (type); /* pointer type */
4099 type = TREE_TYPE (type); /* function type */
4101 if (ix86_function_regparm (type, NULL) >= 3)
4103 /* ??? Need to count the actual number of registers to be used,
4104 not the possible number of registers. Fix later. */
4109 /* Dllimport'd functions are also called indirectly. */
4110 if (TARGET_DLLIMPORT_DECL_ATTRIBUTES
4112 && decl && DECL_DLLIMPORT_P (decl)
4113 && ix86_function_regparm (TREE_TYPE (decl), NULL) >= 3)
4116 /* If we need to align the outgoing stack, then sibcalling would
4117 unalign the stack, which may break the called function. */
4118 if (ix86_incoming_stack_boundary < PREFERRED_STACK_BOUNDARY)
4121 /* Otherwise okay. That also includes certain types of indirect calls. */
4125 /* Handle "cdecl", "stdcall", "fastcall", "regparm" and "sseregparm"
4126 calling convention attributes;
4127 arguments as in struct attribute_spec.handler. */
4130 ix86_handle_cconv_attribute (tree *node, tree name,
4132 int flags ATTRIBUTE_UNUSED,
4135 if (TREE_CODE (*node) != FUNCTION_TYPE
4136 && TREE_CODE (*node) != METHOD_TYPE
4137 && TREE_CODE (*node) != FIELD_DECL
4138 && TREE_CODE (*node) != TYPE_DECL)
4140 warning (OPT_Wattributes, "%qs attribute only applies to functions",
4141 IDENTIFIER_POINTER (name));
4142 *no_add_attrs = true;
4146 /* Can combine regparm with all attributes but fastcall. */
4147 if (is_attribute_p ("regparm", name))
4151 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
4153 error ("fastcall and regparm attributes are not compatible");
4156 cst = TREE_VALUE (args);
4157 if (TREE_CODE (cst) != INTEGER_CST)
4159 warning (OPT_Wattributes,
4160 "%qs attribute requires an integer constant argument",
4161 IDENTIFIER_POINTER (name));
4162 *no_add_attrs = true;
4164 else if (compare_tree_int (cst, REGPARM_MAX) > 0)
4166 warning (OPT_Wattributes, "argument to %qs attribute larger than %d",
4167 IDENTIFIER_POINTER (name), REGPARM_MAX);
4168 *no_add_attrs = true;
4176 /* Do not warn when emulating the MS ABI. */
4177 if (TREE_CODE (*node) != FUNCTION_TYPE || ix86_function_type_abi (*node)!=MS_ABI)
4178 warning (OPT_Wattributes, "%qs attribute ignored",
4179 IDENTIFIER_POINTER (name));
4180 *no_add_attrs = true;
4184 /* Can combine fastcall with stdcall (redundant) and sseregparm. */
4185 if (is_attribute_p ("fastcall", name))
4187 if (lookup_attribute ("cdecl", TYPE_ATTRIBUTES (*node)))
4189 error ("fastcall and cdecl attributes are not compatible");
4191 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (*node)))
4193 error ("fastcall and stdcall attributes are not compatible");
4195 if (lookup_attribute ("regparm", TYPE_ATTRIBUTES (*node)))
4197 error ("fastcall and regparm attributes are not compatible");
4201 /* Can combine stdcall with fastcall (redundant), regparm and
4203 else if (is_attribute_p ("stdcall", name))
4205 if (lookup_attribute ("cdecl", TYPE_ATTRIBUTES (*node)))
4207 error ("stdcall and cdecl attributes are not compatible");
4209 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
4211 error ("stdcall and fastcall attributes are not compatible");
4215 /* Can combine cdecl with regparm and sseregparm. */
4216 else if (is_attribute_p ("cdecl", name))
4218 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (*node)))
4220 error ("stdcall and cdecl attributes are not compatible");
4222 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
4224 error ("fastcall and cdecl attributes are not compatible");
4228 /* Can combine sseregparm with all attributes. */
4233 /* Return 0 if the attributes for two types are incompatible, 1 if they
4234 are compatible, and 2 if they are nearly compatible (which causes a
4235 warning to be generated). */
4238 ix86_comp_type_attributes (const_tree type1, const_tree type2)
4240 /* Check for mismatch of non-default calling convention. */
4241 const char *const rtdstr = TARGET_RTD ? "cdecl" : "stdcall";
4243 if (TREE_CODE (type1) != FUNCTION_TYPE
4244 && TREE_CODE (type1) != METHOD_TYPE)
4247 /* Check for mismatched fastcall/regparm types. */
4248 if ((!lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type1))
4249 != !lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type2)))
4250 || (ix86_function_regparm (type1, NULL)
4251 != ix86_function_regparm (type2, NULL)))
4254 /* Check for mismatched sseregparm types. */
4255 if (!lookup_attribute ("sseregparm", TYPE_ATTRIBUTES (type1))
4256 != !lookup_attribute ("sseregparm", TYPE_ATTRIBUTES (type2)))
4259 /* Check for mismatched return types (cdecl vs stdcall). */
4260 if (!lookup_attribute (rtdstr, TYPE_ATTRIBUTES (type1))
4261 != !lookup_attribute (rtdstr, TYPE_ATTRIBUTES (type2)))
4267 /* Return the regparm value for a function with the indicated TYPE and DECL.
4268 DECL may be NULL when calling function indirectly
4269 or considering a libcall. */
4272 ix86_function_regparm (const_tree type, const_tree decl)
4275 int regparm = ix86_regparm;
4277 static bool error_issued;
4281 if (ix86_function_type_abi (type) == DEFAULT_ABI)
4283 return DEFAULT_ABI != SYSV_ABI ? X86_64_REGPARM_MAX : X64_REGPARM_MAX;
4286 attr = lookup_attribute ("regparm", TYPE_ATTRIBUTES (type));
4290 = TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (attr)));
4292 if (decl && TREE_CODE (decl) == FUNCTION_DECL)
4294 /* We can't use regparm(3) for nested functions because
4295 these pass static chain pointer in %ecx register. */
4296 if (!error_issued && regparm == 3
4297 && decl_function_context (decl)
4298 && !DECL_NO_STATIC_CHAIN (decl))
4300 error ("nested functions are limited to 2 register parameters");
4301 error_issued = true;
4309 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type)))
4312 /* Use register calling convention for local functions when possible. */
4313 if (decl && TREE_CODE (decl) == FUNCTION_DECL
4316 /* FIXME: remove this CONST_CAST when cgraph.[ch] is constified. */
4317 struct cgraph_local_info *i = cgraph_local_info (CONST_CAST_TREE(decl));
4320 int local_regparm, globals = 0, regno;
4323 /* Make sure no regparm register is taken by a
4324 fixed register variable. */
4325 for (local_regparm = 0; local_regparm < REGPARM_MAX; local_regparm++)
4326 if (fixed_regs[local_regparm])
4329 /* We can't use regparm(3) for nested functions as these use
4330 static chain pointer in third argument. */
4331 if (local_regparm == 3
4332 && decl_function_context (decl)
4333 && !DECL_NO_STATIC_CHAIN (decl))
4336 /* If the function realigns its stackpointer, the prologue will
4337 clobber %ecx. If we've already generated code for the callee,
4338 the callee DECL_STRUCT_FUNCTION is gone, so we fall back to
4339 scanning the attributes for the self-realigning property. */
4340 f = DECL_STRUCT_FUNCTION (decl);
4341 /* Since current internal arg pointer won't conflict with
4342 parameter passing regs, so no need to change stack
4343 realignment and adjust regparm number.
4345 Each fixed register usage increases register pressure,
4346 so less registers should be used for argument passing.
4347 This functionality can be overriden by an explicit
4349 for (regno = 0; regno <= DI_REG; regno++)
4350 if (fixed_regs[regno])
4354 = globals < local_regparm ? local_regparm - globals : 0;
4356 if (local_regparm > regparm)
4357 regparm = local_regparm;
4364 /* Return 1 or 2, if we can pass up to SSE_REGPARM_MAX SFmode (1) and
4365 DFmode (2) arguments in SSE registers for a function with the
4366 indicated TYPE and DECL. DECL may be NULL when calling function
4367 indirectly or considering a libcall. Otherwise return 0. */
4370 ix86_function_sseregparm (const_tree type, const_tree decl, bool warn)
4372 gcc_assert (!TARGET_64BIT);
4374 /* Use SSE registers to pass SFmode and DFmode arguments if requested
4375 by the sseregparm attribute. */
4376 if (TARGET_SSEREGPARM
4377 || (type && lookup_attribute ("sseregparm", TYPE_ATTRIBUTES (type))))
4384 error ("Calling %qD with attribute sseregparm without "
4385 "SSE/SSE2 enabled", decl);
4387 error ("Calling %qT with attribute sseregparm without "
4388 "SSE/SSE2 enabled", type);
4396 /* For local functions, pass up to SSE_REGPARM_MAX SFmode
4397 (and DFmode for SSE2) arguments in SSE registers. */
4398 if (decl && TARGET_SSE_MATH && !profile_flag)
4400 /* FIXME: remove this CONST_CAST when cgraph.[ch] is constified. */
4401 struct cgraph_local_info *i = cgraph_local_info (CONST_CAST_TREE(decl));
4403 return TARGET_SSE2 ? 2 : 1;
4409 /* Return true if EAX is live at the start of the function. Used by
4410 ix86_expand_prologue to determine if we need special help before
4411 calling allocate_stack_worker. */
4414 ix86_eax_live_at_start_p (void)
4416 /* Cheat. Don't bother working forward from ix86_function_regparm
4417 to the function type to whether an actual argument is located in
4418 eax. Instead just look at cfg info, which is still close enough
4419 to correct at this point. This gives false positives for broken
4420 functions that might use uninitialized data that happens to be
4421 allocated in eax, but who cares? */
4422 return REGNO_REG_SET_P (df_get_live_out (ENTRY_BLOCK_PTR), 0);
4425 /* Value is the number of bytes of arguments automatically
4426 popped when returning from a subroutine call.
4427 FUNDECL is the declaration node of the function (as a tree),
4428 FUNTYPE is the data type of the function (as a tree),
4429 or for a library call it is an identifier node for the subroutine name.
4430 SIZE is the number of bytes of arguments passed on the stack.
4432 On the 80386, the RTD insn may be used to pop them if the number
4433 of args is fixed, but if the number is variable then the caller
4434 must pop them all. RTD can't be used for library calls now
4435 because the library is compiled with the Unix compiler.
4436 Use of RTD is a selectable option, since it is incompatible with
4437 standard Unix calling sequences. If the option is not selected,
4438 the caller must always pop the args.
4440 The attribute stdcall is equivalent to RTD on a per module basis. */
4443 ix86_return_pops_args (tree fundecl, tree funtype, int size)
4447 /* None of the 64-bit ABIs pop arguments. */
4451 rtd = TARGET_RTD && (!fundecl || TREE_CODE (fundecl) != IDENTIFIER_NODE);
4453 /* Cdecl functions override -mrtd, and never pop the stack. */
4454 if (! lookup_attribute ("cdecl", TYPE_ATTRIBUTES (funtype)))
4456 /* Stdcall and fastcall functions will pop the stack if not
4458 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (funtype))
4459 || lookup_attribute ("fastcall", TYPE_ATTRIBUTES (funtype)))
4462 if (rtd && ! stdarg_p (funtype))
4466 /* Lose any fake structure return argument if it is passed on the stack. */
4467 if (aggregate_value_p (TREE_TYPE (funtype), fundecl)
4468 && !KEEP_AGGREGATE_RETURN_POINTER)
4470 int nregs = ix86_function_regparm (funtype, fundecl);
4472 return GET_MODE_SIZE (Pmode);
4478 /* Argument support functions. */
4480 /* Return true when register may be used to pass function parameters. */
4482 ix86_function_arg_regno_p (int regno)
4485 const int *parm_regs;
4490 return (regno < REGPARM_MAX
4491 || (TARGET_SSE && SSE_REGNO_P (regno) && !fixed_regs[regno]));
4493 return (regno < REGPARM_MAX
4494 || (TARGET_MMX && MMX_REGNO_P (regno)
4495 && (regno < FIRST_MMX_REG + MMX_REGPARM_MAX))
4496 || (TARGET_SSE && SSE_REGNO_P (regno)
4497 && (regno < FIRST_SSE_REG + SSE_REGPARM_MAX)));
4502 if (SSE_REGNO_P (regno) && TARGET_SSE)
4507 if (TARGET_SSE && SSE_REGNO_P (regno)
4508 && (regno < FIRST_SSE_REG + SSE_REGPARM_MAX))
4512 /* TODO: The function should depend on current function ABI but
4513 builtins.c would need updating then. Therefore we use the
4516 /* RAX is used as hidden argument to va_arg functions. */
4517 if (DEFAULT_ABI == SYSV_ABI && regno == AX_REG)
4520 if (DEFAULT_ABI == MS_ABI)
4521 parm_regs = x86_64_ms_abi_int_parameter_registers;
4523 parm_regs = x86_64_int_parameter_registers;
4524 for (i = 0; i < (DEFAULT_ABI == MS_ABI ? X64_REGPARM_MAX
4525 : X86_64_REGPARM_MAX); i++)
4526 if (regno == parm_regs[i])
4531 /* Return if we do not know how to pass TYPE solely in registers. */
4534 ix86_must_pass_in_stack (enum machine_mode mode, const_tree type)
4536 if (must_pass_in_stack_var_size_or_pad (mode, type))
4539 /* For 32-bit, we want TImode aggregates to go on the stack. But watch out!
4540 The layout_type routine is crafty and tries to trick us into passing
4541 currently unsupported vector types on the stack by using TImode. */
4542 return (!TARGET_64BIT && mode == TImode
4543 && type && TREE_CODE (type) != VECTOR_TYPE);
4546 /* It returns the size, in bytes, of the area reserved for arguments passed
4547 in registers for the function represented by fndecl dependent to the used
4550 ix86_reg_parm_stack_space (const_tree fndecl)
4552 int call_abi = SYSV_ABI;
4553 if (fndecl != NULL_TREE && TREE_CODE (fndecl) == FUNCTION_DECL)
4554 call_abi = ix86_function_abi (fndecl);
4556 call_abi = ix86_function_type_abi (fndecl);
4557 if (call_abi == MS_ABI)
4562 /* Returns value SYSV_ABI, MS_ABI dependent on fntype, specifying the
4565 ix86_function_type_abi (const_tree fntype)
4567 if (TARGET_64BIT && fntype != NULL)
4570 if (DEFAULT_ABI == SYSV_ABI)
4571 abi = lookup_attribute ("ms_abi", TYPE_ATTRIBUTES (fntype)) ? MS_ABI : SYSV_ABI;
4573 abi = lookup_attribute ("sysv_abi", TYPE_ATTRIBUTES (fntype)) ? SYSV_ABI : MS_ABI;
4581 ix86_function_abi (const_tree fndecl)
4585 return ix86_function_type_abi (TREE_TYPE (fndecl));
4588 /* Returns value SYSV_ABI, MS_ABI dependent on cfun, specifying the
4591 ix86_cfun_abi (void)
4593 if (! cfun || ! TARGET_64BIT)
4595 return cfun->machine->call_abi;
4599 extern void init_regs (void);
4601 /* Implementation of call abi switching target hook. Specific to FNDECL
4602 the specific call register sets are set. See also CONDITIONAL_REGISTER_USAGE
4603 for more details. */
4605 ix86_call_abi_override (const_tree fndecl)
4607 if (fndecl == NULL_TREE)
4608 cfun->machine->call_abi = DEFAULT_ABI;
4610 cfun->machine->call_abi = ix86_function_type_abi (TREE_TYPE (fndecl));
4613 /* MS and SYSV ABI have different set of call used registers. Avoid expensive
4614 re-initialization of init_regs each time we switch function context since
4615 this is needed only during RTL expansion. */
4617 ix86_maybe_switch_abi (void)
4620 call_used_regs[4 /*RSI*/] == (cfun->machine->call_abi == MS_ABI))
4624 /* Initialize a variable CUM of type CUMULATIVE_ARGS
4625 for a call to a function whose data type is FNTYPE.
4626 For a library call, FNTYPE is 0. */
4629 init_cumulative_args (CUMULATIVE_ARGS *cum, /* Argument info to initialize */
4630 tree fntype, /* tree ptr for function decl */
4631 rtx libname, /* SYMBOL_REF of library name or 0 */
4634 struct cgraph_local_info *i = fndecl ? cgraph_local_info (fndecl) : NULL;
4635 memset (cum, 0, sizeof (*cum));
4638 cum->call_abi = ix86_function_abi (fndecl);
4640 cum->call_abi = ix86_function_type_abi (fntype);
4641 /* Set up the number of registers to use for passing arguments. */
4643 if (cum->call_abi == MS_ABI && !ACCUMULATE_OUTGOING_ARGS)
4644 sorry ("ms_abi attribute require -maccumulate-outgoing-args or subtarget optimization implying it");
4645 cum->nregs = ix86_regparm;
4648 if (cum->call_abi != DEFAULT_ABI)
4649 cum->nregs = DEFAULT_ABI != SYSV_ABI ? X86_64_REGPARM_MAX
4654 cum->sse_nregs = SSE_REGPARM_MAX;
4657 if (cum->call_abi != DEFAULT_ABI)
4658 cum->sse_nregs = DEFAULT_ABI != SYSV_ABI ? X86_64_SSE_REGPARM_MAX
4659 : X64_SSE_REGPARM_MAX;
4663 cum->mmx_nregs = MMX_REGPARM_MAX;
4664 cum->warn_avx = true;
4665 cum->warn_sse = true;
4666 cum->warn_mmx = true;
4668 /* Because type might mismatch in between caller and callee, we need to
4669 use actual type of function for local calls.
4670 FIXME: cgraph_analyze can be told to actually record if function uses
4671 va_start so for local functions maybe_vaarg can be made aggressive
4673 FIXME: once typesytem is fixed, we won't need this code anymore. */
4675 fntype = TREE_TYPE (fndecl);
4676 cum->maybe_vaarg = (fntype
4677 ? (!prototype_p (fntype) || stdarg_p (fntype))
4682 /* If there are variable arguments, then we won't pass anything
4683 in registers in 32-bit mode. */
4684 if (stdarg_p (fntype))
4695 /* Use ecx and edx registers if function has fastcall attribute,
4696 else look for regparm information. */
4699 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (fntype)))
4705 cum->nregs = ix86_function_regparm (fntype, fndecl);
4708 /* Set up the number of SSE registers used for passing SFmode
4709 and DFmode arguments. Warn for mismatching ABI. */
4710 cum->float_in_sse = ix86_function_sseregparm (fntype, fndecl, true);
4714 /* Return the "natural" mode for TYPE. In most cases, this is just TYPE_MODE.
4715 But in the case of vector types, it is some vector mode.
4717 When we have only some of our vector isa extensions enabled, then there
4718 are some modes for which vector_mode_supported_p is false. For these
4719 modes, the generic vector support in gcc will choose some non-vector mode
4720 in order to implement the type. By computing the natural mode, we'll
4721 select the proper ABI location for the operand and not depend on whatever
4722 the middle-end decides to do with these vector types.
4724 The midde-end can't deal with the vector types > 16 bytes. In this
4725 case, we return the original mode and warn ABI change if CUM isn't
4728 static enum machine_mode
4729 type_natural_mode (const_tree type, CUMULATIVE_ARGS *cum)
4731 enum machine_mode mode = TYPE_MODE (type);
4733 if (TREE_CODE (type) == VECTOR_TYPE && !VECTOR_MODE_P (mode))
4735 HOST_WIDE_INT size = int_size_in_bytes (type);
4736 if ((size == 8 || size == 16 || size == 32)
4737 /* ??? Generic code allows us to create width 1 vectors. Ignore. */
4738 && TYPE_VECTOR_SUBPARTS (type) > 1)
4740 enum machine_mode innermode = TYPE_MODE (TREE_TYPE (type));
4742 if (TREE_CODE (TREE_TYPE (type)) == REAL_TYPE)
4743 mode = MIN_MODE_VECTOR_FLOAT;
4745 mode = MIN_MODE_VECTOR_INT;
4747 /* Get the mode which has this inner mode and number of units. */
4748 for (; mode != VOIDmode; mode = GET_MODE_WIDER_MODE (mode))
4749 if (GET_MODE_NUNITS (mode) == TYPE_VECTOR_SUBPARTS (type)
4750 && GET_MODE_INNER (mode) == innermode)
4752 if (size == 32 && !TARGET_AVX)
4754 static bool warnedavx;
4761 warning (0, "AVX vector argument without AVX "
4762 "enabled changes the ABI");
4764 return TYPE_MODE (type);
4777 /* We want to pass a value in REGNO whose "natural" mode is MODE. However,
4778 this may not agree with the mode that the type system has chosen for the
4779 register, which is ORIG_MODE. If ORIG_MODE is not BLKmode, then we can
4780 go ahead and use it. Otherwise we have to build a PARALLEL instead. */
4783 gen_reg_or_parallel (enum machine_mode mode, enum machine_mode orig_mode,
4788 if (orig_mode != BLKmode)
4789 tmp = gen_rtx_REG (orig_mode, regno);
4792 tmp = gen_rtx_REG (mode, regno);
4793 tmp = gen_rtx_EXPR_LIST (VOIDmode, tmp, const0_rtx);
4794 tmp = gen_rtx_PARALLEL (orig_mode, gen_rtvec (1, tmp));
4800 /* x86-64 register passing implementation. See x86-64 ABI for details. Goal
4801 of this code is to classify each 8bytes of incoming argument by the register
4802 class and assign registers accordingly. */
4804 /* Return the union class of CLASS1 and CLASS2.
4805 See the x86-64 PS ABI for details. */
4807 static enum x86_64_reg_class
4808 merge_classes (enum x86_64_reg_class class1, enum x86_64_reg_class class2)
4810 /* Rule #1: If both classes are equal, this is the resulting class. */
4811 if (class1 == class2)
4814 /* Rule #2: If one of the classes is NO_CLASS, the resulting class is
4816 if (class1 == X86_64_NO_CLASS)
4818 if (class2 == X86_64_NO_CLASS)
4821 /* Rule #3: If one of the classes is MEMORY, the result is MEMORY. */
4822 if (class1 == X86_64_MEMORY_CLASS || class2 == X86_64_MEMORY_CLASS)
4823 return X86_64_MEMORY_CLASS;
4825 /* Rule #4: If one of the classes is INTEGER, the result is INTEGER. */
4826 if ((class1 == X86_64_INTEGERSI_CLASS && class2 == X86_64_SSESF_CLASS)
4827 || (class2 == X86_64_INTEGERSI_CLASS && class1 == X86_64_SSESF_CLASS))
4828 return X86_64_INTEGERSI_CLASS;
4829 if (class1 == X86_64_INTEGER_CLASS || class1 == X86_64_INTEGERSI_CLASS
4830 || class2 == X86_64_INTEGER_CLASS || class2 == X86_64_INTEGERSI_CLASS)
4831 return X86_64_INTEGER_CLASS;
4833 /* Rule #5: If one of the classes is X87, X87UP, or COMPLEX_X87 class,
4835 if (class1 == X86_64_X87_CLASS
4836 || class1 == X86_64_X87UP_CLASS
4837 || class1 == X86_64_COMPLEX_X87_CLASS
4838 || class2 == X86_64_X87_CLASS
4839 || class2 == X86_64_X87UP_CLASS
4840 || class2 == X86_64_COMPLEX_X87_CLASS)
4841 return X86_64_MEMORY_CLASS;
4843 /* Rule #6: Otherwise class SSE is used. */
4844 return X86_64_SSE_CLASS;
4847 /* Classify the argument of type TYPE and mode MODE.
4848 CLASSES will be filled by the register class used to pass each word
4849 of the operand. The number of words is returned. In case the parameter
4850 should be passed in memory, 0 is returned. As a special case for zero
4851 sized containers, classes[0] will be NO_CLASS and 1 is returned.
4853 BIT_OFFSET is used internally for handling records and specifies offset
4854 of the offset in bits modulo 256 to avoid overflow cases.
4856 See the x86-64 PS ABI for details.
4860 classify_argument (enum machine_mode mode, const_tree type,
4861 enum x86_64_reg_class classes[MAX_CLASSES], int bit_offset)
4863 HOST_WIDE_INT bytes =
4864 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
4865 int words = (bytes + (bit_offset % 64) / 8 + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
4867 /* Variable sized entities are always passed/returned in memory. */
4871 if (mode != VOIDmode
4872 && targetm.calls.must_pass_in_stack (mode, type))
4875 if (type && AGGREGATE_TYPE_P (type))
4879 enum x86_64_reg_class subclasses[MAX_CLASSES];
4881 /* On x86-64 we pass structures larger than 32 bytes on the stack. */
4885 for (i = 0; i < words; i++)
4886 classes[i] = X86_64_NO_CLASS;
4888 /* Zero sized arrays or structures are NO_CLASS. We return 0 to
4889 signalize memory class, so handle it as special case. */
4892 classes[0] = X86_64_NO_CLASS;
4896 /* Classify each field of record and merge classes. */
4897 switch (TREE_CODE (type))
4900 /* And now merge the fields of structure. */
4901 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
4903 if (TREE_CODE (field) == FIELD_DECL)
4907 if (TREE_TYPE (field) == error_mark_node)
4910 /* Bitfields are always classified as integer. Handle them
4911 early, since later code would consider them to be
4912 misaligned integers. */
4913 if (DECL_BIT_FIELD (field))
4915 for (i = (int_bit_position (field) + (bit_offset % 64)) / 8 / 8;
4916 i < ((int_bit_position (field) + (bit_offset % 64))
4917 + tree_low_cst (DECL_SIZE (field), 0)
4920 merge_classes (X86_64_INTEGER_CLASS,
4925 num = classify_argument (TYPE_MODE (TREE_TYPE (field)),
4926 TREE_TYPE (field), subclasses,
4927 (int_bit_position (field)
4928 + bit_offset) % 256);
4931 for (i = 0; i < num; i++)
4934 (int_bit_position (field) + (bit_offset % 64)) / 8 / 8;
4936 merge_classes (subclasses[i], classes[i + pos]);
4944 /* Arrays are handled as small records. */
4947 num = classify_argument (TYPE_MODE (TREE_TYPE (type)),
4948 TREE_TYPE (type), subclasses, bit_offset);
4952 /* The partial classes are now full classes. */
4953 if (subclasses[0] == X86_64_SSESF_CLASS && bytes != 4)
4954 subclasses[0] = X86_64_SSE_CLASS;
4955 if (subclasses[0] == X86_64_INTEGERSI_CLASS
4956 && !((bit_offset % 64) == 0 && bytes == 4))
4957 subclasses[0] = X86_64_INTEGER_CLASS;
4959 for (i = 0; i < words; i++)
4960 classes[i] = subclasses[i % num];
4965 case QUAL_UNION_TYPE:
4966 /* Unions are similar to RECORD_TYPE but offset is always 0.
4968 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
4970 if (TREE_CODE (field) == FIELD_DECL)
4974 if (TREE_TYPE (field) == error_mark_node)
4977 num = classify_argument (TYPE_MODE (TREE_TYPE (field)),
4978 TREE_TYPE (field), subclasses,
4982 for (i = 0; i < num; i++)
4983 classes[i] = merge_classes (subclasses[i], classes[i]);
4994 /* When size > 16 bytes, if the first one isn't
4995 X86_64_SSE_CLASS or any other ones aren't
4996 X86_64_SSEUP_CLASS, everything should be passed in
4998 if (classes[0] != X86_64_SSE_CLASS)
5001 for (i = 1; i < words; i++)
5002 if (classes[i] != X86_64_SSEUP_CLASS)
5006 /* Final merger cleanup. */
5007 for (i = 0; i < words; i++)
5009 /* If one class is MEMORY, everything should be passed in
5011 if (classes[i] == X86_64_MEMORY_CLASS)
5014 /* The X86_64_SSEUP_CLASS should be always preceded by
5015 X86_64_SSE_CLASS or X86_64_SSEUP_CLASS. */
5016 if (classes[i] == X86_64_SSEUP_CLASS
5017 && classes[i - 1] != X86_64_SSE_CLASS
5018 && classes[i - 1] != X86_64_SSEUP_CLASS)
5020 /* The first one should never be X86_64_SSEUP_CLASS. */
5021 gcc_assert (i != 0);
5022 classes[i] = X86_64_SSE_CLASS;
5025 /* If X86_64_X87UP_CLASS isn't preceded by X86_64_X87_CLASS,
5026 everything should be passed in memory. */
5027 if (classes[i] == X86_64_X87UP_CLASS
5028 && (classes[i - 1] != X86_64_X87_CLASS))
5032 /* The first one should never be X86_64_X87UP_CLASS. */
5033 gcc_assert (i != 0);
5034 if (!warned && warn_psabi)
5037 inform (input_location,
5038 "The ABI of passing union with long double"
5039 " has changed in GCC 4.4");
5047 /* Compute alignment needed. We align all types to natural boundaries with
5048 exception of XFmode that is aligned to 64bits. */
5049 if (mode != VOIDmode && mode != BLKmode)
5051 int mode_alignment = GET_MODE_BITSIZE (mode);
5054 mode_alignment = 128;
5055 else if (mode == XCmode)
5056 mode_alignment = 256;
5057 if (COMPLEX_MODE_P (mode))
5058 mode_alignment /= 2;
5059 /* Misaligned fields are always returned in memory. */
5060 if (bit_offset % mode_alignment)
5064 /* for V1xx modes, just use the base mode */
5065 if (VECTOR_MODE_P (mode) && mode != V1DImode
5066 && GET_MODE_SIZE (GET_MODE_INNER (mode)) == bytes)
5067 mode = GET_MODE_INNER (mode);
5069 /* Classification of atomic types. */
5074 classes[0] = X86_64_SSE_CLASS;
5077 classes[0] = X86_64_SSE_CLASS;
5078 classes[1] = X86_64_SSEUP_CLASS;
5088 int size = (bit_offset % 64)+ (int) GET_MODE_BITSIZE (mode);
5092 classes[0] = X86_64_INTEGERSI_CLASS;
5095 else if (size <= 64)
5097 classes[0] = X86_64_INTEGER_CLASS;
5100 else if (size <= 64+32)
5102 classes[0] = X86_64_INTEGER_CLASS;
5103 classes[1] = X86_64_INTEGERSI_CLASS;
5106 else if (size <= 64+64)
5108 classes[0] = classes[1] = X86_64_INTEGER_CLASS;
5116 classes[0] = classes[1] = X86_64_INTEGER_CLASS;
5120 /* OImode shouldn't be used directly. */
5125 if (!(bit_offset % 64))
5126 classes[0] = X86_64_SSESF_CLASS;
5128 classes[0] = X86_64_SSE_CLASS;
5131 classes[0] = X86_64_SSEDF_CLASS;
5134 classes[0] = X86_64_X87_CLASS;
5135 classes[1] = X86_64_X87UP_CLASS;
5138 classes[0] = X86_64_SSE_CLASS;
5139 classes[1] = X86_64_SSEUP_CLASS;
5142 classes[0] = X86_64_SSE_CLASS;
5145 classes[0] = X86_64_SSEDF_CLASS;
5146 classes[1] = X86_64_SSEDF_CLASS;
5149 classes[0] = X86_64_COMPLEX_X87_CLASS;
5152 /* This modes is larger than 16 bytes. */
5160 classes[0] = X86_64_SSE_CLASS;
5161 classes[1] = X86_64_SSEUP_CLASS;
5162 classes[2] = X86_64_SSEUP_CLASS;
5163 classes[3] = X86_64_SSEUP_CLASS;
5171 classes[0] = X86_64_SSE_CLASS;
5172 classes[1] = X86_64_SSEUP_CLASS;
5179 classes[0] = X86_64_SSE_CLASS;
5185 gcc_assert (VECTOR_MODE_P (mode));
5190 gcc_assert (GET_MODE_CLASS (GET_MODE_INNER (mode)) == MODE_INT);
5192 if (bit_offset + GET_MODE_BITSIZE (mode) <= 32)
5193 classes[0] = X86_64_INTEGERSI_CLASS;
5195 classes[0] = X86_64_INTEGER_CLASS;
5196 classes[1] = X86_64_INTEGER_CLASS;
5197 return 1 + (bytes > 8);
5201 /* Examine the argument and return set number of register required in each
5202 class. Return 0 iff parameter should be passed in memory. */
5204 examine_argument (enum machine_mode mode, const_tree type, int in_return,
5205 int *int_nregs, int *sse_nregs)
5207 enum x86_64_reg_class regclass[MAX_CLASSES];
5208 int n = classify_argument (mode, type, regclass, 0);
5214 for (n--; n >= 0; n--)
5215 switch (regclass[n])
5217 case X86_64_INTEGER_CLASS:
5218 case X86_64_INTEGERSI_CLASS:
5221 case X86_64_SSE_CLASS:
5222 case X86_64_SSESF_CLASS:
5223 case X86_64_SSEDF_CLASS:
5226 case X86_64_NO_CLASS:
5227 case X86_64_SSEUP_CLASS:
5229 case X86_64_X87_CLASS:
5230 case X86_64_X87UP_CLASS:
5234 case X86_64_COMPLEX_X87_CLASS:
5235 return in_return ? 2 : 0;
5236 case X86_64_MEMORY_CLASS:
5242 /* Construct container for the argument used by GCC interface. See
5243 FUNCTION_ARG for the detailed description. */
5246 construct_container (enum machine_mode mode, enum machine_mode orig_mode,
5247 const_tree type, int in_return, int nintregs, int nsseregs,
5248 const int *intreg, int sse_regno)
5250 /* The following variables hold the static issued_error state. */
5251 static bool issued_sse_arg_error;
5252 static bool issued_sse_ret_error;
5253 static bool issued_x87_ret_error;
5255 enum machine_mode tmpmode;
5257 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
5258 enum x86_64_reg_class regclass[MAX_CLASSES];
5262 int needed_sseregs, needed_intregs;
5263 rtx exp[MAX_CLASSES];
5266 n = classify_argument (mode, type, regclass, 0);
5269 if (!examine_argument (mode, type, in_return, &needed_intregs,
5272 if (needed_intregs > nintregs || needed_sseregs > nsseregs)
5275 /* We allowed the user to turn off SSE for kernel mode. Don't crash if
5276 some less clueful developer tries to use floating-point anyway. */
5277 if (needed_sseregs && !TARGET_SSE)
5281 if (!issued_sse_ret_error)
5283 error ("SSE register return with SSE disabled");
5284 issued_sse_ret_error = true;
5287 else if (!issued_sse_arg_error)
5289 error ("SSE register argument with SSE disabled");
5290 issued_sse_arg_error = true;
5295 /* Likewise, error if the ABI requires us to return values in the
5296 x87 registers and the user specified -mno-80387. */
5297 if (!TARGET_80387 && in_return)
5298 for (i = 0; i < n; i++)
5299 if (regclass[i] == X86_64_X87_CLASS
5300 || regclass[i] == X86_64_X87UP_CLASS
5301 || regclass[i] == X86_64_COMPLEX_X87_CLASS)
5303 if (!issued_x87_ret_error)
5305 error ("x87 register return with x87 disabled");
5306 issued_x87_ret_error = true;
5311 /* First construct simple cases. Avoid SCmode, since we want to use
5312 single register to pass this type. */
5313 if (n == 1 && mode != SCmode)
5314 switch (regclass[0])
5316 case X86_64_INTEGER_CLASS:
5317 case X86_64_INTEGERSI_CLASS:
5318 return gen_rtx_REG (mode, intreg[0]);
5319 case X86_64_SSE_CLASS:
5320 case X86_64_SSESF_CLASS:
5321 case X86_64_SSEDF_CLASS:
5322 return gen_reg_or_parallel (mode, orig_mode, SSE_REGNO (sse_regno));
5323 case X86_64_X87_CLASS:
5324 case X86_64_COMPLEX_X87_CLASS:
5325 return gen_rtx_REG (mode, FIRST_STACK_REG);
5326 case X86_64_NO_CLASS:
5327 /* Zero sized array, struct or class. */
5332 if (n == 2 && regclass[0] == X86_64_SSE_CLASS
5333 && regclass[1] == X86_64_SSEUP_CLASS && mode != BLKmode)
5334 return gen_rtx_REG (mode, SSE_REGNO (sse_regno));
5336 && regclass[0] == X86_64_SSE_CLASS
5337 && regclass[1] == X86_64_SSEUP_CLASS
5338 && regclass[2] == X86_64_SSEUP_CLASS
5339 && regclass[3] == X86_64_SSEUP_CLASS
5341 return gen_rtx_REG (mode, SSE_REGNO (sse_regno));
5344 && regclass[0] == X86_64_X87_CLASS && regclass[1] == X86_64_X87UP_CLASS)
5345 return gen_rtx_REG (XFmode, FIRST_STACK_REG);
5346 if (n == 2 && regclass[0] == X86_64_INTEGER_CLASS
5347 && regclass[1] == X86_64_INTEGER_CLASS
5348 && (mode == CDImode || mode == TImode || mode == TFmode)
5349 && intreg[0] + 1 == intreg[1])
5350 return gen_rtx_REG (mode, intreg[0]);
5352 /* Otherwise figure out the entries of the PARALLEL. */
5353 for (i = 0; i < n; i++)
5357 switch (regclass[i])
5359 case X86_64_NO_CLASS:
5361 case X86_64_INTEGER_CLASS:
5362 case X86_64_INTEGERSI_CLASS:
5363 /* Merge TImodes on aligned occasions here too. */
5364 if (i * 8 + 8 > bytes)
5365 tmpmode = mode_for_size ((bytes - i * 8) * BITS_PER_UNIT, MODE_INT, 0);
5366 else if (regclass[i] == X86_64_INTEGERSI_CLASS)
5370 /* We've requested 24 bytes we don't have mode for. Use DImode. */
5371 if (tmpmode == BLKmode)
5373 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
5374 gen_rtx_REG (tmpmode, *intreg),
5378 case X86_64_SSESF_CLASS:
5379 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
5380 gen_rtx_REG (SFmode,
5381 SSE_REGNO (sse_regno)),
5385 case X86_64_SSEDF_CLASS:
5386 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
5387 gen_rtx_REG (DFmode,
5388 SSE_REGNO (sse_regno)),
5392 case X86_64_SSE_CLASS:
5400 if (i == 0 && regclass[1] == X86_64_SSEUP_CLASS)
5410 && regclass[1] == X86_64_SSEUP_CLASS
5411 && regclass[2] == X86_64_SSEUP_CLASS
5412 && regclass[3] == X86_64_SSEUP_CLASS);
5419 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
5420 gen_rtx_REG (tmpmode,
5421 SSE_REGNO (sse_regno)),
5430 /* Empty aligned struct, union or class. */
5434 ret = gen_rtx_PARALLEL (mode, rtvec_alloc (nexps));
5435 for (i = 0; i < nexps; i++)
5436 XVECEXP (ret, 0, i) = exp [i];
5440 /* Update the data in CUM to advance over an argument of mode MODE
5441 and data type TYPE. (TYPE is null for libcalls where that information
5442 may not be available.) */
5445 function_arg_advance_32 (CUMULATIVE_ARGS *cum, enum machine_mode mode,
5446 tree type, HOST_WIDE_INT bytes, HOST_WIDE_INT words)
5462 cum->words += words;
5463 cum->nregs -= words;
5464 cum->regno += words;
5466 if (cum->nregs <= 0)
5474 /* OImode shouldn't be used directly. */
5478 if (cum->float_in_sse < 2)
5481 if (cum->float_in_sse < 1)
5498 if (!type || !AGGREGATE_TYPE_P (type))
5500 cum->sse_words += words;
5501 cum->sse_nregs -= 1;
5502 cum->sse_regno += 1;
5503 if (cum->sse_nregs <= 0)
5516 if (!type || !AGGREGATE_TYPE_P (type))
5518 cum->mmx_words += words;
5519 cum->mmx_nregs -= 1;
5520 cum->mmx_regno += 1;
5521 if (cum->mmx_nregs <= 0)
5532 function_arg_advance_64 (CUMULATIVE_ARGS *cum, enum machine_mode mode,
5533 tree type, HOST_WIDE_INT words, int named)
5535 int int_nregs, sse_nregs;
5537 /* Unnamed 256bit vector mode parameters are passed on stack. */
5538 if (!named && VALID_AVX256_REG_MODE (mode))
5541 if (!examine_argument (mode, type, 0, &int_nregs, &sse_nregs))
5542 cum->words += words;
5543 else if (sse_nregs <= cum->sse_nregs && int_nregs <= cum->nregs)
5545 cum->nregs -= int_nregs;
5546 cum->sse_nregs -= sse_nregs;
5547 cum->regno += int_nregs;
5548 cum->sse_regno += sse_nregs;
5551 cum->words += words;
5555 function_arg_advance_ms_64 (CUMULATIVE_ARGS *cum, HOST_WIDE_INT bytes,
5556 HOST_WIDE_INT words)
5558 /* Otherwise, this should be passed indirect. */
5559 gcc_assert (bytes == 1 || bytes == 2 || bytes == 4 || bytes == 8);
5561 cum->words += words;
5570 function_arg_advance (CUMULATIVE_ARGS *cum, enum machine_mode mode,
5571 tree type, int named)
5573 HOST_WIDE_INT bytes, words;
5575 if (mode == BLKmode)
5576 bytes = int_size_in_bytes (type);
5578 bytes = GET_MODE_SIZE (mode);
5579 words = (bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
5582 mode = type_natural_mode (type, NULL);
5584 if (TARGET_64BIT && (cum ? cum->call_abi : DEFAULT_ABI) == MS_ABI)
5585 function_arg_advance_ms_64 (cum, bytes, words);
5586 else if (TARGET_64BIT)
5587 function_arg_advance_64 (cum, mode, type, words, named);
5589 function_arg_advance_32 (cum, mode, type, bytes, words);
5592 /* Define where to put the arguments to a function.
5593 Value is zero to push the argument on the stack,
5594 or a hard register in which to store the argument.
5596 MODE is the argument's machine mode.
5597 TYPE is the data type of the argument (as a tree).
5598 This is null for libcalls where that information may
5600 CUM is a variable of type CUMULATIVE_ARGS which gives info about
5601 the preceding args and about the function being called.
5602 NAMED is nonzero if this argument is a named parameter
5603 (otherwise it is an extra parameter matching an ellipsis). */
5606 function_arg_32 (CUMULATIVE_ARGS *cum, enum machine_mode mode,
5607 enum machine_mode orig_mode, tree type,
5608 HOST_WIDE_INT bytes, HOST_WIDE_INT words)
5610 static bool warnedsse, warnedmmx;
5612 /* Avoid the AL settings for the Unix64 ABI. */
5613 if (mode == VOIDmode)
5629 if (words <= cum->nregs)
5631 int regno = cum->regno;
5633 /* Fastcall allocates the first two DWORD (SImode) or
5634 smaller arguments to ECX and EDX if it isn't an
5640 || (type && AGGREGATE_TYPE_P (type)))
5643 /* ECX not EAX is the first allocated register. */
5644 if (regno == AX_REG)
5647 return gen_rtx_REG (mode, regno);
5652 if (cum->float_in_sse < 2)
5655 if (cum->float_in_sse < 1)
5659 /* In 32bit, we pass TImode in xmm registers. */
5666 if (!type || !AGGREGATE_TYPE_P (type))
5668 if (!TARGET_SSE && !warnedsse && cum->warn_sse)
5671 warning (0, "SSE vector argument without SSE enabled "
5675 return gen_reg_or_parallel (mode, orig_mode,
5676 cum->sse_regno + FIRST_SSE_REG);
5681 /* OImode shouldn't be used directly. */
5690 if (!type || !AGGREGATE_TYPE_P (type))
5693 return gen_reg_or_parallel (mode, orig_mode,
5694 cum->sse_regno + FIRST_SSE_REG);
5703 if (!type || !AGGREGATE_TYPE_P (type))
5705 if (!TARGET_MMX && !warnedmmx && cum->warn_mmx)
5708 warning (0, "MMX vector argument without MMX enabled "
5712 return gen_reg_or_parallel (mode, orig_mode,
5713 cum->mmx_regno + FIRST_MMX_REG);
5722 function_arg_64 (CUMULATIVE_ARGS *cum, enum machine_mode mode,
5723 enum machine_mode orig_mode, tree type, int named)
5725 /* Handle a hidden AL argument containing number of registers
5726 for varargs x86-64 functions. */
5727 if (mode == VOIDmode)
5728 return GEN_INT (cum->maybe_vaarg
5729 ? (cum->sse_nregs < 0
5730 ? (cum->call_abi == DEFAULT_ABI
5732 : (DEFAULT_ABI != SYSV_ABI ? X86_64_SSE_REGPARM_MAX
5733 : X64_SSE_REGPARM_MAX))
5748 /* Unnamed 256bit vector mode parameters are passed on stack. */
5754 return construct_container (mode, orig_mode, type, 0, cum->nregs,
5756 &x86_64_int_parameter_registers [cum->regno],
5761 function_arg_ms_64 (CUMULATIVE_ARGS *cum, enum machine_mode mode,
5762 enum machine_mode orig_mode, int named,
5763 HOST_WIDE_INT bytes)
5767 /* We need to add clobber for MS_ABI->SYSV ABI calls in expand_call.
5768 We use value of -2 to specify that current function call is MSABI. */
5769 if (mode == VOIDmode)
5770 return GEN_INT (-2);
5772 /* If we've run out of registers, it goes on the stack. */
5773 if (cum->nregs == 0)
5776 regno = x86_64_ms_abi_int_parameter_registers[cum->regno];
5778 /* Only floating point modes are passed in anything but integer regs. */
5779 if (TARGET_SSE && (mode == SFmode || mode == DFmode))
5782 regno = cum->regno + FIRST_SSE_REG;
5787 /* Unnamed floating parameters are passed in both the
5788 SSE and integer registers. */
5789 t1 = gen_rtx_REG (mode, cum->regno + FIRST_SSE_REG);
5790 t2 = gen_rtx_REG (mode, regno);
5791 t1 = gen_rtx_EXPR_LIST (VOIDmode, t1, const0_rtx);
5792 t2 = gen_rtx_EXPR_LIST (VOIDmode, t2, const0_rtx);
5793 return gen_rtx_PARALLEL (mode, gen_rtvec (2, t1, t2));
5796 /* Handle aggregated types passed in register. */
5797 if (orig_mode == BLKmode)
5799 if (bytes > 0 && bytes <= 8)
5800 mode = (bytes > 4 ? DImode : SImode);
5801 if (mode == BLKmode)
5805 return gen_reg_or_parallel (mode, orig_mode, regno);
5809 function_arg (CUMULATIVE_ARGS *cum, enum machine_mode omode,
5810 tree type, int named)
5812 enum machine_mode mode = omode;
5813 HOST_WIDE_INT bytes, words;
5815 if (mode == BLKmode)
5816 bytes = int_size_in_bytes (type);
5818 bytes = GET_MODE_SIZE (mode);
5819 words = (bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
5821 /* To simplify the code below, represent vector types with a vector mode
5822 even if MMX/SSE are not active. */
5823 if (type && TREE_CODE (type) == VECTOR_TYPE)
5824 mode = type_natural_mode (type, cum);
5826 if (TARGET_64BIT && (cum ? cum->call_abi : DEFAULT_ABI) == MS_ABI)
5827 return function_arg_ms_64 (cum, mode, omode, named, bytes);
5828 else if (TARGET_64BIT)
5829 return function_arg_64 (cum, mode, omode, type, named);
5831 return function_arg_32 (cum, mode, omode, type, bytes, words);
5834 /* A C expression that indicates when an argument must be passed by
5835 reference. If nonzero for an argument, a copy of that argument is
5836 made in memory and a pointer to the argument is passed instead of
5837 the argument itself. The pointer is passed in whatever way is
5838 appropriate for passing a pointer to that type. */
5841 ix86_pass_by_reference (CUMULATIVE_ARGS *cum ATTRIBUTE_UNUSED,
5842 enum machine_mode mode ATTRIBUTE_UNUSED,
5843 const_tree type, bool named ATTRIBUTE_UNUSED)
5845 /* See Windows x64 Software Convention. */
5846 if (TARGET_64BIT && (cum ? cum->call_abi : DEFAULT_ABI) == MS_ABI)
5848 int msize = (int) GET_MODE_SIZE (mode);
5851 /* Arrays are passed by reference. */
5852 if (TREE_CODE (type) == ARRAY_TYPE)
5855 if (AGGREGATE_TYPE_P (type))
5857 /* Structs/unions of sizes other than 8, 16, 32, or 64 bits
5858 are passed by reference. */
5859 msize = int_size_in_bytes (type);
5863 /* __m128 is passed by reference. */
5865 case 1: case 2: case 4: case 8:
5871 else if (TARGET_64BIT && type && int_size_in_bytes (type) == -1)
5877 /* Return true when TYPE should be 128bit aligned for 32bit argument passing
5880 contains_aligned_value_p (tree type)
5882 enum machine_mode mode = TYPE_MODE (type);
5883 if (((TARGET_SSE && SSE_REG_MODE_P (mode))
5887 && (!TYPE_USER_ALIGN (type) || TYPE_ALIGN (type) > 128))
5889 if (TYPE_ALIGN (type) < 128)
5892 if (AGGREGATE_TYPE_P (type))
5894 /* Walk the aggregates recursively. */
5895 switch (TREE_CODE (type))
5899 case QUAL_UNION_TYPE:
5903 /* Walk all the structure fields. */
5904 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
5906 if (TREE_CODE (field) == FIELD_DECL
5907 && contains_aligned_value_p (TREE_TYPE (field)))
5914 /* Just for use if some languages passes arrays by value. */
5915 if (contains_aligned_value_p (TREE_TYPE (type)))
5926 /* Gives the alignment boundary, in bits, of an argument with the
5927 specified mode and type. */
5930 ix86_function_arg_boundary (enum machine_mode mode, tree type)
5935 /* Since canonical type is used for call, we convert it to
5936 canonical type if needed. */
5937 if (!TYPE_STRUCTURAL_EQUALITY_P (type))
5938 type = TYPE_CANONICAL (type);
5939 align = TYPE_ALIGN (type);
5942 align = GET_MODE_ALIGNMENT (mode);
5943 if (align < PARM_BOUNDARY)
5944 align = PARM_BOUNDARY;
5945 /* In 32bit, only _Decimal128 and __float128 are aligned to their
5946 natural boundaries. */
5947 if (!TARGET_64BIT && mode != TDmode && mode != TFmode)
5949 /* i386 ABI defines all arguments to be 4 byte aligned. We have to
5950 make an exception for SSE modes since these require 128bit
5953 The handling here differs from field_alignment. ICC aligns MMX
5954 arguments to 4 byte boundaries, while structure fields are aligned
5955 to 8 byte boundaries. */
5958 if (!(TARGET_SSE && SSE_REG_MODE_P (mode)))
5959 align = PARM_BOUNDARY;
5963 if (!contains_aligned_value_p (type))
5964 align = PARM_BOUNDARY;
5967 if (align > BIGGEST_ALIGNMENT)
5968 align = BIGGEST_ALIGNMENT;
5972 /* Return true if N is a possible register number of function value. */
5975 ix86_function_value_regno_p (int regno)
5982 case FIRST_FLOAT_REG:
5983 /* TODO: The function should depend on current function ABI but
5984 builtins.c would need updating then. Therefore we use the
5986 if (TARGET_64BIT && DEFAULT_ABI == MS_ABI)
5988 return TARGET_FLOAT_RETURNS_IN_80387;
5994 if (TARGET_MACHO || TARGET_64BIT)
6002 /* Define how to find the value returned by a function.
6003 VALTYPE is the data type of the value (as a tree).
6004 If the precise function being called is known, FUNC is its FUNCTION_DECL;
6005 otherwise, FUNC is 0. */
6008 function_value_32 (enum machine_mode orig_mode, enum machine_mode mode,
6009 const_tree fntype, const_tree fn)
6013 /* 8-byte vector modes in %mm0. See ix86_return_in_memory for where
6014 we normally prevent this case when mmx is not available. However
6015 some ABIs may require the result to be returned like DImode. */
6016 if (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 8)
6017 regno = TARGET_MMX ? FIRST_MMX_REG : 0;
6019 /* 16-byte vector modes in %xmm0. See ix86_return_in_memory for where
6020 we prevent this case when sse is not available. However some ABIs
6021 may require the result to be returned like integer TImode. */
6022 else if (mode == TImode
6023 || (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 16))
6024 regno = TARGET_SSE ? FIRST_SSE_REG : 0;
6026 /* 32-byte vector modes in %ymm0. */
6027 else if (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 32)
6028 regno = TARGET_AVX ? FIRST_SSE_REG : 0;
6030 /* Floating point return values in %st(0) (unless -mno-fp-ret-in-387). */
6031 else if (X87_FLOAT_MODE_P (mode) && TARGET_FLOAT_RETURNS_IN_80387)
6032 regno = FIRST_FLOAT_REG;
6034 /* Most things go in %eax. */
6037 /* Override FP return register with %xmm0 for local functions when
6038 SSE math is enabled or for functions with sseregparm attribute. */
6039 if ((fn || fntype) && (mode == SFmode || mode == DFmode))
6041 int sse_level = ix86_function_sseregparm (fntype, fn, false);
6042 if ((sse_level >= 1 && mode == SFmode)
6043 || (sse_level == 2 && mode == DFmode))
6044 regno = FIRST_SSE_REG;
6047 /* OImode shouldn't be used directly. */
6048 gcc_assert (mode != OImode);
6050 return gen_rtx_REG (orig_mode, regno);
6054 function_value_64 (enum machine_mode orig_mode, enum machine_mode mode,
6059 /* Handle libcalls, which don't provide a type node. */
6060 if (valtype == NULL)
6072 return gen_rtx_REG (mode, FIRST_SSE_REG);
6075 return gen_rtx_REG (mode, FIRST_FLOAT_REG);
6079 return gen_rtx_REG (mode, AX_REG);
6083 ret = construct_container (mode, orig_mode, valtype, 1,
6084 X86_64_REGPARM_MAX, X86_64_SSE_REGPARM_MAX,
6085 x86_64_int_return_registers, 0);
6087 /* For zero sized structures, construct_container returns NULL, but we
6088 need to keep rest of compiler happy by returning meaningful value. */
6090 ret = gen_rtx_REG (orig_mode, AX_REG);
6096 function_value_ms_64 (enum machine_mode orig_mode, enum machine_mode mode)
6098 unsigned int regno = AX_REG;
6102 switch (GET_MODE_SIZE (mode))
6105 if((SCALAR_INT_MODE_P (mode) || VECTOR_MODE_P (mode))
6106 && !COMPLEX_MODE_P (mode))
6107 regno = FIRST_SSE_REG;
6111 if (mode == SFmode || mode == DFmode)
6112 regno = FIRST_SSE_REG;
6118 return gen_rtx_REG (orig_mode, regno);
6122 ix86_function_value_1 (const_tree valtype, const_tree fntype_or_decl,
6123 enum machine_mode orig_mode, enum machine_mode mode)
6125 const_tree fn, fntype;
6128 if (fntype_or_decl && DECL_P (fntype_or_decl))
6129 fn = fntype_or_decl;
6130 fntype = fn ? TREE_TYPE (fn) : fntype_or_decl;
6132 if (TARGET_64BIT && ix86_function_type_abi (fntype) == MS_ABI)
6133 return function_value_ms_64 (orig_mode, mode);
6134 else if (TARGET_64BIT)
6135 return function_value_64 (orig_mode, mode, valtype);
6137 return function_value_32 (orig_mode, mode, fntype, fn);
6141 ix86_function_value (const_tree valtype, const_tree fntype_or_decl,
6142 bool outgoing ATTRIBUTE_UNUSED)
6144 enum machine_mode mode, orig_mode;
6146 orig_mode = TYPE_MODE (valtype);
6147 mode = type_natural_mode (valtype, NULL);
6148 return ix86_function_value_1 (valtype, fntype_or_decl, orig_mode, mode);
6152 ix86_libcall_value (enum machine_mode mode)
6154 return ix86_function_value_1 (NULL, NULL, mode, mode);
6157 /* Return true iff type is returned in memory. */
6159 static int ATTRIBUTE_UNUSED
6160 return_in_memory_32 (const_tree type, enum machine_mode mode)
6164 if (mode == BLKmode)
6167 size = int_size_in_bytes (type);
6169 if (MS_AGGREGATE_RETURN && AGGREGATE_TYPE_P (type) && size <= 8)
6172 if (VECTOR_MODE_P (mode) || mode == TImode)
6174 /* User-created vectors small enough to fit in EAX. */
6178 /* MMX/3dNow values are returned in MM0,
6179 except when it doesn't exits. */
6181 return (TARGET_MMX ? 0 : 1);
6183 /* SSE values are returned in XMM0, except when it doesn't exist. */
6185 return (TARGET_SSE ? 0 : 1);
6187 /* AVX values are returned in YMM0, except when it doesn't exist. */
6189 return TARGET_AVX ? 0 : 1;
6198 /* OImode shouldn't be used directly. */
6199 gcc_assert (mode != OImode);
6204 static int ATTRIBUTE_UNUSED
6205 return_in_memory_64 (const_tree type, enum machine_mode mode)
6207 int needed_intregs, needed_sseregs;
6208 return !examine_argument (mode, type, 1, &needed_intregs, &needed_sseregs);
6211 static int ATTRIBUTE_UNUSED
6212 return_in_memory_ms_64 (const_tree type, enum machine_mode mode)
6214 HOST_WIDE_INT size = int_size_in_bytes (type);
6216 /* __m128 is returned in xmm0. */
6217 if ((SCALAR_INT_MODE_P (mode) || VECTOR_MODE_P (mode))
6218 && !COMPLEX_MODE_P (mode) && (GET_MODE_SIZE (mode) == 16 || size == 16))
6221 /* Otherwise, the size must be exactly in [1248]. */
6222 return (size != 1 && size != 2 && size != 4 && size != 8);
6226 ix86_return_in_memory (const_tree type, const_tree fntype ATTRIBUTE_UNUSED)
6228 #ifdef SUBTARGET_RETURN_IN_MEMORY
6229 return SUBTARGET_RETURN_IN_MEMORY (type, fntype);
6231 const enum machine_mode mode = type_natural_mode (type, NULL);
6235 if (ix86_function_type_abi (fntype) == MS_ABI)
6236 return return_in_memory_ms_64 (type, mode);
6238 return return_in_memory_64 (type, mode);
6241 return return_in_memory_32 (type, mode);
6245 /* Return false iff TYPE is returned in memory. This version is used
6246 on Solaris 10. It is similar to the generic ix86_return_in_memory,
6247 but differs notably in that when MMX is available, 8-byte vectors
6248 are returned in memory, rather than in MMX registers. */
6251 ix86_sol10_return_in_memory (const_tree type, const_tree fntype ATTRIBUTE_UNUSED)
6254 enum machine_mode mode = type_natural_mode (type, NULL);
6257 return return_in_memory_64 (type, mode);
6259 if (mode == BLKmode)
6262 size = int_size_in_bytes (type);
6264 if (VECTOR_MODE_P (mode))
6266 /* Return in memory only if MMX registers *are* available. This
6267 seems backwards, but it is consistent with the existing
6274 else if (mode == TImode)
6276 else if (mode == XFmode)
6282 /* When returning SSE vector types, we have a choice of either
6283 (1) being abi incompatible with a -march switch, or
6284 (2) generating an error.
6285 Given no good solution, I think the safest thing is one warning.
6286 The user won't be able to use -Werror, but....
6288 Choose the STRUCT_VALUE_RTX hook because that's (at present) only
6289 called in response to actually generating a caller or callee that
6290 uses such a type. As opposed to TARGET_RETURN_IN_MEMORY, which is called
6291 via aggregate_value_p for general type probing from tree-ssa. */
6294 ix86_struct_value_rtx (tree type, int incoming ATTRIBUTE_UNUSED)
6296 static bool warnedsse, warnedmmx;
6298 if (!TARGET_64BIT && type)
6300 /* Look at the return type of the function, not the function type. */
6301 enum machine_mode mode = TYPE_MODE (TREE_TYPE (type));
6303 if (!TARGET_SSE && !warnedsse)
6306 || (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 16))
6309 warning (0, "SSE vector return without SSE enabled "
6314 if (!TARGET_MMX && !warnedmmx)
6316 if (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 8)
6319 warning (0, "MMX vector return without MMX enabled "
6329 /* Create the va_list data type. */
6331 /* Returns the calling convention specific va_list date type.
6332 The argument ABI can be DEFAULT_ABI, MS_ABI, or SYSV_ABI. */
6335 ix86_build_builtin_va_list_abi (enum calling_abi abi)
6337 tree f_gpr, f_fpr, f_ovf, f_sav, record, type_decl;
6339 /* For i386 we use plain pointer to argument area. */
6340 if (!TARGET_64BIT || abi == MS_ABI)
6341 return build_pointer_type (char_type_node);
6343 record = (*lang_hooks.types.make_type) (RECORD_TYPE);
6344 type_decl = build_decl (TYPE_DECL, get_identifier ("__va_list_tag"), record);
6346 f_gpr = build_decl (FIELD_DECL, get_identifier ("gp_offset"),
6347 unsigned_type_node);
6348 f_fpr = build_decl (FIELD_DECL, get_identifier ("fp_offset"),
6349 unsigned_type_node);
6350 f_ovf = build_decl (FIELD_DECL, get_identifier ("overflow_arg_area"),
6352 f_sav = build_decl (FIELD_DECL, get_identifier ("reg_save_area"),
6355 va_list_gpr_counter_field = f_gpr;
6356 va_list_fpr_counter_field = f_fpr;
6358 DECL_FIELD_CONTEXT (f_gpr) = record;
6359 DECL_FIELD_CONTEXT (f_fpr) = record;
6360 DECL_FIELD_CONTEXT (f_ovf) = record;
6361 DECL_FIELD_CONTEXT (f_sav) = record;
6363 TREE_CHAIN (record) = type_decl;
6364 TYPE_NAME (record) = type_decl;
6365 TYPE_FIELDS (record) = f_gpr;
6366 TREE_CHAIN (f_gpr) = f_fpr;
6367 TREE_CHAIN (f_fpr) = f_ovf;
6368 TREE_CHAIN (f_ovf) = f_sav;
6370 layout_type (record);
6372 /* The correct type is an array type of one element. */
6373 return build_array_type (record, build_index_type (size_zero_node));
6376 /* Setup the builtin va_list data type and for 64-bit the additional
6377 calling convention specific va_list data types. */
6380 ix86_build_builtin_va_list (void)
6382 tree ret = ix86_build_builtin_va_list_abi (DEFAULT_ABI);
6384 /* Initialize abi specific va_list builtin types. */
6388 if (DEFAULT_ABI == MS_ABI)
6390 t = ix86_build_builtin_va_list_abi (SYSV_ABI);
6391 if (TREE_CODE (t) != RECORD_TYPE)
6392 t = build_variant_type_copy (t);
6393 sysv_va_list_type_node = t;
6398 if (TREE_CODE (t) != RECORD_TYPE)
6399 t = build_variant_type_copy (t);
6400 sysv_va_list_type_node = t;
6402 if (DEFAULT_ABI != MS_ABI)
6404 t = ix86_build_builtin_va_list_abi (MS_ABI);
6405 if (TREE_CODE (t) != RECORD_TYPE)
6406 t = build_variant_type_copy (t);
6407 ms_va_list_type_node = t;
6412 if (TREE_CODE (t) != RECORD_TYPE)
6413 t = build_variant_type_copy (t);
6414 ms_va_list_type_node = t;
6421 /* Worker function for TARGET_SETUP_INCOMING_VARARGS. */
6424 setup_incoming_varargs_64 (CUMULATIVE_ARGS *cum)
6433 int regparm = ix86_regparm;
6435 if (cum->call_abi != DEFAULT_ABI)
6436 regparm = DEFAULT_ABI != SYSV_ABI ? X86_64_REGPARM_MAX : X64_REGPARM_MAX;
6438 /* GPR size of varargs save area. */
6439 if (cfun->va_list_gpr_size)
6440 ix86_varargs_gpr_size = X86_64_REGPARM_MAX * UNITS_PER_WORD;
6442 ix86_varargs_gpr_size = 0;
6444 /* FPR size of varargs save area. We don't need it if we don't pass
6445 anything in SSE registers. */
6446 if (cum->sse_nregs && cfun->va_list_fpr_size)
6447 ix86_varargs_fpr_size = X86_64_SSE_REGPARM_MAX * 16;
6449 ix86_varargs_fpr_size = 0;
6451 if (! ix86_varargs_gpr_size && ! ix86_varargs_fpr_size)
6454 save_area = frame_pointer_rtx;
6455 set = get_varargs_alias_set ();
6457 for (i = cum->regno;
6459 && i < cum->regno + cfun->va_list_gpr_size / UNITS_PER_WORD;
6462 mem = gen_rtx_MEM (Pmode,
6463 plus_constant (save_area, i * UNITS_PER_WORD));
6464 MEM_NOTRAP_P (mem) = 1;
6465 set_mem_alias_set (mem, set);
6466 emit_move_insn (mem, gen_rtx_REG (Pmode,
6467 x86_64_int_parameter_registers[i]));
6470 if (ix86_varargs_fpr_size)
6472 /* Now emit code to save SSE registers. The AX parameter contains number
6473 of SSE parameter registers used to call this function. We use
6474 sse_prologue_save insn template that produces computed jump across
6475 SSE saves. We need some preparation work to get this working. */
6477 label = gen_label_rtx ();
6478 label_ref = gen_rtx_LABEL_REF (Pmode, label);
6480 /* Compute address to jump to :
6481 label - eax*4 + nnamed_sse_arguments*4 Or
6482 label - eax*5 + nnamed_sse_arguments*5 for AVX. */
6483 tmp_reg = gen_reg_rtx (Pmode);
6484 nsse_reg = gen_reg_rtx (Pmode);
6485 emit_insn (gen_zero_extendqidi2 (nsse_reg, gen_rtx_REG (QImode, AX_REG)));
6486 emit_insn (gen_rtx_SET (VOIDmode, tmp_reg,
6487 gen_rtx_MULT (Pmode, nsse_reg,
6490 /* vmovaps is one byte longer than movaps. */
6492 emit_insn (gen_rtx_SET (VOIDmode, tmp_reg,
6493 gen_rtx_PLUS (Pmode, tmp_reg,
6499 gen_rtx_CONST (DImode,
6500 gen_rtx_PLUS (DImode,
6502 GEN_INT (cum->sse_regno
6503 * (TARGET_AVX ? 5 : 4)))));
6505 emit_move_insn (nsse_reg, label_ref);
6506 emit_insn (gen_subdi3 (nsse_reg, nsse_reg, tmp_reg));
6508 /* Compute address of memory block we save into. We always use pointer
6509 pointing 127 bytes after first byte to store - this is needed to keep
6510 instruction size limited by 4 bytes (5 bytes for AVX) with one
6511 byte displacement. */
6512 tmp_reg = gen_reg_rtx (Pmode);
6513 emit_insn (gen_rtx_SET (VOIDmode, tmp_reg,
6514 plus_constant (save_area,
6515 ix86_varargs_gpr_size + 127)));
6516 mem = gen_rtx_MEM (BLKmode, plus_constant (tmp_reg, -127));
6517 MEM_NOTRAP_P (mem) = 1;
6518 set_mem_alias_set (mem, set);
6519 set_mem_align (mem, BITS_PER_WORD);
6521 /* And finally do the dirty job! */
6522 emit_insn (gen_sse_prologue_save (mem, nsse_reg,
6523 GEN_INT (cum->sse_regno), label));
6528 setup_incoming_varargs_ms_64 (CUMULATIVE_ARGS *cum)
6530 alias_set_type set = get_varargs_alias_set ();
6533 for (i = cum->regno; i < X64_REGPARM_MAX; i++)
6537 mem = gen_rtx_MEM (Pmode,
6538 plus_constant (virtual_incoming_args_rtx,
6539 i * UNITS_PER_WORD));
6540 MEM_NOTRAP_P (mem) = 1;
6541 set_mem_alias_set (mem, set);
6543 reg = gen_rtx_REG (Pmode, x86_64_ms_abi_int_parameter_registers[i]);
6544 emit_move_insn (mem, reg);
6549 ix86_setup_incoming_varargs (CUMULATIVE_ARGS *cum, enum machine_mode mode,
6550 tree type, int *pretend_size ATTRIBUTE_UNUSED,
6553 CUMULATIVE_ARGS next_cum;
6556 /* This argument doesn't appear to be used anymore. Which is good,
6557 because the old code here didn't suppress rtl generation. */
6558 gcc_assert (!no_rtl);
6563 fntype = TREE_TYPE (current_function_decl);
6565 /* For varargs, we do not want to skip the dummy va_dcl argument.
6566 For stdargs, we do want to skip the last named argument. */
6568 if (stdarg_p (fntype))
6569 function_arg_advance (&next_cum, mode, type, 1);
6571 if (cum->call_abi == MS_ABI)
6572 setup_incoming_varargs_ms_64 (&next_cum);
6574 setup_incoming_varargs_64 (&next_cum);
6577 /* Checks if TYPE is of kind va_list char *. */
6580 is_va_list_char_pointer (tree type)
6584 /* For 32-bit it is always true. */
6587 canonic = ix86_canonical_va_list_type (type);
6588 return (canonic == ms_va_list_type_node
6589 || (DEFAULT_ABI == MS_ABI && canonic == va_list_type_node));
6592 /* Implement va_start. */
6595 ix86_va_start (tree valist, rtx nextarg)
6597 HOST_WIDE_INT words, n_gpr, n_fpr;
6598 tree f_gpr, f_fpr, f_ovf, f_sav;
6599 tree gpr, fpr, ovf, sav, t;
6602 /* Only 64bit target needs something special. */
6603 if (!TARGET_64BIT || is_va_list_char_pointer (TREE_TYPE (valist)))
6605 std_expand_builtin_va_start (valist, nextarg);
6609 f_gpr = TYPE_FIELDS (TREE_TYPE (sysv_va_list_type_node));
6610 f_fpr = TREE_CHAIN (f_gpr);
6611 f_ovf = TREE_CHAIN (f_fpr);
6612 f_sav = TREE_CHAIN (f_ovf);
6614 valist = build1 (INDIRECT_REF, TREE_TYPE (TREE_TYPE (valist)), valist);
6615 gpr = build3 (COMPONENT_REF, TREE_TYPE (f_gpr), valist, f_gpr, NULL_TREE);
6616 fpr = build3 (COMPONENT_REF, TREE_TYPE (f_fpr), valist, f_fpr, NULL_TREE);
6617 ovf = build3 (COMPONENT_REF, TREE_TYPE (f_ovf), valist, f_ovf, NULL_TREE);
6618 sav = build3 (COMPONENT_REF, TREE_TYPE (f_sav), valist, f_sav, NULL_TREE);
6620 /* Count number of gp and fp argument registers used. */
6621 words = crtl->args.info.words;
6622 n_gpr = crtl->args.info.regno;
6623 n_fpr = crtl->args.info.sse_regno;
6625 if (cfun->va_list_gpr_size)
6627 type = TREE_TYPE (gpr);
6628 t = build2 (MODIFY_EXPR, type,
6629 gpr, build_int_cst (type, n_gpr * 8));
6630 TREE_SIDE_EFFECTS (t) = 1;
6631 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
6634 if (TARGET_SSE && cfun->va_list_fpr_size)
6636 type = TREE_TYPE (fpr);
6637 t = build2 (MODIFY_EXPR, type, fpr,
6638 build_int_cst (type, n_fpr * 16 + 8*X86_64_REGPARM_MAX));
6639 TREE_SIDE_EFFECTS (t) = 1;
6640 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
6643 /* Find the overflow area. */
6644 type = TREE_TYPE (ovf);
6645 t = make_tree (type, crtl->args.internal_arg_pointer);
6647 t = build2 (POINTER_PLUS_EXPR, type, t,
6648 size_int (words * UNITS_PER_WORD));
6649 t = build2 (MODIFY_EXPR, type, ovf, t);
6650 TREE_SIDE_EFFECTS (t) = 1;
6651 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
6653 if (ix86_varargs_gpr_size || ix86_varargs_fpr_size)
6655 /* Find the register save area.
6656 Prologue of the function save it right above stack frame. */
6657 type = TREE_TYPE (sav);
6658 t = make_tree (type, frame_pointer_rtx);
6659 if (!ix86_varargs_gpr_size)
6660 t = build2 (POINTER_PLUS_EXPR, type, t,
6661 size_int (-8 * X86_64_REGPARM_MAX));
6662 t = build2 (MODIFY_EXPR, type, sav, t);
6663 TREE_SIDE_EFFECTS (t) = 1;
6664 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
6668 /* Implement va_arg. */
6671 ix86_gimplify_va_arg (tree valist, tree type, gimple_seq *pre_p,
6674 static const int intreg[6] = { 0, 1, 2, 3, 4, 5 };
6675 tree f_gpr, f_fpr, f_ovf, f_sav;
6676 tree gpr, fpr, ovf, sav, t;
6678 tree lab_false, lab_over = NULL_TREE;
6683 enum machine_mode nat_mode;
6686 /* Only 64bit target needs something special. */
6687 if (!TARGET_64BIT || is_va_list_char_pointer (TREE_TYPE (valist)))
6688 return std_gimplify_va_arg_expr (valist, type, pre_p, post_p);
6690 f_gpr = TYPE_FIELDS (TREE_TYPE (sysv_va_list_type_node));
6691 f_fpr = TREE_CHAIN (f_gpr);
6692 f_ovf = TREE_CHAIN (f_fpr);
6693 f_sav = TREE_CHAIN (f_ovf);
6695 gpr = build3 (COMPONENT_REF, TREE_TYPE (f_gpr),
6696 build_va_arg_indirect_ref (valist), f_gpr, NULL_TREE);
6697 valist = build_va_arg_indirect_ref (valist);
6698 fpr = build3 (COMPONENT_REF, TREE_TYPE (f_fpr), valist, f_fpr, NULL_TREE);
6699 ovf = build3 (COMPONENT_REF, TREE_TYPE (f_ovf), valist, f_ovf, NULL_TREE);
6700 sav = build3 (COMPONENT_REF, TREE_TYPE (f_sav), valist, f_sav, NULL_TREE);
6702 indirect_p = pass_by_reference (NULL, TYPE_MODE (type), type, false);
6704 type = build_pointer_type (type);
6705 size = int_size_in_bytes (type);
6706 rsize = (size + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
6708 nat_mode = type_natural_mode (type, NULL);
6717 /* Unnamed 256bit vector mode parameters are passed on stack. */
6718 if (ix86_cfun_abi () == SYSV_ABI)
6725 container = construct_container (nat_mode, TYPE_MODE (type),
6726 type, 0, X86_64_REGPARM_MAX,
6727 X86_64_SSE_REGPARM_MAX, intreg,
6732 /* Pull the value out of the saved registers. */
6734 addr = create_tmp_var (ptr_type_node, "addr");
6735 DECL_POINTER_ALIAS_SET (addr) = get_varargs_alias_set ();
6739 int needed_intregs, needed_sseregs;
6741 tree int_addr, sse_addr;
6743 lab_false = create_artificial_label ();
6744 lab_over = create_artificial_label ();
6746 examine_argument (nat_mode, type, 0, &needed_intregs, &needed_sseregs);
6748 need_temp = (!REG_P (container)
6749 && ((needed_intregs && TYPE_ALIGN (type) > 64)
6750 || TYPE_ALIGN (type) > 128));
6752 /* In case we are passing structure, verify that it is consecutive block
6753 on the register save area. If not we need to do moves. */
6754 if (!need_temp && !REG_P (container))
6756 /* Verify that all registers are strictly consecutive */
6757 if (SSE_REGNO_P (REGNO (XEXP (XVECEXP (container, 0, 0), 0))))
6761 for (i = 0; i < XVECLEN (container, 0) && !need_temp; i++)
6763 rtx slot = XVECEXP (container, 0, i);
6764 if (REGNO (XEXP (slot, 0)) != FIRST_SSE_REG + (unsigned int) i
6765 || INTVAL (XEXP (slot, 1)) != i * 16)
6773 for (i = 0; i < XVECLEN (container, 0) && !need_temp; i++)
6775 rtx slot = XVECEXP (container, 0, i);
6776 if (REGNO (XEXP (slot, 0)) != (unsigned int) i
6777 || INTVAL (XEXP (slot, 1)) != i * 8)
6789 int_addr = create_tmp_var (ptr_type_node, "int_addr");
6790 DECL_POINTER_ALIAS_SET (int_addr) = get_varargs_alias_set ();
6791 sse_addr = create_tmp_var (ptr_type_node, "sse_addr");
6792 DECL_POINTER_ALIAS_SET (sse_addr) = get_varargs_alias_set ();
6795 /* First ensure that we fit completely in registers. */
6798 t = build_int_cst (TREE_TYPE (gpr),
6799 (X86_64_REGPARM_MAX - needed_intregs + 1) * 8);
6800 t = build2 (GE_EXPR, boolean_type_node, gpr, t);
6801 t2 = build1 (GOTO_EXPR, void_type_node, lab_false);
6802 t = build3 (COND_EXPR, void_type_node, t, t2, NULL_TREE);
6803 gimplify_and_add (t, pre_p);
6807 t = build_int_cst (TREE_TYPE (fpr),
6808 (X86_64_SSE_REGPARM_MAX - needed_sseregs + 1) * 16
6809 + X86_64_REGPARM_MAX * 8);
6810 t = build2 (GE_EXPR, boolean_type_node, fpr, t);
6811 t2 = build1 (GOTO_EXPR, void_type_node, lab_false);
6812 t = build3 (COND_EXPR, void_type_node, t, t2, NULL_TREE);
6813 gimplify_and_add (t, pre_p);
6816 /* Compute index to start of area used for integer regs. */
6819 /* int_addr = gpr + sav; */
6820 t = fold_convert (sizetype, gpr);
6821 t = build2 (POINTER_PLUS_EXPR, ptr_type_node, sav, t);
6822 gimplify_assign (int_addr, t, pre_p);
6826 /* sse_addr = fpr + sav; */
6827 t = fold_convert (sizetype, fpr);
6828 t = build2 (POINTER_PLUS_EXPR, ptr_type_node, sav, t);
6829 gimplify_assign (sse_addr, t, pre_p);
6834 tree temp = create_tmp_var (type, "va_arg_tmp");
6837 t = build1 (ADDR_EXPR, build_pointer_type (type), temp);
6838 gimplify_assign (addr, t, pre_p);
6840 for (i = 0; i < XVECLEN (container, 0); i++)
6842 rtx slot = XVECEXP (container, 0, i);
6843 rtx reg = XEXP (slot, 0);
6844 enum machine_mode mode = GET_MODE (reg);
6845 tree piece_type = lang_hooks.types.type_for_mode (mode, 1);
6846 tree addr_type = build_pointer_type (piece_type);
6847 tree daddr_type = build_pointer_type_for_mode (piece_type,
6851 tree dest_addr, dest;
6853 if (SSE_REGNO_P (REGNO (reg)))
6855 src_addr = sse_addr;
6856 src_offset = (REGNO (reg) - FIRST_SSE_REG) * 16;
6860 src_addr = int_addr;
6861 src_offset = REGNO (reg) * 8;
6863 src_addr = fold_convert (addr_type, src_addr);
6864 src_addr = fold_build2 (POINTER_PLUS_EXPR, addr_type, src_addr,
6865 size_int (src_offset));
6866 src = build_va_arg_indirect_ref (src_addr);
6868 dest_addr = fold_convert (daddr_type, addr);
6869 dest_addr = fold_build2 (POINTER_PLUS_EXPR, daddr_type, dest_addr,
6870 size_int (INTVAL (XEXP (slot, 1))));
6871 dest = build_va_arg_indirect_ref (dest_addr);
6873 gimplify_assign (dest, src, pre_p);
6879 t = build2 (PLUS_EXPR, TREE_TYPE (gpr), gpr,
6880 build_int_cst (TREE_TYPE (gpr), needed_intregs * 8));
6881 gimplify_assign (gpr, t, pre_p);
6886 t = build2 (PLUS_EXPR, TREE_TYPE (fpr), fpr,
6887 build_int_cst (TREE_TYPE (fpr), needed_sseregs * 16));
6888 gimplify_assign (fpr, t, pre_p);
6891 gimple_seq_add_stmt (pre_p, gimple_build_goto (lab_over));
6893 gimple_seq_add_stmt (pre_p, gimple_build_label (lab_false));
6896 /* ... otherwise out of the overflow area. */
6898 /* When we align parameter on stack for caller, if the parameter
6899 alignment is beyond MAX_SUPPORTED_STACK_ALIGNMENT, it will be
6900 aligned at MAX_SUPPORTED_STACK_ALIGNMENT. We will match callee
6901 here with caller. */
6902 arg_boundary = FUNCTION_ARG_BOUNDARY (VOIDmode, type);
6903 if ((unsigned int) arg_boundary > MAX_SUPPORTED_STACK_ALIGNMENT)
6904 arg_boundary = MAX_SUPPORTED_STACK_ALIGNMENT;
6906 /* Care for on-stack alignment if needed. */
6907 if (arg_boundary <= 64
6908 || integer_zerop (TYPE_SIZE (type)))
6912 HOST_WIDE_INT align = arg_boundary / 8;
6913 t = build2 (POINTER_PLUS_EXPR, TREE_TYPE (ovf), ovf,
6914 size_int (align - 1));
6915 t = fold_convert (sizetype, t);
6916 t = build2 (BIT_AND_EXPR, TREE_TYPE (t), t,
6918 t = fold_convert (TREE_TYPE (ovf), t);
6920 gimplify_expr (&t, pre_p, NULL, is_gimple_val, fb_rvalue);
6921 gimplify_assign (addr, t, pre_p);
6923 t = build2 (POINTER_PLUS_EXPR, TREE_TYPE (t), t,
6924 size_int (rsize * UNITS_PER_WORD));
6925 gimplify_assign (unshare_expr (ovf), t, pre_p);
6928 gimple_seq_add_stmt (pre_p, gimple_build_label (lab_over));
6930 ptrtype = build_pointer_type (type);
6931 addr = fold_convert (ptrtype, addr);
6934 addr = build_va_arg_indirect_ref (addr);
6935 return build_va_arg_indirect_ref (addr);
6938 /* Return nonzero if OPNUM's MEM should be matched
6939 in movabs* patterns. */
6942 ix86_check_movabs (rtx insn, int opnum)
6946 set = PATTERN (insn);
6947 if (GET_CODE (set) == PARALLEL)
6948 set = XVECEXP (set, 0, 0);
6949 gcc_assert (GET_CODE (set) == SET);
6950 mem = XEXP (set, opnum);
6951 while (GET_CODE (mem) == SUBREG)
6952 mem = SUBREG_REG (mem);
6953 gcc_assert (MEM_P (mem));
6954 return (volatile_ok || !MEM_VOLATILE_P (mem));
6957 /* Initialize the table of extra 80387 mathematical constants. */
6960 init_ext_80387_constants (void)
6962 static const char * cst[5] =
6964 "0.3010299956639811952256464283594894482", /* 0: fldlg2 */
6965 "0.6931471805599453094286904741849753009", /* 1: fldln2 */
6966 "1.4426950408889634073876517827983434472", /* 2: fldl2e */
6967 "3.3219280948873623478083405569094566090", /* 3: fldl2t */
6968 "3.1415926535897932385128089594061862044", /* 4: fldpi */
6972 for (i = 0; i < 5; i++)
6974 real_from_string (&ext_80387_constants_table[i], cst[i]);
6975 /* Ensure each constant is rounded to XFmode precision. */
6976 real_convert (&ext_80387_constants_table[i],
6977 XFmode, &ext_80387_constants_table[i]);
6980 ext_80387_constants_init = 1;
6983 /* Return true if the constant is something that can be loaded with
6984 a special instruction. */
6987 standard_80387_constant_p (rtx x)
6989 enum machine_mode mode = GET_MODE (x);
6993 if (!(X87_FLOAT_MODE_P (mode) && (GET_CODE (x) == CONST_DOUBLE)))
6996 if (x == CONST0_RTX (mode))
6998 if (x == CONST1_RTX (mode))
7001 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
7003 /* For XFmode constants, try to find a special 80387 instruction when
7004 optimizing for size or on those CPUs that benefit from them. */
7006 && (optimize_function_for_size_p (cfun) || TARGET_EXT_80387_CONSTANTS))
7010 if (! ext_80387_constants_init)
7011 init_ext_80387_constants ();
7013 for (i = 0; i < 5; i++)
7014 if (real_identical (&r, &ext_80387_constants_table[i]))
7018 /* Load of the constant -0.0 or -1.0 will be split as
7019 fldz;fchs or fld1;fchs sequence. */
7020 if (real_isnegzero (&r))
7022 if (real_identical (&r, &dconstm1))
7028 /* Return the opcode of the special instruction to be used to load
7032 standard_80387_constant_opcode (rtx x)
7034 switch (standard_80387_constant_p (x))
7058 /* Return the CONST_DOUBLE representing the 80387 constant that is
7059 loaded by the specified special instruction. The argument IDX
7060 matches the return value from standard_80387_constant_p. */
7063 standard_80387_constant_rtx (int idx)
7067 if (! ext_80387_constants_init)
7068 init_ext_80387_constants ();
7084 return CONST_DOUBLE_FROM_REAL_VALUE (ext_80387_constants_table[i],
7088 /* Return 1 if mode is a valid mode for sse. */
7090 standard_sse_mode_p (enum machine_mode mode)
7107 /* Return 1 if X is all 0s. For all 1s, return 2 if X is in 128bit
7108 SSE modes and SSE2 is enabled, return 3 if X is in 256bit AVX
7109 modes and AVX is enabled. */
7112 standard_sse_constant_p (rtx x)
7114 enum machine_mode mode = GET_MODE (x);
7116 if (x == const0_rtx || x == CONST0_RTX (GET_MODE (x)))
7118 if (vector_all_ones_operand (x, mode))
7120 if (standard_sse_mode_p (mode))
7121 return TARGET_SSE2 ? 2 : -2;
7122 else if (VALID_AVX256_REG_MODE (mode))
7123 return TARGET_AVX ? 3 : -3;
7129 /* Return the opcode of the special instruction to be used to load
7133 standard_sse_constant_opcode (rtx insn, rtx x)
7135 switch (standard_sse_constant_p (x))
7138 switch (get_attr_mode (insn))
7141 return TARGET_AVX ? "vxorps\t%0, %0, %0" : "xorps\t%0, %0";
7143 return TARGET_AVX ? "vxorpd\t%0, %0, %0" : "xorpd\t%0, %0";
7145 return TARGET_AVX ? "vpxor\t%0, %0, %0" : "pxor\t%0, %0";
7147 return "vxorps\t%x0, %x0, %x0";
7149 return "vxorpd\t%x0, %x0, %x0";
7151 return "vpxor\t%x0, %x0, %x0";
7157 switch (get_attr_mode (insn))
7162 return "vpcmpeqd\t%0, %0, %0";
7168 return "pcmpeqd\t%0, %0";
7173 /* Returns 1 if OP contains a symbol reference */
7176 symbolic_reference_mentioned_p (rtx op)
7181 if (GET_CODE (op) == SYMBOL_REF || GET_CODE (op) == LABEL_REF)
7184 fmt = GET_RTX_FORMAT (GET_CODE (op));
7185 for (i = GET_RTX_LENGTH (GET_CODE (op)) - 1; i >= 0; i--)
7191 for (j = XVECLEN (op, i) - 1; j >= 0; j--)
7192 if (symbolic_reference_mentioned_p (XVECEXP (op, i, j)))
7196 else if (fmt[i] == 'e' && symbolic_reference_mentioned_p (XEXP (op, i)))
7203 /* Return 1 if it is appropriate to emit `ret' instructions in the
7204 body of a function. Do this only if the epilogue is simple, needing a
7205 couple of insns. Prior to reloading, we can't tell how many registers
7206 must be saved, so return 0 then. Return 0 if there is no frame
7207 marker to de-allocate. */
7210 ix86_can_use_return_insn_p (void)
7212 struct ix86_frame frame;
7214 if (! reload_completed || frame_pointer_needed)
7217 /* Don't allow more than 32 pop, since that's all we can do
7218 with one instruction. */
7219 if (crtl->args.pops_args
7220 && crtl->args.size >= 32768)
7223 ix86_compute_frame_layout (&frame);
7224 return frame.to_allocate == 0 && (frame.nregs + frame.nsseregs) == 0;
7227 /* Value should be nonzero if functions must have frame pointers.
7228 Zero means the frame pointer need not be set up (and parms may
7229 be accessed via the stack pointer) in functions that seem suitable. */
7232 ix86_frame_pointer_required (void)
7234 /* If we accessed previous frames, then the generated code expects
7235 to be able to access the saved ebp value in our frame. */
7236 if (cfun->machine->accesses_prev_frame)
7239 /* Several x86 os'es need a frame pointer for other reasons,
7240 usually pertaining to setjmp. */
7241 if (SUBTARGET_FRAME_POINTER_REQUIRED)
7244 /* In override_options, TARGET_OMIT_LEAF_FRAME_POINTER turns off
7245 the frame pointer by default. Turn it back on now if we've not
7246 got a leaf function. */
7247 if (TARGET_OMIT_LEAF_FRAME_POINTER
7248 && (!current_function_is_leaf
7249 || ix86_current_function_calls_tls_descriptor))
7258 /* Record that the current function accesses previous call frames. */
7261 ix86_setup_frame_addresses (void)
7263 cfun->machine->accesses_prev_frame = 1;
7266 #if (defined(HAVE_GAS_HIDDEN) && (SUPPORTS_ONE_ONLY - 0)) || TARGET_MACHO
7267 # define USE_HIDDEN_LINKONCE 1
7269 # define USE_HIDDEN_LINKONCE 0
7272 static int pic_labels_used;
7274 /* Fills in the label name that should be used for a pc thunk for
7275 the given register. */
7278 get_pc_thunk_name (char name[32], unsigned int regno)
7280 gcc_assert (!TARGET_64BIT);
7282 if (USE_HIDDEN_LINKONCE)
7283 sprintf (name, "__i686.get_pc_thunk.%s", reg_names[regno]);
7285 ASM_GENERATE_INTERNAL_LABEL (name, "LPR", regno);
7289 /* This function generates code for -fpic that loads %ebx with
7290 the return address of the caller and then returns. */
7293 ix86_file_end (void)
7298 for (regno = 0; regno < 8; ++regno)
7302 if (! ((pic_labels_used >> regno) & 1))
7305 get_pc_thunk_name (name, regno);
7310 switch_to_section (darwin_sections[text_coal_section]);
7311 fputs ("\t.weak_definition\t", asm_out_file);
7312 assemble_name (asm_out_file, name);
7313 fputs ("\n\t.private_extern\t", asm_out_file);
7314 assemble_name (asm_out_file, name);
7315 fputs ("\n", asm_out_file);
7316 ASM_OUTPUT_LABEL (asm_out_file, name);
7320 if (USE_HIDDEN_LINKONCE)
7324 decl = build_decl (FUNCTION_DECL, get_identifier (name),
7326 TREE_PUBLIC (decl) = 1;
7327 TREE_STATIC (decl) = 1;
7328 DECL_ONE_ONLY (decl) = 1;
7330 (*targetm.asm_out.unique_section) (decl, 0);
7331 switch_to_section (get_named_section (decl, NULL, 0));
7333 (*targetm.asm_out.globalize_label) (asm_out_file, name);
7334 fputs ("\t.hidden\t", asm_out_file);
7335 assemble_name (asm_out_file, name);
7336 fputc ('\n', asm_out_file);
7337 ASM_DECLARE_FUNCTION_NAME (asm_out_file, name, decl);
7341 switch_to_section (text_section);
7342 ASM_OUTPUT_LABEL (asm_out_file, name);
7345 xops[0] = gen_rtx_REG (Pmode, regno);
7346 xops[1] = gen_rtx_MEM (Pmode, stack_pointer_rtx);
7347 output_asm_insn ("mov%z0\t{%1, %0|%0, %1}", xops);
7348 output_asm_insn ("ret", xops);
7351 if (NEED_INDICATE_EXEC_STACK)
7352 file_end_indicate_exec_stack ();
7355 /* Emit code for the SET_GOT patterns. */
7358 output_set_got (rtx dest, rtx label ATTRIBUTE_UNUSED)
7364 if (TARGET_VXWORKS_RTP && flag_pic)
7366 /* Load (*VXWORKS_GOTT_BASE) into the PIC register. */
7367 xops[2] = gen_rtx_MEM (Pmode,
7368 gen_rtx_SYMBOL_REF (Pmode, VXWORKS_GOTT_BASE));
7369 output_asm_insn ("mov{l}\t{%2, %0|%0, %2}", xops);
7371 /* Load (*VXWORKS_GOTT_BASE)[VXWORKS_GOTT_INDEX] into the PIC register.
7372 Use %P and a local symbol in order to print VXWORKS_GOTT_INDEX as
7373 an unadorned address. */
7374 xops[2] = gen_rtx_SYMBOL_REF (Pmode, VXWORKS_GOTT_INDEX);
7375 SYMBOL_REF_FLAGS (xops[2]) |= SYMBOL_FLAG_LOCAL;
7376 output_asm_insn ("mov{l}\t{%P2(%0), %0|%0, DWORD PTR %P2[%0]}", xops);
7380 xops[1] = gen_rtx_SYMBOL_REF (Pmode, GOT_SYMBOL_NAME);
7382 if (! TARGET_DEEP_BRANCH_PREDICTION || !flag_pic)
7384 xops[2] = gen_rtx_LABEL_REF (Pmode, label ? label : gen_label_rtx ());
7387 output_asm_insn ("mov%z0\t{%2, %0|%0, %2}", xops);
7389 output_asm_insn ("call\t%a2", xops);
7392 /* Output the Mach-O "canonical" label name ("Lxx$pb") here too. This
7393 is what will be referenced by the Mach-O PIC subsystem. */
7395 ASM_OUTPUT_LABEL (asm_out_file, MACHOPIC_FUNCTION_BASE_NAME);
7398 (*targetm.asm_out.internal_label) (asm_out_file, "L",
7399 CODE_LABEL_NUMBER (XEXP (xops[2], 0)));
7402 output_asm_insn ("pop%z0\t%0", xops);
7407 get_pc_thunk_name (name, REGNO (dest));
7408 pic_labels_used |= 1 << REGNO (dest);
7410 xops[2] = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (name));
7411 xops[2] = gen_rtx_MEM (QImode, xops[2]);
7412 output_asm_insn ("call\t%X2", xops);
7413 /* Output the Mach-O "canonical" label name ("Lxx$pb") here too. This
7414 is what will be referenced by the Mach-O PIC subsystem. */
7417 ASM_OUTPUT_LABEL (asm_out_file, MACHOPIC_FUNCTION_BASE_NAME);
7419 targetm.asm_out.internal_label (asm_out_file, "L",
7420 CODE_LABEL_NUMBER (label));
7427 if (!flag_pic || TARGET_DEEP_BRANCH_PREDICTION)
7428 output_asm_insn ("add%z0\t{%1, %0|%0, %1}", xops);
7430 output_asm_insn ("add%z0\t{%1+[.-%a2], %0|%0, %1+(.-%a2)}", xops);
7435 /* Generate an "push" pattern for input ARG. */
7440 return gen_rtx_SET (VOIDmode,
7442 gen_rtx_PRE_DEC (Pmode,
7443 stack_pointer_rtx)),
7447 /* Return >= 0 if there is an unused call-clobbered register available
7448 for the entire function. */
7451 ix86_select_alt_pic_regnum (void)
7453 if (current_function_is_leaf && !crtl->profile
7454 && !ix86_current_function_calls_tls_descriptor)
7457 /* Can't use the same register for both PIC and DRAP. */
7459 drap = REGNO (crtl->drap_reg);
7462 for (i = 2; i >= 0; --i)
7463 if (i != drap && !df_regs_ever_live_p (i))
7467 return INVALID_REGNUM;
7470 /* Return 1 if we need to save REGNO. */
7472 ix86_save_reg (unsigned int regno, int maybe_eh_return)
7474 if (pic_offset_table_rtx
7475 && regno == REAL_PIC_OFFSET_TABLE_REGNUM
7476 && (df_regs_ever_live_p (REAL_PIC_OFFSET_TABLE_REGNUM)
7478 || crtl->calls_eh_return
7479 || crtl->uses_const_pool))
7481 if (ix86_select_alt_pic_regnum () != INVALID_REGNUM)
7486 if (crtl->calls_eh_return && maybe_eh_return)
7491 unsigned test = EH_RETURN_DATA_REGNO (i);
7492 if (test == INVALID_REGNUM)
7500 && regno == REGNO (crtl->drap_reg))
7503 return (df_regs_ever_live_p (regno)
7504 && !call_used_regs[regno]
7505 && !fixed_regs[regno]
7506 && (regno != HARD_FRAME_POINTER_REGNUM || !frame_pointer_needed));
7509 /* Return number of saved general prupose registers. */
7512 ix86_nsaved_regs (void)
7517 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
7518 if (!SSE_REGNO_P (regno) && ix86_save_reg (regno, true))
7523 /* Return number of saved SSE registrers. */
7526 ix86_nsaved_sseregs (void)
7531 if (ix86_cfun_abi () != MS_ABI)
7533 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
7534 if (SSE_REGNO_P (regno) && ix86_save_reg (regno, true))
7539 /* Given FROM and TO register numbers, say whether this elimination is
7540 allowed. If stack alignment is needed, we can only replace argument
7541 pointer with hard frame pointer, or replace frame pointer with stack
7542 pointer. Otherwise, frame pointer elimination is automatically
7543 handled and all other eliminations are valid. */
7546 ix86_can_eliminate (int from, int to)
7548 if (stack_realign_fp)
7549 return ((from == ARG_POINTER_REGNUM
7550 && to == HARD_FRAME_POINTER_REGNUM)
7551 || (from == FRAME_POINTER_REGNUM
7552 && to == STACK_POINTER_REGNUM));
7554 return to == STACK_POINTER_REGNUM ? !frame_pointer_needed : 1;
7557 /* Return the offset between two registers, one to be eliminated, and the other
7558 its replacement, at the start of a routine. */
7561 ix86_initial_elimination_offset (int from, int to)
7563 struct ix86_frame frame;
7564 ix86_compute_frame_layout (&frame);
7566 if (from == ARG_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
7567 return frame.hard_frame_pointer_offset;
7568 else if (from == FRAME_POINTER_REGNUM
7569 && to == HARD_FRAME_POINTER_REGNUM)
7570 return frame.hard_frame_pointer_offset - frame.frame_pointer_offset;
7573 gcc_assert (to == STACK_POINTER_REGNUM);
7575 if (from == ARG_POINTER_REGNUM)
7576 return frame.stack_pointer_offset;
7578 gcc_assert (from == FRAME_POINTER_REGNUM);
7579 return frame.stack_pointer_offset - frame.frame_pointer_offset;
7583 /* In a dynamically-aligned function, we can't know the offset from
7584 stack pointer to frame pointer, so we must ensure that setjmp
7585 eliminates fp against the hard fp (%ebp) rather than trying to
7586 index from %esp up to the top of the frame across a gap that is
7587 of unknown (at compile-time) size. */
7589 ix86_builtin_setjmp_frame_value (void)
7591 return stack_realign_fp ? hard_frame_pointer_rtx : virtual_stack_vars_rtx;
7594 /* Fill structure ix86_frame about frame of currently computed function. */
7597 ix86_compute_frame_layout (struct ix86_frame *frame)
7599 HOST_WIDE_INT total_size;
7600 unsigned int stack_alignment_needed;
7601 HOST_WIDE_INT offset;
7602 unsigned int preferred_alignment;
7603 HOST_WIDE_INT size = get_frame_size ();
7605 frame->nregs = ix86_nsaved_regs ();
7606 frame->nsseregs = ix86_nsaved_sseregs ();
7609 stack_alignment_needed = crtl->stack_alignment_needed / BITS_PER_UNIT;
7610 preferred_alignment = crtl->preferred_stack_boundary / BITS_PER_UNIT;
7612 /* MS ABI seem to require stack alignment to be always 16 except for function
7614 if (ix86_cfun_abi () == MS_ABI && preferred_alignment < 16)
7616 preferred_alignment = 16;
7617 stack_alignment_needed = 16;
7618 crtl->preferred_stack_boundary = 128;
7619 crtl->stack_alignment_needed = 128;
7622 gcc_assert (!size || stack_alignment_needed);
7623 gcc_assert (preferred_alignment >= STACK_BOUNDARY / BITS_PER_UNIT);
7624 gcc_assert (preferred_alignment <= stack_alignment_needed);
7626 /* During reload iteration the amount of registers saved can change.
7627 Recompute the value as needed. Do not recompute when amount of registers
7628 didn't change as reload does multiple calls to the function and does not
7629 expect the decision to change within single iteration. */
7630 if (!optimize_function_for_size_p (cfun)
7631 && cfun->machine->use_fast_prologue_epilogue_nregs != frame->nregs)
7633 int count = frame->nregs;
7635 cfun->machine->use_fast_prologue_epilogue_nregs = count;
7636 /* The fast prologue uses move instead of push to save registers. This
7637 is significantly longer, but also executes faster as modern hardware
7638 can execute the moves in parallel, but can't do that for push/pop.
7640 Be careful about choosing what prologue to emit: When function takes
7641 many instructions to execute we may use slow version as well as in
7642 case function is known to be outside hot spot (this is known with
7643 feedback only). Weight the size of function by number of registers
7644 to save as it is cheap to use one or two push instructions but very
7645 slow to use many of them. */
7647 count = (count - 1) * FAST_PROLOGUE_INSN_COUNT;
7648 if (cfun->function_frequency < FUNCTION_FREQUENCY_NORMAL
7649 || (flag_branch_probabilities
7650 && cfun->function_frequency < FUNCTION_FREQUENCY_HOT))
7651 cfun->machine->use_fast_prologue_epilogue = false;
7653 cfun->machine->use_fast_prologue_epilogue
7654 = !expensive_function_p (count);
7656 if (TARGET_PROLOGUE_USING_MOVE
7657 && cfun->machine->use_fast_prologue_epilogue)
7658 frame->save_regs_using_mov = true;
7660 frame->save_regs_using_mov = false;
7663 /* Skip return address and saved base pointer. */
7664 offset = frame_pointer_needed ? UNITS_PER_WORD * 2 : UNITS_PER_WORD;
7666 frame->hard_frame_pointer_offset = offset;
7668 /* Set offset to aligned because the realigned frame starts from
7670 if (stack_realign_fp)
7671 offset = (offset + stack_alignment_needed -1) & -stack_alignment_needed;
7673 /* Register save area */
7674 offset += frame->nregs * UNITS_PER_WORD;
7676 /* Align SSE reg save area. */
7677 if (frame->nsseregs)
7678 frame->padding0 = ((offset + 16 - 1) & -16) - offset;
7680 frame->padding0 = 0;
7682 /* SSE register save area. */
7683 offset += frame->padding0 + frame->nsseregs * 16;
7686 frame->va_arg_size = ix86_varargs_gpr_size + ix86_varargs_fpr_size;
7687 offset += frame->va_arg_size;
7689 /* Align start of frame for local function. */
7690 frame->padding1 = ((offset + stack_alignment_needed - 1)
7691 & -stack_alignment_needed) - offset;
7693 offset += frame->padding1;
7695 /* Frame pointer points here. */
7696 frame->frame_pointer_offset = offset;
7700 /* Add outgoing arguments area. Can be skipped if we eliminated
7701 all the function calls as dead code.
7702 Skipping is however impossible when function calls alloca. Alloca
7703 expander assumes that last crtl->outgoing_args_size
7704 of stack frame are unused. */
7705 if (ACCUMULATE_OUTGOING_ARGS
7706 && (!current_function_is_leaf || cfun->calls_alloca
7707 || ix86_current_function_calls_tls_descriptor))
7709 offset += crtl->outgoing_args_size;
7710 frame->outgoing_arguments_size = crtl->outgoing_args_size;
7713 frame->outgoing_arguments_size = 0;
7715 /* Align stack boundary. Only needed if we're calling another function
7717 if (!current_function_is_leaf || cfun->calls_alloca
7718 || ix86_current_function_calls_tls_descriptor)
7719 frame->padding2 = ((offset + preferred_alignment - 1)
7720 & -preferred_alignment) - offset;
7722 frame->padding2 = 0;
7724 offset += frame->padding2;
7726 /* We've reached end of stack frame. */
7727 frame->stack_pointer_offset = offset;
7729 /* Size prologue needs to allocate. */
7730 frame->to_allocate =
7731 (size + frame->padding1 + frame->padding2
7732 + frame->outgoing_arguments_size + frame->va_arg_size);
7734 if ((!frame->to_allocate && frame->nregs <= 1)
7735 || (TARGET_64BIT && frame->to_allocate >= (HOST_WIDE_INT) 0x80000000))
7736 frame->save_regs_using_mov = false;
7738 if (!TARGET_64BIT_MS_ABI && TARGET_RED_ZONE && current_function_sp_is_unchanging
7739 && current_function_is_leaf
7740 && !ix86_current_function_calls_tls_descriptor)
7742 frame->red_zone_size = frame->to_allocate;
7743 if (frame->save_regs_using_mov)
7744 frame->red_zone_size += frame->nregs * UNITS_PER_WORD;
7745 if (frame->red_zone_size > RED_ZONE_SIZE - RED_ZONE_RESERVE)
7746 frame->red_zone_size = RED_ZONE_SIZE - RED_ZONE_RESERVE;
7749 frame->red_zone_size = 0;
7750 frame->to_allocate -= frame->red_zone_size;
7751 frame->stack_pointer_offset -= frame->red_zone_size;
7753 fprintf (stderr, "\n");
7754 fprintf (stderr, "size: %ld\n", (long)size);
7755 fprintf (stderr, "nregs: %ld\n", (long)frame->nregs);
7756 fprintf (stderr, "nsseregs: %ld\n", (long)frame->nsseregs);
7757 fprintf (stderr, "padding0: %ld\n", (long)frame->padding0);
7758 fprintf (stderr, "alignment1: %ld\n", (long)stack_alignment_needed);
7759 fprintf (stderr, "padding1: %ld\n", (long)frame->padding1);
7760 fprintf (stderr, "va_arg: %ld\n", (long)frame->va_arg_size);
7761 fprintf (stderr, "padding2: %ld\n", (long)frame->padding2);
7762 fprintf (stderr, "to_allocate: %ld\n", (long)frame->to_allocate);
7763 fprintf (stderr, "red_zone_size: %ld\n", (long)frame->red_zone_size);
7764 fprintf (stderr, "frame_pointer_offset: %ld\n", (long)frame->frame_pointer_offset);
7765 fprintf (stderr, "hard_frame_pointer_offset: %ld\n",
7766 (long)frame->hard_frame_pointer_offset);
7767 fprintf (stderr, "stack_pointer_offset: %ld\n", (long)frame->stack_pointer_offset);
7768 fprintf (stderr, "current_function_is_leaf: %ld\n", (long)current_function_is_leaf);
7769 fprintf (stderr, "cfun->calls_alloca: %ld\n", (long)cfun->calls_alloca);
7770 fprintf (stderr, "x86_current_function_calls_tls_descriptor: %ld\n", (long)ix86_current_function_calls_tls_descriptor);
7774 /* Emit code to save registers in the prologue. */
7777 ix86_emit_save_regs (void)
7782 for (regno = FIRST_PSEUDO_REGISTER - 1; regno-- > 0; )
7783 if (!SSE_REGNO_P (regno) && ix86_save_reg (regno, true))
7785 insn = emit_insn (gen_push (gen_rtx_REG (Pmode, regno)));
7786 RTX_FRAME_RELATED_P (insn) = 1;
7790 /* Emit code to save registers using MOV insns. First register
7791 is restored from POINTER + OFFSET. */
7793 ix86_emit_save_regs_using_mov (rtx pointer, HOST_WIDE_INT offset)
7798 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
7799 if (!SSE_REGNO_P (regno) && ix86_save_reg (regno, true))
7801 insn = emit_move_insn (adjust_address (gen_rtx_MEM (Pmode, pointer),
7803 gen_rtx_REG (Pmode, regno));
7804 RTX_FRAME_RELATED_P (insn) = 1;
7805 offset += UNITS_PER_WORD;
7809 /* Emit code to save registers using MOV insns. First register
7810 is restored from POINTER + OFFSET. */
7812 ix86_emit_save_sse_regs_using_mov (rtx pointer, HOST_WIDE_INT offset)
7818 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
7819 if (SSE_REGNO_P (regno) && ix86_save_reg (regno, true))
7821 mem = adjust_address (gen_rtx_MEM (TImode, pointer), TImode, offset);
7822 set_mem_align (mem, 128);
7823 insn = emit_move_insn (mem, gen_rtx_REG (TImode, regno));
7824 RTX_FRAME_RELATED_P (insn) = 1;
7829 /* Expand prologue or epilogue stack adjustment.
7830 The pattern exist to put a dependency on all ebp-based memory accesses.
7831 STYLE should be negative if instructions should be marked as frame related,
7832 zero if %r11 register is live and cannot be freely used and positive
7836 pro_epilogue_adjust_stack (rtx dest, rtx src, rtx offset, int style)
7841 insn = emit_insn (gen_pro_epilogue_adjust_stack_1 (dest, src, offset));
7842 else if (x86_64_immediate_operand (offset, DImode))
7843 insn = emit_insn (gen_pro_epilogue_adjust_stack_rex64 (dest, src, offset));
7847 /* r11 is used by indirect sibcall return as well, set before the
7848 epilogue and used after the epilogue. ATM indirect sibcall
7849 shouldn't be used together with huge frame sizes in one
7850 function because of the frame_size check in sibcall.c. */
7852 r11 = gen_rtx_REG (DImode, R11_REG);
7853 insn = emit_insn (gen_rtx_SET (DImode, r11, offset));
7855 RTX_FRAME_RELATED_P (insn) = 1;
7856 insn = emit_insn (gen_pro_epilogue_adjust_stack_rex64_2 (dest, src, r11,
7860 RTX_FRAME_RELATED_P (insn) = 1;
7863 /* Find an available register to be used as dynamic realign argument
7864 pointer regsiter. Such a register will be written in prologue and
7865 used in begin of body, so it must not be
7866 1. parameter passing register.
7868 We reuse static-chain register if it is available. Otherwise, we
7869 use DI for i386 and R13 for x86-64. We chose R13 since it has
7872 Return: the regno of chosen register. */
7875 find_drap_reg (void)
7877 tree decl = cfun->decl;
7881 /* Use R13 for nested function or function need static chain.
7882 Since function with tail call may use any caller-saved
7883 registers in epilogue, DRAP must not use caller-saved
7884 register in such case. */
7885 if ((decl_function_context (decl)
7886 && !DECL_NO_STATIC_CHAIN (decl))
7887 || crtl->tail_call_emit)
7894 /* Use DI for nested function or function need static chain.
7895 Since function with tail call may use any caller-saved
7896 registers in epilogue, DRAP must not use caller-saved
7897 register in such case. */
7898 if ((decl_function_context (decl)
7899 && !DECL_NO_STATIC_CHAIN (decl))
7900 || crtl->tail_call_emit)
7903 /* Reuse static chain register if it isn't used for parameter
7905 if (ix86_function_regparm (TREE_TYPE (decl), decl) <= 2
7906 && !lookup_attribute ("fastcall",
7907 TYPE_ATTRIBUTES (TREE_TYPE (decl))))
7914 /* Update incoming stack boundary and estimated stack alignment. */
7917 ix86_update_stack_boundary (void)
7919 /* Prefer the one specified at command line. */
7920 ix86_incoming_stack_boundary
7921 = (ix86_user_incoming_stack_boundary
7922 ? ix86_user_incoming_stack_boundary
7923 : ix86_default_incoming_stack_boundary);
7925 /* Incoming stack alignment can be changed on individual functions
7926 via force_align_arg_pointer attribute. We use the smallest
7927 incoming stack boundary. */
7928 if (ix86_incoming_stack_boundary > MIN_STACK_BOUNDARY
7929 && lookup_attribute (ix86_force_align_arg_pointer_string,
7930 TYPE_ATTRIBUTES (TREE_TYPE (current_function_decl))))
7931 ix86_incoming_stack_boundary = MIN_STACK_BOUNDARY;
7933 /* The incoming stack frame has to be aligned at least at
7934 parm_stack_boundary. */
7935 if (ix86_incoming_stack_boundary < crtl->parm_stack_boundary)
7936 ix86_incoming_stack_boundary = crtl->parm_stack_boundary;
7938 /* Stack at entrance of main is aligned by runtime. We use the
7939 smallest incoming stack boundary. */
7940 if (ix86_incoming_stack_boundary > MAIN_STACK_BOUNDARY
7941 && DECL_NAME (current_function_decl)
7942 && MAIN_NAME_P (DECL_NAME (current_function_decl))
7943 && DECL_FILE_SCOPE_P (current_function_decl))
7944 ix86_incoming_stack_boundary = MAIN_STACK_BOUNDARY;
7946 /* x86_64 vararg needs 16byte stack alignment for register save
7950 && crtl->stack_alignment_estimated < 128)
7951 crtl->stack_alignment_estimated = 128;
7954 /* Handle the TARGET_GET_DRAP_RTX hook. Return NULL if no DRAP is
7955 needed or an rtx for DRAP otherwise. */
7958 ix86_get_drap_rtx (void)
7960 if (ix86_force_drap || !ACCUMULATE_OUTGOING_ARGS)
7961 crtl->need_drap = true;
7963 if (stack_realign_drap)
7965 /* Assign DRAP to vDRAP and returns vDRAP */
7966 unsigned int regno = find_drap_reg ();
7971 arg_ptr = gen_rtx_REG (Pmode, regno);
7972 crtl->drap_reg = arg_ptr;
7975 drap_vreg = copy_to_reg (arg_ptr);
7979 insn = emit_insn_before (seq, NEXT_INSN (entry_of_function ()));
7980 RTX_FRAME_RELATED_P (insn) = 1;
7987 /* Handle the TARGET_INTERNAL_ARG_POINTER hook. */
7990 ix86_internal_arg_pointer (void)
7992 return virtual_incoming_args_rtx;
7995 /* Handle the TARGET_DWARF_HANDLE_FRAME_UNSPEC hook.
7996 This is called from dwarf2out.c to emit call frame instructions
7997 for frame-related insns containing UNSPECs and UNSPEC_VOLATILEs. */
7999 ix86_dwarf_handle_frame_unspec (const char *label, rtx pattern, int index)
8001 rtx unspec = SET_SRC (pattern);
8002 gcc_assert (GET_CODE (unspec) == UNSPEC);
8006 case UNSPEC_REG_SAVE:
8007 dwarf2out_reg_save_reg (label, XVECEXP (unspec, 0, 0),
8008 SET_DEST (pattern));
8010 case UNSPEC_DEF_CFA:
8011 dwarf2out_def_cfa (label, REGNO (SET_DEST (pattern)),
8012 INTVAL (XVECEXP (unspec, 0, 0)));
8019 /* Finalize stack_realign_needed flag, which will guide prologue/epilogue
8020 to be generated in correct form. */
8022 ix86_finalize_stack_realign_flags (void)
8024 /* Check if stack realign is really needed after reload, and
8025 stores result in cfun */
8026 unsigned int incoming_stack_boundary
8027 = (crtl->parm_stack_boundary > ix86_incoming_stack_boundary
8028 ? crtl->parm_stack_boundary : ix86_incoming_stack_boundary);
8029 unsigned int stack_realign = (incoming_stack_boundary
8030 < (current_function_is_leaf
8031 ? crtl->max_used_stack_slot_alignment
8032 : crtl->stack_alignment_needed));
8034 if (crtl->stack_realign_finalized)
8036 /* After stack_realign_needed is finalized, we can't no longer
8038 gcc_assert (crtl->stack_realign_needed == stack_realign);
8042 crtl->stack_realign_needed = stack_realign;
8043 crtl->stack_realign_finalized = true;
8047 /* Expand the prologue into a bunch of separate insns. */
8050 ix86_expand_prologue (void)
8054 struct ix86_frame frame;
8055 HOST_WIDE_INT allocate;
8057 ix86_finalize_stack_realign_flags ();
8059 /* DRAP should not coexist with stack_realign_fp */
8060 gcc_assert (!(crtl->drap_reg && stack_realign_fp));
8062 ix86_compute_frame_layout (&frame);
8064 /* Emit prologue code to adjust stack alignment and setup DRAP, in case
8065 of DRAP is needed and stack realignment is really needed after reload */
8066 if (crtl->drap_reg && crtl->stack_realign_needed)
8069 int align_bytes = crtl->stack_alignment_needed / BITS_PER_UNIT;
8070 int param_ptr_offset = (call_used_regs[REGNO (crtl->drap_reg)]
8071 ? 0 : UNITS_PER_WORD);
8073 gcc_assert (stack_realign_drap);
8075 /* Grab the argument pointer. */
8076 x = plus_constant (stack_pointer_rtx,
8077 (UNITS_PER_WORD + param_ptr_offset));
8080 /* Only need to push parameter pointer reg if it is caller
8082 if (!call_used_regs[REGNO (crtl->drap_reg)])
8084 /* Push arg pointer reg */
8085 insn = emit_insn (gen_push (y));
8086 RTX_FRAME_RELATED_P (insn) = 1;
8089 insn = emit_insn (gen_rtx_SET (VOIDmode, y, x));
8090 RTX_FRAME_RELATED_P (insn) = 1;
8092 /* Align the stack. */
8093 insn = emit_insn ((*ix86_gen_andsp) (stack_pointer_rtx,
8095 GEN_INT (-align_bytes)));
8096 RTX_FRAME_RELATED_P (insn) = 1;
8098 /* Replicate the return address on the stack so that return
8099 address can be reached via (argp - 1) slot. This is needed
8100 to implement macro RETURN_ADDR_RTX and intrinsic function
8101 expand_builtin_return_addr etc. */
8103 x = gen_frame_mem (Pmode,
8104 plus_constant (x, -UNITS_PER_WORD));
8105 insn = emit_insn (gen_push (x));
8106 RTX_FRAME_RELATED_P (insn) = 1;
8109 /* Note: AT&T enter does NOT have reversed args. Enter is probably
8110 slower on all targets. Also sdb doesn't like it. */
8112 if (frame_pointer_needed)
8114 insn = emit_insn (gen_push (hard_frame_pointer_rtx));
8115 RTX_FRAME_RELATED_P (insn) = 1;
8117 insn = emit_move_insn (hard_frame_pointer_rtx, stack_pointer_rtx);
8118 RTX_FRAME_RELATED_P (insn) = 1;
8121 if (stack_realign_fp)
8123 int align_bytes = crtl->stack_alignment_needed / BITS_PER_UNIT;
8124 gcc_assert (align_bytes > MIN_STACK_BOUNDARY / BITS_PER_UNIT);
8126 /* Align the stack. */
8127 insn = emit_insn ((*ix86_gen_andsp) (stack_pointer_rtx,
8129 GEN_INT (-align_bytes)));
8130 RTX_FRAME_RELATED_P (insn) = 1;
8133 allocate = frame.to_allocate + frame.nsseregs * 16 + frame.padding0;
8135 if (!frame.save_regs_using_mov)
8136 ix86_emit_save_regs ();
8138 allocate += frame.nregs * UNITS_PER_WORD;
8140 /* When using red zone we may start register saving before allocating
8141 the stack frame saving one cycle of the prologue. However I will
8142 avoid doing this if I am going to have to probe the stack since
8143 at least on x86_64 the stack probe can turn into a call that clobbers
8144 a red zone location */
8145 if (!TARGET_64BIT_MS_ABI && TARGET_RED_ZONE && frame.save_regs_using_mov
8146 && (! TARGET_STACK_PROBE || allocate < CHECK_STACK_LIMIT))
8147 ix86_emit_save_regs_using_mov ((frame_pointer_needed
8148 && !crtl->stack_realign_needed)
8149 ? hard_frame_pointer_rtx
8150 : stack_pointer_rtx,
8151 -frame.nregs * UNITS_PER_WORD);
8155 else if (! TARGET_STACK_PROBE || allocate < CHECK_STACK_LIMIT)
8156 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
8157 GEN_INT (-allocate), -1);
8160 /* Only valid for Win32. */
8161 rtx eax = gen_rtx_REG (Pmode, AX_REG);
8165 gcc_assert (!TARGET_64BIT || cfun->machine->call_abi == MS_ABI);
8167 if (cfun->machine->call_abi == MS_ABI)
8170 eax_live = ix86_eax_live_at_start_p ();
8174 emit_insn (gen_push (eax));
8175 allocate -= UNITS_PER_WORD;
8178 emit_move_insn (eax, GEN_INT (allocate));
8181 insn = gen_allocate_stack_worker_64 (eax, eax);
8183 insn = gen_allocate_stack_worker_32 (eax, eax);
8184 insn = emit_insn (insn);
8185 RTX_FRAME_RELATED_P (insn) = 1;
8186 t = gen_rtx_PLUS (Pmode, stack_pointer_rtx, GEN_INT (-allocate));
8187 t = gen_rtx_SET (VOIDmode, stack_pointer_rtx, t);
8188 REG_NOTES (insn) = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR,
8189 t, REG_NOTES (insn));
8193 if (frame_pointer_needed)
8194 t = plus_constant (hard_frame_pointer_rtx,
8197 - frame.nregs * UNITS_PER_WORD);
8199 t = plus_constant (stack_pointer_rtx, allocate);
8200 emit_move_insn (eax, gen_rtx_MEM (Pmode, t));
8204 if (frame.save_regs_using_mov
8205 && !(!TARGET_64BIT_MS_ABI && TARGET_RED_ZONE
8206 && (! TARGET_STACK_PROBE || allocate < CHECK_STACK_LIMIT)))
8208 if (!frame_pointer_needed
8209 || !frame.to_allocate
8210 || crtl->stack_realign_needed)
8211 ix86_emit_save_regs_using_mov (stack_pointer_rtx,
8213 + frame.nsseregs * 16 + frame.padding0);
8215 ix86_emit_save_regs_using_mov (hard_frame_pointer_rtx,
8216 -frame.nregs * UNITS_PER_WORD);
8218 if (!frame_pointer_needed
8219 || !frame.to_allocate
8220 || crtl->stack_realign_needed)
8221 ix86_emit_save_sse_regs_using_mov (stack_pointer_rtx,
8224 ix86_emit_save_sse_regs_using_mov (hard_frame_pointer_rtx,
8225 - frame.nregs * UNITS_PER_WORD
8226 - frame.nsseregs * 16
8229 pic_reg_used = false;
8230 if (pic_offset_table_rtx
8231 && (df_regs_ever_live_p (REAL_PIC_OFFSET_TABLE_REGNUM)
8234 unsigned int alt_pic_reg_used = ix86_select_alt_pic_regnum ();
8236 if (alt_pic_reg_used != INVALID_REGNUM)
8237 SET_REGNO (pic_offset_table_rtx, alt_pic_reg_used);
8239 pic_reg_used = true;
8246 if (ix86_cmodel == CM_LARGE_PIC)
8248 rtx tmp_reg = gen_rtx_REG (DImode, R11_REG);
8249 rtx label = gen_label_rtx ();
8251 LABEL_PRESERVE_P (label) = 1;
8252 gcc_assert (REGNO (pic_offset_table_rtx) != REGNO (tmp_reg));
8253 insn = emit_insn (gen_set_rip_rex64 (pic_offset_table_rtx, label));
8254 insn = emit_insn (gen_set_got_offset_rex64 (tmp_reg, label));
8255 insn = emit_insn (gen_adddi3 (pic_offset_table_rtx,
8256 pic_offset_table_rtx, tmp_reg));
8259 insn = emit_insn (gen_set_got_rex64 (pic_offset_table_rtx));
8262 insn = emit_insn (gen_set_got (pic_offset_table_rtx));
8265 /* In the pic_reg_used case, make sure that the got load isn't deleted
8266 when mcount needs it. Blockage to avoid call movement across mcount
8267 call is emitted in generic code after the NOTE_INSN_PROLOGUE_END
8269 if (crtl->profile && pic_reg_used)
8270 emit_insn (gen_prologue_use (pic_offset_table_rtx));
8272 if (crtl->drap_reg && !crtl->stack_realign_needed)
8274 /* vDRAP is setup but after reload it turns out stack realign
8275 isn't necessary, here we will emit prologue to setup DRAP
8276 without stack realign adjustment */
8277 int drap_bp_offset = UNITS_PER_WORD * 2;
8278 rtx x = plus_constant (hard_frame_pointer_rtx, drap_bp_offset);
8279 insn = emit_insn (gen_rtx_SET (VOIDmode, crtl->drap_reg, x));
8282 /* Prevent instructions from being scheduled into register save push
8283 sequence when access to the redzone area is done through frame pointer.
8284 The offset betweeh the frame pointer and the stack pointer is calculated
8285 relative to the value of the stack pointer at the end of the function
8286 prologue, and moving instructions that access redzone area via frame
8287 pointer inside push sequence violates this assumption. */
8288 if (frame_pointer_needed && frame.red_zone_size)
8289 emit_insn (gen_memory_blockage ());
8291 /* Emit cld instruction if stringops are used in the function. */
8292 if (TARGET_CLD && ix86_current_function_needs_cld)
8293 emit_insn (gen_cld ());
8296 /* Emit code to restore saved registers using MOV insns. First register
8297 is restored from POINTER + OFFSET. */
8299 ix86_emit_restore_regs_using_mov (rtx pointer, HOST_WIDE_INT offset,
8300 int maybe_eh_return)
8303 rtx base_address = gen_rtx_MEM (Pmode, pointer);
8305 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
8306 if (!SSE_REGNO_P (regno) && ix86_save_reg (regno, maybe_eh_return))
8308 /* Ensure that adjust_address won't be forced to produce pointer
8309 out of range allowed by x86-64 instruction set. */
8310 if (TARGET_64BIT && offset != trunc_int_for_mode (offset, SImode))
8314 r11 = gen_rtx_REG (DImode, R11_REG);
8315 emit_move_insn (r11, GEN_INT (offset));
8316 emit_insn (gen_adddi3 (r11, r11, pointer));
8317 base_address = gen_rtx_MEM (Pmode, r11);
8320 emit_move_insn (gen_rtx_REG (Pmode, regno),
8321 adjust_address (base_address, Pmode, offset));
8322 offset += UNITS_PER_WORD;
8326 /* Emit code to restore saved registers using MOV insns. First register
8327 is restored from POINTER + OFFSET. */
8329 ix86_emit_restore_sse_regs_using_mov (rtx pointer, HOST_WIDE_INT offset,
8330 int maybe_eh_return)
8333 rtx base_address = gen_rtx_MEM (TImode, pointer);
8336 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
8337 if (SSE_REGNO_P (regno) && ix86_save_reg (regno, maybe_eh_return))
8339 /* Ensure that adjust_address won't be forced to produce pointer
8340 out of range allowed by x86-64 instruction set. */
8341 if (TARGET_64BIT && offset != trunc_int_for_mode (offset, SImode))
8345 r11 = gen_rtx_REG (DImode, R11_REG);
8346 emit_move_insn (r11, GEN_INT (offset));
8347 emit_insn (gen_adddi3 (r11, r11, pointer));
8348 base_address = gen_rtx_MEM (TImode, r11);
8351 mem = adjust_address (base_address, TImode, offset);
8352 set_mem_align (mem, 128);
8353 emit_move_insn (gen_rtx_REG (TImode, regno), mem);
8358 /* Restore function stack, frame, and registers. */
8361 ix86_expand_epilogue (int style)
8365 struct ix86_frame frame;
8366 HOST_WIDE_INT offset;
8368 ix86_finalize_stack_realign_flags ();
8370 /* When stack is realigned, SP must be valid. */
8371 sp_valid = (!frame_pointer_needed
8372 || current_function_sp_is_unchanging
8373 || stack_realign_fp);
8375 ix86_compute_frame_layout (&frame);
8377 /* See the comment about red zone and frame
8378 pointer usage in ix86_expand_prologue. */
8379 if (frame_pointer_needed && frame.red_zone_size)
8380 emit_insn (gen_memory_blockage ());
8382 /* Calculate start of saved registers relative to ebp. Special care
8383 must be taken for the normal return case of a function using
8384 eh_return: the eax and edx registers are marked as saved, but not
8385 restored along this path. */
8386 offset = frame.nregs;
8387 if (crtl->calls_eh_return && style != 2)
8389 offset *= -UNITS_PER_WORD;
8390 offset -= frame.nsseregs * 16 + frame.padding0;
8392 /* If we're only restoring one register and sp is not valid then
8393 using a move instruction to restore the register since it's
8394 less work than reloading sp and popping the register.
8396 The default code result in stack adjustment using add/lea instruction,
8397 while this code results in LEAVE instruction (or discrete equivalent),
8398 so it is profitable in some other cases as well. Especially when there
8399 are no registers to restore. We also use this code when TARGET_USE_LEAVE
8400 and there is exactly one register to pop. This heuristic may need some
8401 tuning in future. */
8402 if ((!sp_valid && (frame.nregs + frame.nsseregs) <= 1)
8403 || (TARGET_EPILOGUE_USING_MOVE
8404 && cfun->machine->use_fast_prologue_epilogue
8405 && ((frame.nregs + frame.nsseregs) > 1 || frame.to_allocate))
8406 || (frame_pointer_needed && !(frame.nregs + frame.nsseregs) && frame.to_allocate)
8407 || (frame_pointer_needed && TARGET_USE_LEAVE
8408 && cfun->machine->use_fast_prologue_epilogue
8409 && (frame.nregs + frame.nsseregs) == 1)
8410 || crtl->calls_eh_return)
8412 /* Restore registers. We can use ebp or esp to address the memory
8413 locations. If both are available, default to ebp, since offsets
8414 are known to be small. Only exception is esp pointing directly
8415 to the end of block of saved registers, where we may simplify
8418 If we are realigning stack with bp and sp, regs restore can't
8419 be addressed by bp. sp must be used instead. */
8421 if (!frame_pointer_needed
8422 || (sp_valid && !frame.to_allocate)
8423 || stack_realign_fp)
8425 ix86_emit_restore_sse_regs_using_mov (stack_pointer_rtx,
8426 frame.to_allocate, style == 2);
8427 ix86_emit_restore_regs_using_mov (stack_pointer_rtx,
8429 + frame.nsseregs * 16
8430 + frame.padding0, style == 2);
8434 ix86_emit_restore_sse_regs_using_mov (hard_frame_pointer_rtx,
8435 offset, style == 2);
8436 ix86_emit_restore_regs_using_mov (hard_frame_pointer_rtx,
8438 + frame.nsseregs * 16
8439 + frame.padding0, style == 2);
8442 /* eh_return epilogues need %ecx added to the stack pointer. */
8445 rtx tmp, sa = EH_RETURN_STACKADJ_RTX;
8447 /* Stack align doesn't work with eh_return. */
8448 gcc_assert (!crtl->stack_realign_needed);
8450 if (frame_pointer_needed)
8452 tmp = gen_rtx_PLUS (Pmode, hard_frame_pointer_rtx, sa);
8453 tmp = plus_constant (tmp, UNITS_PER_WORD);
8454 emit_insn (gen_rtx_SET (VOIDmode, sa, tmp));
8456 tmp = gen_rtx_MEM (Pmode, hard_frame_pointer_rtx);
8457 emit_move_insn (hard_frame_pointer_rtx, tmp);
8459 pro_epilogue_adjust_stack (stack_pointer_rtx, sa,
8464 tmp = gen_rtx_PLUS (Pmode, stack_pointer_rtx, sa);
8465 tmp = plus_constant (tmp, (frame.to_allocate
8466 + frame.nregs * UNITS_PER_WORD
8467 + frame.nsseregs * 16
8469 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx, tmp));
8472 else if (!frame_pointer_needed)
8473 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
8474 GEN_INT (frame.to_allocate
8475 + frame.nregs * UNITS_PER_WORD
8476 + frame.nsseregs * 16
8479 /* If not an i386, mov & pop is faster than "leave". */
8480 else if (TARGET_USE_LEAVE || optimize_function_for_size_p (cfun)
8481 || !cfun->machine->use_fast_prologue_epilogue)
8482 emit_insn ((*ix86_gen_leave) ());
8485 pro_epilogue_adjust_stack (stack_pointer_rtx,
8486 hard_frame_pointer_rtx,
8489 emit_insn ((*ix86_gen_pop1) (hard_frame_pointer_rtx));
8494 /* First step is to deallocate the stack frame so that we can
8497 If we realign stack with frame pointer, then stack pointer
8498 won't be able to recover via lea $offset(%bp), %sp, because
8499 there is a padding area between bp and sp for realign.
8500 "add $to_allocate, %sp" must be used instead. */
8503 gcc_assert (frame_pointer_needed);
8504 gcc_assert (!stack_realign_fp);
8505 pro_epilogue_adjust_stack (stack_pointer_rtx,
8506 hard_frame_pointer_rtx,
8507 GEN_INT (offset), style);
8508 ix86_emit_restore_sse_regs_using_mov (stack_pointer_rtx,
8509 frame.to_allocate, style == 2);
8510 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
8511 GEN_INT (frame.nsseregs * 16), style);
8513 else if (frame.to_allocate || frame.nsseregs)
8515 ix86_emit_restore_sse_regs_using_mov (stack_pointer_rtx,
8518 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
8519 GEN_INT (frame.to_allocate
8520 + frame.nsseregs * 16
8521 + frame.padding0), style);
8524 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
8525 if (!SSE_REGNO_P (regno) && ix86_save_reg (regno, false))
8526 emit_insn ((*ix86_gen_pop1) (gen_rtx_REG (Pmode, regno)));
8527 if (frame_pointer_needed)
8529 /* Leave results in shorter dependency chains on CPUs that are
8530 able to grok it fast. */
8531 if (TARGET_USE_LEAVE)
8532 emit_insn ((*ix86_gen_leave) ());
8535 /* For stack realigned really happens, recover stack
8536 pointer to hard frame pointer is a must, if not using
8538 if (stack_realign_fp)
8539 pro_epilogue_adjust_stack (stack_pointer_rtx,
8540 hard_frame_pointer_rtx,
8542 emit_insn ((*ix86_gen_pop1) (hard_frame_pointer_rtx));
8547 if (crtl->drap_reg && crtl->stack_realign_needed)
8549 int param_ptr_offset = (call_used_regs[REGNO (crtl->drap_reg)]
8550 ? 0 : UNITS_PER_WORD);
8551 gcc_assert (stack_realign_drap);
8552 emit_insn ((*ix86_gen_add3) (stack_pointer_rtx,
8554 GEN_INT (-(UNITS_PER_WORD
8555 + param_ptr_offset))));
8556 if (!call_used_regs[REGNO (crtl->drap_reg)])
8557 emit_insn ((*ix86_gen_pop1) (crtl->drap_reg));
8561 /* Sibcall epilogues don't want a return instruction. */
8565 if (crtl->args.pops_args && crtl->args.size)
8567 rtx popc = GEN_INT (crtl->args.pops_args);
8569 /* i386 can only pop 64K bytes. If asked to pop more, pop
8570 return address, do explicit add, and jump indirectly to the
8573 if (crtl->args.pops_args >= 65536)
8575 rtx ecx = gen_rtx_REG (SImode, CX_REG);
8577 /* There is no "pascal" calling convention in any 64bit ABI. */
8578 gcc_assert (!TARGET_64BIT);
8580 emit_insn (gen_popsi1 (ecx));
8581 emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx, popc));
8582 emit_jump_insn (gen_return_indirect_internal (ecx));
8585 emit_jump_insn (gen_return_pop_internal (popc));
8588 emit_jump_insn (gen_return_internal ());
8591 /* Reset from the function's potential modifications. */
8594 ix86_output_function_epilogue (FILE *file ATTRIBUTE_UNUSED,
8595 HOST_WIDE_INT size ATTRIBUTE_UNUSED)
8597 if (pic_offset_table_rtx)
8598 SET_REGNO (pic_offset_table_rtx, REAL_PIC_OFFSET_TABLE_REGNUM);
8600 /* Mach-O doesn't support labels at the end of objects, so if
8601 it looks like we might want one, insert a NOP. */
8603 rtx insn = get_last_insn ();
8606 && NOTE_KIND (insn) != NOTE_INSN_DELETED_LABEL)
8607 insn = PREV_INSN (insn);
8611 && NOTE_KIND (insn) == NOTE_INSN_DELETED_LABEL)))
8612 fputs ("\tnop\n", file);
8618 /* Extract the parts of an RTL expression that is a valid memory address
8619 for an instruction. Return 0 if the structure of the address is
8620 grossly off. Return -1 if the address contains ASHIFT, so it is not
8621 strictly valid, but still used for computing length of lea instruction. */
8624 ix86_decompose_address (rtx addr, struct ix86_address *out)
8626 rtx base = NULL_RTX, index = NULL_RTX, disp = NULL_RTX;
8627 rtx base_reg, index_reg;
8628 HOST_WIDE_INT scale = 1;
8629 rtx scale_rtx = NULL_RTX;
8631 enum ix86_address_seg seg = SEG_DEFAULT;
8633 if (REG_P (addr) || GET_CODE (addr) == SUBREG)
8635 else if (GET_CODE (addr) == PLUS)
8645 addends[n++] = XEXP (op, 1);
8648 while (GET_CODE (op) == PLUS);
8653 for (i = n; i >= 0; --i)
8656 switch (GET_CODE (op))
8661 index = XEXP (op, 0);
8662 scale_rtx = XEXP (op, 1);
8666 if (XINT (op, 1) == UNSPEC_TP
8667 && TARGET_TLS_DIRECT_SEG_REFS
8668 && seg == SEG_DEFAULT)
8669 seg = TARGET_64BIT ? SEG_FS : SEG_GS;
8698 else if (GET_CODE (addr) == MULT)
8700 index = XEXP (addr, 0); /* index*scale */
8701 scale_rtx = XEXP (addr, 1);
8703 else if (GET_CODE (addr) == ASHIFT)
8707 /* We're called for lea too, which implements ashift on occasion. */
8708 index = XEXP (addr, 0);
8709 tmp = XEXP (addr, 1);
8710 if (!CONST_INT_P (tmp))
8712 scale = INTVAL (tmp);
8713 if ((unsigned HOST_WIDE_INT) scale > 3)
8719 disp = addr; /* displacement */
8721 /* Extract the integral value of scale. */
8724 if (!CONST_INT_P (scale_rtx))
8726 scale = INTVAL (scale_rtx);
8729 base_reg = base && GET_CODE (base) == SUBREG ? SUBREG_REG (base) : base;
8730 index_reg = index && GET_CODE (index) == SUBREG ? SUBREG_REG (index) : index;
8732 /* Allow arg pointer and stack pointer as index if there is not scaling. */
8733 if (base_reg && index_reg && scale == 1
8734 && (index_reg == arg_pointer_rtx
8735 || index_reg == frame_pointer_rtx
8736 || (REG_P (index_reg) && REGNO (index_reg) == STACK_POINTER_REGNUM)))
8739 tmp = base, base = index, index = tmp;
8740 tmp = base_reg, base_reg = index_reg, index_reg = tmp;
8743 /* Special case: %ebp cannot be encoded as a base without a displacement. */
8744 if ((base_reg == hard_frame_pointer_rtx
8745 || base_reg == frame_pointer_rtx
8746 || base_reg == arg_pointer_rtx) && !disp)
8749 /* Special case: on K6, [%esi] makes the instruction vector decoded.
8750 Avoid this by transforming to [%esi+0].
8751 Reload calls address legitimization without cfun defined, so we need
8752 to test cfun for being non-NULL. */
8753 if (TARGET_K6 && cfun && optimize_function_for_speed_p (cfun)
8754 && base_reg && !index_reg && !disp
8756 && REGNO_REG_CLASS (REGNO (base_reg)) == SIREG)
8759 /* Special case: encode reg+reg instead of reg*2. */
8760 if (!base && index && scale && scale == 2)
8761 base = index, base_reg = index_reg, scale = 1;
8763 /* Special case: scaling cannot be encoded without base or displacement. */
8764 if (!base && !disp && index && scale != 1)
8776 /* Return cost of the memory address x.
8777 For i386, it is better to use a complex address than let gcc copy
8778 the address into a reg and make a new pseudo. But not if the address
8779 requires to two regs - that would mean more pseudos with longer
8782 ix86_address_cost (rtx x, bool speed ATTRIBUTE_UNUSED)
8784 struct ix86_address parts;
8786 int ok = ix86_decompose_address (x, &parts);
8790 if (parts.base && GET_CODE (parts.base) == SUBREG)
8791 parts.base = SUBREG_REG (parts.base);
8792 if (parts.index && GET_CODE (parts.index) == SUBREG)
8793 parts.index = SUBREG_REG (parts.index);
8795 /* Attempt to minimize number of registers in the address. */
8797 && (!REG_P (parts.base) || REGNO (parts.base) >= FIRST_PSEUDO_REGISTER))
8799 && (!REG_P (parts.index)
8800 || REGNO (parts.index) >= FIRST_PSEUDO_REGISTER)))
8804 && (!REG_P (parts.base) || REGNO (parts.base) >= FIRST_PSEUDO_REGISTER)
8806 && (!REG_P (parts.index) || REGNO (parts.index) >= FIRST_PSEUDO_REGISTER)
8807 && parts.base != parts.index)
8810 /* AMD-K6 don't like addresses with ModR/M set to 00_xxx_100b,
8811 since it's predecode logic can't detect the length of instructions
8812 and it degenerates to vector decoded. Increase cost of such
8813 addresses here. The penalty is minimally 2 cycles. It may be worthwhile
8814 to split such addresses or even refuse such addresses at all.
8816 Following addressing modes are affected:
8821 The first and last case may be avoidable by explicitly coding the zero in
8822 memory address, but I don't have AMD-K6 machine handy to check this
8826 && ((!parts.disp && parts.base && parts.index && parts.scale != 1)
8827 || (parts.disp && !parts.base && parts.index && parts.scale != 1)
8828 || (!parts.disp && parts.base && parts.index && parts.scale == 1)))
8834 /* Allow {LABEL | SYMBOL}_REF - SYMBOL_REF-FOR-PICBASE for Mach-O as
8835 this is used for to form addresses to local data when -fPIC is in
8839 darwin_local_data_pic (rtx disp)
8841 return (GET_CODE (disp) == UNSPEC
8842 && XINT (disp, 1) == UNSPEC_MACHOPIC_OFFSET);
8845 /* Determine if a given RTX is a valid constant. We already know this
8846 satisfies CONSTANT_P. */
8849 legitimate_constant_p (rtx x)
8851 switch (GET_CODE (x))
8856 if (GET_CODE (x) == PLUS)
8858 if (!CONST_INT_P (XEXP (x, 1)))
8863 if (TARGET_MACHO && darwin_local_data_pic (x))
8866 /* Only some unspecs are valid as "constants". */
8867 if (GET_CODE (x) == UNSPEC)
8868 switch (XINT (x, 1))
8873 return TARGET_64BIT;
8876 x = XVECEXP (x, 0, 0);
8877 return (GET_CODE (x) == SYMBOL_REF
8878 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_EXEC);
8880 x = XVECEXP (x, 0, 0);
8881 return (GET_CODE (x) == SYMBOL_REF
8882 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_DYNAMIC);
8887 /* We must have drilled down to a symbol. */
8888 if (GET_CODE (x) == LABEL_REF)
8890 if (GET_CODE (x) != SYMBOL_REF)
8895 /* TLS symbols are never valid. */
8896 if (SYMBOL_REF_TLS_MODEL (x))
8899 /* DLLIMPORT symbols are never valid. */
8900 if (TARGET_DLLIMPORT_DECL_ATTRIBUTES
8901 && SYMBOL_REF_DLLIMPORT_P (x))
8906 if (GET_MODE (x) == TImode
8907 && x != CONST0_RTX (TImode)
8913 if (x == CONST0_RTX (GET_MODE (x)))
8921 /* Otherwise we handle everything else in the move patterns. */
8925 /* Determine if it's legal to put X into the constant pool. This
8926 is not possible for the address of thread-local symbols, which
8927 is checked above. */
8930 ix86_cannot_force_const_mem (rtx x)
8932 /* We can always put integral constants and vectors in memory. */
8933 switch (GET_CODE (x))
8943 return !legitimate_constant_p (x);
8946 /* Determine if a given RTX is a valid constant address. */
8949 constant_address_p (rtx x)
8951 return CONSTANT_P (x) && legitimate_address_p (Pmode, x, 1);
8954 /* Nonzero if the constant value X is a legitimate general operand
8955 when generating PIC code. It is given that flag_pic is on and
8956 that X satisfies CONSTANT_P or is a CONST_DOUBLE. */
8959 legitimate_pic_operand_p (rtx x)
8963 switch (GET_CODE (x))
8966 inner = XEXP (x, 0);
8967 if (GET_CODE (inner) == PLUS
8968 && CONST_INT_P (XEXP (inner, 1)))
8969 inner = XEXP (inner, 0);
8971 /* Only some unspecs are valid as "constants". */
8972 if (GET_CODE (inner) == UNSPEC)
8973 switch (XINT (inner, 1))
8978 return TARGET_64BIT;
8980 x = XVECEXP (inner, 0, 0);
8981 return (GET_CODE (x) == SYMBOL_REF
8982 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_EXEC);
8983 case UNSPEC_MACHOPIC_OFFSET:
8984 return legitimate_pic_address_disp_p (x);
8992 return legitimate_pic_address_disp_p (x);
8999 /* Determine if a given CONST RTX is a valid memory displacement
9003 legitimate_pic_address_disp_p (rtx disp)
9007 /* In 64bit mode we can allow direct addresses of symbols and labels
9008 when they are not dynamic symbols. */
9011 rtx op0 = disp, op1;
9013 switch (GET_CODE (disp))
9019 if (GET_CODE (XEXP (disp, 0)) != PLUS)
9021 op0 = XEXP (XEXP (disp, 0), 0);
9022 op1 = XEXP (XEXP (disp, 0), 1);
9023 if (!CONST_INT_P (op1)
9024 || INTVAL (op1) >= 16*1024*1024
9025 || INTVAL (op1) < -16*1024*1024)
9027 if (GET_CODE (op0) == LABEL_REF)
9029 if (GET_CODE (op0) != SYMBOL_REF)
9034 /* TLS references should always be enclosed in UNSPEC. */
9035 if (SYMBOL_REF_TLS_MODEL (op0))
9037 if (!SYMBOL_REF_FAR_ADDR_P (op0) && SYMBOL_REF_LOCAL_P (op0)
9038 && ix86_cmodel != CM_LARGE_PIC)
9046 if (GET_CODE (disp) != CONST)
9048 disp = XEXP (disp, 0);
9052 /* We are unsafe to allow PLUS expressions. This limit allowed distance
9053 of GOT tables. We should not need these anyway. */
9054 if (GET_CODE (disp) != UNSPEC
9055 || (XINT (disp, 1) != UNSPEC_GOTPCREL
9056 && XINT (disp, 1) != UNSPEC_GOTOFF
9057 && XINT (disp, 1) != UNSPEC_PLTOFF))
9060 if (GET_CODE (XVECEXP (disp, 0, 0)) != SYMBOL_REF
9061 && GET_CODE (XVECEXP (disp, 0, 0)) != LABEL_REF)
9067 if (GET_CODE (disp) == PLUS)
9069 if (!CONST_INT_P (XEXP (disp, 1)))
9071 disp = XEXP (disp, 0);
9075 if (TARGET_MACHO && darwin_local_data_pic (disp))
9078 if (GET_CODE (disp) != UNSPEC)
9081 switch (XINT (disp, 1))
9086 /* We need to check for both symbols and labels because VxWorks loads
9087 text labels with @GOT rather than @GOTOFF. See gotoff_operand for
9089 return (GET_CODE (XVECEXP (disp, 0, 0)) == SYMBOL_REF
9090 || GET_CODE (XVECEXP (disp, 0, 0)) == LABEL_REF);
9092 /* Refuse GOTOFF in 64bit mode since it is always 64bit when used.
9093 While ABI specify also 32bit relocation but we don't produce it in
9094 small PIC model at all. */
9095 if ((GET_CODE (XVECEXP (disp, 0, 0)) == SYMBOL_REF
9096 || GET_CODE (XVECEXP (disp, 0, 0)) == LABEL_REF)
9098 return gotoff_operand (XVECEXP (disp, 0, 0), Pmode);
9100 case UNSPEC_GOTTPOFF:
9101 case UNSPEC_GOTNTPOFF:
9102 case UNSPEC_INDNTPOFF:
9105 disp = XVECEXP (disp, 0, 0);
9106 return (GET_CODE (disp) == SYMBOL_REF
9107 && SYMBOL_REF_TLS_MODEL (disp) == TLS_MODEL_INITIAL_EXEC);
9109 disp = XVECEXP (disp, 0, 0);
9110 return (GET_CODE (disp) == SYMBOL_REF
9111 && SYMBOL_REF_TLS_MODEL (disp) == TLS_MODEL_LOCAL_EXEC);
9113 disp = XVECEXP (disp, 0, 0);
9114 return (GET_CODE (disp) == SYMBOL_REF
9115 && SYMBOL_REF_TLS_MODEL (disp) == TLS_MODEL_LOCAL_DYNAMIC);
9121 /* GO_IF_LEGITIMATE_ADDRESS recognizes an RTL expression that is a valid
9122 memory address for an instruction. The MODE argument is the machine mode
9123 for the MEM expression that wants to use this address.
9125 It only recognizes address in canonical form. LEGITIMIZE_ADDRESS should
9126 convert common non-canonical forms to canonical form so that they will
9130 legitimate_address_p (enum machine_mode mode ATTRIBUTE_UNUSED,
9131 rtx addr, int strict)
9133 struct ix86_address parts;
9134 rtx base, index, disp;
9135 HOST_WIDE_INT scale;
9136 const char *reason = NULL;
9137 rtx reason_rtx = NULL_RTX;
9139 if (ix86_decompose_address (addr, &parts) <= 0)
9141 reason = "decomposition failed";
9146 index = parts.index;
9148 scale = parts.scale;
9150 /* Validate base register.
9152 Don't allow SUBREG's that span more than a word here. It can lead to spill
9153 failures when the base is one word out of a two word structure, which is
9154 represented internally as a DImode int. */
9163 else if (GET_CODE (base) == SUBREG
9164 && REG_P (SUBREG_REG (base))
9165 && GET_MODE_SIZE (GET_MODE (SUBREG_REG (base)))
9167 reg = SUBREG_REG (base);
9170 reason = "base is not a register";
9174 if (GET_MODE (base) != Pmode)
9176 reason = "base is not in Pmode";
9180 if ((strict && ! REG_OK_FOR_BASE_STRICT_P (reg))
9181 || (! strict && ! REG_OK_FOR_BASE_NONSTRICT_P (reg)))
9183 reason = "base is not valid";
9188 /* Validate index register.
9190 Don't allow SUBREG's that span more than a word here -- same as above. */
9199 else if (GET_CODE (index) == SUBREG
9200 && REG_P (SUBREG_REG (index))
9201 && GET_MODE_SIZE (GET_MODE (SUBREG_REG (index)))
9203 reg = SUBREG_REG (index);
9206 reason = "index is not a register";
9210 if (GET_MODE (index) != Pmode)
9212 reason = "index is not in Pmode";
9216 if ((strict && ! REG_OK_FOR_INDEX_STRICT_P (reg))
9217 || (! strict && ! REG_OK_FOR_INDEX_NONSTRICT_P (reg)))
9219 reason = "index is not valid";
9224 /* Validate scale factor. */
9227 reason_rtx = GEN_INT (scale);
9230 reason = "scale without index";
9234 if (scale != 2 && scale != 4 && scale != 8)
9236 reason = "scale is not a valid multiplier";
9241 /* Validate displacement. */
9246 if (GET_CODE (disp) == CONST
9247 && GET_CODE (XEXP (disp, 0)) == UNSPEC
9248 && XINT (XEXP (disp, 0), 1) != UNSPEC_MACHOPIC_OFFSET)
9249 switch (XINT (XEXP (disp, 0), 1))
9251 /* Refuse GOTOFF and GOT in 64bit mode since it is always 64bit when
9252 used. While ABI specify also 32bit relocations, we don't produce
9253 them at all and use IP relative instead. */
9256 gcc_assert (flag_pic);
9258 goto is_legitimate_pic;
9259 reason = "64bit address unspec";
9262 case UNSPEC_GOTPCREL:
9263 gcc_assert (flag_pic);
9264 goto is_legitimate_pic;
9266 case UNSPEC_GOTTPOFF:
9267 case UNSPEC_GOTNTPOFF:
9268 case UNSPEC_INDNTPOFF:
9274 reason = "invalid address unspec";
9278 else if (SYMBOLIC_CONST (disp)
9282 && MACHOPIC_INDIRECT
9283 && !machopic_operand_p (disp)
9289 if (TARGET_64BIT && (index || base))
9291 /* foo@dtpoff(%rX) is ok. */
9292 if (GET_CODE (disp) != CONST
9293 || GET_CODE (XEXP (disp, 0)) != PLUS
9294 || GET_CODE (XEXP (XEXP (disp, 0), 0)) != UNSPEC
9295 || !CONST_INT_P (XEXP (XEXP (disp, 0), 1))
9296 || (XINT (XEXP (XEXP (disp, 0), 0), 1) != UNSPEC_DTPOFF
9297 && XINT (XEXP (XEXP (disp, 0), 0), 1) != UNSPEC_NTPOFF))
9299 reason = "non-constant pic memory reference";
9303 else if (! legitimate_pic_address_disp_p (disp))
9305 reason = "displacement is an invalid pic construct";
9309 /* This code used to verify that a symbolic pic displacement
9310 includes the pic_offset_table_rtx register.
9312 While this is good idea, unfortunately these constructs may
9313 be created by "adds using lea" optimization for incorrect
9322 This code is nonsensical, but results in addressing
9323 GOT table with pic_offset_table_rtx base. We can't
9324 just refuse it easily, since it gets matched by
9325 "addsi3" pattern, that later gets split to lea in the
9326 case output register differs from input. While this
9327 can be handled by separate addsi pattern for this case
9328 that never results in lea, this seems to be easier and
9329 correct fix for crash to disable this test. */
9331 else if (GET_CODE (disp) != LABEL_REF
9332 && !CONST_INT_P (disp)
9333 && (GET_CODE (disp) != CONST
9334 || !legitimate_constant_p (disp))
9335 && (GET_CODE (disp) != SYMBOL_REF
9336 || !legitimate_constant_p (disp)))
9338 reason = "displacement is not constant";
9341 else if (TARGET_64BIT
9342 && !x86_64_immediate_operand (disp, VOIDmode))
9344 reason = "displacement is out of range";
9349 /* Everything looks valid. */
9356 /* Return a unique alias set for the GOT. */
9358 static alias_set_type
9359 ix86_GOT_alias_set (void)
9361 static alias_set_type set = -1;
9363 set = new_alias_set ();
9367 /* Return a legitimate reference for ORIG (an address) using the
9368 register REG. If REG is 0, a new pseudo is generated.
9370 There are two types of references that must be handled:
9372 1. Global data references must load the address from the GOT, via
9373 the PIC reg. An insn is emitted to do this load, and the reg is
9376 2. Static data references, constant pool addresses, and code labels
9377 compute the address as an offset from the GOT, whose base is in
9378 the PIC reg. Static data objects have SYMBOL_FLAG_LOCAL set to
9379 differentiate them from global data objects. The returned
9380 address is the PIC reg + an unspec constant.
9382 GO_IF_LEGITIMATE_ADDRESS rejects symbolic references unless the PIC
9383 reg also appears in the address. */
9386 legitimize_pic_address (rtx orig, rtx reg)
9393 if (TARGET_MACHO && !TARGET_64BIT)
9396 reg = gen_reg_rtx (Pmode);
9397 /* Use the generic Mach-O PIC machinery. */
9398 return machopic_legitimize_pic_address (orig, GET_MODE (orig), reg);
9402 if (TARGET_64BIT && legitimate_pic_address_disp_p (addr))
9404 else if (TARGET_64BIT
9405 && ix86_cmodel != CM_SMALL_PIC
9406 && gotoff_operand (addr, Pmode))
9409 /* This symbol may be referenced via a displacement from the PIC
9410 base address (@GOTOFF). */
9412 if (reload_in_progress)
9413 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
9414 if (GET_CODE (addr) == CONST)
9415 addr = XEXP (addr, 0);
9416 if (GET_CODE (addr) == PLUS)
9418 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, XEXP (addr, 0)),
9420 new_rtx = gen_rtx_PLUS (Pmode, new_rtx, XEXP (addr, 1));
9423 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTOFF);
9424 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
9426 tmpreg = gen_reg_rtx (Pmode);
9429 emit_move_insn (tmpreg, new_rtx);
9433 new_rtx = expand_simple_binop (Pmode, PLUS, reg, pic_offset_table_rtx,
9434 tmpreg, 1, OPTAB_DIRECT);
9437 else new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, tmpreg);
9439 else if (!TARGET_64BIT && gotoff_operand (addr, Pmode))
9441 /* This symbol may be referenced via a displacement from the PIC
9442 base address (@GOTOFF). */
9444 if (reload_in_progress)
9445 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
9446 if (GET_CODE (addr) == CONST)
9447 addr = XEXP (addr, 0);
9448 if (GET_CODE (addr) == PLUS)
9450 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, XEXP (addr, 0)),
9452 new_rtx = gen_rtx_PLUS (Pmode, new_rtx, XEXP (addr, 1));
9455 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTOFF);
9456 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
9457 new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new_rtx);
9461 emit_move_insn (reg, new_rtx);
9465 else if ((GET_CODE (addr) == SYMBOL_REF && SYMBOL_REF_TLS_MODEL (addr) == 0)
9466 /* We can't use @GOTOFF for text labels on VxWorks;
9467 see gotoff_operand. */
9468 || (TARGET_VXWORKS_RTP && GET_CODE (addr) == LABEL_REF))
9470 if (TARGET_DLLIMPORT_DECL_ATTRIBUTES)
9472 if (GET_CODE (addr) == SYMBOL_REF && SYMBOL_REF_DLLIMPORT_P (addr))
9473 return legitimize_dllimport_symbol (addr, true);
9474 if (GET_CODE (addr) == CONST && GET_CODE (XEXP (addr, 0)) == PLUS
9475 && GET_CODE (XEXP (XEXP (addr, 0), 0)) == SYMBOL_REF
9476 && SYMBOL_REF_DLLIMPORT_P (XEXP (XEXP (addr, 0), 0)))
9478 rtx t = legitimize_dllimport_symbol (XEXP (XEXP (addr, 0), 0), true);
9479 return gen_rtx_PLUS (Pmode, t, XEXP (XEXP (addr, 0), 1));
9483 if (TARGET_64BIT && ix86_cmodel != CM_LARGE_PIC)
9485 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTPCREL);
9486 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
9487 new_rtx = gen_const_mem (Pmode, new_rtx);
9488 set_mem_alias_set (new_rtx, ix86_GOT_alias_set ());
9491 reg = gen_reg_rtx (Pmode);
9492 /* Use directly gen_movsi, otherwise the address is loaded
9493 into register for CSE. We don't want to CSE this addresses,
9494 instead we CSE addresses from the GOT table, so skip this. */
9495 emit_insn (gen_movsi (reg, new_rtx));
9500 /* This symbol must be referenced via a load from the
9501 Global Offset Table (@GOT). */
9503 if (reload_in_progress)
9504 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
9505 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOT);
9506 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
9508 new_rtx = force_reg (Pmode, new_rtx);
9509 new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new_rtx);
9510 new_rtx = gen_const_mem (Pmode, new_rtx);
9511 set_mem_alias_set (new_rtx, ix86_GOT_alias_set ());
9514 reg = gen_reg_rtx (Pmode);
9515 emit_move_insn (reg, new_rtx);
9521 if (CONST_INT_P (addr)
9522 && !x86_64_immediate_operand (addr, VOIDmode))
9526 emit_move_insn (reg, addr);
9530 new_rtx = force_reg (Pmode, addr);
9532 else if (GET_CODE (addr) == CONST)
9534 addr = XEXP (addr, 0);
9536 /* We must match stuff we generate before. Assume the only
9537 unspecs that can get here are ours. Not that we could do
9538 anything with them anyway.... */
9539 if (GET_CODE (addr) == UNSPEC
9540 || (GET_CODE (addr) == PLUS
9541 && GET_CODE (XEXP (addr, 0)) == UNSPEC))
9543 gcc_assert (GET_CODE (addr) == PLUS);
9545 if (GET_CODE (addr) == PLUS)
9547 rtx op0 = XEXP (addr, 0), op1 = XEXP (addr, 1);
9549 /* Check first to see if this is a constant offset from a @GOTOFF
9550 symbol reference. */
9551 if (gotoff_operand (op0, Pmode)
9552 && CONST_INT_P (op1))
9556 if (reload_in_progress)
9557 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
9558 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, op0),
9560 new_rtx = gen_rtx_PLUS (Pmode, new_rtx, op1);
9561 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
9562 new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new_rtx);
9566 emit_move_insn (reg, new_rtx);
9572 if (INTVAL (op1) < -16*1024*1024
9573 || INTVAL (op1) >= 16*1024*1024)
9575 if (!x86_64_immediate_operand (op1, Pmode))
9576 op1 = force_reg (Pmode, op1);
9577 new_rtx = gen_rtx_PLUS (Pmode, force_reg (Pmode, op0), op1);
9583 base = legitimize_pic_address (XEXP (addr, 0), reg);
9584 new_rtx = legitimize_pic_address (XEXP (addr, 1),
9585 base == reg ? NULL_RTX : reg);
9587 if (CONST_INT_P (new_rtx))
9588 new_rtx = plus_constant (base, INTVAL (new_rtx));
9591 if (GET_CODE (new_rtx) == PLUS && CONSTANT_P (XEXP (new_rtx, 1)))
9593 base = gen_rtx_PLUS (Pmode, base, XEXP (new_rtx, 0));
9594 new_rtx = XEXP (new_rtx, 1);
9596 new_rtx = gen_rtx_PLUS (Pmode, base, new_rtx);
9604 /* Load the thread pointer. If TO_REG is true, force it into a register. */
9607 get_thread_pointer (int to_reg)
9611 tp = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx), UNSPEC_TP);
9615 reg = gen_reg_rtx (Pmode);
9616 insn = gen_rtx_SET (VOIDmode, reg, tp);
9617 insn = emit_insn (insn);
9622 /* A subroutine of legitimize_address and ix86_expand_move. FOR_MOV is
9623 false if we expect this to be used for a memory address and true if
9624 we expect to load the address into a register. */
9627 legitimize_tls_address (rtx x, enum tls_model model, int for_mov)
9629 rtx dest, base, off, pic, tp;
9634 case TLS_MODEL_GLOBAL_DYNAMIC:
9635 dest = gen_reg_rtx (Pmode);
9636 tp = TARGET_GNU2_TLS ? get_thread_pointer (1) : 0;
9638 if (TARGET_64BIT && ! TARGET_GNU2_TLS)
9640 rtx rax = gen_rtx_REG (Pmode, AX_REG), insns;
9643 emit_call_insn (gen_tls_global_dynamic_64 (rax, x));
9644 insns = get_insns ();
9647 RTL_CONST_CALL_P (insns) = 1;
9648 emit_libcall_block (insns, dest, rax, x);
9650 else if (TARGET_64BIT && TARGET_GNU2_TLS)
9651 emit_insn (gen_tls_global_dynamic_64 (dest, x));
9653 emit_insn (gen_tls_global_dynamic_32 (dest, x));
9655 if (TARGET_GNU2_TLS)
9657 dest = force_reg (Pmode, gen_rtx_PLUS (Pmode, tp, dest));
9659 set_unique_reg_note (get_last_insn (), REG_EQUIV, x);
9663 case TLS_MODEL_LOCAL_DYNAMIC:
9664 base = gen_reg_rtx (Pmode);
9665 tp = TARGET_GNU2_TLS ? get_thread_pointer (1) : 0;
9667 if (TARGET_64BIT && ! TARGET_GNU2_TLS)
9669 rtx rax = gen_rtx_REG (Pmode, AX_REG), insns, note;
9672 emit_call_insn (gen_tls_local_dynamic_base_64 (rax));
9673 insns = get_insns ();
9676 note = gen_rtx_EXPR_LIST (VOIDmode, const0_rtx, NULL);
9677 note = gen_rtx_EXPR_LIST (VOIDmode, ix86_tls_get_addr (), note);
9678 RTL_CONST_CALL_P (insns) = 1;
9679 emit_libcall_block (insns, base, rax, note);
9681 else if (TARGET_64BIT && TARGET_GNU2_TLS)
9682 emit_insn (gen_tls_local_dynamic_base_64 (base));
9684 emit_insn (gen_tls_local_dynamic_base_32 (base));
9686 if (TARGET_GNU2_TLS)
9688 rtx x = ix86_tls_module_base ();
9690 set_unique_reg_note (get_last_insn (), REG_EQUIV,
9691 gen_rtx_MINUS (Pmode, x, tp));
9694 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x), UNSPEC_DTPOFF);
9695 off = gen_rtx_CONST (Pmode, off);
9697 dest = force_reg (Pmode, gen_rtx_PLUS (Pmode, base, off));
9699 if (TARGET_GNU2_TLS)
9701 dest = force_reg (Pmode, gen_rtx_PLUS (Pmode, dest, tp));
9703 set_unique_reg_note (get_last_insn (), REG_EQUIV, x);
9708 case TLS_MODEL_INITIAL_EXEC:
9712 type = UNSPEC_GOTNTPOFF;
9716 if (reload_in_progress)
9717 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
9718 pic = pic_offset_table_rtx;
9719 type = TARGET_ANY_GNU_TLS ? UNSPEC_GOTNTPOFF : UNSPEC_GOTTPOFF;
9721 else if (!TARGET_ANY_GNU_TLS)
9723 pic = gen_reg_rtx (Pmode);
9724 emit_insn (gen_set_got (pic));
9725 type = UNSPEC_GOTTPOFF;
9730 type = UNSPEC_INDNTPOFF;
9733 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x), type);
9734 off = gen_rtx_CONST (Pmode, off);
9736 off = gen_rtx_PLUS (Pmode, pic, off);
9737 off = gen_const_mem (Pmode, off);
9738 set_mem_alias_set (off, ix86_GOT_alias_set ());
9740 if (TARGET_64BIT || TARGET_ANY_GNU_TLS)
9742 base = get_thread_pointer (for_mov || !TARGET_TLS_DIRECT_SEG_REFS);
9743 off = force_reg (Pmode, off);
9744 return gen_rtx_PLUS (Pmode, base, off);
9748 base = get_thread_pointer (true);
9749 dest = gen_reg_rtx (Pmode);
9750 emit_insn (gen_subsi3 (dest, base, off));
9754 case TLS_MODEL_LOCAL_EXEC:
9755 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x),
9756 (TARGET_64BIT || TARGET_ANY_GNU_TLS)
9757 ? UNSPEC_NTPOFF : UNSPEC_TPOFF);
9758 off = gen_rtx_CONST (Pmode, off);
9760 if (TARGET_64BIT || TARGET_ANY_GNU_TLS)
9762 base = get_thread_pointer (for_mov || !TARGET_TLS_DIRECT_SEG_REFS);
9763 return gen_rtx_PLUS (Pmode, base, off);
9767 base = get_thread_pointer (true);
9768 dest = gen_reg_rtx (Pmode);
9769 emit_insn (gen_subsi3 (dest, base, off));
9780 /* Create or return the unique __imp_DECL dllimport symbol corresponding
9783 static GTY((if_marked ("tree_map_marked_p"), param_is (struct tree_map)))
9784 htab_t dllimport_map;
9787 get_dllimport_decl (tree decl)
9789 struct tree_map *h, in;
9793 size_t namelen, prefixlen;
9799 dllimport_map = htab_create_ggc (512, tree_map_hash, tree_map_eq, 0);
9801 in.hash = htab_hash_pointer (decl);
9802 in.base.from = decl;
9803 loc = htab_find_slot_with_hash (dllimport_map, &in, in.hash, INSERT);
9804 h = (struct tree_map *) *loc;
9808 *loc = h = GGC_NEW (struct tree_map);
9810 h->base.from = decl;
9811 h->to = to = build_decl (VAR_DECL, NULL, ptr_type_node);
9812 DECL_ARTIFICIAL (to) = 1;
9813 DECL_IGNORED_P (to) = 1;
9814 DECL_EXTERNAL (to) = 1;
9815 TREE_READONLY (to) = 1;
9817 name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl));
9818 name = targetm.strip_name_encoding (name);
9819 prefix = name[0] == FASTCALL_PREFIX || user_label_prefix[0] == 0
9820 ? "*__imp_" : "*__imp__";
9821 namelen = strlen (name);
9822 prefixlen = strlen (prefix);
9823 imp_name = (char *) alloca (namelen + prefixlen + 1);
9824 memcpy (imp_name, prefix, prefixlen);
9825 memcpy (imp_name + prefixlen, name, namelen + 1);
9827 name = ggc_alloc_string (imp_name, namelen + prefixlen);
9828 rtl = gen_rtx_SYMBOL_REF (Pmode, name);
9829 SET_SYMBOL_REF_DECL (rtl, to);
9830 SYMBOL_REF_FLAGS (rtl) = SYMBOL_FLAG_LOCAL;
9832 rtl = gen_const_mem (Pmode, rtl);
9833 set_mem_alias_set (rtl, ix86_GOT_alias_set ());
9835 SET_DECL_RTL (to, rtl);
9836 SET_DECL_ASSEMBLER_NAME (to, get_identifier (name));
9841 /* Expand SYMBOL into its corresponding dllimport symbol. WANT_REG is
9842 true if we require the result be a register. */
9845 legitimize_dllimport_symbol (rtx symbol, bool want_reg)
9850 gcc_assert (SYMBOL_REF_DECL (symbol));
9851 imp_decl = get_dllimport_decl (SYMBOL_REF_DECL (symbol));
9853 x = DECL_RTL (imp_decl);
9855 x = force_reg (Pmode, x);
9859 /* Try machine-dependent ways of modifying an illegitimate address
9860 to be legitimate. If we find one, return the new, valid address.
9861 This macro is used in only one place: `memory_address' in explow.c.
9863 OLDX is the address as it was before break_out_memory_refs was called.
9864 In some cases it is useful to look at this to decide what needs to be done.
9866 MODE and WIN are passed so that this macro can use
9867 GO_IF_LEGITIMATE_ADDRESS.
9869 It is always safe for this macro to do nothing. It exists to recognize
9870 opportunities to optimize the output.
9872 For the 80386, we handle X+REG by loading X into a register R and
9873 using R+REG. R will go in a general reg and indexing will be used.
9874 However, if REG is a broken-out memory address or multiplication,
9875 nothing needs to be done because REG can certainly go in a general reg.
9877 When -fpic is used, special handling is needed for symbolic references.
9878 See comments by legitimize_pic_address in i386.c for details. */
9881 legitimize_address (rtx x, rtx oldx ATTRIBUTE_UNUSED, enum machine_mode mode)
9886 log = GET_CODE (x) == SYMBOL_REF ? SYMBOL_REF_TLS_MODEL (x) : 0;
9888 return legitimize_tls_address (x, (enum tls_model) log, false);
9889 if (GET_CODE (x) == CONST
9890 && GET_CODE (XEXP (x, 0)) == PLUS
9891 && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF
9892 && (log = SYMBOL_REF_TLS_MODEL (XEXP (XEXP (x, 0), 0))))
9894 rtx t = legitimize_tls_address (XEXP (XEXP (x, 0), 0),
9895 (enum tls_model) log, false);
9896 return gen_rtx_PLUS (Pmode, t, XEXP (XEXP (x, 0), 1));
9899 if (TARGET_DLLIMPORT_DECL_ATTRIBUTES)
9901 if (GET_CODE (x) == SYMBOL_REF && SYMBOL_REF_DLLIMPORT_P (x))
9902 return legitimize_dllimport_symbol (x, true);
9903 if (GET_CODE (x) == CONST
9904 && GET_CODE (XEXP (x, 0)) == PLUS
9905 && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF
9906 && SYMBOL_REF_DLLIMPORT_P (XEXP (XEXP (x, 0), 0)))
9908 rtx t = legitimize_dllimport_symbol (XEXP (XEXP (x, 0), 0), true);
9909 return gen_rtx_PLUS (Pmode, t, XEXP (XEXP (x, 0), 1));
9913 if (flag_pic && SYMBOLIC_CONST (x))
9914 return legitimize_pic_address (x, 0);
9916 /* Canonicalize shifts by 0, 1, 2, 3 into multiply */
9917 if (GET_CODE (x) == ASHIFT
9918 && CONST_INT_P (XEXP (x, 1))
9919 && (unsigned HOST_WIDE_INT) INTVAL (XEXP (x, 1)) < 4)
9922 log = INTVAL (XEXP (x, 1));
9923 x = gen_rtx_MULT (Pmode, force_reg (Pmode, XEXP (x, 0)),
9924 GEN_INT (1 << log));
9927 if (GET_CODE (x) == PLUS)
9929 /* Canonicalize shifts by 0, 1, 2, 3 into multiply. */
9931 if (GET_CODE (XEXP (x, 0)) == ASHIFT
9932 && CONST_INT_P (XEXP (XEXP (x, 0), 1))
9933 && (unsigned HOST_WIDE_INT) INTVAL (XEXP (XEXP (x, 0), 1)) < 4)
9936 log = INTVAL (XEXP (XEXP (x, 0), 1));
9937 XEXP (x, 0) = gen_rtx_MULT (Pmode,
9938 force_reg (Pmode, XEXP (XEXP (x, 0), 0)),
9939 GEN_INT (1 << log));
9942 if (GET_CODE (XEXP (x, 1)) == ASHIFT
9943 && CONST_INT_P (XEXP (XEXP (x, 1), 1))
9944 && (unsigned HOST_WIDE_INT) INTVAL (XEXP (XEXP (x, 1), 1)) < 4)
9947 log = INTVAL (XEXP (XEXP (x, 1), 1));
9948 XEXP (x, 1) = gen_rtx_MULT (Pmode,
9949 force_reg (Pmode, XEXP (XEXP (x, 1), 0)),
9950 GEN_INT (1 << log));
9953 /* Put multiply first if it isn't already. */
9954 if (GET_CODE (XEXP (x, 1)) == MULT)
9956 rtx tmp = XEXP (x, 0);
9957 XEXP (x, 0) = XEXP (x, 1);
9962 /* Canonicalize (plus (mult (reg) (const)) (plus (reg) (const)))
9963 into (plus (plus (mult (reg) (const)) (reg)) (const)). This can be
9964 created by virtual register instantiation, register elimination, and
9965 similar optimizations. */
9966 if (GET_CODE (XEXP (x, 0)) == MULT && GET_CODE (XEXP (x, 1)) == PLUS)
9969 x = gen_rtx_PLUS (Pmode,
9970 gen_rtx_PLUS (Pmode, XEXP (x, 0),
9971 XEXP (XEXP (x, 1), 0)),
9972 XEXP (XEXP (x, 1), 1));
9976 (plus (plus (mult (reg) (const)) (plus (reg) (const))) const)
9977 into (plus (plus (mult (reg) (const)) (reg)) (const)). */
9978 else if (GET_CODE (x) == PLUS && GET_CODE (XEXP (x, 0)) == PLUS
9979 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
9980 && GET_CODE (XEXP (XEXP (x, 0), 1)) == PLUS
9981 && CONSTANT_P (XEXP (x, 1)))
9984 rtx other = NULL_RTX;
9986 if (CONST_INT_P (XEXP (x, 1)))
9988 constant = XEXP (x, 1);
9989 other = XEXP (XEXP (XEXP (x, 0), 1), 1);
9991 else if (CONST_INT_P (XEXP (XEXP (XEXP (x, 0), 1), 1)))
9993 constant = XEXP (XEXP (XEXP (x, 0), 1), 1);
9994 other = XEXP (x, 1);
10002 x = gen_rtx_PLUS (Pmode,
10003 gen_rtx_PLUS (Pmode, XEXP (XEXP (x, 0), 0),
10004 XEXP (XEXP (XEXP (x, 0), 1), 0)),
10005 plus_constant (other, INTVAL (constant)));
10009 if (changed && legitimate_address_p (mode, x, FALSE))
10012 if (GET_CODE (XEXP (x, 0)) == MULT)
10015 XEXP (x, 0) = force_operand (XEXP (x, 0), 0);
10018 if (GET_CODE (XEXP (x, 1)) == MULT)
10021 XEXP (x, 1) = force_operand (XEXP (x, 1), 0);
10025 && REG_P (XEXP (x, 1))
10026 && REG_P (XEXP (x, 0)))
10029 if (flag_pic && SYMBOLIC_CONST (XEXP (x, 1)))
10032 x = legitimize_pic_address (x, 0);
10035 if (changed && legitimate_address_p (mode, x, FALSE))
10038 if (REG_P (XEXP (x, 0)))
10040 rtx temp = gen_reg_rtx (Pmode);
10041 rtx val = force_operand (XEXP (x, 1), temp);
10043 emit_move_insn (temp, val);
10045 XEXP (x, 1) = temp;
10049 else if (REG_P (XEXP (x, 1)))
10051 rtx temp = gen_reg_rtx (Pmode);
10052 rtx val = force_operand (XEXP (x, 0), temp);
10054 emit_move_insn (temp, val);
10056 XEXP (x, 0) = temp;
10064 /* Print an integer constant expression in assembler syntax. Addition
10065 and subtraction are the only arithmetic that may appear in these
10066 expressions. FILE is the stdio stream to write to, X is the rtx, and
10067 CODE is the operand print code from the output string. */
10070 output_pic_addr_const (FILE *file, rtx x, int code)
10074 switch (GET_CODE (x))
10077 gcc_assert (flag_pic);
10082 if (! TARGET_MACHO || TARGET_64BIT)
10083 output_addr_const (file, x);
10086 const char *name = XSTR (x, 0);
10088 /* Mark the decl as referenced so that cgraph will
10089 output the function. */
10090 if (SYMBOL_REF_DECL (x))
10091 mark_decl_referenced (SYMBOL_REF_DECL (x));
10094 if (MACHOPIC_INDIRECT
10095 && machopic_classify_symbol (x) == MACHOPIC_UNDEFINED_FUNCTION)
10096 name = machopic_indirection_name (x, /*stub_p=*/true);
10098 assemble_name (file, name);
10100 if (!TARGET_MACHO && !(TARGET_64BIT && DEFAULT_ABI == MS_ABI)
10101 && code == 'P' && ! SYMBOL_REF_LOCAL_P (x))
10102 fputs ("@PLT", file);
10109 ASM_GENERATE_INTERNAL_LABEL (buf, "L", CODE_LABEL_NUMBER (x));
10110 assemble_name (asm_out_file, buf);
10114 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
10118 /* This used to output parentheses around the expression,
10119 but that does not work on the 386 (either ATT or BSD assembler). */
10120 output_pic_addr_const (file, XEXP (x, 0), code);
10124 if (GET_MODE (x) == VOIDmode)
10126 /* We can use %d if the number is <32 bits and positive. */
10127 if (CONST_DOUBLE_HIGH (x) || CONST_DOUBLE_LOW (x) < 0)
10128 fprintf (file, "0x%lx%08lx",
10129 (unsigned long) CONST_DOUBLE_HIGH (x),
10130 (unsigned long) CONST_DOUBLE_LOW (x));
10132 fprintf (file, HOST_WIDE_INT_PRINT_DEC, CONST_DOUBLE_LOW (x));
10135 /* We can't handle floating point constants;
10136 PRINT_OPERAND must handle them. */
10137 output_operand_lossage ("floating constant misused");
10141 /* Some assemblers need integer constants to appear first. */
10142 if (CONST_INT_P (XEXP (x, 0)))
10144 output_pic_addr_const (file, XEXP (x, 0), code);
10146 output_pic_addr_const (file, XEXP (x, 1), code);
10150 gcc_assert (CONST_INT_P (XEXP (x, 1)));
10151 output_pic_addr_const (file, XEXP (x, 1), code);
10153 output_pic_addr_const (file, XEXP (x, 0), code);
10159 putc (ASSEMBLER_DIALECT == ASM_INTEL ? '(' : '[', file);
10160 output_pic_addr_const (file, XEXP (x, 0), code);
10162 output_pic_addr_const (file, XEXP (x, 1), code);
10164 putc (ASSEMBLER_DIALECT == ASM_INTEL ? ')' : ']', file);
10168 gcc_assert (XVECLEN (x, 0) == 1);
10169 output_pic_addr_const (file, XVECEXP (x, 0, 0), code);
10170 switch (XINT (x, 1))
10173 fputs ("@GOT", file);
10175 case UNSPEC_GOTOFF:
10176 fputs ("@GOTOFF", file);
10178 case UNSPEC_PLTOFF:
10179 fputs ("@PLTOFF", file);
10181 case UNSPEC_GOTPCREL:
10182 fputs (ASSEMBLER_DIALECT == ASM_ATT ?
10183 "@GOTPCREL(%rip)" : "@GOTPCREL[rip]", file);
10185 case UNSPEC_GOTTPOFF:
10186 /* FIXME: This might be @TPOFF in Sun ld too. */
10187 fputs ("@GOTTPOFF", file);
10190 fputs ("@TPOFF", file);
10192 case UNSPEC_NTPOFF:
10194 fputs ("@TPOFF", file);
10196 fputs ("@NTPOFF", file);
10198 case UNSPEC_DTPOFF:
10199 fputs ("@DTPOFF", file);
10201 case UNSPEC_GOTNTPOFF:
10203 fputs (ASSEMBLER_DIALECT == ASM_ATT ?
10204 "@GOTTPOFF(%rip)": "@GOTTPOFF[rip]", file);
10206 fputs ("@GOTNTPOFF", file);
10208 case UNSPEC_INDNTPOFF:
10209 fputs ("@INDNTPOFF", file);
10212 case UNSPEC_MACHOPIC_OFFSET:
10214 machopic_output_function_base_name (file);
10218 output_operand_lossage ("invalid UNSPEC as operand");
10224 output_operand_lossage ("invalid expression as operand");
10228 /* This is called from dwarf2out.c via TARGET_ASM_OUTPUT_DWARF_DTPREL.
10229 We need to emit DTP-relative relocations. */
10231 static void ATTRIBUTE_UNUSED
10232 i386_output_dwarf_dtprel (FILE *file, int size, rtx x)
10234 fputs (ASM_LONG, file);
10235 output_addr_const (file, x);
10236 fputs ("@DTPOFF", file);
10242 fputs (", 0", file);
10245 gcc_unreachable ();
10249 /* Return true if X is a representation of the PIC register. This copes
10250 with calls from ix86_find_base_term, where the register might have
10251 been replaced by a cselib value. */
10254 ix86_pic_register_p (rtx x)
10256 if (GET_CODE (x) == VALUE)
10257 return (pic_offset_table_rtx
10258 && rtx_equal_for_cselib_p (x, pic_offset_table_rtx));
10260 return REG_P (x) && REGNO (x) == PIC_OFFSET_TABLE_REGNUM;
10263 /* In the name of slightly smaller debug output, and to cater to
10264 general assembler lossage, recognize PIC+GOTOFF and turn it back
10265 into a direct symbol reference.
10267 On Darwin, this is necessary to avoid a crash, because Darwin
10268 has a different PIC label for each routine but the DWARF debugging
10269 information is not associated with any particular routine, so it's
10270 necessary to remove references to the PIC label from RTL stored by
10271 the DWARF output code. */
10274 ix86_delegitimize_address (rtx orig_x)
10277 /* reg_addend is NULL or a multiple of some register. */
10278 rtx reg_addend = NULL_RTX;
10279 /* const_addend is NULL or a const_int. */
10280 rtx const_addend = NULL_RTX;
10281 /* This is the result, or NULL. */
10282 rtx result = NULL_RTX;
10289 if (GET_CODE (x) != CONST
10290 || GET_CODE (XEXP (x, 0)) != UNSPEC
10291 || XINT (XEXP (x, 0), 1) != UNSPEC_GOTPCREL
10292 || !MEM_P (orig_x))
10294 return XVECEXP (XEXP (x, 0), 0, 0);
10297 if (GET_CODE (x) != PLUS
10298 || GET_CODE (XEXP (x, 1)) != CONST)
10301 if (ix86_pic_register_p (XEXP (x, 0)))
10302 /* %ebx + GOT/GOTOFF */
10304 else if (GET_CODE (XEXP (x, 0)) == PLUS)
10306 /* %ebx + %reg * scale + GOT/GOTOFF */
10307 reg_addend = XEXP (x, 0);
10308 if (ix86_pic_register_p (XEXP (reg_addend, 0)))
10309 reg_addend = XEXP (reg_addend, 1);
10310 else if (ix86_pic_register_p (XEXP (reg_addend, 1)))
10311 reg_addend = XEXP (reg_addend, 0);
10314 if (!REG_P (reg_addend)
10315 && GET_CODE (reg_addend) != MULT
10316 && GET_CODE (reg_addend) != ASHIFT)
10322 x = XEXP (XEXP (x, 1), 0);
10323 if (GET_CODE (x) == PLUS
10324 && CONST_INT_P (XEXP (x, 1)))
10326 const_addend = XEXP (x, 1);
10330 if (GET_CODE (x) == UNSPEC
10331 && ((XINT (x, 1) == UNSPEC_GOT && MEM_P (orig_x))
10332 || (XINT (x, 1) == UNSPEC_GOTOFF && !MEM_P (orig_x))))
10333 result = XVECEXP (x, 0, 0);
10335 if (TARGET_MACHO && darwin_local_data_pic (x)
10336 && !MEM_P (orig_x))
10337 result = XVECEXP (x, 0, 0);
10343 result = gen_rtx_CONST (Pmode, gen_rtx_PLUS (Pmode, result, const_addend));
10345 result = gen_rtx_PLUS (Pmode, reg_addend, result);
10349 /* If X is a machine specific address (i.e. a symbol or label being
10350 referenced as a displacement from the GOT implemented using an
10351 UNSPEC), then return the base term. Otherwise return X. */
10354 ix86_find_base_term (rtx x)
10360 if (GET_CODE (x) != CONST)
10362 term = XEXP (x, 0);
10363 if (GET_CODE (term) == PLUS
10364 && (CONST_INT_P (XEXP (term, 1))
10365 || GET_CODE (XEXP (term, 1)) == CONST_DOUBLE))
10366 term = XEXP (term, 0);
10367 if (GET_CODE (term) != UNSPEC
10368 || XINT (term, 1) != UNSPEC_GOTPCREL)
10371 return XVECEXP (term, 0, 0);
10374 return ix86_delegitimize_address (x);
10378 put_condition_code (enum rtx_code code, enum machine_mode mode, int reverse,
10379 int fp, FILE *file)
10381 const char *suffix;
10383 if (mode == CCFPmode || mode == CCFPUmode)
10385 enum rtx_code second_code, bypass_code;
10386 ix86_fp_comparison_codes (code, &bypass_code, &code, &second_code);
10387 gcc_assert (bypass_code == UNKNOWN && second_code == UNKNOWN);
10388 code = ix86_fp_compare_code_to_integer (code);
10392 code = reverse_condition (code);
10443 gcc_assert (mode == CCmode || mode == CCNOmode || mode == CCGCmode);
10447 /* ??? Use "nbe" instead of "a" for fcmov lossage on some assemblers.
10448 Those same assemblers have the same but opposite lossage on cmov. */
10449 if (mode == CCmode)
10450 suffix = fp ? "nbe" : "a";
10451 else if (mode == CCCmode)
10454 gcc_unreachable ();
10470 gcc_unreachable ();
10474 gcc_assert (mode == CCmode || mode == CCCmode);
10491 gcc_unreachable ();
10495 /* ??? As above. */
10496 gcc_assert (mode == CCmode || mode == CCCmode);
10497 suffix = fp ? "nb" : "ae";
10500 gcc_assert (mode == CCmode || mode == CCGCmode || mode == CCNOmode);
10504 /* ??? As above. */
10505 if (mode == CCmode)
10507 else if (mode == CCCmode)
10508 suffix = fp ? "nb" : "ae";
10510 gcc_unreachable ();
10513 suffix = fp ? "u" : "p";
10516 suffix = fp ? "nu" : "np";
10519 gcc_unreachable ();
10521 fputs (suffix, file);
10524 /* Print the name of register X to FILE based on its machine mode and number.
10525 If CODE is 'w', pretend the mode is HImode.
10526 If CODE is 'b', pretend the mode is QImode.
10527 If CODE is 'k', pretend the mode is SImode.
10528 If CODE is 'q', pretend the mode is DImode.
10529 If CODE is 'x', pretend the mode is V4SFmode.
10530 If CODE is 't', pretend the mode is V8SFmode.
10531 If CODE is 'h', pretend the reg is the 'high' byte register.
10532 If CODE is 'y', print "st(0)" instead of "st", if the reg is stack op.
10533 If CODE is 'd', duplicate the operand for AVX instruction.
10537 print_reg (rtx x, int code, FILE *file)
10540 bool duplicated = code == 'd' && TARGET_AVX;
10542 gcc_assert (x == pc_rtx
10543 || (REGNO (x) != ARG_POINTER_REGNUM
10544 && REGNO (x) != FRAME_POINTER_REGNUM
10545 && REGNO (x) != FLAGS_REG
10546 && REGNO (x) != FPSR_REG
10547 && REGNO (x) != FPCR_REG));
10549 if (ASSEMBLER_DIALECT == ASM_ATT)
10554 gcc_assert (TARGET_64BIT);
10555 fputs ("rip", file);
10559 if (code == 'w' || MMX_REG_P (x))
10561 else if (code == 'b')
10563 else if (code == 'k')
10565 else if (code == 'q')
10567 else if (code == 'y')
10569 else if (code == 'h')
10571 else if (code == 'x')
10573 else if (code == 't')
10576 code = GET_MODE_SIZE (GET_MODE (x));
10578 /* Irritatingly, AMD extended registers use different naming convention
10579 from the normal registers. */
10580 if (REX_INT_REG_P (x))
10582 gcc_assert (TARGET_64BIT);
10586 error ("extended registers have no high halves");
10589 fprintf (file, "r%ib", REGNO (x) - FIRST_REX_INT_REG + 8);
10592 fprintf (file, "r%iw", REGNO (x) - FIRST_REX_INT_REG + 8);
10595 fprintf (file, "r%id", REGNO (x) - FIRST_REX_INT_REG + 8);
10598 fprintf (file, "r%i", REGNO (x) - FIRST_REX_INT_REG + 8);
10601 error ("unsupported operand size for extended register");
10611 if (STACK_TOP_P (x))
10620 if (! ANY_FP_REG_P (x))
10621 putc (code == 8 && TARGET_64BIT ? 'r' : 'e', file);
10626 reg = hi_reg_name[REGNO (x)];
10629 if (REGNO (x) >= ARRAY_SIZE (qi_reg_name))
10631 reg = qi_reg_name[REGNO (x)];
10634 if (REGNO (x) >= ARRAY_SIZE (qi_high_reg_name))
10636 reg = qi_high_reg_name[REGNO (x)];
10641 gcc_assert (!duplicated);
10643 fputs (hi_reg_name[REGNO (x)] + 1, file);
10648 gcc_unreachable ();
10654 if (ASSEMBLER_DIALECT == ASM_ATT)
10655 fprintf (file, ", %%%s", reg);
10657 fprintf (file, ", %s", reg);
10661 /* Locate some local-dynamic symbol still in use by this function
10662 so that we can print its name in some tls_local_dynamic_base
10666 get_some_local_dynamic_name_1 (rtx *px, void *data ATTRIBUTE_UNUSED)
10670 if (GET_CODE (x) == SYMBOL_REF
10671 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_DYNAMIC)
10673 cfun->machine->some_ld_name = XSTR (x, 0);
10680 static const char *
10681 get_some_local_dynamic_name (void)
10685 if (cfun->machine->some_ld_name)
10686 return cfun->machine->some_ld_name;
10688 for (insn = get_insns (); insn ; insn = NEXT_INSN (insn))
10690 && for_each_rtx (&PATTERN (insn), get_some_local_dynamic_name_1, 0))
10691 return cfun->machine->some_ld_name;
10693 gcc_unreachable ();
10696 /* Meaning of CODE:
10697 L,W,B,Q,S,T -- print the opcode suffix for specified size of operand.
10698 C -- print opcode suffix for set/cmov insn.
10699 c -- like C, but print reversed condition
10700 E,e -- likewise, but for compare-and-branch fused insn.
10701 F,f -- likewise, but for floating-point.
10702 O -- if HAVE_AS_IX86_CMOV_SUN_SYNTAX, expand to "w.", "l." or "q.",
10704 R -- print the prefix for register names.
10705 z -- print the opcode suffix for the size of the current operand.
10706 * -- print a star (in certain assembler syntax)
10707 A -- print an absolute memory reference.
10708 w -- print the operand as if it's a "word" (HImode) even if it isn't.
10709 s -- print a shift double count, followed by the assemblers argument
10711 b -- print the QImode name of the register for the indicated operand.
10712 %b0 would print %al if operands[0] is reg 0.
10713 w -- likewise, print the HImode name of the register.
10714 k -- likewise, print the SImode name of the register.
10715 q -- likewise, print the DImode name of the register.
10716 x -- likewise, print the V4SFmode name of the register.
10717 t -- likewise, print the V8SFmode name of the register.
10718 h -- print the QImode name for a "high" register, either ah, bh, ch or dh.
10719 y -- print "st(0)" instead of "st" as a register.
10720 d -- print duplicated register operand for AVX instruction.
10721 D -- print condition for SSE cmp instruction.
10722 P -- if PIC, print an @PLT suffix.
10723 X -- don't print any sort of PIC '@' suffix for a symbol.
10724 & -- print some in-use local-dynamic symbol name.
10725 H -- print a memory address offset by 8; used for sse high-parts
10726 Y -- print condition for SSE5 com* instruction.
10727 + -- print a branch hint as 'cs' or 'ds' prefix
10728 ; -- print a semicolon (after prefixes due to bug in older gas).
10732 print_operand (FILE *file, rtx x, int code)
10739 if (ASSEMBLER_DIALECT == ASM_ATT)
10744 assemble_name (file, get_some_local_dynamic_name ());
10748 switch (ASSEMBLER_DIALECT)
10755 /* Intel syntax. For absolute addresses, registers should not
10756 be surrounded by braces. */
10760 PRINT_OPERAND (file, x, 0);
10767 gcc_unreachable ();
10770 PRINT_OPERAND (file, x, 0);
10775 if (ASSEMBLER_DIALECT == ASM_ATT)
10780 if (ASSEMBLER_DIALECT == ASM_ATT)
10785 if (ASSEMBLER_DIALECT == ASM_ATT)
10790 if (ASSEMBLER_DIALECT == ASM_ATT)
10795 if (ASSEMBLER_DIALECT == ASM_ATT)
10800 if (ASSEMBLER_DIALECT == ASM_ATT)
10805 /* 387 opcodes don't get size suffixes if the operands are
10807 if (STACK_REG_P (x))
10810 /* Likewise if using Intel opcodes. */
10811 if (ASSEMBLER_DIALECT == ASM_INTEL)
10814 /* This is the size of op from size of operand. */
10815 switch (GET_MODE_SIZE (GET_MODE (x)))
10824 #ifdef HAVE_GAS_FILDS_FISTS
10834 if (GET_MODE (x) == SFmode)
10849 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)
10853 #ifdef GAS_MNEMONICS
10868 gcc_unreachable ();
10885 if (CONST_INT_P (x) || ! SHIFT_DOUBLE_OMITS_COUNT)
10887 PRINT_OPERAND (file, x, 0);
10888 fputs (", ", file);
10893 /* Little bit of braindamage here. The SSE compare instructions
10894 does use completely different names for the comparisons that the
10895 fp conditional moves. */
10898 switch (GET_CODE (x))
10901 fputs ("eq", file);
10904 fputs ("eq_us", file);
10907 fputs ("lt", file);
10910 fputs ("nge", file);
10913 fputs ("le", file);
10916 fputs ("ngt", file);
10919 fputs ("unord", file);
10922 fputs ("neq", file);
10925 fputs ("neq_oq", file);
10928 fputs ("ge", file);
10931 fputs ("nlt", file);
10934 fputs ("gt", file);
10937 fputs ("nle", file);
10940 fputs ("ord", file);
10943 output_operand_lossage ("operand is not a condition code, invalid operand code 'D'");
10949 switch (GET_CODE (x))
10953 fputs ("eq", file);
10957 fputs ("lt", file);
10961 fputs ("le", file);
10964 fputs ("unord", file);
10968 fputs ("neq", file);
10972 fputs ("nlt", file);
10976 fputs ("nle", file);
10979 fputs ("ord", file);
10982 output_operand_lossage ("operand is not a condition code, invalid operand code 'D'");
10988 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
10989 if (ASSEMBLER_DIALECT == ASM_ATT)
10991 switch (GET_MODE (x))
10993 case HImode: putc ('w', file); break;
10995 case SFmode: putc ('l', file); break;
10997 case DFmode: putc ('q', file); break;
10998 default: gcc_unreachable ();
11005 if (!COMPARISON_P (x))
11007 output_operand_lossage ("operand is neither a constant nor a "
11008 "condition code, invalid operand code "
11012 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 0, 0, file);
11015 if (!COMPARISON_P (x))
11017 output_operand_lossage ("operand is neither a constant nor a "
11018 "condition code, invalid operand code "
11022 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
11023 if (ASSEMBLER_DIALECT == ASM_ATT)
11026 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 0, 1, file);
11029 /* Like above, but reverse condition */
11031 /* Check to see if argument to %c is really a constant
11032 and not a condition code which needs to be reversed. */
11033 if (!COMPARISON_P (x))
11035 output_operand_lossage ("operand is neither a constant nor a "
11036 "condition code, invalid operand "
11040 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 1, 0, file);
11043 if (!COMPARISON_P (x))
11045 output_operand_lossage ("operand is neither a constant nor a "
11046 "condition code, invalid operand "
11050 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
11051 if (ASSEMBLER_DIALECT == ASM_ATT)
11054 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 1, 1, file);
11058 put_condition_code (GET_CODE (x), CCmode, 0, 0, file);
11062 put_condition_code (GET_CODE (x), CCmode, 1, 0, file);
11066 /* It doesn't actually matter what mode we use here, as we're
11067 only going to use this for printing. */
11068 x = adjust_address_nv (x, DImode, 8);
11076 || optimize_function_for_size_p (cfun) || !TARGET_BRANCH_PREDICTION_HINTS)
11079 x = find_reg_note (current_output_insn, REG_BR_PROB, 0);
11082 int pred_val = INTVAL (XEXP (x, 0));
11084 if (pred_val < REG_BR_PROB_BASE * 45 / 100
11085 || pred_val > REG_BR_PROB_BASE * 55 / 100)
11087 int taken = pred_val > REG_BR_PROB_BASE / 2;
11088 int cputaken = final_forward_branch_p (current_output_insn) == 0;
11090 /* Emit hints only in the case default branch prediction
11091 heuristics would fail. */
11092 if (taken != cputaken)
11094 /* We use 3e (DS) prefix for taken branches and
11095 2e (CS) prefix for not taken branches. */
11097 fputs ("ds ; ", file);
11099 fputs ("cs ; ", file);
11107 switch (GET_CODE (x))
11110 fputs ("neq", file);
11113 fputs ("eq", file);
11117 fputs (INTEGRAL_MODE_P (GET_MODE (x)) ? "ge" : "unlt", file);
11121 fputs (INTEGRAL_MODE_P (GET_MODE (x)) ? "gt" : "unle", file);
11125 fputs ("le", file);
11129 fputs ("lt", file);
11132 fputs ("unord", file);
11135 fputs ("ord", file);
11138 fputs ("ueq", file);
11141 fputs ("nlt", file);
11144 fputs ("nle", file);
11147 fputs ("ule", file);
11150 fputs ("ult", file);
11153 fputs ("une", file);
11156 output_operand_lossage ("operand is not a condition code, invalid operand code 'D'");
11163 fputs (" ; ", file);
11170 output_operand_lossage ("invalid operand code '%c'", code);
11175 print_reg (x, code, file);
11177 else if (MEM_P (x))
11179 /* No `byte ptr' prefix for call instructions or BLKmode operands. */
11180 if (ASSEMBLER_DIALECT == ASM_INTEL && code != 'X' && code != 'P'
11181 && GET_MODE (x) != BLKmode)
11184 switch (GET_MODE_SIZE (GET_MODE (x)))
11186 case 1: size = "BYTE"; break;
11187 case 2: size = "WORD"; break;
11188 case 4: size = "DWORD"; break;
11189 case 8: size = "QWORD"; break;
11190 case 12: size = "XWORD"; break;
11192 if (GET_MODE (x) == XFmode)
11198 gcc_unreachable ();
11201 /* Check for explicit size override (codes 'b', 'w' and 'k') */
11204 else if (code == 'w')
11206 else if (code == 'k')
11209 fputs (size, file);
11210 fputs (" PTR ", file);
11214 /* Avoid (%rip) for call operands. */
11215 if (CONSTANT_ADDRESS_P (x) && code == 'P'
11216 && !CONST_INT_P (x))
11217 output_addr_const (file, x);
11218 else if (this_is_asm_operands && ! address_operand (x, VOIDmode))
11219 output_operand_lossage ("invalid constraints for operand");
11221 output_address (x);
11224 else if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) == SFmode)
11229 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
11230 REAL_VALUE_TO_TARGET_SINGLE (r, l);
11232 if (ASSEMBLER_DIALECT == ASM_ATT)
11234 fprintf (file, "0x%08lx", (long unsigned int) l);
11237 /* These float cases don't actually occur as immediate operands. */
11238 else if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) == DFmode)
11242 real_to_decimal (dstr, CONST_DOUBLE_REAL_VALUE (x), sizeof (dstr), 0, 1);
11243 fprintf (file, "%s", dstr);
11246 else if (GET_CODE (x) == CONST_DOUBLE
11247 && GET_MODE (x) == XFmode)
11251 real_to_decimal (dstr, CONST_DOUBLE_REAL_VALUE (x), sizeof (dstr), 0, 1);
11252 fprintf (file, "%s", dstr);
11257 /* We have patterns that allow zero sets of memory, for instance.
11258 In 64-bit mode, we should probably support all 8-byte vectors,
11259 since we can in fact encode that into an immediate. */
11260 if (GET_CODE (x) == CONST_VECTOR)
11262 gcc_assert (x == CONST0_RTX (GET_MODE (x)));
11268 if (CONST_INT_P (x) || GET_CODE (x) == CONST_DOUBLE)
11270 if (ASSEMBLER_DIALECT == ASM_ATT)
11273 else if (GET_CODE (x) == CONST || GET_CODE (x) == SYMBOL_REF
11274 || GET_CODE (x) == LABEL_REF)
11276 if (ASSEMBLER_DIALECT == ASM_ATT)
11279 fputs ("OFFSET FLAT:", file);
11282 if (CONST_INT_P (x))
11283 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
11285 output_pic_addr_const (file, x, code);
11287 output_addr_const (file, x);
11291 /* Print a memory operand whose address is ADDR. */
11294 print_operand_address (FILE *file, rtx addr)
11296 struct ix86_address parts;
11297 rtx base, index, disp;
11299 int ok = ix86_decompose_address (addr, &parts);
11304 index = parts.index;
11306 scale = parts.scale;
11314 if (ASSEMBLER_DIALECT == ASM_ATT)
11316 fputs ((parts.seg == SEG_FS ? "fs:" : "gs:"), file);
11319 gcc_unreachable ();
11322 /* Use one byte shorter RIP relative addressing for 64bit mode. */
11323 if (TARGET_64BIT && !base && !index)
11327 if (GET_CODE (disp) == CONST
11328 && GET_CODE (XEXP (disp, 0)) == PLUS
11329 && CONST_INT_P (XEXP (XEXP (disp, 0), 1)))
11330 symbol = XEXP (XEXP (disp, 0), 0);
11332 if (GET_CODE (symbol) == LABEL_REF
11333 || (GET_CODE (symbol) == SYMBOL_REF
11334 && SYMBOL_REF_TLS_MODEL (symbol) == 0))
11337 if (!base && !index)
11339 /* Displacement only requires special attention. */
11341 if (CONST_INT_P (disp))
11343 if (ASSEMBLER_DIALECT == ASM_INTEL && parts.seg == SEG_DEFAULT)
11344 fputs ("ds:", file);
11345 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (disp));
11348 output_pic_addr_const (file, disp, 0);
11350 output_addr_const (file, disp);
11354 if (ASSEMBLER_DIALECT == ASM_ATT)
11359 output_pic_addr_const (file, disp, 0);
11360 else if (GET_CODE (disp) == LABEL_REF)
11361 output_asm_label (disp);
11363 output_addr_const (file, disp);
11368 print_reg (base, 0, file);
11372 print_reg (index, 0, file);
11374 fprintf (file, ",%d", scale);
11380 rtx offset = NULL_RTX;
11384 /* Pull out the offset of a symbol; print any symbol itself. */
11385 if (GET_CODE (disp) == CONST
11386 && GET_CODE (XEXP (disp, 0)) == PLUS
11387 && CONST_INT_P (XEXP (XEXP (disp, 0), 1)))
11389 offset = XEXP (XEXP (disp, 0), 1);
11390 disp = gen_rtx_CONST (VOIDmode,
11391 XEXP (XEXP (disp, 0), 0));
11395 output_pic_addr_const (file, disp, 0);
11396 else if (GET_CODE (disp) == LABEL_REF)
11397 output_asm_label (disp);
11398 else if (CONST_INT_P (disp))
11401 output_addr_const (file, disp);
11407 print_reg (base, 0, file);
11410 if (INTVAL (offset) >= 0)
11412 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (offset));
11416 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (offset));
11423 print_reg (index, 0, file);
11425 fprintf (file, "*%d", scale);
11433 output_addr_const_extra (FILE *file, rtx x)
11437 if (GET_CODE (x) != UNSPEC)
11440 op = XVECEXP (x, 0, 0);
11441 switch (XINT (x, 1))
11443 case UNSPEC_GOTTPOFF:
11444 output_addr_const (file, op);
11445 /* FIXME: This might be @TPOFF in Sun ld. */
11446 fputs ("@GOTTPOFF", file);
11449 output_addr_const (file, op);
11450 fputs ("@TPOFF", file);
11452 case UNSPEC_NTPOFF:
11453 output_addr_const (file, op);
11455 fputs ("@TPOFF", file);
11457 fputs ("@NTPOFF", file);
11459 case UNSPEC_DTPOFF:
11460 output_addr_const (file, op);
11461 fputs ("@DTPOFF", file);
11463 case UNSPEC_GOTNTPOFF:
11464 output_addr_const (file, op);
11466 fputs (ASSEMBLER_DIALECT == ASM_ATT ?
11467 "@GOTTPOFF(%rip)" : "@GOTTPOFF[rip]", file);
11469 fputs ("@GOTNTPOFF", file);
11471 case UNSPEC_INDNTPOFF:
11472 output_addr_const (file, op);
11473 fputs ("@INDNTPOFF", file);
11476 case UNSPEC_MACHOPIC_OFFSET:
11477 output_addr_const (file, op);
11479 machopic_output_function_base_name (file);
11490 /* Split one or more DImode RTL references into pairs of SImode
11491 references. The RTL can be REG, offsettable MEM, integer constant, or
11492 CONST_DOUBLE. "operands" is a pointer to an array of DImode RTL to
11493 split and "num" is its length. lo_half and hi_half are output arrays
11494 that parallel "operands". */
11497 split_di (rtx operands[], int num, rtx lo_half[], rtx hi_half[])
11501 rtx op = operands[num];
11503 /* simplify_subreg refuse to split volatile memory addresses,
11504 but we still have to handle it. */
11507 lo_half[num] = adjust_address (op, SImode, 0);
11508 hi_half[num] = adjust_address (op, SImode, 4);
11512 lo_half[num] = simplify_gen_subreg (SImode, op,
11513 GET_MODE (op) == VOIDmode
11514 ? DImode : GET_MODE (op), 0);
11515 hi_half[num] = simplify_gen_subreg (SImode, op,
11516 GET_MODE (op) == VOIDmode
11517 ? DImode : GET_MODE (op), 4);
11521 /* Split one or more TImode RTL references into pairs of DImode
11522 references. The RTL can be REG, offsettable MEM, integer constant, or
11523 CONST_DOUBLE. "operands" is a pointer to an array of DImode RTL to
11524 split and "num" is its length. lo_half and hi_half are output arrays
11525 that parallel "operands". */
11528 split_ti (rtx operands[], int num, rtx lo_half[], rtx hi_half[])
11532 rtx op = operands[num];
11534 /* simplify_subreg refuse to split volatile memory addresses, but we
11535 still have to handle it. */
11538 lo_half[num] = adjust_address (op, DImode, 0);
11539 hi_half[num] = adjust_address (op, DImode, 8);
11543 lo_half[num] = simplify_gen_subreg (DImode, op, TImode, 0);
11544 hi_half[num] = simplify_gen_subreg (DImode, op, TImode, 8);
11549 /* Output code to perform a 387 binary operation in INSN, one of PLUS,
11550 MINUS, MULT or DIV. OPERANDS are the insn operands, where operands[3]
11551 is the expression of the binary operation. The output may either be
11552 emitted here, or returned to the caller, like all output_* functions.
11554 There is no guarantee that the operands are the same mode, as they
11555 might be within FLOAT or FLOAT_EXTEND expressions. */
11557 #ifndef SYSV386_COMPAT
11558 /* Set to 1 for compatibility with brain-damaged assemblers. No-one
11559 wants to fix the assemblers because that causes incompatibility
11560 with gcc. No-one wants to fix gcc because that causes
11561 incompatibility with assemblers... You can use the option of
11562 -DSYSV386_COMPAT=0 if you recompile both gcc and gas this way. */
11563 #define SYSV386_COMPAT 1
11567 output_387_binary_op (rtx insn, rtx *operands)
11569 static char buf[40];
11572 int is_sse = SSE_REG_P (operands[0]) || SSE_REG_P (operands[1]) || SSE_REG_P (operands[2]);
11574 #ifdef ENABLE_CHECKING
11575 /* Even if we do not want to check the inputs, this documents input
11576 constraints. Which helps in understanding the following code. */
11577 if (STACK_REG_P (operands[0])
11578 && ((REG_P (operands[1])
11579 && REGNO (operands[0]) == REGNO (operands[1])
11580 && (STACK_REG_P (operands[2]) || MEM_P (operands[2])))
11581 || (REG_P (operands[2])
11582 && REGNO (operands[0]) == REGNO (operands[2])
11583 && (STACK_REG_P (operands[1]) || MEM_P (operands[1]))))
11584 && (STACK_TOP_P (operands[1]) || STACK_TOP_P (operands[2])))
11587 gcc_assert (is_sse);
11590 switch (GET_CODE (operands[3]))
11593 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
11594 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
11602 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
11603 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
11611 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
11612 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
11620 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
11621 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
11629 gcc_unreachable ();
11636 strcpy (buf, ssep);
11637 if (GET_MODE (operands[0]) == SFmode)
11638 strcat (buf, "ss\t{%2, %1, %0|%0, %1, %2}");
11640 strcat (buf, "sd\t{%2, %1, %0|%0, %1, %2}");
11644 strcpy (buf, ssep + 1);
11645 if (GET_MODE (operands[0]) == SFmode)
11646 strcat (buf, "ss\t{%2, %0|%0, %2}");
11648 strcat (buf, "sd\t{%2, %0|%0, %2}");
11654 switch (GET_CODE (operands[3]))
11658 if (REG_P (operands[2]) && REGNO (operands[0]) == REGNO (operands[2]))
11660 rtx temp = operands[2];
11661 operands[2] = operands[1];
11662 operands[1] = temp;
11665 /* know operands[0] == operands[1]. */
11667 if (MEM_P (operands[2]))
11673 if (find_regno_note (insn, REG_DEAD, REGNO (operands[2])))
11675 if (STACK_TOP_P (operands[0]))
11676 /* How is it that we are storing to a dead operand[2]?
11677 Well, presumably operands[1] is dead too. We can't
11678 store the result to st(0) as st(0) gets popped on this
11679 instruction. Instead store to operands[2] (which I
11680 think has to be st(1)). st(1) will be popped later.
11681 gcc <= 2.8.1 didn't have this check and generated
11682 assembly code that the Unixware assembler rejected. */
11683 p = "p\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
11685 p = "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
11689 if (STACK_TOP_P (operands[0]))
11690 p = "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
11692 p = "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
11697 if (MEM_P (operands[1]))
11703 if (MEM_P (operands[2]))
11709 if (find_regno_note (insn, REG_DEAD, REGNO (operands[2])))
11712 /* The SystemV/386 SVR3.2 assembler, and probably all AT&T
11713 derived assemblers, confusingly reverse the direction of
11714 the operation for fsub{r} and fdiv{r} when the
11715 destination register is not st(0). The Intel assembler
11716 doesn't have this brain damage. Read !SYSV386_COMPAT to
11717 figure out what the hardware really does. */
11718 if (STACK_TOP_P (operands[0]))
11719 p = "{p\t%0, %2|rp\t%2, %0}";
11721 p = "{rp\t%2, %0|p\t%0, %2}";
11723 if (STACK_TOP_P (operands[0]))
11724 /* As above for fmul/fadd, we can't store to st(0). */
11725 p = "rp\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
11727 p = "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
11732 if (find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
11735 if (STACK_TOP_P (operands[0]))
11736 p = "{rp\t%0, %1|p\t%1, %0}";
11738 p = "{p\t%1, %0|rp\t%0, %1}";
11740 if (STACK_TOP_P (operands[0]))
11741 p = "p\t{%0, %1|%1, %0}"; /* st(1) = st(1) op st(0); pop */
11743 p = "rp\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2); pop */
11748 if (STACK_TOP_P (operands[0]))
11750 if (STACK_TOP_P (operands[1]))
11751 p = "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
11753 p = "r\t{%y1, %0|%0, %y1}"; /* st(0) = st(r1) op st(0) */
11756 else if (STACK_TOP_P (operands[1]))
11759 p = "{\t%1, %0|r\t%0, %1}";
11761 p = "r\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2) */
11767 p = "{r\t%2, %0|\t%0, %2}";
11769 p = "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
11775 gcc_unreachable ();
11782 /* Return needed mode for entity in optimize_mode_switching pass. */
11785 ix86_mode_needed (int entity, rtx insn)
11787 enum attr_i387_cw mode;
11789 /* The mode UNINITIALIZED is used to store control word after a
11790 function call or ASM pattern. The mode ANY specify that function
11791 has no requirements on the control word and make no changes in the
11792 bits we are interested in. */
11795 || (NONJUMP_INSN_P (insn)
11796 && (asm_noperands (PATTERN (insn)) >= 0
11797 || GET_CODE (PATTERN (insn)) == ASM_INPUT)))
11798 return I387_CW_UNINITIALIZED;
11800 if (recog_memoized (insn) < 0)
11801 return I387_CW_ANY;
11803 mode = get_attr_i387_cw (insn);
11808 if (mode == I387_CW_TRUNC)
11813 if (mode == I387_CW_FLOOR)
11818 if (mode == I387_CW_CEIL)
11823 if (mode == I387_CW_MASK_PM)
11828 gcc_unreachable ();
11831 return I387_CW_ANY;
11834 /* Output code to initialize control word copies used by trunc?f?i and
11835 rounding patterns. CURRENT_MODE is set to current control word,
11836 while NEW_MODE is set to new control word. */
11839 emit_i387_cw_initialization (int mode)
11841 rtx stored_mode = assign_386_stack_local (HImode, SLOT_CW_STORED);
11844 enum ix86_stack_slot slot;
11846 rtx reg = gen_reg_rtx (HImode);
11848 emit_insn (gen_x86_fnstcw_1 (stored_mode));
11849 emit_move_insn (reg, copy_rtx (stored_mode));
11851 if (TARGET_64BIT || TARGET_PARTIAL_REG_STALL
11852 || optimize_function_for_size_p (cfun))
11856 case I387_CW_TRUNC:
11857 /* round toward zero (truncate) */
11858 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0c00)));
11859 slot = SLOT_CW_TRUNC;
11862 case I387_CW_FLOOR:
11863 /* round down toward -oo */
11864 emit_insn (gen_andhi3 (reg, reg, GEN_INT (~0x0c00)));
11865 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0400)));
11866 slot = SLOT_CW_FLOOR;
11870 /* round up toward +oo */
11871 emit_insn (gen_andhi3 (reg, reg, GEN_INT (~0x0c00)));
11872 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0800)));
11873 slot = SLOT_CW_CEIL;
11876 case I387_CW_MASK_PM:
11877 /* mask precision exception for nearbyint() */
11878 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0020)));
11879 slot = SLOT_CW_MASK_PM;
11883 gcc_unreachable ();
11890 case I387_CW_TRUNC:
11891 /* round toward zero (truncate) */
11892 emit_insn (gen_movsi_insv_1 (reg, GEN_INT (0xc)));
11893 slot = SLOT_CW_TRUNC;
11896 case I387_CW_FLOOR:
11897 /* round down toward -oo */
11898 emit_insn (gen_movsi_insv_1 (reg, GEN_INT (0x4)));
11899 slot = SLOT_CW_FLOOR;
11903 /* round up toward +oo */
11904 emit_insn (gen_movsi_insv_1 (reg, GEN_INT (0x8)));
11905 slot = SLOT_CW_CEIL;
11908 case I387_CW_MASK_PM:
11909 /* mask precision exception for nearbyint() */
11910 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0020)));
11911 slot = SLOT_CW_MASK_PM;
11915 gcc_unreachable ();
11919 gcc_assert (slot < MAX_386_STACK_LOCALS);
11921 new_mode = assign_386_stack_local (HImode, slot);
11922 emit_move_insn (new_mode, reg);
11925 /* Output code for INSN to convert a float to a signed int. OPERANDS
11926 are the insn operands. The output may be [HSD]Imode and the input
11927 operand may be [SDX]Fmode. */
11930 output_fix_trunc (rtx insn, rtx *operands, int fisttp)
11932 int stack_top_dies = find_regno_note (insn, REG_DEAD, FIRST_STACK_REG) != 0;
11933 int dimode_p = GET_MODE (operands[0]) == DImode;
11934 int round_mode = get_attr_i387_cw (insn);
11936 /* Jump through a hoop or two for DImode, since the hardware has no
11937 non-popping instruction. We used to do this a different way, but
11938 that was somewhat fragile and broke with post-reload splitters. */
11939 if ((dimode_p || fisttp) && !stack_top_dies)
11940 output_asm_insn ("fld\t%y1", operands);
11942 gcc_assert (STACK_TOP_P (operands[1]));
11943 gcc_assert (MEM_P (operands[0]));
11944 gcc_assert (GET_MODE (operands[1]) != TFmode);
11947 output_asm_insn ("fisttp%z0\t%0", operands);
11950 if (round_mode != I387_CW_ANY)
11951 output_asm_insn ("fldcw\t%3", operands);
11952 if (stack_top_dies || dimode_p)
11953 output_asm_insn ("fistp%z0\t%0", operands);
11955 output_asm_insn ("fist%z0\t%0", operands);
11956 if (round_mode != I387_CW_ANY)
11957 output_asm_insn ("fldcw\t%2", operands);
11963 /* Output code for x87 ffreep insn. The OPNO argument, which may only
11964 have the values zero or one, indicates the ffreep insn's operand
11965 from the OPERANDS array. */
11967 static const char *
11968 output_387_ffreep (rtx *operands ATTRIBUTE_UNUSED, int opno)
11970 if (TARGET_USE_FFREEP)
11971 #if HAVE_AS_IX86_FFREEP
11972 return opno ? "ffreep\t%y1" : "ffreep\t%y0";
11975 static char retval[] = ".word\t0xc_df";
11976 int regno = REGNO (operands[opno]);
11978 gcc_assert (FP_REGNO_P (regno));
11980 retval[9] = '0' + (regno - FIRST_STACK_REG);
11985 return opno ? "fstp\t%y1" : "fstp\t%y0";
11989 /* Output code for INSN to compare OPERANDS. EFLAGS_P is 1 when fcomi
11990 should be used. UNORDERED_P is true when fucom should be used. */
11993 output_fp_compare (rtx insn, rtx *operands, int eflags_p, int unordered_p)
11995 int stack_top_dies;
11996 rtx cmp_op0, cmp_op1;
11997 int is_sse = SSE_REG_P (operands[0]) || SSE_REG_P (operands[1]);
12001 cmp_op0 = operands[0];
12002 cmp_op1 = operands[1];
12006 cmp_op0 = operands[1];
12007 cmp_op1 = operands[2];
12012 static const char ucomiss[] = "vucomiss\t{%1, %0|%0, %1}";
12013 static const char ucomisd[] = "vucomisd\t{%1, %0|%0, %1}";
12014 static const char comiss[] = "vcomiss\t{%1, %0|%0, %1}";
12015 static const char comisd[] = "vcomisd\t{%1, %0|%0, %1}";
12017 if (GET_MODE (operands[0]) == SFmode)
12019 return &ucomiss[TARGET_AVX ? 0 : 1];
12021 return &comiss[TARGET_AVX ? 0 : 1];
12024 return &ucomisd[TARGET_AVX ? 0 : 1];
12026 return &comisd[TARGET_AVX ? 0 : 1];
12029 gcc_assert (STACK_TOP_P (cmp_op0));
12031 stack_top_dies = find_regno_note (insn, REG_DEAD, FIRST_STACK_REG) != 0;
12033 if (cmp_op1 == CONST0_RTX (GET_MODE (cmp_op1)))
12035 if (stack_top_dies)
12037 output_asm_insn ("ftst\n\tfnstsw\t%0", operands);
12038 return output_387_ffreep (operands, 1);
12041 return "ftst\n\tfnstsw\t%0";
12044 if (STACK_REG_P (cmp_op1)
12046 && find_regno_note (insn, REG_DEAD, REGNO (cmp_op1))
12047 && REGNO (cmp_op1) != FIRST_STACK_REG)
12049 /* If both the top of the 387 stack dies, and the other operand
12050 is also a stack register that dies, then this must be a
12051 `fcompp' float compare */
12055 /* There is no double popping fcomi variant. Fortunately,
12056 eflags is immune from the fstp's cc clobbering. */
12058 output_asm_insn ("fucomip\t{%y1, %0|%0, %y1}", operands);
12060 output_asm_insn ("fcomip\t{%y1, %0|%0, %y1}", operands);
12061 return output_387_ffreep (operands, 0);
12066 return "fucompp\n\tfnstsw\t%0";
12068 return "fcompp\n\tfnstsw\t%0";
12073 /* Encoded here as eflags_p | intmode | unordered_p | stack_top_dies. */
12075 static const char * const alt[16] =
12077 "fcom%z2\t%y2\n\tfnstsw\t%0",
12078 "fcomp%z2\t%y2\n\tfnstsw\t%0",
12079 "fucom%z2\t%y2\n\tfnstsw\t%0",
12080 "fucomp%z2\t%y2\n\tfnstsw\t%0",
12082 "ficom%z2\t%y2\n\tfnstsw\t%0",
12083 "ficomp%z2\t%y2\n\tfnstsw\t%0",
12087 "fcomi\t{%y1, %0|%0, %y1}",
12088 "fcomip\t{%y1, %0|%0, %y1}",
12089 "fucomi\t{%y1, %0|%0, %y1}",
12090 "fucomip\t{%y1, %0|%0, %y1}",
12101 mask = eflags_p << 3;
12102 mask |= (GET_MODE_CLASS (GET_MODE (cmp_op1)) == MODE_INT) << 2;
12103 mask |= unordered_p << 1;
12104 mask |= stack_top_dies;
12106 gcc_assert (mask < 16);
12115 ix86_output_addr_vec_elt (FILE *file, int value)
12117 const char *directive = ASM_LONG;
12121 directive = ASM_QUAD;
12123 gcc_assert (!TARGET_64BIT);
12126 fprintf (file, "%s%s%d\n", directive, LPREFIX, value);
12130 ix86_output_addr_diff_elt (FILE *file, int value, int rel)
12132 const char *directive = ASM_LONG;
12135 if (TARGET_64BIT && CASE_VECTOR_MODE == DImode)
12136 directive = ASM_QUAD;
12138 gcc_assert (!TARGET_64BIT);
12140 /* We can't use @GOTOFF for text labels on VxWorks; see gotoff_operand. */
12141 if (TARGET_64BIT || TARGET_VXWORKS_RTP)
12142 fprintf (file, "%s%s%d-%s%d\n",
12143 directive, LPREFIX, value, LPREFIX, rel);
12144 else if (HAVE_AS_GOTOFF_IN_DATA)
12145 fprintf (file, "%s%s%d@GOTOFF\n", ASM_LONG, LPREFIX, value);
12147 else if (TARGET_MACHO)
12149 fprintf (file, "%s%s%d-", ASM_LONG, LPREFIX, value);
12150 machopic_output_function_base_name (file);
12151 fprintf(file, "\n");
12155 asm_fprintf (file, "%s%U%s+[.-%s%d]\n",
12156 ASM_LONG, GOT_SYMBOL_NAME, LPREFIX, value);
12159 /* Generate either "mov $0, reg" or "xor reg, reg", as appropriate
12163 ix86_expand_clear (rtx dest)
12167 /* We play register width games, which are only valid after reload. */
12168 gcc_assert (reload_completed);
12170 /* Avoid HImode and its attendant prefix byte. */
12171 if (GET_MODE_SIZE (GET_MODE (dest)) < 4)
12172 dest = gen_rtx_REG (SImode, REGNO (dest));
12173 tmp = gen_rtx_SET (VOIDmode, dest, const0_rtx);
12175 /* This predicate should match that for movsi_xor and movdi_xor_rex64. */
12176 if (reload_completed && (!TARGET_USE_MOV0 || optimize_insn_for_speed_p ()))
12178 rtx clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
12179 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, tmp, clob));
12185 /* X is an unchanging MEM. If it is a constant pool reference, return
12186 the constant pool rtx, else NULL. */
12189 maybe_get_pool_constant (rtx x)
12191 x = ix86_delegitimize_address (XEXP (x, 0));
12193 if (GET_CODE (x) == SYMBOL_REF && CONSTANT_POOL_ADDRESS_P (x))
12194 return get_pool_constant (x);
12200 ix86_expand_move (enum machine_mode mode, rtx operands[])
12203 enum tls_model model;
12208 if (GET_CODE (op1) == SYMBOL_REF)
12210 model = SYMBOL_REF_TLS_MODEL (op1);
12213 op1 = legitimize_tls_address (op1, model, true);
12214 op1 = force_operand (op1, op0);
12218 else if (TARGET_DLLIMPORT_DECL_ATTRIBUTES
12219 && SYMBOL_REF_DLLIMPORT_P (op1))
12220 op1 = legitimize_dllimport_symbol (op1, false);
12222 else if (GET_CODE (op1) == CONST
12223 && GET_CODE (XEXP (op1, 0)) == PLUS
12224 && GET_CODE (XEXP (XEXP (op1, 0), 0)) == SYMBOL_REF)
12226 rtx addend = XEXP (XEXP (op1, 0), 1);
12227 rtx symbol = XEXP (XEXP (op1, 0), 0);
12230 model = SYMBOL_REF_TLS_MODEL (symbol);
12232 tmp = legitimize_tls_address (symbol, model, true);
12233 else if (TARGET_DLLIMPORT_DECL_ATTRIBUTES
12234 && SYMBOL_REF_DLLIMPORT_P (symbol))
12235 tmp = legitimize_dllimport_symbol (symbol, true);
12239 tmp = force_operand (tmp, NULL);
12240 tmp = expand_simple_binop (Pmode, PLUS, tmp, addend,
12241 op0, 1, OPTAB_DIRECT);
12247 if (flag_pic && mode == Pmode && symbolic_operand (op1, Pmode))
12249 if (TARGET_MACHO && !TARGET_64BIT)
12254 rtx temp = ((reload_in_progress
12255 || ((op0 && REG_P (op0))
12257 ? op0 : gen_reg_rtx (Pmode));
12258 op1 = machopic_indirect_data_reference (op1, temp);
12259 op1 = machopic_legitimize_pic_address (op1, mode,
12260 temp == op1 ? 0 : temp);
12262 else if (MACHOPIC_INDIRECT)
12263 op1 = machopic_indirect_data_reference (op1, 0);
12271 op1 = force_reg (Pmode, op1);
12272 else if (!TARGET_64BIT || !x86_64_movabs_operand (op1, Pmode))
12274 rtx reg = !can_create_pseudo_p () ? op0 : NULL_RTX;
12275 op1 = legitimize_pic_address (op1, reg);
12284 && (PUSH_ROUNDING (GET_MODE_SIZE (mode)) != GET_MODE_SIZE (mode)
12285 || !push_operand (op0, mode))
12287 op1 = force_reg (mode, op1);
12289 if (push_operand (op0, mode)
12290 && ! general_no_elim_operand (op1, mode))
12291 op1 = copy_to_mode_reg (mode, op1);
12293 /* Force large constants in 64bit compilation into register
12294 to get them CSEed. */
12295 if (can_create_pseudo_p ()
12296 && (mode == DImode) && TARGET_64BIT
12297 && immediate_operand (op1, mode)
12298 && !x86_64_zext_immediate_operand (op1, VOIDmode)
12299 && !register_operand (op0, mode)
12301 op1 = copy_to_mode_reg (mode, op1);
12303 if (can_create_pseudo_p ()
12304 && FLOAT_MODE_P (mode)
12305 && GET_CODE (op1) == CONST_DOUBLE)
12307 /* If we are loading a floating point constant to a register,
12308 force the value to memory now, since we'll get better code
12309 out the back end. */
12311 op1 = validize_mem (force_const_mem (mode, op1));
12312 if (!register_operand (op0, mode))
12314 rtx temp = gen_reg_rtx (mode);
12315 emit_insn (gen_rtx_SET (VOIDmode, temp, op1));
12316 emit_move_insn (op0, temp);
12322 emit_insn (gen_rtx_SET (VOIDmode, op0, op1));
12326 ix86_expand_vector_move (enum machine_mode mode, rtx operands[])
12328 rtx op0 = operands[0], op1 = operands[1];
12329 unsigned int align = GET_MODE_ALIGNMENT (mode);
12331 /* Force constants other than zero into memory. We do not know how
12332 the instructions used to build constants modify the upper 64 bits
12333 of the register, once we have that information we may be able
12334 to handle some of them more efficiently. */
12335 if (can_create_pseudo_p ()
12336 && register_operand (op0, mode)
12337 && (CONSTANT_P (op1)
12338 || (GET_CODE (op1) == SUBREG
12339 && CONSTANT_P (SUBREG_REG (op1))))
12340 && standard_sse_constant_p (op1) <= 0)
12341 op1 = validize_mem (force_const_mem (mode, op1));
12343 /* We need to check memory alignment for SSE mode since attribute
12344 can make operands unaligned. */
12345 if (can_create_pseudo_p ()
12346 && SSE_REG_MODE_P (mode)
12347 && ((MEM_P (op0) && (MEM_ALIGN (op0) < align))
12348 || (MEM_P (op1) && (MEM_ALIGN (op1) < align))))
12352 /* ix86_expand_vector_move_misalign() does not like constants ... */
12353 if (CONSTANT_P (op1)
12354 || (GET_CODE (op1) == SUBREG
12355 && CONSTANT_P (SUBREG_REG (op1))))
12356 op1 = validize_mem (force_const_mem (mode, op1));
12358 /* ... nor both arguments in memory. */
12359 if (!register_operand (op0, mode)
12360 && !register_operand (op1, mode))
12361 op1 = force_reg (mode, op1);
12363 tmp[0] = op0; tmp[1] = op1;
12364 ix86_expand_vector_move_misalign (mode, tmp);
12368 /* Make operand1 a register if it isn't already. */
12369 if (can_create_pseudo_p ()
12370 && !register_operand (op0, mode)
12371 && !register_operand (op1, mode))
12373 emit_move_insn (op0, force_reg (GET_MODE (op0), op1));
12377 emit_insn (gen_rtx_SET (VOIDmode, op0, op1));
12380 /* Implement the movmisalign patterns for SSE. Non-SSE modes go
12381 straight to ix86_expand_vector_move. */
12382 /* Code generation for scalar reg-reg moves of single and double precision data:
12383 if (x86_sse_partial_reg_dependency == true | x86_sse_split_regs == true)
12387 if (x86_sse_partial_reg_dependency == true)
12392 Code generation for scalar loads of double precision data:
12393 if (x86_sse_split_regs == true)
12394 movlpd mem, reg (gas syntax)
12398 Code generation for unaligned packed loads of single precision data
12399 (x86_sse_unaligned_move_optimal overrides x86_sse_partial_reg_dependency):
12400 if (x86_sse_unaligned_move_optimal)
12403 if (x86_sse_partial_reg_dependency == true)
12415 Code generation for unaligned packed loads of double precision data
12416 (x86_sse_unaligned_move_optimal overrides x86_sse_split_regs):
12417 if (x86_sse_unaligned_move_optimal)
12420 if (x86_sse_split_regs == true)
12433 ix86_expand_vector_move_misalign (enum machine_mode mode, rtx operands[])
12442 switch (GET_MODE_CLASS (mode))
12444 case MODE_VECTOR_INT:
12446 switch (GET_MODE_SIZE (mode))
12449 op0 = gen_lowpart (V16QImode, op0);
12450 op1 = gen_lowpart (V16QImode, op1);
12451 emit_insn (gen_avx_movdqu (op0, op1));
12454 op0 = gen_lowpart (V32QImode, op0);
12455 op1 = gen_lowpart (V32QImode, op1);
12456 emit_insn (gen_avx_movdqu256 (op0, op1));
12459 gcc_unreachable ();
12462 case MODE_VECTOR_FLOAT:
12463 op0 = gen_lowpart (mode, op0);
12464 op1 = gen_lowpart (mode, op1);
12469 emit_insn (gen_avx_movups (op0, op1));
12472 emit_insn (gen_avx_movups256 (op0, op1));
12475 emit_insn (gen_avx_movupd (op0, op1));
12478 emit_insn (gen_avx_movupd256 (op0, op1));
12481 gcc_unreachable ();
12486 gcc_unreachable ();
12494 /* If we're optimizing for size, movups is the smallest. */
12495 if (optimize_insn_for_size_p ())
12497 op0 = gen_lowpart (V4SFmode, op0);
12498 op1 = gen_lowpart (V4SFmode, op1);
12499 emit_insn (gen_sse_movups (op0, op1));
12503 /* ??? If we have typed data, then it would appear that using
12504 movdqu is the only way to get unaligned data loaded with
12506 if (TARGET_SSE2 && GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
12508 op0 = gen_lowpart (V16QImode, op0);
12509 op1 = gen_lowpart (V16QImode, op1);
12510 emit_insn (gen_sse2_movdqu (op0, op1));
12514 if (TARGET_SSE2 && mode == V2DFmode)
12518 if (TARGET_SSE_UNALIGNED_MOVE_OPTIMAL)
12520 op0 = gen_lowpart (V2DFmode, op0);
12521 op1 = gen_lowpart (V2DFmode, op1);
12522 emit_insn (gen_sse2_movupd (op0, op1));
12526 /* When SSE registers are split into halves, we can avoid
12527 writing to the top half twice. */
12528 if (TARGET_SSE_SPLIT_REGS)
12530 emit_clobber (op0);
12535 /* ??? Not sure about the best option for the Intel chips.
12536 The following would seem to satisfy; the register is
12537 entirely cleared, breaking the dependency chain. We
12538 then store to the upper half, with a dependency depth
12539 of one. A rumor has it that Intel recommends two movsd
12540 followed by an unpacklpd, but this is unconfirmed. And
12541 given that the dependency depth of the unpacklpd would
12542 still be one, I'm not sure why this would be better. */
12543 zero = CONST0_RTX (V2DFmode);
12546 m = adjust_address (op1, DFmode, 0);
12547 emit_insn (gen_sse2_loadlpd (op0, zero, m));
12548 m = adjust_address (op1, DFmode, 8);
12549 emit_insn (gen_sse2_loadhpd (op0, op0, m));
12553 if (TARGET_SSE_UNALIGNED_MOVE_OPTIMAL)
12555 op0 = gen_lowpart (V4SFmode, op0);
12556 op1 = gen_lowpart (V4SFmode, op1);
12557 emit_insn (gen_sse_movups (op0, op1));
12561 if (TARGET_SSE_PARTIAL_REG_DEPENDENCY)
12562 emit_move_insn (op0, CONST0_RTX (mode));
12564 emit_clobber (op0);
12566 if (mode != V4SFmode)
12567 op0 = gen_lowpart (V4SFmode, op0);
12568 m = adjust_address (op1, V2SFmode, 0);
12569 emit_insn (gen_sse_loadlps (op0, op0, m));
12570 m = adjust_address (op1, V2SFmode, 8);
12571 emit_insn (gen_sse_loadhps (op0, op0, m));
12574 else if (MEM_P (op0))
12576 /* If we're optimizing for size, movups is the smallest. */
12577 if (optimize_insn_for_size_p ())
12579 op0 = gen_lowpart (V4SFmode, op0);
12580 op1 = gen_lowpart (V4SFmode, op1);
12581 emit_insn (gen_sse_movups (op0, op1));
12585 /* ??? Similar to above, only less clear because of quote
12586 typeless stores unquote. */
12587 if (TARGET_SSE2 && !TARGET_SSE_TYPELESS_STORES
12588 && GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
12590 op0 = gen_lowpart (V16QImode, op0);
12591 op1 = gen_lowpart (V16QImode, op1);
12592 emit_insn (gen_sse2_movdqu (op0, op1));
12596 if (TARGET_SSE2 && mode == V2DFmode)
12598 m = adjust_address (op0, DFmode, 0);
12599 emit_insn (gen_sse2_storelpd (m, op1));
12600 m = adjust_address (op0, DFmode, 8);
12601 emit_insn (gen_sse2_storehpd (m, op1));
12605 if (mode != V4SFmode)
12606 op1 = gen_lowpart (V4SFmode, op1);
12607 m = adjust_address (op0, V2SFmode, 0);
12608 emit_insn (gen_sse_storelps (m, op1));
12609 m = adjust_address (op0, V2SFmode, 8);
12610 emit_insn (gen_sse_storehps (m, op1));
12614 gcc_unreachable ();
12617 /* Expand a push in MODE. This is some mode for which we do not support
12618 proper push instructions, at least from the registers that we expect
12619 the value to live in. */
12622 ix86_expand_push (enum machine_mode mode, rtx x)
12626 tmp = expand_simple_binop (Pmode, PLUS, stack_pointer_rtx,
12627 GEN_INT (-GET_MODE_SIZE (mode)),
12628 stack_pointer_rtx, 1, OPTAB_DIRECT);
12629 if (tmp != stack_pointer_rtx)
12630 emit_move_insn (stack_pointer_rtx, tmp);
12632 tmp = gen_rtx_MEM (mode, stack_pointer_rtx);
12634 /* When we push an operand onto stack, it has to be aligned at least
12635 at the function argument boundary. */
12636 set_mem_align (tmp,
12637 ix86_function_arg_boundary (mode, NULL_TREE));
12639 emit_move_insn (tmp, x);
12642 /* Helper function of ix86_fixup_binary_operands to canonicalize
12643 operand order. Returns true if the operands should be swapped. */
12646 ix86_swap_binary_operands_p (enum rtx_code code, enum machine_mode mode,
12649 rtx dst = operands[0];
12650 rtx src1 = operands[1];
12651 rtx src2 = operands[2];
12653 /* If the operation is not commutative, we can't do anything. */
12654 if (GET_RTX_CLASS (code) != RTX_COMM_ARITH)
12657 /* Highest priority is that src1 should match dst. */
12658 if (rtx_equal_p (dst, src1))
12660 if (rtx_equal_p (dst, src2))
12663 /* Next highest priority is that immediate constants come second. */
12664 if (immediate_operand (src2, mode))
12666 if (immediate_operand (src1, mode))
12669 /* Lowest priority is that memory references should come second. */
12679 /* Fix up OPERANDS to satisfy ix86_binary_operator_ok. Return the
12680 destination to use for the operation. If different from the true
12681 destination in operands[0], a copy operation will be required. */
12684 ix86_fixup_binary_operands (enum rtx_code code, enum machine_mode mode,
12687 rtx dst = operands[0];
12688 rtx src1 = operands[1];
12689 rtx src2 = operands[2];
12691 /* Canonicalize operand order. */
12692 if (ix86_swap_binary_operands_p (code, mode, operands))
12696 /* It is invalid to swap operands of different modes. */
12697 gcc_assert (GET_MODE (src1) == GET_MODE (src2));
12704 /* Both source operands cannot be in memory. */
12705 if (MEM_P (src1) && MEM_P (src2))
12707 /* Optimization: Only read from memory once. */
12708 if (rtx_equal_p (src1, src2))
12710 src2 = force_reg (mode, src2);
12714 src2 = force_reg (mode, src2);
12717 /* If the destination is memory, and we do not have matching source
12718 operands, do things in registers. */
12719 if (MEM_P (dst) && !rtx_equal_p (dst, src1))
12720 dst = gen_reg_rtx (mode);
12722 /* Source 1 cannot be a constant. */
12723 if (CONSTANT_P (src1))
12724 src1 = force_reg (mode, src1);
12726 /* Source 1 cannot be a non-matching memory. */
12727 if (MEM_P (src1) && !rtx_equal_p (dst, src1))
12728 src1 = force_reg (mode, src1);
12730 operands[1] = src1;
12731 operands[2] = src2;
12735 /* Similarly, but assume that the destination has already been
12736 set up properly. */
12739 ix86_fixup_binary_operands_no_copy (enum rtx_code code,
12740 enum machine_mode mode, rtx operands[])
12742 rtx dst = ix86_fixup_binary_operands (code, mode, operands);
12743 gcc_assert (dst == operands[0]);
12746 /* Attempt to expand a binary operator. Make the expansion closer to the
12747 actual machine, then just general_operand, which will allow 3 separate
12748 memory references (one output, two input) in a single insn. */
12751 ix86_expand_binary_operator (enum rtx_code code, enum machine_mode mode,
12754 rtx src1, src2, dst, op, clob;
12756 dst = ix86_fixup_binary_operands (code, mode, operands);
12757 src1 = operands[1];
12758 src2 = operands[2];
12760 /* Emit the instruction. */
12762 op = gen_rtx_SET (VOIDmode, dst, gen_rtx_fmt_ee (code, mode, src1, src2));
12763 if (reload_in_progress)
12765 /* Reload doesn't know about the flags register, and doesn't know that
12766 it doesn't want to clobber it. We can only do this with PLUS. */
12767 gcc_assert (code == PLUS);
12772 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
12773 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
12776 /* Fix up the destination if needed. */
12777 if (dst != operands[0])
12778 emit_move_insn (operands[0], dst);
12781 /* Return TRUE or FALSE depending on whether the binary operator meets the
12782 appropriate constraints. */
12785 ix86_binary_operator_ok (enum rtx_code code, enum machine_mode mode,
12788 rtx dst = operands[0];
12789 rtx src1 = operands[1];
12790 rtx src2 = operands[2];
12792 /* Both source operands cannot be in memory. */
12793 if (MEM_P (src1) && MEM_P (src2))
12796 /* Canonicalize operand order for commutative operators. */
12797 if (ix86_swap_binary_operands_p (code, mode, operands))
12804 /* If the destination is memory, we must have a matching source operand. */
12805 if (MEM_P (dst) && !rtx_equal_p (dst, src1))
12808 /* Source 1 cannot be a constant. */
12809 if (CONSTANT_P (src1))
12812 /* Source 1 cannot be a non-matching memory. */
12813 if (MEM_P (src1) && !rtx_equal_p (dst, src1))
12819 /* Attempt to expand a unary operator. Make the expansion closer to the
12820 actual machine, then just general_operand, which will allow 2 separate
12821 memory references (one output, one input) in a single insn. */
12824 ix86_expand_unary_operator (enum rtx_code code, enum machine_mode mode,
12827 int matching_memory;
12828 rtx src, dst, op, clob;
12833 /* If the destination is memory, and we do not have matching source
12834 operands, do things in registers. */
12835 matching_memory = 0;
12838 if (rtx_equal_p (dst, src))
12839 matching_memory = 1;
12841 dst = gen_reg_rtx (mode);
12844 /* When source operand is memory, destination must match. */
12845 if (MEM_P (src) && !matching_memory)
12846 src = force_reg (mode, src);
12848 /* Emit the instruction. */
12850 op = gen_rtx_SET (VOIDmode, dst, gen_rtx_fmt_e (code, mode, src));
12851 if (reload_in_progress || code == NOT)
12853 /* Reload doesn't know about the flags register, and doesn't know that
12854 it doesn't want to clobber it. */
12855 gcc_assert (code == NOT);
12860 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
12861 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
12864 /* Fix up the destination if needed. */
12865 if (dst != operands[0])
12866 emit_move_insn (operands[0], dst);
12869 /* Return TRUE or FALSE depending on whether the unary operator meets the
12870 appropriate constraints. */
12873 ix86_unary_operator_ok (enum rtx_code code ATTRIBUTE_UNUSED,
12874 enum machine_mode mode ATTRIBUTE_UNUSED,
12875 rtx operands[2] ATTRIBUTE_UNUSED)
12877 /* If one of operands is memory, source and destination must match. */
12878 if ((MEM_P (operands[0])
12879 || MEM_P (operands[1]))
12880 && ! rtx_equal_p (operands[0], operands[1]))
12885 /* Post-reload splitter for converting an SF or DFmode value in an
12886 SSE register into an unsigned SImode. */
12889 ix86_split_convert_uns_si_sse (rtx operands[])
12891 enum machine_mode vecmode;
12892 rtx value, large, zero_or_two31, input, two31, x;
12894 large = operands[1];
12895 zero_or_two31 = operands[2];
12896 input = operands[3];
12897 two31 = operands[4];
12898 vecmode = GET_MODE (large);
12899 value = gen_rtx_REG (vecmode, REGNO (operands[0]));
12901 /* Load up the value into the low element. We must ensure that the other
12902 elements are valid floats -- zero is the easiest such value. */
12905 if (vecmode == V4SFmode)
12906 emit_insn (gen_vec_setv4sf_0 (value, CONST0_RTX (V4SFmode), input));
12908 emit_insn (gen_sse2_loadlpd (value, CONST0_RTX (V2DFmode), input));
12912 input = gen_rtx_REG (vecmode, REGNO (input));
12913 emit_move_insn (value, CONST0_RTX (vecmode));
12914 if (vecmode == V4SFmode)
12915 emit_insn (gen_sse_movss (value, value, input));
12917 emit_insn (gen_sse2_movsd (value, value, input));
12920 emit_move_insn (large, two31);
12921 emit_move_insn (zero_or_two31, MEM_P (two31) ? large : two31);
12923 x = gen_rtx_fmt_ee (LE, vecmode, large, value);
12924 emit_insn (gen_rtx_SET (VOIDmode, large, x));
12926 x = gen_rtx_AND (vecmode, zero_or_two31, large);
12927 emit_insn (gen_rtx_SET (VOIDmode, zero_or_two31, x));
12929 x = gen_rtx_MINUS (vecmode, value, zero_or_two31);
12930 emit_insn (gen_rtx_SET (VOIDmode, value, x));
12932 large = gen_rtx_REG (V4SImode, REGNO (large));
12933 emit_insn (gen_ashlv4si3 (large, large, GEN_INT (31)));
12935 x = gen_rtx_REG (V4SImode, REGNO (value));
12936 if (vecmode == V4SFmode)
12937 emit_insn (gen_sse2_cvttps2dq (x, value));
12939 emit_insn (gen_sse2_cvttpd2dq (x, value));
12942 emit_insn (gen_xorv4si3 (value, value, large));
12945 /* Convert an unsigned DImode value into a DFmode, using only SSE.
12946 Expects the 64-bit DImode to be supplied in a pair of integral
12947 registers. Requires SSE2; will use SSE3 if available. For x86_32,
12948 -mfpmath=sse, !optimize_size only. */
12951 ix86_expand_convert_uns_didf_sse (rtx target, rtx input)
12953 REAL_VALUE_TYPE bias_lo_rvt, bias_hi_rvt;
12954 rtx int_xmm, fp_xmm;
12955 rtx biases, exponents;
12958 int_xmm = gen_reg_rtx (V4SImode);
12959 if (TARGET_INTER_UNIT_MOVES)
12960 emit_insn (gen_movdi_to_sse (int_xmm, input));
12961 else if (TARGET_SSE_SPLIT_REGS)
12963 emit_clobber (int_xmm);
12964 emit_move_insn (gen_lowpart (DImode, int_xmm), input);
12968 x = gen_reg_rtx (V2DImode);
12969 ix86_expand_vector_init_one_nonzero (false, V2DImode, x, input, 0);
12970 emit_move_insn (int_xmm, gen_lowpart (V4SImode, x));
12973 x = gen_rtx_CONST_VECTOR (V4SImode,
12974 gen_rtvec (4, GEN_INT (0x43300000UL),
12975 GEN_INT (0x45300000UL),
12976 const0_rtx, const0_rtx));
12977 exponents = validize_mem (force_const_mem (V4SImode, x));
12979 /* int_xmm = {0x45300000UL, fp_xmm/hi, 0x43300000, fp_xmm/lo } */
12980 emit_insn (gen_sse2_punpckldq (int_xmm, int_xmm, exponents));
12982 /* Concatenating (juxtaposing) (0x43300000UL ## fp_value_low_xmm)
12983 yields a valid DF value equal to (0x1.0p52 + double(fp_value_lo_xmm)).
12984 Similarly (0x45300000UL ## fp_value_hi_xmm) yields
12985 (0x1.0p84 + double(fp_value_hi_xmm)).
12986 Note these exponents differ by 32. */
12988 fp_xmm = copy_to_mode_reg (V2DFmode, gen_lowpart (V2DFmode, int_xmm));
12990 /* Subtract off those 0x1.0p52 and 0x1.0p84 biases, to produce values
12991 in [0,2**32-1] and [0]+[2**32,2**64-1] respectively. */
12992 real_ldexp (&bias_lo_rvt, &dconst1, 52);
12993 real_ldexp (&bias_hi_rvt, &dconst1, 84);
12994 biases = const_double_from_real_value (bias_lo_rvt, DFmode);
12995 x = const_double_from_real_value (bias_hi_rvt, DFmode);
12996 biases = gen_rtx_CONST_VECTOR (V2DFmode, gen_rtvec (2, biases, x));
12997 biases = validize_mem (force_const_mem (V2DFmode, biases));
12998 emit_insn (gen_subv2df3 (fp_xmm, fp_xmm, biases));
13000 /* Add the upper and lower DFmode values together. */
13002 emit_insn (gen_sse3_haddv2df3 (fp_xmm, fp_xmm, fp_xmm));
13005 x = copy_to_mode_reg (V2DFmode, fp_xmm);
13006 emit_insn (gen_sse2_unpckhpd (fp_xmm, fp_xmm, fp_xmm));
13007 emit_insn (gen_addv2df3 (fp_xmm, fp_xmm, x));
13010 ix86_expand_vector_extract (false, target, fp_xmm, 0);
13013 /* Not used, but eases macroization of patterns. */
13015 ix86_expand_convert_uns_sixf_sse (rtx target ATTRIBUTE_UNUSED,
13016 rtx input ATTRIBUTE_UNUSED)
13018 gcc_unreachable ();
13021 /* Convert an unsigned SImode value into a DFmode. Only currently used
13022 for SSE, but applicable anywhere. */
13025 ix86_expand_convert_uns_sidf_sse (rtx target, rtx input)
13027 REAL_VALUE_TYPE TWO31r;
13030 x = expand_simple_binop (SImode, PLUS, input, GEN_INT (-2147483647 - 1),
13031 NULL, 1, OPTAB_DIRECT);
13033 fp = gen_reg_rtx (DFmode);
13034 emit_insn (gen_floatsidf2 (fp, x));
13036 real_ldexp (&TWO31r, &dconst1, 31);
13037 x = const_double_from_real_value (TWO31r, DFmode);
13039 x = expand_simple_binop (DFmode, PLUS, fp, x, target, 0, OPTAB_DIRECT);
13041 emit_move_insn (target, x);
13044 /* Convert a signed DImode value into a DFmode. Only used for SSE in
13045 32-bit mode; otherwise we have a direct convert instruction. */
13048 ix86_expand_convert_sign_didf_sse (rtx target, rtx input)
13050 REAL_VALUE_TYPE TWO32r;
13051 rtx fp_lo, fp_hi, x;
13053 fp_lo = gen_reg_rtx (DFmode);
13054 fp_hi = gen_reg_rtx (DFmode);
13056 emit_insn (gen_floatsidf2 (fp_hi, gen_highpart (SImode, input)));
13058 real_ldexp (&TWO32r, &dconst1, 32);
13059 x = const_double_from_real_value (TWO32r, DFmode);
13060 fp_hi = expand_simple_binop (DFmode, MULT, fp_hi, x, fp_hi, 0, OPTAB_DIRECT);
13062 ix86_expand_convert_uns_sidf_sse (fp_lo, gen_lowpart (SImode, input));
13064 x = expand_simple_binop (DFmode, PLUS, fp_hi, fp_lo, target,
13067 emit_move_insn (target, x);
13070 /* Convert an unsigned SImode value into a SFmode, using only SSE.
13071 For x86_32, -mfpmath=sse, !optimize_size only. */
13073 ix86_expand_convert_uns_sisf_sse (rtx target, rtx input)
13075 REAL_VALUE_TYPE ONE16r;
13076 rtx fp_hi, fp_lo, int_hi, int_lo, x;
13078 real_ldexp (&ONE16r, &dconst1, 16);
13079 x = const_double_from_real_value (ONE16r, SFmode);
13080 int_lo = expand_simple_binop (SImode, AND, input, GEN_INT(0xffff),
13081 NULL, 0, OPTAB_DIRECT);
13082 int_hi = expand_simple_binop (SImode, LSHIFTRT, input, GEN_INT(16),
13083 NULL, 0, OPTAB_DIRECT);
13084 fp_hi = gen_reg_rtx (SFmode);
13085 fp_lo = gen_reg_rtx (SFmode);
13086 emit_insn (gen_floatsisf2 (fp_hi, int_hi));
13087 emit_insn (gen_floatsisf2 (fp_lo, int_lo));
13088 fp_hi = expand_simple_binop (SFmode, MULT, fp_hi, x, fp_hi,
13090 fp_hi = expand_simple_binop (SFmode, PLUS, fp_hi, fp_lo, target,
13092 if (!rtx_equal_p (target, fp_hi))
13093 emit_move_insn (target, fp_hi);
13096 /* A subroutine of ix86_build_signbit_mask_vector. If VECT is true,
13097 then replicate the value for all elements of the vector
13101 ix86_build_const_vector (enum machine_mode mode, bool vect, rtx value)
13108 v = gen_rtvec (4, value, value, value, value);
13109 return gen_rtx_CONST_VECTOR (V4SImode, v);
13113 v = gen_rtvec (2, value, value);
13114 return gen_rtx_CONST_VECTOR (V2DImode, v);
13118 v = gen_rtvec (4, value, value, value, value);
13120 v = gen_rtvec (4, value, CONST0_RTX (SFmode),
13121 CONST0_RTX (SFmode), CONST0_RTX (SFmode));
13122 return gen_rtx_CONST_VECTOR (V4SFmode, v);
13126 v = gen_rtvec (2, value, value);
13128 v = gen_rtvec (2, value, CONST0_RTX (DFmode));
13129 return gen_rtx_CONST_VECTOR (V2DFmode, v);
13132 gcc_unreachable ();
13136 /* A subroutine of ix86_expand_fp_absneg_operator, copysign expanders
13137 and ix86_expand_int_vcond. Create a mask for the sign bit in MODE
13138 for an SSE register. If VECT is true, then replicate the mask for
13139 all elements of the vector register. If INVERT is true, then create
13140 a mask excluding the sign bit. */
13143 ix86_build_signbit_mask (enum machine_mode mode, bool vect, bool invert)
13145 enum machine_mode vec_mode, imode;
13146 HOST_WIDE_INT hi, lo;
13151 /* Find the sign bit, sign extended to 2*HWI. */
13157 vec_mode = (mode == SImode) ? V4SImode : V4SFmode;
13158 lo = 0x80000000, hi = lo < 0;
13164 vec_mode = (mode == DImode) ? V2DImode : V2DFmode;
13165 if (HOST_BITS_PER_WIDE_INT >= 64)
13166 lo = (HOST_WIDE_INT)1 << shift, hi = -1;
13168 lo = 0, hi = (HOST_WIDE_INT)1 << (shift - HOST_BITS_PER_WIDE_INT);
13173 vec_mode = VOIDmode;
13174 if (HOST_BITS_PER_WIDE_INT >= 64)
13177 lo = 0, hi = (HOST_WIDE_INT)1 << shift;
13184 lo = 0, hi = (HOST_WIDE_INT)1 << (shift - HOST_BITS_PER_WIDE_INT);
13188 lo = ~lo, hi = ~hi;
13194 mask = immed_double_const (lo, hi, imode);
13196 vec = gen_rtvec (2, v, mask);
13197 v = gen_rtx_CONST_VECTOR (V2DImode, vec);
13198 v = copy_to_mode_reg (mode, gen_lowpart (mode, v));
13205 gcc_unreachable ();
13209 lo = ~lo, hi = ~hi;
13211 /* Force this value into the low part of a fp vector constant. */
13212 mask = immed_double_const (lo, hi, imode);
13213 mask = gen_lowpart (mode, mask);
13215 if (vec_mode == VOIDmode)
13216 return force_reg (mode, mask);
13218 v = ix86_build_const_vector (mode, vect, mask);
13219 return force_reg (vec_mode, v);
13222 /* Generate code for floating point ABS or NEG. */
13225 ix86_expand_fp_absneg_operator (enum rtx_code code, enum machine_mode mode,
13228 rtx mask, set, use, clob, dst, src;
13229 bool use_sse = false;
13230 bool vector_mode = VECTOR_MODE_P (mode);
13231 enum machine_mode elt_mode = mode;
13235 elt_mode = GET_MODE_INNER (mode);
13238 else if (mode == TFmode)
13240 else if (TARGET_SSE_MATH)
13241 use_sse = SSE_FLOAT_MODE_P (mode);
13243 /* NEG and ABS performed with SSE use bitwise mask operations.
13244 Create the appropriate mask now. */
13246 mask = ix86_build_signbit_mask (elt_mode, vector_mode, code == ABS);
13255 set = gen_rtx_fmt_ee (code == NEG ? XOR : AND, mode, src, mask);
13256 set = gen_rtx_SET (VOIDmode, dst, set);
13261 set = gen_rtx_fmt_e (code, mode, src);
13262 set = gen_rtx_SET (VOIDmode, dst, set);
13265 use = gen_rtx_USE (VOIDmode, mask);
13266 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
13267 emit_insn (gen_rtx_PARALLEL (VOIDmode,
13268 gen_rtvec (3, set, use, clob)));
13275 /* Expand a copysign operation. Special case operand 0 being a constant. */
13278 ix86_expand_copysign (rtx operands[])
13280 enum machine_mode mode;
13281 rtx dest, op0, op1, mask, nmask;
13283 dest = operands[0];
13287 mode = GET_MODE (dest);
13289 if (GET_CODE (op0) == CONST_DOUBLE)
13291 rtx (*copysign_insn)(rtx, rtx, rtx, rtx);
13293 if (real_isneg (CONST_DOUBLE_REAL_VALUE (op0)))
13294 op0 = simplify_unary_operation (ABS, mode, op0, mode);
13296 if (mode == SFmode || mode == DFmode)
13298 enum machine_mode vmode;
13300 vmode = mode == SFmode ? V4SFmode : V2DFmode;
13302 if (op0 == CONST0_RTX (mode))
13303 op0 = CONST0_RTX (vmode);
13308 if (mode == SFmode)
13309 v = gen_rtvec (4, op0, CONST0_RTX (SFmode),
13310 CONST0_RTX (SFmode), CONST0_RTX (SFmode));
13312 v = gen_rtvec (2, op0, CONST0_RTX (DFmode));
13314 op0 = force_reg (vmode, gen_rtx_CONST_VECTOR (vmode, v));
13317 else if (op0 != CONST0_RTX (mode))
13318 op0 = force_reg (mode, op0);
13320 mask = ix86_build_signbit_mask (mode, 0, 0);
13322 if (mode == SFmode)
13323 copysign_insn = gen_copysignsf3_const;
13324 else if (mode == DFmode)
13325 copysign_insn = gen_copysigndf3_const;
13327 copysign_insn = gen_copysigntf3_const;
13329 emit_insn (copysign_insn (dest, op0, op1, mask));
13333 rtx (*copysign_insn)(rtx, rtx, rtx, rtx, rtx, rtx);
13335 nmask = ix86_build_signbit_mask (mode, 0, 1);
13336 mask = ix86_build_signbit_mask (mode, 0, 0);
13338 if (mode == SFmode)
13339 copysign_insn = gen_copysignsf3_var;
13340 else if (mode == DFmode)
13341 copysign_insn = gen_copysigndf3_var;
13343 copysign_insn = gen_copysigntf3_var;
13345 emit_insn (copysign_insn (dest, NULL_RTX, op0, op1, nmask, mask));
13349 /* Deconstruct a copysign operation into bit masks. Operand 0 is known to
13350 be a constant, and so has already been expanded into a vector constant. */
13353 ix86_split_copysign_const (rtx operands[])
13355 enum machine_mode mode, vmode;
13356 rtx dest, op0, op1, mask, x;
13358 dest = operands[0];
13361 mask = operands[3];
13363 mode = GET_MODE (dest);
13364 vmode = GET_MODE (mask);
13366 dest = simplify_gen_subreg (vmode, dest, mode, 0);
13367 x = gen_rtx_AND (vmode, dest, mask);
13368 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
13370 if (op0 != CONST0_RTX (vmode))
13372 x = gen_rtx_IOR (vmode, dest, op0);
13373 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
13377 /* Deconstruct a copysign operation into bit masks. Operand 0 is variable,
13378 so we have to do two masks. */
13381 ix86_split_copysign_var (rtx operands[])
13383 enum machine_mode mode, vmode;
13384 rtx dest, scratch, op0, op1, mask, nmask, x;
13386 dest = operands[0];
13387 scratch = operands[1];
13390 nmask = operands[4];
13391 mask = operands[5];
13393 mode = GET_MODE (dest);
13394 vmode = GET_MODE (mask);
13396 if (rtx_equal_p (op0, op1))
13398 /* Shouldn't happen often (it's useless, obviously), but when it does
13399 we'd generate incorrect code if we continue below. */
13400 emit_move_insn (dest, op0);
13404 if (REG_P (mask) && REGNO (dest) == REGNO (mask)) /* alternative 0 */
13406 gcc_assert (REGNO (op1) == REGNO (scratch));
13408 x = gen_rtx_AND (vmode, scratch, mask);
13409 emit_insn (gen_rtx_SET (VOIDmode, scratch, x));
13412 op0 = simplify_gen_subreg (vmode, op0, mode, 0);
13413 x = gen_rtx_NOT (vmode, dest);
13414 x = gen_rtx_AND (vmode, x, op0);
13415 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
13419 if (REGNO (op1) == REGNO (scratch)) /* alternative 1,3 */
13421 x = gen_rtx_AND (vmode, scratch, mask);
13423 else /* alternative 2,4 */
13425 gcc_assert (REGNO (mask) == REGNO (scratch));
13426 op1 = simplify_gen_subreg (vmode, op1, mode, 0);
13427 x = gen_rtx_AND (vmode, scratch, op1);
13429 emit_insn (gen_rtx_SET (VOIDmode, scratch, x));
13431 if (REGNO (op0) == REGNO (dest)) /* alternative 1,2 */
13433 dest = simplify_gen_subreg (vmode, op0, mode, 0);
13434 x = gen_rtx_AND (vmode, dest, nmask);
13436 else /* alternative 3,4 */
13438 gcc_assert (REGNO (nmask) == REGNO (dest));
13440 op0 = simplify_gen_subreg (vmode, op0, mode, 0);
13441 x = gen_rtx_AND (vmode, dest, op0);
13443 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
13446 x = gen_rtx_IOR (vmode, dest, scratch);
13447 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
13450 /* Return TRUE or FALSE depending on whether the first SET in INSN
13451 has source and destination with matching CC modes, and that the
13452 CC mode is at least as constrained as REQ_MODE. */
13455 ix86_match_ccmode (rtx insn, enum machine_mode req_mode)
13458 enum machine_mode set_mode;
13460 set = PATTERN (insn);
13461 if (GET_CODE (set) == PARALLEL)
13462 set = XVECEXP (set, 0, 0);
13463 gcc_assert (GET_CODE (set) == SET);
13464 gcc_assert (GET_CODE (SET_SRC (set)) == COMPARE);
13466 set_mode = GET_MODE (SET_DEST (set));
13470 if (req_mode != CCNOmode
13471 && (req_mode != CCmode
13472 || XEXP (SET_SRC (set), 1) != const0_rtx))
13476 if (req_mode == CCGCmode)
13480 if (req_mode == CCGOCmode || req_mode == CCNOmode)
13484 if (req_mode == CCZmode)
13495 gcc_unreachable ();
13498 return (GET_MODE (SET_SRC (set)) == set_mode);
13501 /* Generate insn patterns to do an integer compare of OPERANDS. */
13504 ix86_expand_int_compare (enum rtx_code code, rtx op0, rtx op1)
13506 enum machine_mode cmpmode;
13509 cmpmode = SELECT_CC_MODE (code, op0, op1);
13510 flags = gen_rtx_REG (cmpmode, FLAGS_REG);
13512 /* This is very simple, but making the interface the same as in the
13513 FP case makes the rest of the code easier. */
13514 tmp = gen_rtx_COMPARE (cmpmode, op0, op1);
13515 emit_insn (gen_rtx_SET (VOIDmode, flags, tmp));
13517 /* Return the test that should be put into the flags user, i.e.
13518 the bcc, scc, or cmov instruction. */
13519 return gen_rtx_fmt_ee (code, VOIDmode, flags, const0_rtx);
13522 /* Figure out whether to use ordered or unordered fp comparisons.
13523 Return the appropriate mode to use. */
13526 ix86_fp_compare_mode (enum rtx_code code ATTRIBUTE_UNUSED)
13528 /* ??? In order to make all comparisons reversible, we do all comparisons
13529 non-trapping when compiling for IEEE. Once gcc is able to distinguish
13530 all forms trapping and nontrapping comparisons, we can make inequality
13531 comparisons trapping again, since it results in better code when using
13532 FCOM based compares. */
13533 return TARGET_IEEE_FP ? CCFPUmode : CCFPmode;
13537 ix86_cc_mode (enum rtx_code code, rtx op0, rtx op1)
13539 enum machine_mode mode = GET_MODE (op0);
13541 if (SCALAR_FLOAT_MODE_P (mode))
13543 gcc_assert (!DECIMAL_FLOAT_MODE_P (mode));
13544 return ix86_fp_compare_mode (code);
13549 /* Only zero flag is needed. */
13550 case EQ: /* ZF=0 */
13551 case NE: /* ZF!=0 */
13553 /* Codes needing carry flag. */
13554 case GEU: /* CF=0 */
13555 case LTU: /* CF=1 */
13556 /* Detect overflow checks. They need just the carry flag. */
13557 if (GET_CODE (op0) == PLUS
13558 && rtx_equal_p (op1, XEXP (op0, 0)))
13562 case GTU: /* CF=0 & ZF=0 */
13563 case LEU: /* CF=1 | ZF=1 */
13564 /* Detect overflow checks. They need just the carry flag. */
13565 if (GET_CODE (op0) == MINUS
13566 && rtx_equal_p (op1, XEXP (op0, 0)))
13570 /* Codes possibly doable only with sign flag when
13571 comparing against zero. */
13572 case GE: /* SF=OF or SF=0 */
13573 case LT: /* SF<>OF or SF=1 */
13574 if (op1 == const0_rtx)
13577 /* For other cases Carry flag is not required. */
13579 /* Codes doable only with sign flag when comparing
13580 against zero, but we miss jump instruction for it
13581 so we need to use relational tests against overflow
13582 that thus needs to be zero. */
13583 case GT: /* ZF=0 & SF=OF */
13584 case LE: /* ZF=1 | SF<>OF */
13585 if (op1 == const0_rtx)
13589 /* strcmp pattern do (use flags) and combine may ask us for proper
13594 gcc_unreachable ();
13598 /* Return the fixed registers used for condition codes. */
13601 ix86_fixed_condition_code_regs (unsigned int *p1, unsigned int *p2)
13608 /* If two condition code modes are compatible, return a condition code
13609 mode which is compatible with both. Otherwise, return
13612 static enum machine_mode
13613 ix86_cc_modes_compatible (enum machine_mode m1, enum machine_mode m2)
13618 if (GET_MODE_CLASS (m1) != MODE_CC || GET_MODE_CLASS (m2) != MODE_CC)
13621 if ((m1 == CCGCmode && m2 == CCGOCmode)
13622 || (m1 == CCGOCmode && m2 == CCGCmode))
13628 gcc_unreachable ();
13658 /* These are only compatible with themselves, which we already
13664 /* Split comparison code CODE into comparisons we can do using branch
13665 instructions. BYPASS_CODE is comparison code for branch that will
13666 branch around FIRST_CODE and SECOND_CODE. If some of branches
13667 is not required, set value to UNKNOWN.
13668 We never require more than two branches. */
13671 ix86_fp_comparison_codes (enum rtx_code code, enum rtx_code *bypass_code,
13672 enum rtx_code *first_code,
13673 enum rtx_code *second_code)
13675 *first_code = code;
13676 *bypass_code = UNKNOWN;
13677 *second_code = UNKNOWN;
13679 /* The fcomi comparison sets flags as follows:
13689 case GT: /* GTU - CF=0 & ZF=0 */
13690 case GE: /* GEU - CF=0 */
13691 case ORDERED: /* PF=0 */
13692 case UNORDERED: /* PF=1 */
13693 case UNEQ: /* EQ - ZF=1 */
13694 case UNLT: /* LTU - CF=1 */
13695 case UNLE: /* LEU - CF=1 | ZF=1 */
13696 case LTGT: /* EQ - ZF=0 */
13698 case LT: /* LTU - CF=1 - fails on unordered */
13699 *first_code = UNLT;
13700 *bypass_code = UNORDERED;
13702 case LE: /* LEU - CF=1 | ZF=1 - fails on unordered */
13703 *first_code = UNLE;
13704 *bypass_code = UNORDERED;
13706 case EQ: /* EQ - ZF=1 - fails on unordered */
13707 *first_code = UNEQ;
13708 *bypass_code = UNORDERED;
13710 case NE: /* NE - ZF=0 - fails on unordered */
13711 *first_code = LTGT;
13712 *second_code = UNORDERED;
13714 case UNGE: /* GEU - CF=0 - fails on unordered */
13716 *second_code = UNORDERED;
13718 case UNGT: /* GTU - CF=0 & ZF=0 - fails on unordered */
13720 *second_code = UNORDERED;
13723 gcc_unreachable ();
13725 if (!TARGET_IEEE_FP)
13727 *second_code = UNKNOWN;
13728 *bypass_code = UNKNOWN;
13732 /* Return cost of comparison done fcom + arithmetics operations on AX.
13733 All following functions do use number of instructions as a cost metrics.
13734 In future this should be tweaked to compute bytes for optimize_size and
13735 take into account performance of various instructions on various CPUs. */
13737 ix86_fp_comparison_arithmetics_cost (enum rtx_code code)
13739 if (!TARGET_IEEE_FP)
13741 /* The cost of code output by ix86_expand_fp_compare. */
13765 gcc_unreachable ();
13769 /* Return cost of comparison done using fcomi operation.
13770 See ix86_fp_comparison_arithmetics_cost for the metrics. */
13772 ix86_fp_comparison_fcomi_cost (enum rtx_code code)
13774 enum rtx_code bypass_code, first_code, second_code;
13775 /* Return arbitrarily high cost when instruction is not supported - this
13776 prevents gcc from using it. */
13779 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
13780 return (bypass_code != UNKNOWN || second_code != UNKNOWN) + 2;
13783 /* Return cost of comparison done using sahf operation.
13784 See ix86_fp_comparison_arithmetics_cost for the metrics. */
13786 ix86_fp_comparison_sahf_cost (enum rtx_code code)
13788 enum rtx_code bypass_code, first_code, second_code;
13789 /* Return arbitrarily high cost when instruction is not preferred - this
13790 avoids gcc from using it. */
13791 if (!(TARGET_SAHF && (TARGET_USE_SAHF || optimize_insn_for_size_p ())))
13793 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
13794 return (bypass_code != UNKNOWN || second_code != UNKNOWN) + 3;
13797 /* Compute cost of the comparison done using any method.
13798 See ix86_fp_comparison_arithmetics_cost for the metrics. */
13800 ix86_fp_comparison_cost (enum rtx_code code)
13802 int fcomi_cost, sahf_cost, arithmetics_cost = 1024;
13805 fcomi_cost = ix86_fp_comparison_fcomi_cost (code);
13806 sahf_cost = ix86_fp_comparison_sahf_cost (code);
13808 min = arithmetics_cost = ix86_fp_comparison_arithmetics_cost (code);
13809 if (min > sahf_cost)
13811 if (min > fcomi_cost)
13816 /* Return true if we should use an FCOMI instruction for this
13820 ix86_use_fcomi_compare (enum rtx_code code ATTRIBUTE_UNUSED)
13822 enum rtx_code swapped_code = swap_condition (code);
13824 return ((ix86_fp_comparison_cost (code)
13825 == ix86_fp_comparison_fcomi_cost (code))
13826 || (ix86_fp_comparison_cost (swapped_code)
13827 == ix86_fp_comparison_fcomi_cost (swapped_code)));
13830 /* Swap, force into registers, or otherwise massage the two operands
13831 to a fp comparison. The operands are updated in place; the new
13832 comparison code is returned. */
13834 static enum rtx_code
13835 ix86_prepare_fp_compare_args (enum rtx_code code, rtx *pop0, rtx *pop1)
13837 enum machine_mode fpcmp_mode = ix86_fp_compare_mode (code);
13838 rtx op0 = *pop0, op1 = *pop1;
13839 enum machine_mode op_mode = GET_MODE (op0);
13840 int is_sse = TARGET_SSE_MATH && SSE_FLOAT_MODE_P (op_mode);
13842 /* All of the unordered compare instructions only work on registers.
13843 The same is true of the fcomi compare instructions. The XFmode
13844 compare instructions require registers except when comparing
13845 against zero or when converting operand 1 from fixed point to
13849 && (fpcmp_mode == CCFPUmode
13850 || (op_mode == XFmode
13851 && ! (standard_80387_constant_p (op0) == 1
13852 || standard_80387_constant_p (op1) == 1)
13853 && GET_CODE (op1) != FLOAT)
13854 || ix86_use_fcomi_compare (code)))
13856 op0 = force_reg (op_mode, op0);
13857 op1 = force_reg (op_mode, op1);
13861 /* %%% We only allow op1 in memory; op0 must be st(0). So swap
13862 things around if they appear profitable, otherwise force op0
13863 into a register. */
13865 if (standard_80387_constant_p (op0) == 0
13867 && ! (standard_80387_constant_p (op1) == 0
13871 tmp = op0, op0 = op1, op1 = tmp;
13872 code = swap_condition (code);
13876 op0 = force_reg (op_mode, op0);
13878 if (CONSTANT_P (op1))
13880 int tmp = standard_80387_constant_p (op1);
13882 op1 = validize_mem (force_const_mem (op_mode, op1));
13886 op1 = force_reg (op_mode, op1);
13889 op1 = force_reg (op_mode, op1);
13893 /* Try to rearrange the comparison to make it cheaper. */
13894 if (ix86_fp_comparison_cost (code)
13895 > ix86_fp_comparison_cost (swap_condition (code))
13896 && (REG_P (op1) || can_create_pseudo_p ()))
13899 tmp = op0, op0 = op1, op1 = tmp;
13900 code = swap_condition (code);
13902 op0 = force_reg (op_mode, op0);
13910 /* Convert comparison codes we use to represent FP comparison to integer
13911 code that will result in proper branch. Return UNKNOWN if no such code
13915 ix86_fp_compare_code_to_integer (enum rtx_code code)
13944 /* Generate insn patterns to do a floating point compare of OPERANDS. */
13947 ix86_expand_fp_compare (enum rtx_code code, rtx op0, rtx op1, rtx scratch,
13948 rtx *second_test, rtx *bypass_test)
13950 enum machine_mode fpcmp_mode, intcmp_mode;
13952 int cost = ix86_fp_comparison_cost (code);
13953 enum rtx_code bypass_code, first_code, second_code;
13955 fpcmp_mode = ix86_fp_compare_mode (code);
13956 code = ix86_prepare_fp_compare_args (code, &op0, &op1);
13959 *second_test = NULL_RTX;
13961 *bypass_test = NULL_RTX;
13963 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
13965 /* Do fcomi/sahf based test when profitable. */
13966 if (ix86_fp_comparison_arithmetics_cost (code) > cost
13967 && (bypass_code == UNKNOWN || bypass_test)
13968 && (second_code == UNKNOWN || second_test))
13970 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
13971 tmp = gen_rtx_SET (VOIDmode, gen_rtx_REG (fpcmp_mode, FLAGS_REG),
13977 gcc_assert (TARGET_SAHF);
13980 scratch = gen_reg_rtx (HImode);
13981 tmp2 = gen_rtx_CLOBBER (VOIDmode, scratch);
13983 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, tmp, tmp2)));
13986 /* The FP codes work out to act like unsigned. */
13987 intcmp_mode = fpcmp_mode;
13989 if (bypass_code != UNKNOWN)
13990 *bypass_test = gen_rtx_fmt_ee (bypass_code, VOIDmode,
13991 gen_rtx_REG (intcmp_mode, FLAGS_REG),
13993 if (second_code != UNKNOWN)
13994 *second_test = gen_rtx_fmt_ee (second_code, VOIDmode,
13995 gen_rtx_REG (intcmp_mode, FLAGS_REG),
14000 /* Sadness wrt reg-stack pops killing fpsr -- gotta get fnstsw first. */
14001 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
14002 tmp2 = gen_rtx_UNSPEC (HImode, gen_rtvec (1, tmp), UNSPEC_FNSTSW);
14004 scratch = gen_reg_rtx (HImode);
14005 emit_insn (gen_rtx_SET (VOIDmode, scratch, tmp2));
14007 /* In the unordered case, we have to check C2 for NaN's, which
14008 doesn't happen to work out to anything nice combination-wise.
14009 So do some bit twiddling on the value we've got in AH to come
14010 up with an appropriate set of condition codes. */
14012 intcmp_mode = CCNOmode;
14017 if (code == GT || !TARGET_IEEE_FP)
14019 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x45)));
14024 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
14025 emit_insn (gen_addqi_ext_1 (scratch, scratch, constm1_rtx));
14026 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x44)));
14027 intcmp_mode = CCmode;
14033 if (code == LT && TARGET_IEEE_FP)
14035 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
14036 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x01)));
14037 intcmp_mode = CCmode;
14042 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x01)));
14048 if (code == GE || !TARGET_IEEE_FP)
14050 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x05)));
14055 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
14056 emit_insn (gen_xorqi_cc_ext_1 (scratch, scratch,
14063 if (code == LE && TARGET_IEEE_FP)
14065 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
14066 emit_insn (gen_addqi_ext_1 (scratch, scratch, constm1_rtx));
14067 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x40)));
14068 intcmp_mode = CCmode;
14073 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x45)));
14079 if (code == EQ && TARGET_IEEE_FP)
14081 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
14082 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x40)));
14083 intcmp_mode = CCmode;
14088 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x40)));
14095 if (code == NE && TARGET_IEEE_FP)
14097 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
14098 emit_insn (gen_xorqi_cc_ext_1 (scratch, scratch,
14104 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x40)));
14110 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x04)));
14114 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x04)));
14119 gcc_unreachable ();
14123 /* Return the test that should be put into the flags user, i.e.
14124 the bcc, scc, or cmov instruction. */
14125 return gen_rtx_fmt_ee (code, VOIDmode,
14126 gen_rtx_REG (intcmp_mode, FLAGS_REG),
14131 ix86_expand_compare (enum rtx_code code, rtx *second_test, rtx *bypass_test)
14134 op0 = ix86_compare_op0;
14135 op1 = ix86_compare_op1;
14138 *second_test = NULL_RTX;
14140 *bypass_test = NULL_RTX;
14142 if (ix86_compare_emitted)
14144 ret = gen_rtx_fmt_ee (code, VOIDmode, ix86_compare_emitted, const0_rtx);
14145 ix86_compare_emitted = NULL_RTX;
14147 else if (SCALAR_FLOAT_MODE_P (GET_MODE (op0)))
14149 gcc_assert (!DECIMAL_FLOAT_MODE_P (GET_MODE (op0)));
14150 ret = ix86_expand_fp_compare (code, op0, op1, NULL_RTX,
14151 second_test, bypass_test);
14154 ret = ix86_expand_int_compare (code, op0, op1);
14159 /* Return true if the CODE will result in nontrivial jump sequence. */
14161 ix86_fp_jump_nontrivial_p (enum rtx_code code)
14163 enum rtx_code bypass_code, first_code, second_code;
14166 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
14167 return bypass_code != UNKNOWN || second_code != UNKNOWN;
14171 ix86_expand_branch (enum rtx_code code, rtx label)
14175 /* If we have emitted a compare insn, go straight to simple.
14176 ix86_expand_compare won't emit anything if ix86_compare_emitted
14178 if (ix86_compare_emitted)
14181 switch (GET_MODE (ix86_compare_op0))
14187 tmp = ix86_expand_compare (code, NULL, NULL);
14188 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
14189 gen_rtx_LABEL_REF (VOIDmode, label),
14191 emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
14200 enum rtx_code bypass_code, first_code, second_code;
14202 code = ix86_prepare_fp_compare_args (code, &ix86_compare_op0,
14203 &ix86_compare_op1);
14205 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
14207 /* Check whether we will use the natural sequence with one jump. If
14208 so, we can expand jump early. Otherwise delay expansion by
14209 creating compound insn to not confuse optimizers. */
14210 if (bypass_code == UNKNOWN && second_code == UNKNOWN)
14212 ix86_split_fp_branch (code, ix86_compare_op0, ix86_compare_op1,
14213 gen_rtx_LABEL_REF (VOIDmode, label),
14214 pc_rtx, NULL_RTX, NULL_RTX);
14218 tmp = gen_rtx_fmt_ee (code, VOIDmode,
14219 ix86_compare_op0, ix86_compare_op1);
14220 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
14221 gen_rtx_LABEL_REF (VOIDmode, label),
14223 tmp = gen_rtx_SET (VOIDmode, pc_rtx, tmp);
14225 use_fcomi = ix86_use_fcomi_compare (code);
14226 vec = rtvec_alloc (3 + !use_fcomi);
14227 RTVEC_ELT (vec, 0) = tmp;
14229 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCFPmode, FPSR_REG));
14231 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCFPmode, FLAGS_REG));
14234 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (HImode));
14236 emit_jump_insn (gen_rtx_PARALLEL (VOIDmode, vec));
14245 /* Expand DImode branch into multiple compare+branch. */
14247 rtx lo[2], hi[2], label2;
14248 enum rtx_code code1, code2, code3;
14249 enum machine_mode submode;
14251 if (CONSTANT_P (ix86_compare_op0) && ! CONSTANT_P (ix86_compare_op1))
14253 tmp = ix86_compare_op0;
14254 ix86_compare_op0 = ix86_compare_op1;
14255 ix86_compare_op1 = tmp;
14256 code = swap_condition (code);
14258 if (GET_MODE (ix86_compare_op0) == DImode)
14260 split_di (&ix86_compare_op0, 1, lo+0, hi+0);
14261 split_di (&ix86_compare_op1, 1, lo+1, hi+1);
14266 split_ti (&ix86_compare_op0, 1, lo+0, hi+0);
14267 split_ti (&ix86_compare_op1, 1, lo+1, hi+1);
14271 /* When comparing for equality, we can use (hi0^hi1)|(lo0^lo1) to
14272 avoid two branches. This costs one extra insn, so disable when
14273 optimizing for size. */
14275 if ((code == EQ || code == NE)
14276 && (!optimize_insn_for_size_p ()
14277 || hi[1] == const0_rtx || lo[1] == const0_rtx))
14282 if (hi[1] != const0_rtx)
14283 xor1 = expand_binop (submode, xor_optab, xor1, hi[1],
14284 NULL_RTX, 0, OPTAB_WIDEN);
14287 if (lo[1] != const0_rtx)
14288 xor0 = expand_binop (submode, xor_optab, xor0, lo[1],
14289 NULL_RTX, 0, OPTAB_WIDEN);
14291 tmp = expand_binop (submode, ior_optab, xor1, xor0,
14292 NULL_RTX, 0, OPTAB_WIDEN);
14294 ix86_compare_op0 = tmp;
14295 ix86_compare_op1 = const0_rtx;
14296 ix86_expand_branch (code, label);
14300 /* Otherwise, if we are doing less-than or greater-or-equal-than,
14301 op1 is a constant and the low word is zero, then we can just
14302 examine the high word. Similarly for low word -1 and
14303 less-or-equal-than or greater-than. */
14305 if (CONST_INT_P (hi[1]))
14308 case LT: case LTU: case GE: case GEU:
14309 if (lo[1] == const0_rtx)
14311 ix86_compare_op0 = hi[0];
14312 ix86_compare_op1 = hi[1];
14313 ix86_expand_branch (code, label);
14317 case LE: case LEU: case GT: case GTU:
14318 if (lo[1] == constm1_rtx)
14320 ix86_compare_op0 = hi[0];
14321 ix86_compare_op1 = hi[1];
14322 ix86_expand_branch (code, label);
14330 /* Otherwise, we need two or three jumps. */
14332 label2 = gen_label_rtx ();
14335 code2 = swap_condition (code);
14336 code3 = unsigned_condition (code);
14340 case LT: case GT: case LTU: case GTU:
14343 case LE: code1 = LT; code2 = GT; break;
14344 case GE: code1 = GT; code2 = LT; break;
14345 case LEU: code1 = LTU; code2 = GTU; break;
14346 case GEU: code1 = GTU; code2 = LTU; break;
14348 case EQ: code1 = UNKNOWN; code2 = NE; break;
14349 case NE: code2 = UNKNOWN; break;
14352 gcc_unreachable ();
14357 * if (hi(a) < hi(b)) goto true;
14358 * if (hi(a) > hi(b)) goto false;
14359 * if (lo(a) < lo(b)) goto true;
14363 ix86_compare_op0 = hi[0];
14364 ix86_compare_op1 = hi[1];
14366 if (code1 != UNKNOWN)
14367 ix86_expand_branch (code1, label);
14368 if (code2 != UNKNOWN)
14369 ix86_expand_branch (code2, label2);
14371 ix86_compare_op0 = lo[0];
14372 ix86_compare_op1 = lo[1];
14373 ix86_expand_branch (code3, label);
14375 if (code2 != UNKNOWN)
14376 emit_label (label2);
14381 gcc_unreachable ();
14385 /* Split branch based on floating point condition. */
14387 ix86_split_fp_branch (enum rtx_code code, rtx op1, rtx op2,
14388 rtx target1, rtx target2, rtx tmp, rtx pushed)
14390 rtx second, bypass;
14391 rtx label = NULL_RTX;
14393 int bypass_probability = -1, second_probability = -1, probability = -1;
14396 if (target2 != pc_rtx)
14399 code = reverse_condition_maybe_unordered (code);
14404 condition = ix86_expand_fp_compare (code, op1, op2,
14405 tmp, &second, &bypass);
14407 /* Remove pushed operand from stack. */
14409 ix86_free_from_memory (GET_MODE (pushed));
14411 if (split_branch_probability >= 0)
14413 /* Distribute the probabilities across the jumps.
14414 Assume the BYPASS and SECOND to be always test
14416 probability = split_branch_probability;
14418 /* Value of 1 is low enough to make no need for probability
14419 to be updated. Later we may run some experiments and see
14420 if unordered values are more frequent in practice. */
14422 bypass_probability = 1;
14424 second_probability = 1;
14426 if (bypass != NULL_RTX)
14428 label = gen_label_rtx ();
14429 i = emit_jump_insn (gen_rtx_SET
14431 gen_rtx_IF_THEN_ELSE (VOIDmode,
14433 gen_rtx_LABEL_REF (VOIDmode,
14436 if (bypass_probability >= 0)
14438 = gen_rtx_EXPR_LIST (REG_BR_PROB,
14439 GEN_INT (bypass_probability),
14442 i = emit_jump_insn (gen_rtx_SET
14444 gen_rtx_IF_THEN_ELSE (VOIDmode,
14445 condition, target1, target2)));
14446 if (probability >= 0)
14448 = gen_rtx_EXPR_LIST (REG_BR_PROB,
14449 GEN_INT (probability),
14451 if (second != NULL_RTX)
14453 i = emit_jump_insn (gen_rtx_SET
14455 gen_rtx_IF_THEN_ELSE (VOIDmode, second, target1,
14457 if (second_probability >= 0)
14459 = gen_rtx_EXPR_LIST (REG_BR_PROB,
14460 GEN_INT (second_probability),
14463 if (label != NULL_RTX)
14464 emit_label (label);
14468 ix86_expand_setcc (enum rtx_code code, rtx dest)
14470 rtx ret, tmp, tmpreg, equiv;
14471 rtx second_test, bypass_test;
14473 if (GET_MODE (ix86_compare_op0) == (TARGET_64BIT ? TImode : DImode))
14474 return 0; /* FAIL */
14476 gcc_assert (GET_MODE (dest) == QImode);
14478 ret = ix86_expand_compare (code, &second_test, &bypass_test);
14479 PUT_MODE (ret, QImode);
14484 emit_insn (gen_rtx_SET (VOIDmode, tmp, ret));
14485 if (bypass_test || second_test)
14487 rtx test = second_test;
14489 rtx tmp2 = gen_reg_rtx (QImode);
14492 gcc_assert (!second_test);
14493 test = bypass_test;
14495 PUT_CODE (test, reverse_condition_maybe_unordered (GET_CODE (test)));
14497 PUT_MODE (test, QImode);
14498 emit_insn (gen_rtx_SET (VOIDmode, tmp2, test));
14501 emit_insn (gen_andqi3 (tmp, tmpreg, tmp2));
14503 emit_insn (gen_iorqi3 (tmp, tmpreg, tmp2));
14506 /* Attach a REG_EQUAL note describing the comparison result. */
14507 if (ix86_compare_op0 && ix86_compare_op1)
14509 equiv = simplify_gen_relational (code, QImode,
14510 GET_MODE (ix86_compare_op0),
14511 ix86_compare_op0, ix86_compare_op1);
14512 set_unique_reg_note (get_last_insn (), REG_EQUAL, equiv);
14515 return 1; /* DONE */
14518 /* Expand comparison setting or clearing carry flag. Return true when
14519 successful and set pop for the operation. */
14521 ix86_expand_carry_flag_compare (enum rtx_code code, rtx op0, rtx op1, rtx *pop)
14523 enum machine_mode mode =
14524 GET_MODE (op0) != VOIDmode ? GET_MODE (op0) : GET_MODE (op1);
14526 /* Do not handle DImode compares that go through special path. */
14527 if (mode == (TARGET_64BIT ? TImode : DImode))
14530 if (SCALAR_FLOAT_MODE_P (mode))
14532 rtx second_test = NULL, bypass_test = NULL;
14533 rtx compare_op, compare_seq;
14535 gcc_assert (!DECIMAL_FLOAT_MODE_P (mode));
14537 /* Shortcut: following common codes never translate
14538 into carry flag compares. */
14539 if (code == EQ || code == NE || code == UNEQ || code == LTGT
14540 || code == ORDERED || code == UNORDERED)
14543 /* These comparisons require zero flag; swap operands so they won't. */
14544 if ((code == GT || code == UNLE || code == LE || code == UNGT)
14545 && !TARGET_IEEE_FP)
14550 code = swap_condition (code);
14553 /* Try to expand the comparison and verify that we end up with
14554 carry flag based comparison. This fails to be true only when
14555 we decide to expand comparison using arithmetic that is not
14556 too common scenario. */
14558 compare_op = ix86_expand_fp_compare (code, op0, op1, NULL_RTX,
14559 &second_test, &bypass_test);
14560 compare_seq = get_insns ();
14563 if (second_test || bypass_test)
14566 if (GET_MODE (XEXP (compare_op, 0)) == CCFPmode
14567 || GET_MODE (XEXP (compare_op, 0)) == CCFPUmode)
14568 code = ix86_fp_compare_code_to_integer (GET_CODE (compare_op));
14570 code = GET_CODE (compare_op);
14572 if (code != LTU && code != GEU)
14575 emit_insn (compare_seq);
14580 if (!INTEGRAL_MODE_P (mode))
14589 /* Convert a==0 into (unsigned)a<1. */
14592 if (op1 != const0_rtx)
14595 code = (code == EQ ? LTU : GEU);
14598 /* Convert a>b into b<a or a>=b-1. */
14601 if (CONST_INT_P (op1))
14603 op1 = gen_int_mode (INTVAL (op1) + 1, GET_MODE (op0));
14604 /* Bail out on overflow. We still can swap operands but that
14605 would force loading of the constant into register. */
14606 if (op1 == const0_rtx
14607 || !x86_64_immediate_operand (op1, GET_MODE (op1)))
14609 code = (code == GTU ? GEU : LTU);
14616 code = (code == GTU ? LTU : GEU);
14620 /* Convert a>=0 into (unsigned)a<0x80000000. */
14623 if (mode == DImode || op1 != const0_rtx)
14625 op1 = gen_int_mode (1 << (GET_MODE_BITSIZE (mode) - 1), mode);
14626 code = (code == LT ? GEU : LTU);
14630 if (mode == DImode || op1 != constm1_rtx)
14632 op1 = gen_int_mode (1 << (GET_MODE_BITSIZE (mode) - 1), mode);
14633 code = (code == LE ? GEU : LTU);
14639 /* Swapping operands may cause constant to appear as first operand. */
14640 if (!nonimmediate_operand (op0, VOIDmode))
14642 if (!can_create_pseudo_p ())
14644 op0 = force_reg (mode, op0);
14646 ix86_compare_op0 = op0;
14647 ix86_compare_op1 = op1;
14648 *pop = ix86_expand_compare (code, NULL, NULL);
14649 gcc_assert (GET_CODE (*pop) == LTU || GET_CODE (*pop) == GEU);
14654 ix86_expand_int_movcc (rtx operands[])
14656 enum rtx_code code = GET_CODE (operands[1]), compare_code;
14657 rtx compare_seq, compare_op;
14658 rtx second_test, bypass_test;
14659 enum machine_mode mode = GET_MODE (operands[0]);
14660 bool sign_bit_compare_p = false;;
14663 compare_op = ix86_expand_compare (code, &second_test, &bypass_test);
14664 compare_seq = get_insns ();
14667 compare_code = GET_CODE (compare_op);
14669 if ((ix86_compare_op1 == const0_rtx && (code == GE || code == LT))
14670 || (ix86_compare_op1 == constm1_rtx && (code == GT || code == LE)))
14671 sign_bit_compare_p = true;
14673 /* Don't attempt mode expansion here -- if we had to expand 5 or 6
14674 HImode insns, we'd be swallowed in word prefix ops. */
14676 if ((mode != HImode || TARGET_FAST_PREFIX)
14677 && (mode != (TARGET_64BIT ? TImode : DImode))
14678 && CONST_INT_P (operands[2])
14679 && CONST_INT_P (operands[3]))
14681 rtx out = operands[0];
14682 HOST_WIDE_INT ct = INTVAL (operands[2]);
14683 HOST_WIDE_INT cf = INTVAL (operands[3]);
14684 HOST_WIDE_INT diff;
14687 /* Sign bit compares are better done using shifts than we do by using
14689 if (sign_bit_compare_p
14690 || ix86_expand_carry_flag_compare (code, ix86_compare_op0,
14691 ix86_compare_op1, &compare_op))
14693 /* Detect overlap between destination and compare sources. */
14696 if (!sign_bit_compare_p)
14698 bool fpcmp = false;
14700 compare_code = GET_CODE (compare_op);
14702 if (GET_MODE (XEXP (compare_op, 0)) == CCFPmode
14703 || GET_MODE (XEXP (compare_op, 0)) == CCFPUmode)
14706 compare_code = ix86_fp_compare_code_to_integer (compare_code);
14709 /* To simplify rest of code, restrict to the GEU case. */
14710 if (compare_code == LTU)
14712 HOST_WIDE_INT tmp = ct;
14715 compare_code = reverse_condition (compare_code);
14716 code = reverse_condition (code);
14721 PUT_CODE (compare_op,
14722 reverse_condition_maybe_unordered
14723 (GET_CODE (compare_op)));
14725 PUT_CODE (compare_op, reverse_condition (GET_CODE (compare_op)));
14729 if (reg_overlap_mentioned_p (out, ix86_compare_op0)
14730 || reg_overlap_mentioned_p (out, ix86_compare_op1))
14731 tmp = gen_reg_rtx (mode);
14733 if (mode == DImode)
14734 emit_insn (gen_x86_movdicc_0_m1_rex64 (tmp, compare_op));
14736 emit_insn (gen_x86_movsicc_0_m1 (gen_lowpart (SImode, tmp), compare_op));
14740 if (code == GT || code == GE)
14741 code = reverse_condition (code);
14744 HOST_WIDE_INT tmp = ct;
14749 tmp = emit_store_flag (tmp, code, ix86_compare_op0,
14750 ix86_compare_op1, VOIDmode, 0, -1);
14763 tmp = expand_simple_binop (mode, PLUS,
14765 copy_rtx (tmp), 1, OPTAB_DIRECT);
14776 tmp = expand_simple_binop (mode, IOR,
14778 copy_rtx (tmp), 1, OPTAB_DIRECT);
14780 else if (diff == -1 && ct)
14790 tmp = expand_simple_unop (mode, NOT, tmp, copy_rtx (tmp), 1);
14792 tmp = expand_simple_binop (mode, PLUS,
14793 copy_rtx (tmp), GEN_INT (cf),
14794 copy_rtx (tmp), 1, OPTAB_DIRECT);
14802 * andl cf - ct, dest
14812 tmp = expand_simple_unop (mode, NOT, tmp, copy_rtx (tmp), 1);
14815 tmp = expand_simple_binop (mode, AND,
14817 gen_int_mode (cf - ct, mode),
14818 copy_rtx (tmp), 1, OPTAB_DIRECT);
14820 tmp = expand_simple_binop (mode, PLUS,
14821 copy_rtx (tmp), GEN_INT (ct),
14822 copy_rtx (tmp), 1, OPTAB_DIRECT);
14825 if (!rtx_equal_p (tmp, out))
14826 emit_move_insn (copy_rtx (out), copy_rtx (tmp));
14828 return 1; /* DONE */
14833 enum machine_mode cmp_mode = GET_MODE (ix86_compare_op0);
14836 tmp = ct, ct = cf, cf = tmp;
14839 if (SCALAR_FLOAT_MODE_P (cmp_mode))
14841 gcc_assert (!DECIMAL_FLOAT_MODE_P (cmp_mode));
14843 /* We may be reversing unordered compare to normal compare, that
14844 is not valid in general (we may convert non-trapping condition
14845 to trapping one), however on i386 we currently emit all
14846 comparisons unordered. */
14847 compare_code = reverse_condition_maybe_unordered (compare_code);
14848 code = reverse_condition_maybe_unordered (code);
14852 compare_code = reverse_condition (compare_code);
14853 code = reverse_condition (code);
14857 compare_code = UNKNOWN;
14858 if (GET_MODE_CLASS (GET_MODE (ix86_compare_op0)) == MODE_INT
14859 && CONST_INT_P (ix86_compare_op1))
14861 if (ix86_compare_op1 == const0_rtx
14862 && (code == LT || code == GE))
14863 compare_code = code;
14864 else if (ix86_compare_op1 == constm1_rtx)
14868 else if (code == GT)
14873 /* Optimize dest = (op0 < 0) ? -1 : cf. */
14874 if (compare_code != UNKNOWN
14875 && GET_MODE (ix86_compare_op0) == GET_MODE (out)
14876 && (cf == -1 || ct == -1))
14878 /* If lea code below could be used, only optimize
14879 if it results in a 2 insn sequence. */
14881 if (! (diff == 1 || diff == 2 || diff == 4 || diff == 8
14882 || diff == 3 || diff == 5 || diff == 9)
14883 || (compare_code == LT && ct == -1)
14884 || (compare_code == GE && cf == -1))
14887 * notl op1 (if necessary)
14895 code = reverse_condition (code);
14898 out = emit_store_flag (out, code, ix86_compare_op0,
14899 ix86_compare_op1, VOIDmode, 0, -1);
14901 out = expand_simple_binop (mode, IOR,
14903 out, 1, OPTAB_DIRECT);
14904 if (out != operands[0])
14905 emit_move_insn (operands[0], out);
14907 return 1; /* DONE */
14912 if ((diff == 1 || diff == 2 || diff == 4 || diff == 8
14913 || diff == 3 || diff == 5 || diff == 9)
14914 && ((mode != QImode && mode != HImode) || !TARGET_PARTIAL_REG_STALL)
14916 || x86_64_immediate_operand (GEN_INT (cf), VOIDmode)))
14922 * lea cf(dest*(ct-cf)),dest
14926 * This also catches the degenerate setcc-only case.
14932 out = emit_store_flag (out, code, ix86_compare_op0,
14933 ix86_compare_op1, VOIDmode, 0, 1);
14936 /* On x86_64 the lea instruction operates on Pmode, so we need
14937 to get arithmetics done in proper mode to match. */
14939 tmp = copy_rtx (out);
14943 out1 = copy_rtx (out);
14944 tmp = gen_rtx_MULT (mode, out1, GEN_INT (diff & ~1));
14948 tmp = gen_rtx_PLUS (mode, tmp, out1);
14954 tmp = gen_rtx_PLUS (mode, tmp, GEN_INT (cf));
14957 if (!rtx_equal_p (tmp, out))
14960 out = force_operand (tmp, copy_rtx (out));
14962 emit_insn (gen_rtx_SET (VOIDmode, copy_rtx (out), copy_rtx (tmp)));
14964 if (!rtx_equal_p (out, operands[0]))
14965 emit_move_insn (operands[0], copy_rtx (out));
14967 return 1; /* DONE */
14971 * General case: Jumpful:
14972 * xorl dest,dest cmpl op1, op2
14973 * cmpl op1, op2 movl ct, dest
14974 * setcc dest jcc 1f
14975 * decl dest movl cf, dest
14976 * andl (cf-ct),dest 1:
14979 * Size 20. Size 14.
14981 * This is reasonably steep, but branch mispredict costs are
14982 * high on modern cpus, so consider failing only if optimizing
14986 if ((!TARGET_CMOVE || (mode == QImode && TARGET_PARTIAL_REG_STALL))
14987 && BRANCH_COST (optimize_insn_for_speed_p (),
14992 enum machine_mode cmp_mode = GET_MODE (ix86_compare_op0);
14997 if (SCALAR_FLOAT_MODE_P (cmp_mode))
14999 gcc_assert (!DECIMAL_FLOAT_MODE_P (cmp_mode));
15001 /* We may be reversing unordered compare to normal compare,
15002 that is not valid in general (we may convert non-trapping
15003 condition to trapping one), however on i386 we currently
15004 emit all comparisons unordered. */
15005 code = reverse_condition_maybe_unordered (code);
15009 code = reverse_condition (code);
15010 if (compare_code != UNKNOWN)
15011 compare_code = reverse_condition (compare_code);
15015 if (compare_code != UNKNOWN)
15017 /* notl op1 (if needed)
15022 For x < 0 (resp. x <= -1) there will be no notl,
15023 so if possible swap the constants to get rid of the
15025 True/false will be -1/0 while code below (store flag
15026 followed by decrement) is 0/-1, so the constants need
15027 to be exchanged once more. */
15029 if (compare_code == GE || !cf)
15031 code = reverse_condition (code);
15036 HOST_WIDE_INT tmp = cf;
15041 out = emit_store_flag (out, code, ix86_compare_op0,
15042 ix86_compare_op1, VOIDmode, 0, -1);
15046 out = emit_store_flag (out, code, ix86_compare_op0,
15047 ix86_compare_op1, VOIDmode, 0, 1);
15049 out = expand_simple_binop (mode, PLUS, copy_rtx (out), constm1_rtx,
15050 copy_rtx (out), 1, OPTAB_DIRECT);
15053 out = expand_simple_binop (mode, AND, copy_rtx (out),
15054 gen_int_mode (cf - ct, mode),
15055 copy_rtx (out), 1, OPTAB_DIRECT);
15057 out = expand_simple_binop (mode, PLUS, copy_rtx (out), GEN_INT (ct),
15058 copy_rtx (out), 1, OPTAB_DIRECT);
15059 if (!rtx_equal_p (out, operands[0]))
15060 emit_move_insn (operands[0], copy_rtx (out));
15062 return 1; /* DONE */
15066 if (!TARGET_CMOVE || (mode == QImode && TARGET_PARTIAL_REG_STALL))
15068 /* Try a few things more with specific constants and a variable. */
15071 rtx var, orig_out, out, tmp;
15073 if (BRANCH_COST (optimize_insn_for_speed_p (), false) <= 2)
15074 return 0; /* FAIL */
15076 /* If one of the two operands is an interesting constant, load a
15077 constant with the above and mask it in with a logical operation. */
15079 if (CONST_INT_P (operands[2]))
15082 if (INTVAL (operands[2]) == 0 && operands[3] != constm1_rtx)
15083 operands[3] = constm1_rtx, op = and_optab;
15084 else if (INTVAL (operands[2]) == -1 && operands[3] != const0_rtx)
15085 operands[3] = const0_rtx, op = ior_optab;
15087 return 0; /* FAIL */
15089 else if (CONST_INT_P (operands[3]))
15092 if (INTVAL (operands[3]) == 0 && operands[2] != constm1_rtx)
15093 operands[2] = constm1_rtx, op = and_optab;
15094 else if (INTVAL (operands[3]) == -1 && operands[3] != const0_rtx)
15095 operands[2] = const0_rtx, op = ior_optab;
15097 return 0; /* FAIL */
15100 return 0; /* FAIL */
15102 orig_out = operands[0];
15103 tmp = gen_reg_rtx (mode);
15106 /* Recurse to get the constant loaded. */
15107 if (ix86_expand_int_movcc (operands) == 0)
15108 return 0; /* FAIL */
15110 /* Mask in the interesting variable. */
15111 out = expand_binop (mode, op, var, tmp, orig_out, 0,
15113 if (!rtx_equal_p (out, orig_out))
15114 emit_move_insn (copy_rtx (orig_out), copy_rtx (out));
15116 return 1; /* DONE */
15120 * For comparison with above,
15130 if (! nonimmediate_operand (operands[2], mode))
15131 operands[2] = force_reg (mode, operands[2]);
15132 if (! nonimmediate_operand (operands[3], mode))
15133 operands[3] = force_reg (mode, operands[3]);
15135 if (bypass_test && reg_overlap_mentioned_p (operands[0], operands[3]))
15137 rtx tmp = gen_reg_rtx (mode);
15138 emit_move_insn (tmp, operands[3]);
15141 if (second_test && reg_overlap_mentioned_p (operands[0], operands[2]))
15143 rtx tmp = gen_reg_rtx (mode);
15144 emit_move_insn (tmp, operands[2]);
15148 if (! register_operand (operands[2], VOIDmode)
15150 || ! register_operand (operands[3], VOIDmode)))
15151 operands[2] = force_reg (mode, operands[2]);
15154 && ! register_operand (operands[3], VOIDmode))
15155 operands[3] = force_reg (mode, operands[3]);
15157 emit_insn (compare_seq);
15158 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
15159 gen_rtx_IF_THEN_ELSE (mode,
15160 compare_op, operands[2],
15163 emit_insn (gen_rtx_SET (VOIDmode, copy_rtx (operands[0]),
15164 gen_rtx_IF_THEN_ELSE (mode,
15166 copy_rtx (operands[3]),
15167 copy_rtx (operands[0]))));
15169 emit_insn (gen_rtx_SET (VOIDmode, copy_rtx (operands[0]),
15170 gen_rtx_IF_THEN_ELSE (mode,
15172 copy_rtx (operands[2]),
15173 copy_rtx (operands[0]))));
15175 return 1; /* DONE */
15178 /* Swap, force into registers, or otherwise massage the two operands
15179 to an sse comparison with a mask result. Thus we differ a bit from
15180 ix86_prepare_fp_compare_args which expects to produce a flags result.
15182 The DEST operand exists to help determine whether to commute commutative
15183 operators. The POP0/POP1 operands are updated in place. The new
15184 comparison code is returned, or UNKNOWN if not implementable. */
15186 static enum rtx_code
15187 ix86_prepare_sse_fp_compare_args (rtx dest, enum rtx_code code,
15188 rtx *pop0, rtx *pop1)
15196 /* We have no LTGT as an operator. We could implement it with
15197 NE & ORDERED, but this requires an extra temporary. It's
15198 not clear that it's worth it. */
15205 /* These are supported directly. */
15212 /* For commutative operators, try to canonicalize the destination
15213 operand to be first in the comparison - this helps reload to
15214 avoid extra moves. */
15215 if (!dest || !rtx_equal_p (dest, *pop1))
15223 /* These are not supported directly. Swap the comparison operands
15224 to transform into something that is supported. */
15228 code = swap_condition (code);
15232 gcc_unreachable ();
15238 /* Detect conditional moves that exactly match min/max operational
15239 semantics. Note that this is IEEE safe, as long as we don't
15240 interchange the operands.
15242 Returns FALSE if this conditional move doesn't match a MIN/MAX,
15243 and TRUE if the operation is successful and instructions are emitted. */
15246 ix86_expand_sse_fp_minmax (rtx dest, enum rtx_code code, rtx cmp_op0,
15247 rtx cmp_op1, rtx if_true, rtx if_false)
15249 enum machine_mode mode;
15255 else if (code == UNGE)
15258 if_true = if_false;
15264 if (rtx_equal_p (cmp_op0, if_true) && rtx_equal_p (cmp_op1, if_false))
15266 else if (rtx_equal_p (cmp_op1, if_true) && rtx_equal_p (cmp_op0, if_false))
15271 mode = GET_MODE (dest);
15273 /* We want to check HONOR_NANS and HONOR_SIGNED_ZEROS here,
15274 but MODE may be a vector mode and thus not appropriate. */
15275 if (!flag_finite_math_only || !flag_unsafe_math_optimizations)
15277 int u = is_min ? UNSPEC_IEEE_MIN : UNSPEC_IEEE_MAX;
15280 if_true = force_reg (mode, if_true);
15281 v = gen_rtvec (2, if_true, if_false);
15282 tmp = gen_rtx_UNSPEC (mode, v, u);
15286 code = is_min ? SMIN : SMAX;
15287 tmp = gen_rtx_fmt_ee (code, mode, if_true, if_false);
15290 emit_insn (gen_rtx_SET (VOIDmode, dest, tmp));
15294 /* Expand an sse vector comparison. Return the register with the result. */
15297 ix86_expand_sse_cmp (rtx dest, enum rtx_code code, rtx cmp_op0, rtx cmp_op1,
15298 rtx op_true, rtx op_false)
15300 enum machine_mode mode = GET_MODE (dest);
15303 cmp_op0 = force_reg (mode, cmp_op0);
15304 if (!nonimmediate_operand (cmp_op1, mode))
15305 cmp_op1 = force_reg (mode, cmp_op1);
15308 || reg_overlap_mentioned_p (dest, op_true)
15309 || reg_overlap_mentioned_p (dest, op_false))
15310 dest = gen_reg_rtx (mode);
15312 x = gen_rtx_fmt_ee (code, mode, cmp_op0, cmp_op1);
15313 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
15318 /* Expand DEST = CMP ? OP_TRUE : OP_FALSE into a sequence of logical
15319 operations. This is used for both scalar and vector conditional moves. */
15322 ix86_expand_sse_movcc (rtx dest, rtx cmp, rtx op_true, rtx op_false)
15324 enum machine_mode mode = GET_MODE (dest);
15327 if (op_false == CONST0_RTX (mode))
15329 op_true = force_reg (mode, op_true);
15330 x = gen_rtx_AND (mode, cmp, op_true);
15331 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
15333 else if (op_true == CONST0_RTX (mode))
15335 op_false = force_reg (mode, op_false);
15336 x = gen_rtx_NOT (mode, cmp);
15337 x = gen_rtx_AND (mode, x, op_false);
15338 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
15340 else if (TARGET_SSE5)
15342 rtx pcmov = gen_rtx_SET (mode, dest,
15343 gen_rtx_IF_THEN_ELSE (mode, cmp,
15350 op_true = force_reg (mode, op_true);
15351 op_false = force_reg (mode, op_false);
15353 t2 = gen_reg_rtx (mode);
15355 t3 = gen_reg_rtx (mode);
15359 x = gen_rtx_AND (mode, op_true, cmp);
15360 emit_insn (gen_rtx_SET (VOIDmode, t2, x));
15362 x = gen_rtx_NOT (mode, cmp);
15363 x = gen_rtx_AND (mode, x, op_false);
15364 emit_insn (gen_rtx_SET (VOIDmode, t3, x));
15366 x = gen_rtx_IOR (mode, t3, t2);
15367 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
15371 /* Expand a floating-point conditional move. Return true if successful. */
15374 ix86_expand_fp_movcc (rtx operands[])
15376 enum machine_mode mode = GET_MODE (operands[0]);
15377 enum rtx_code code = GET_CODE (operands[1]);
15378 rtx tmp, compare_op, second_test, bypass_test;
15380 if (TARGET_SSE_MATH && SSE_FLOAT_MODE_P (mode))
15382 enum machine_mode cmode;
15384 /* Since we've no cmove for sse registers, don't force bad register
15385 allocation just to gain access to it. Deny movcc when the
15386 comparison mode doesn't match the move mode. */
15387 cmode = GET_MODE (ix86_compare_op0);
15388 if (cmode == VOIDmode)
15389 cmode = GET_MODE (ix86_compare_op1);
15393 code = ix86_prepare_sse_fp_compare_args (operands[0], code,
15395 &ix86_compare_op1);
15396 if (code == UNKNOWN)
15399 if (ix86_expand_sse_fp_minmax (operands[0], code, ix86_compare_op0,
15400 ix86_compare_op1, operands[2],
15404 tmp = ix86_expand_sse_cmp (operands[0], code, ix86_compare_op0,
15405 ix86_compare_op1, operands[2], operands[3]);
15406 ix86_expand_sse_movcc (operands[0], tmp, operands[2], operands[3]);
15410 /* The floating point conditional move instructions don't directly
15411 support conditions resulting from a signed integer comparison. */
15413 compare_op = ix86_expand_compare (code, &second_test, &bypass_test);
15415 /* The floating point conditional move instructions don't directly
15416 support signed integer comparisons. */
15418 if (!fcmov_comparison_operator (compare_op, VOIDmode))
15420 gcc_assert (!second_test && !bypass_test);
15421 tmp = gen_reg_rtx (QImode);
15422 ix86_expand_setcc (code, tmp);
15424 ix86_compare_op0 = tmp;
15425 ix86_compare_op1 = const0_rtx;
15426 compare_op = ix86_expand_compare (code, &second_test, &bypass_test);
15428 if (bypass_test && reg_overlap_mentioned_p (operands[0], operands[3]))
15430 tmp = gen_reg_rtx (mode);
15431 emit_move_insn (tmp, operands[3]);
15434 if (second_test && reg_overlap_mentioned_p (operands[0], operands[2]))
15436 tmp = gen_reg_rtx (mode);
15437 emit_move_insn (tmp, operands[2]);
15441 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
15442 gen_rtx_IF_THEN_ELSE (mode, compare_op,
15443 operands[2], operands[3])));
15445 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
15446 gen_rtx_IF_THEN_ELSE (mode, bypass_test,
15447 operands[3], operands[0])));
15449 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
15450 gen_rtx_IF_THEN_ELSE (mode, second_test,
15451 operands[2], operands[0])));
15456 /* Expand a floating-point vector conditional move; a vcond operation
15457 rather than a movcc operation. */
15460 ix86_expand_fp_vcond (rtx operands[])
15462 enum rtx_code code = GET_CODE (operands[3]);
15465 code = ix86_prepare_sse_fp_compare_args (operands[0], code,
15466 &operands[4], &operands[5]);
15467 if (code == UNKNOWN)
15470 if (ix86_expand_sse_fp_minmax (operands[0], code, operands[4],
15471 operands[5], operands[1], operands[2]))
15474 cmp = ix86_expand_sse_cmp (operands[0], code, operands[4], operands[5],
15475 operands[1], operands[2]);
15476 ix86_expand_sse_movcc (operands[0], cmp, operands[1], operands[2]);
15480 /* Expand a signed/unsigned integral vector conditional move. */
15483 ix86_expand_int_vcond (rtx operands[])
15485 enum machine_mode mode = GET_MODE (operands[0]);
15486 enum rtx_code code = GET_CODE (operands[3]);
15487 bool negate = false;
15490 cop0 = operands[4];
15491 cop1 = operands[5];
15493 /* SSE5 supports all of the comparisons on all vector int types. */
15496 /* Canonicalize the comparison to EQ, GT, GTU. */
15507 code = reverse_condition (code);
15513 code = reverse_condition (code);
15519 code = swap_condition (code);
15520 x = cop0, cop0 = cop1, cop1 = x;
15524 gcc_unreachable ();
15527 /* Only SSE4.1/SSE4.2 supports V2DImode. */
15528 if (mode == V2DImode)
15533 /* SSE4.1 supports EQ. */
15534 if (!TARGET_SSE4_1)
15540 /* SSE4.2 supports GT/GTU. */
15541 if (!TARGET_SSE4_2)
15546 gcc_unreachable ();
15550 /* Unsigned parallel compare is not supported by the hardware. Play some
15551 tricks to turn this into a signed comparison against 0. */
15554 cop0 = force_reg (mode, cop0);
15563 /* Perform a parallel modulo subtraction. */
15564 t1 = gen_reg_rtx (mode);
15565 emit_insn ((mode == V4SImode
15567 : gen_subv2di3) (t1, cop0, cop1));
15569 /* Extract the original sign bit of op0. */
15570 mask = ix86_build_signbit_mask (GET_MODE_INNER (mode),
15572 t2 = gen_reg_rtx (mode);
15573 emit_insn ((mode == V4SImode
15575 : gen_andv2di3) (t2, cop0, mask));
15577 /* XOR it back into the result of the subtraction. This results
15578 in the sign bit set iff we saw unsigned underflow. */
15579 x = gen_reg_rtx (mode);
15580 emit_insn ((mode == V4SImode
15582 : gen_xorv2di3) (x, t1, t2));
15590 /* Perform a parallel unsigned saturating subtraction. */
15591 x = gen_reg_rtx (mode);
15592 emit_insn (gen_rtx_SET (VOIDmode, x,
15593 gen_rtx_US_MINUS (mode, cop0, cop1)));
15600 gcc_unreachable ();
15604 cop1 = CONST0_RTX (mode);
15608 x = ix86_expand_sse_cmp (operands[0], code, cop0, cop1,
15609 operands[1+negate], operands[2-negate]);
15611 ix86_expand_sse_movcc (operands[0], x, operands[1+negate],
15612 operands[2-negate]);
15616 /* Unpack OP[1] into the next wider integer vector type. UNSIGNED_P is
15617 true if we should do zero extension, else sign extension. HIGH_P is
15618 true if we want the N/2 high elements, else the low elements. */
15621 ix86_expand_sse_unpack (rtx operands[2], bool unsigned_p, bool high_p)
15623 enum machine_mode imode = GET_MODE (operands[1]);
15624 rtx (*unpack)(rtx, rtx, rtx);
15631 unpack = gen_vec_interleave_highv16qi;
15633 unpack = gen_vec_interleave_lowv16qi;
15637 unpack = gen_vec_interleave_highv8hi;
15639 unpack = gen_vec_interleave_lowv8hi;
15643 unpack = gen_vec_interleave_highv4si;
15645 unpack = gen_vec_interleave_lowv4si;
15648 gcc_unreachable ();
15651 dest = gen_lowpart (imode, operands[0]);
15654 se = force_reg (imode, CONST0_RTX (imode));
15656 se = ix86_expand_sse_cmp (gen_reg_rtx (imode), GT, CONST0_RTX (imode),
15657 operands[1], pc_rtx, pc_rtx);
15659 emit_insn (unpack (dest, operands[1], se));
15662 /* This function performs the same task as ix86_expand_sse_unpack,
15663 but with SSE4.1 instructions. */
15666 ix86_expand_sse4_unpack (rtx operands[2], bool unsigned_p, bool high_p)
15668 enum machine_mode imode = GET_MODE (operands[1]);
15669 rtx (*unpack)(rtx, rtx);
15676 unpack = gen_sse4_1_zero_extendv8qiv8hi2;
15678 unpack = gen_sse4_1_extendv8qiv8hi2;
15682 unpack = gen_sse4_1_zero_extendv4hiv4si2;
15684 unpack = gen_sse4_1_extendv4hiv4si2;
15688 unpack = gen_sse4_1_zero_extendv2siv2di2;
15690 unpack = gen_sse4_1_extendv2siv2di2;
15693 gcc_unreachable ();
15696 dest = operands[0];
15699 /* Shift higher 8 bytes to lower 8 bytes. */
15700 src = gen_reg_rtx (imode);
15701 emit_insn (gen_sse2_lshrti3 (gen_lowpart (TImode, src),
15702 gen_lowpart (TImode, operands[1]),
15708 emit_insn (unpack (dest, src));
15711 /* This function performs the same task as ix86_expand_sse_unpack,
15712 but with sse5 instructions. */
15715 ix86_expand_sse5_unpack (rtx operands[2], bool unsigned_p, bool high_p)
15717 enum machine_mode imode = GET_MODE (operands[1]);
15718 int pperm_bytes[16];
15720 int h = (high_p) ? 8 : 0;
15723 rtvec v = rtvec_alloc (16);
15726 rtx op0 = operands[0], op1 = operands[1];
15731 vs = rtvec_alloc (8);
15732 h2 = (high_p) ? 8 : 0;
15733 for (i = 0; i < 8; i++)
15735 pperm_bytes[2*i+0] = PPERM_SRC | PPERM_SRC2 | i | h;
15736 pperm_bytes[2*i+1] = ((unsigned_p)
15738 : PPERM_SIGN | PPERM_SRC2 | i | h);
15741 for (i = 0; i < 16; i++)
15742 RTVEC_ELT (v, i) = GEN_INT (pperm_bytes[i]);
15744 for (i = 0; i < 8; i++)
15745 RTVEC_ELT (vs, i) = GEN_INT (i + h2);
15747 p = gen_rtx_PARALLEL (VOIDmode, vs);
15748 x = force_reg (V16QImode, gen_rtx_CONST_VECTOR (V16QImode, v));
15750 emit_insn (gen_sse5_pperm_zero_v16qi_v8hi (op0, op1, p, x));
15752 emit_insn (gen_sse5_pperm_sign_v16qi_v8hi (op0, op1, p, x));
15756 vs = rtvec_alloc (4);
15757 h2 = (high_p) ? 4 : 0;
15758 for (i = 0; i < 4; i++)
15760 sign_extend = ((unsigned_p)
15762 : PPERM_SIGN | PPERM_SRC2 | ((2*i) + 1 + h));
15763 pperm_bytes[4*i+0] = PPERM_SRC | PPERM_SRC2 | ((2*i) + 0 + h);
15764 pperm_bytes[4*i+1] = PPERM_SRC | PPERM_SRC2 | ((2*i) + 1 + h);
15765 pperm_bytes[4*i+2] = sign_extend;
15766 pperm_bytes[4*i+3] = sign_extend;
15769 for (i = 0; i < 16; i++)
15770 RTVEC_ELT (v, i) = GEN_INT (pperm_bytes[i]);
15772 for (i = 0; i < 4; i++)
15773 RTVEC_ELT (vs, i) = GEN_INT (i + h2);
15775 p = gen_rtx_PARALLEL (VOIDmode, vs);
15776 x = force_reg (V16QImode, gen_rtx_CONST_VECTOR (V16QImode, v));
15778 emit_insn (gen_sse5_pperm_zero_v8hi_v4si (op0, op1, p, x));
15780 emit_insn (gen_sse5_pperm_sign_v8hi_v4si (op0, op1, p, x));
15784 vs = rtvec_alloc (2);
15785 h2 = (high_p) ? 2 : 0;
15786 for (i = 0; i < 2; i++)
15788 sign_extend = ((unsigned_p)
15790 : PPERM_SIGN | PPERM_SRC2 | ((4*i) + 3 + h));
15791 pperm_bytes[8*i+0] = PPERM_SRC | PPERM_SRC2 | ((4*i) + 0 + h);
15792 pperm_bytes[8*i+1] = PPERM_SRC | PPERM_SRC2 | ((4*i) + 1 + h);
15793 pperm_bytes[8*i+2] = PPERM_SRC | PPERM_SRC2 | ((4*i) + 2 + h);
15794 pperm_bytes[8*i+3] = PPERM_SRC | PPERM_SRC2 | ((4*i) + 3 + h);
15795 pperm_bytes[8*i+4] = sign_extend;
15796 pperm_bytes[8*i+5] = sign_extend;
15797 pperm_bytes[8*i+6] = sign_extend;
15798 pperm_bytes[8*i+7] = sign_extend;
15801 for (i = 0; i < 16; i++)
15802 RTVEC_ELT (v, i) = GEN_INT (pperm_bytes[i]);
15804 for (i = 0; i < 2; i++)
15805 RTVEC_ELT (vs, i) = GEN_INT (i + h2);
15807 p = gen_rtx_PARALLEL (VOIDmode, vs);
15808 x = force_reg (V16QImode, gen_rtx_CONST_VECTOR (V16QImode, v));
15810 emit_insn (gen_sse5_pperm_zero_v4si_v2di (op0, op1, p, x));
15812 emit_insn (gen_sse5_pperm_sign_v4si_v2di (op0, op1, p, x));
15816 gcc_unreachable ();
15822 /* Pack the high bits from OPERANDS[1] and low bits from OPERANDS[2] into the
15823 next narrower integer vector type */
15825 ix86_expand_sse5_pack (rtx operands[3])
15827 enum machine_mode imode = GET_MODE (operands[0]);
15828 int pperm_bytes[16];
15830 rtvec v = rtvec_alloc (16);
15832 rtx op0 = operands[0];
15833 rtx op1 = operands[1];
15834 rtx op2 = operands[2];
15839 for (i = 0; i < 8; i++)
15841 pperm_bytes[i+0] = PPERM_SRC | PPERM_SRC1 | (i*2);
15842 pperm_bytes[i+8] = PPERM_SRC | PPERM_SRC2 | (i*2);
15845 for (i = 0; i < 16; i++)
15846 RTVEC_ELT (v, i) = GEN_INT (pperm_bytes[i]);
15848 x = force_reg (V16QImode, gen_rtx_CONST_VECTOR (V16QImode, v));
15849 emit_insn (gen_sse5_pperm_pack_v8hi_v16qi (op0, op1, op2, x));
15853 for (i = 0; i < 4; i++)
15855 pperm_bytes[(2*i)+0] = PPERM_SRC | PPERM_SRC1 | ((i*4) + 0);
15856 pperm_bytes[(2*i)+1] = PPERM_SRC | PPERM_SRC1 | ((i*4) + 1);
15857 pperm_bytes[(2*i)+8] = PPERM_SRC | PPERM_SRC2 | ((i*4) + 0);
15858 pperm_bytes[(2*i)+9] = PPERM_SRC | PPERM_SRC2 | ((i*4) + 1);
15861 for (i = 0; i < 16; i++)
15862 RTVEC_ELT (v, i) = GEN_INT (pperm_bytes[i]);
15864 x = force_reg (V16QImode, gen_rtx_CONST_VECTOR (V16QImode, v));
15865 emit_insn (gen_sse5_pperm_pack_v4si_v8hi (op0, op1, op2, x));
15869 for (i = 0; i < 2; i++)
15871 pperm_bytes[(4*i)+0] = PPERM_SRC | PPERM_SRC1 | ((i*8) + 0);
15872 pperm_bytes[(4*i)+1] = PPERM_SRC | PPERM_SRC1 | ((i*8) + 1);
15873 pperm_bytes[(4*i)+2] = PPERM_SRC | PPERM_SRC1 | ((i*8) + 2);
15874 pperm_bytes[(4*i)+3] = PPERM_SRC | PPERM_SRC1 | ((i*8) + 3);
15875 pperm_bytes[(4*i)+8] = PPERM_SRC | PPERM_SRC2 | ((i*8) + 0);
15876 pperm_bytes[(4*i)+9] = PPERM_SRC | PPERM_SRC2 | ((i*8) + 1);
15877 pperm_bytes[(4*i)+10] = PPERM_SRC | PPERM_SRC2 | ((i*8) + 2);
15878 pperm_bytes[(4*i)+11] = PPERM_SRC | PPERM_SRC2 | ((i*8) + 3);
15881 for (i = 0; i < 16; i++)
15882 RTVEC_ELT (v, i) = GEN_INT (pperm_bytes[i]);
15884 x = force_reg (V16QImode, gen_rtx_CONST_VECTOR (V16QImode, v));
15885 emit_insn (gen_sse5_pperm_pack_v2di_v4si (op0, op1, op2, x));
15889 gcc_unreachable ();
15895 /* Expand conditional increment or decrement using adb/sbb instructions.
15896 The default case using setcc followed by the conditional move can be
15897 done by generic code. */
15899 ix86_expand_int_addcc (rtx operands[])
15901 enum rtx_code code = GET_CODE (operands[1]);
15903 rtx val = const0_rtx;
15904 bool fpcmp = false;
15905 enum machine_mode mode = GET_MODE (operands[0]);
15907 if (operands[3] != const1_rtx
15908 && operands[3] != constm1_rtx)
15910 if (!ix86_expand_carry_flag_compare (code, ix86_compare_op0,
15911 ix86_compare_op1, &compare_op))
15913 code = GET_CODE (compare_op);
15915 if (GET_MODE (XEXP (compare_op, 0)) == CCFPmode
15916 || GET_MODE (XEXP (compare_op, 0)) == CCFPUmode)
15919 code = ix86_fp_compare_code_to_integer (code);
15926 PUT_CODE (compare_op,
15927 reverse_condition_maybe_unordered
15928 (GET_CODE (compare_op)));
15930 PUT_CODE (compare_op, reverse_condition (GET_CODE (compare_op)));
15932 PUT_MODE (compare_op, mode);
15934 /* Construct either adc or sbb insn. */
15935 if ((code == LTU) == (operands[3] == constm1_rtx))
15937 switch (GET_MODE (operands[0]))
15940 emit_insn (gen_subqi3_carry (operands[0], operands[2], val, compare_op));
15943 emit_insn (gen_subhi3_carry (operands[0], operands[2], val, compare_op));
15946 emit_insn (gen_subsi3_carry (operands[0], operands[2], val, compare_op));
15949 emit_insn (gen_subdi3_carry_rex64 (operands[0], operands[2], val, compare_op));
15952 gcc_unreachable ();
15957 switch (GET_MODE (operands[0]))
15960 emit_insn (gen_addqi3_carry (operands[0], operands[2], val, compare_op));
15963 emit_insn (gen_addhi3_carry (operands[0], operands[2], val, compare_op));
15966 emit_insn (gen_addsi3_carry (operands[0], operands[2], val, compare_op));
15969 emit_insn (gen_adddi3_carry_rex64 (operands[0], operands[2], val, compare_op));
15972 gcc_unreachable ();
15975 return 1; /* DONE */
15979 /* Split operands 0 and 1 into SImode parts. Similar to split_di, but
15980 works for floating pointer parameters and nonoffsetable memories.
15981 For pushes, it returns just stack offsets; the values will be saved
15982 in the right order. Maximally three parts are generated. */
15985 ix86_split_to_parts (rtx operand, rtx *parts, enum machine_mode mode)
15990 size = mode==XFmode ? 3 : GET_MODE_SIZE (mode) / 4;
15992 size = (GET_MODE_SIZE (mode) + 4) / 8;
15994 gcc_assert (!REG_P (operand) || !MMX_REGNO_P (REGNO (operand)));
15995 gcc_assert (size >= 2 && size <= 4);
15997 /* Optimize constant pool reference to immediates. This is used by fp
15998 moves, that force all constants to memory to allow combining. */
15999 if (MEM_P (operand) && MEM_READONLY_P (operand))
16001 rtx tmp = maybe_get_pool_constant (operand);
16006 if (MEM_P (operand) && !offsettable_memref_p (operand))
16008 /* The only non-offsetable memories we handle are pushes. */
16009 int ok = push_operand (operand, VOIDmode);
16013 operand = copy_rtx (operand);
16014 PUT_MODE (operand, Pmode);
16015 parts[0] = parts[1] = parts[2] = parts[3] = operand;
16019 if (GET_CODE (operand) == CONST_VECTOR)
16021 enum machine_mode imode = int_mode_for_mode (mode);
16022 /* Caution: if we looked through a constant pool memory above,
16023 the operand may actually have a different mode now. That's
16024 ok, since we want to pun this all the way back to an integer. */
16025 operand = simplify_subreg (imode, operand, GET_MODE (operand), 0);
16026 gcc_assert (operand != NULL);
16032 if (mode == DImode)
16033 split_di (&operand, 1, &parts[0], &parts[1]);
16038 if (REG_P (operand))
16040 gcc_assert (reload_completed);
16041 for (i = 0; i < size; i++)
16042 parts[i] = gen_rtx_REG (SImode, REGNO (operand) + i);
16044 else if (offsettable_memref_p (operand))
16046 operand = adjust_address (operand, SImode, 0);
16047 parts[0] = operand;
16048 for (i = 1; i < size; i++)
16049 parts[i] = adjust_address (operand, SImode, 4 * i);
16051 else if (GET_CODE (operand) == CONST_DOUBLE)
16056 REAL_VALUE_FROM_CONST_DOUBLE (r, operand);
16060 real_to_target (l, &r, mode);
16061 parts[3] = gen_int_mode (l[3], SImode);
16062 parts[2] = gen_int_mode (l[2], SImode);
16065 REAL_VALUE_TO_TARGET_LONG_DOUBLE (r, l);
16066 parts[2] = gen_int_mode (l[2], SImode);
16069 REAL_VALUE_TO_TARGET_DOUBLE (r, l);
16072 gcc_unreachable ();
16074 parts[1] = gen_int_mode (l[1], SImode);
16075 parts[0] = gen_int_mode (l[0], SImode);
16078 gcc_unreachable ();
16083 if (mode == TImode)
16084 split_ti (&operand, 1, &parts[0], &parts[1]);
16085 if (mode == XFmode || mode == TFmode)
16087 enum machine_mode upper_mode = mode==XFmode ? SImode : DImode;
16088 if (REG_P (operand))
16090 gcc_assert (reload_completed);
16091 parts[0] = gen_rtx_REG (DImode, REGNO (operand) + 0);
16092 parts[1] = gen_rtx_REG (upper_mode, REGNO (operand) + 1);
16094 else if (offsettable_memref_p (operand))
16096 operand = adjust_address (operand, DImode, 0);
16097 parts[0] = operand;
16098 parts[1] = adjust_address (operand, upper_mode, 8);
16100 else if (GET_CODE (operand) == CONST_DOUBLE)
16105 REAL_VALUE_FROM_CONST_DOUBLE (r, operand);
16106 real_to_target (l, &r, mode);
16108 /* Do not use shift by 32 to avoid warning on 32bit systems. */
16109 if (HOST_BITS_PER_WIDE_INT >= 64)
16112 ((l[0] & (((HOST_WIDE_INT) 2 << 31) - 1))
16113 + ((((HOST_WIDE_INT) l[1]) << 31) << 1),
16116 parts[0] = immed_double_const (l[0], l[1], DImode);
16118 if (upper_mode == SImode)
16119 parts[1] = gen_int_mode (l[2], SImode);
16120 else if (HOST_BITS_PER_WIDE_INT >= 64)
16123 ((l[2] & (((HOST_WIDE_INT) 2 << 31) - 1))
16124 + ((((HOST_WIDE_INT) l[3]) << 31) << 1),
16127 parts[1] = immed_double_const (l[2], l[3], DImode);
16130 gcc_unreachable ();
16137 /* Emit insns to perform a move or push of DI, DF, XF, and TF values.
16138 Return false when normal moves are needed; true when all required
16139 insns have been emitted. Operands 2-4 contain the input values
16140 int the correct order; operands 5-7 contain the output values. */
16143 ix86_split_long_move (rtx operands[])
16148 int collisions = 0;
16149 enum machine_mode mode = GET_MODE (operands[0]);
16150 bool collisionparts[4];
16152 /* The DFmode expanders may ask us to move double.
16153 For 64bit target this is single move. By hiding the fact
16154 here we simplify i386.md splitters. */
16155 if (GET_MODE_SIZE (GET_MODE (operands[0])) == 8 && TARGET_64BIT)
16157 /* Optimize constant pool reference to immediates. This is used by
16158 fp moves, that force all constants to memory to allow combining. */
16160 if (MEM_P (operands[1])
16161 && GET_CODE (XEXP (operands[1], 0)) == SYMBOL_REF
16162 && CONSTANT_POOL_ADDRESS_P (XEXP (operands[1], 0)))
16163 operands[1] = get_pool_constant (XEXP (operands[1], 0));
16164 if (push_operand (operands[0], VOIDmode))
16166 operands[0] = copy_rtx (operands[0]);
16167 PUT_MODE (operands[0], Pmode);
16170 operands[0] = gen_lowpart (DImode, operands[0]);
16171 operands[1] = gen_lowpart (DImode, operands[1]);
16172 emit_move_insn (operands[0], operands[1]);
16176 /* The only non-offsettable memory we handle is push. */
16177 if (push_operand (operands[0], VOIDmode))
16180 gcc_assert (!MEM_P (operands[0])
16181 || offsettable_memref_p (operands[0]));
16183 nparts = ix86_split_to_parts (operands[1], part[1], GET_MODE (operands[0]));
16184 ix86_split_to_parts (operands[0], part[0], GET_MODE (operands[0]));
16186 /* When emitting push, take care for source operands on the stack. */
16187 if (push && MEM_P (operands[1])
16188 && reg_overlap_mentioned_p (stack_pointer_rtx, operands[1]))
16189 for (i = 0; i < nparts - 1; i++)
16190 part[1][i] = change_address (part[1][i],
16191 GET_MODE (part[1][i]),
16192 XEXP (part[1][i + 1], 0));
16194 /* We need to do copy in the right order in case an address register
16195 of the source overlaps the destination. */
16196 if (REG_P (part[0][0]) && MEM_P (part[1][0]))
16200 for (i = 0; i < nparts; i++)
16203 = reg_overlap_mentioned_p (part[0][i], XEXP (part[1][0], 0));
16204 if (collisionparts[i])
16208 /* Collision in the middle part can be handled by reordering. */
16209 if (collisions == 1 && nparts == 3 && collisionparts [1])
16211 tmp = part[0][1]; part[0][1] = part[0][2]; part[0][2] = tmp;
16212 tmp = part[1][1]; part[1][1] = part[1][2]; part[1][2] = tmp;
16214 else if (collisions == 1
16216 && (collisionparts [1] || collisionparts [2]))
16218 if (collisionparts [1])
16220 tmp = part[0][1]; part[0][1] = part[0][2]; part[0][2] = tmp;
16221 tmp = part[1][1]; part[1][1] = part[1][2]; part[1][2] = tmp;
16225 tmp = part[0][2]; part[0][2] = part[0][3]; part[0][3] = tmp;
16226 tmp = part[1][2]; part[1][2] = part[1][3]; part[1][3] = tmp;
16230 /* If there are more collisions, we can't handle it by reordering.
16231 Do an lea to the last part and use only one colliding move. */
16232 else if (collisions > 1)
16238 base = part[0][nparts - 1];
16240 /* Handle the case when the last part isn't valid for lea.
16241 Happens in 64-bit mode storing the 12-byte XFmode. */
16242 if (GET_MODE (base) != Pmode)
16243 base = gen_rtx_REG (Pmode, REGNO (base));
16245 emit_insn (gen_rtx_SET (VOIDmode, base, XEXP (part[1][0], 0)));
16246 part[1][0] = replace_equiv_address (part[1][0], base);
16247 for (i = 1; i < nparts; i++)
16249 tmp = plus_constant (base, UNITS_PER_WORD * i);
16250 part[1][i] = replace_equiv_address (part[1][i], tmp);
16261 if (TARGET_128BIT_LONG_DOUBLE && mode == XFmode)
16262 emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx, GEN_INT (-4)));
16263 emit_move_insn (part[0][2], part[1][2]);
16265 else if (nparts == 4)
16267 emit_move_insn (part[0][3], part[1][3]);
16268 emit_move_insn (part[0][2], part[1][2]);
16273 /* In 64bit mode we don't have 32bit push available. In case this is
16274 register, it is OK - we will just use larger counterpart. We also
16275 retype memory - these comes from attempt to avoid REX prefix on
16276 moving of second half of TFmode value. */
16277 if (GET_MODE (part[1][1]) == SImode)
16279 switch (GET_CODE (part[1][1]))
16282 part[1][1] = adjust_address (part[1][1], DImode, 0);
16286 part[1][1] = gen_rtx_REG (DImode, REGNO (part[1][1]));
16290 gcc_unreachable ();
16293 if (GET_MODE (part[1][0]) == SImode)
16294 part[1][0] = part[1][1];
16297 emit_move_insn (part[0][1], part[1][1]);
16298 emit_move_insn (part[0][0], part[1][0]);
16302 /* Choose correct order to not overwrite the source before it is copied. */
16303 if ((REG_P (part[0][0])
16304 && REG_P (part[1][1])
16305 && (REGNO (part[0][0]) == REGNO (part[1][1])
16307 && REGNO (part[0][0]) == REGNO (part[1][2]))
16309 && REGNO (part[0][0]) == REGNO (part[1][3]))))
16311 && reg_overlap_mentioned_p (part[0][0], XEXP (part[1][0], 0))))
16313 for (i = 0, j = nparts - 1; i < nparts; i++, j--)
16315 operands[2 + i] = part[0][j];
16316 operands[6 + i] = part[1][j];
16321 for (i = 0; i < nparts; i++)
16323 operands[2 + i] = part[0][i];
16324 operands[6 + i] = part[1][i];
16328 /* If optimizing for size, attempt to locally unCSE nonzero constants. */
16329 if (optimize_insn_for_size_p ())
16331 for (j = 0; j < nparts - 1; j++)
16332 if (CONST_INT_P (operands[6 + j])
16333 && operands[6 + j] != const0_rtx
16334 && REG_P (operands[2 + j]))
16335 for (i = j; i < nparts - 1; i++)
16336 if (CONST_INT_P (operands[7 + i])
16337 && INTVAL (operands[7 + i]) == INTVAL (operands[6 + j]))
16338 operands[7 + i] = operands[2 + j];
16341 for (i = 0; i < nparts; i++)
16342 emit_move_insn (operands[2 + i], operands[6 + i]);
16347 /* Helper function of ix86_split_ashl used to generate an SImode/DImode
16348 left shift by a constant, either using a single shift or
16349 a sequence of add instructions. */
16352 ix86_expand_ashl_const (rtx operand, int count, enum machine_mode mode)
16356 emit_insn ((mode == DImode
16358 : gen_adddi3) (operand, operand, operand));
16360 else if (!optimize_insn_for_size_p ()
16361 && count * ix86_cost->add <= ix86_cost->shift_const)
16364 for (i=0; i<count; i++)
16366 emit_insn ((mode == DImode
16368 : gen_adddi3) (operand, operand, operand));
16372 emit_insn ((mode == DImode
16374 : gen_ashldi3) (operand, operand, GEN_INT (count)));
16378 ix86_split_ashl (rtx *operands, rtx scratch, enum machine_mode mode)
16380 rtx low[2], high[2];
16382 const int single_width = mode == DImode ? 32 : 64;
16384 if (CONST_INT_P (operands[2]))
16386 (mode == DImode ? split_di : split_ti) (operands, 2, low, high);
16387 count = INTVAL (operands[2]) & (single_width * 2 - 1);
16389 if (count >= single_width)
16391 emit_move_insn (high[0], low[1]);
16392 emit_move_insn (low[0], const0_rtx);
16394 if (count > single_width)
16395 ix86_expand_ashl_const (high[0], count - single_width, mode);
16399 if (!rtx_equal_p (operands[0], operands[1]))
16400 emit_move_insn (operands[0], operands[1]);
16401 emit_insn ((mode == DImode
16403 : gen_x86_64_shld) (high[0], low[0], GEN_INT (count)));
16404 ix86_expand_ashl_const (low[0], count, mode);
16409 (mode == DImode ? split_di : split_ti) (operands, 1, low, high);
16411 if (operands[1] == const1_rtx)
16413 /* Assuming we've chosen a QImode capable registers, then 1 << N
16414 can be done with two 32/64-bit shifts, no branches, no cmoves. */
16415 if (ANY_QI_REG_P (low[0]) && ANY_QI_REG_P (high[0]))
16417 rtx s, d, flags = gen_rtx_REG (CCZmode, FLAGS_REG);
16419 ix86_expand_clear (low[0]);
16420 ix86_expand_clear (high[0]);
16421 emit_insn (gen_testqi_ccz_1 (operands[2], GEN_INT (single_width)));
16423 d = gen_lowpart (QImode, low[0]);
16424 d = gen_rtx_STRICT_LOW_PART (VOIDmode, d);
16425 s = gen_rtx_EQ (QImode, flags, const0_rtx);
16426 emit_insn (gen_rtx_SET (VOIDmode, d, s));
16428 d = gen_lowpart (QImode, high[0]);
16429 d = gen_rtx_STRICT_LOW_PART (VOIDmode, d);
16430 s = gen_rtx_NE (QImode, flags, const0_rtx);
16431 emit_insn (gen_rtx_SET (VOIDmode, d, s));
16434 /* Otherwise, we can get the same results by manually performing
16435 a bit extract operation on bit 5/6, and then performing the two
16436 shifts. The two methods of getting 0/1 into low/high are exactly
16437 the same size. Avoiding the shift in the bit extract case helps
16438 pentium4 a bit; no one else seems to care much either way. */
16443 if (TARGET_PARTIAL_REG_STALL && !optimize_insn_for_size_p ())
16444 x = gen_rtx_ZERO_EXTEND (mode == DImode ? SImode : DImode, operands[2]);
16446 x = gen_lowpart (mode == DImode ? SImode : DImode, operands[2]);
16447 emit_insn (gen_rtx_SET (VOIDmode, high[0], x));
16449 emit_insn ((mode == DImode
16451 : gen_lshrdi3) (high[0], high[0], GEN_INT (mode == DImode ? 5 : 6)));
16452 emit_insn ((mode == DImode
16454 : gen_anddi3) (high[0], high[0], GEN_INT (1)));
16455 emit_move_insn (low[0], high[0]);
16456 emit_insn ((mode == DImode
16458 : gen_xordi3) (low[0], low[0], GEN_INT (1)));
16461 emit_insn ((mode == DImode
16463 : gen_ashldi3) (low[0], low[0], operands[2]));
16464 emit_insn ((mode == DImode
16466 : gen_ashldi3) (high[0], high[0], operands[2]));
16470 if (operands[1] == constm1_rtx)
16472 /* For -1 << N, we can avoid the shld instruction, because we
16473 know that we're shifting 0...31/63 ones into a -1. */
16474 emit_move_insn (low[0], constm1_rtx);
16475 if (optimize_insn_for_size_p ())
16476 emit_move_insn (high[0], low[0]);
16478 emit_move_insn (high[0], constm1_rtx);
16482 if (!rtx_equal_p (operands[0], operands[1]))
16483 emit_move_insn (operands[0], operands[1]);
16485 (mode == DImode ? split_di : split_ti) (operands, 1, low, high);
16486 emit_insn ((mode == DImode
16488 : gen_x86_64_shld) (high[0], low[0], operands[2]));
16491 emit_insn ((mode == DImode ? gen_ashlsi3 : gen_ashldi3) (low[0], low[0], operands[2]));
16493 if (TARGET_CMOVE && scratch)
16495 ix86_expand_clear (scratch);
16496 emit_insn ((mode == DImode
16497 ? gen_x86_shift_adj_1
16498 : gen_x86_64_shift_adj_1) (high[0], low[0], operands[2],
16502 emit_insn ((mode == DImode
16503 ? gen_x86_shift_adj_2
16504 : gen_x86_64_shift_adj_2) (high[0], low[0], operands[2]));
16508 ix86_split_ashr (rtx *operands, rtx scratch, enum machine_mode mode)
16510 rtx low[2], high[2];
16512 const int single_width = mode == DImode ? 32 : 64;
16514 if (CONST_INT_P (operands[2]))
16516 (mode == DImode ? split_di : split_ti) (operands, 2, low, high);
16517 count = INTVAL (operands[2]) & (single_width * 2 - 1);
16519 if (count == single_width * 2 - 1)
16521 emit_move_insn (high[0], high[1]);
16522 emit_insn ((mode == DImode
16524 : gen_ashrdi3) (high[0], high[0],
16525 GEN_INT (single_width - 1)));
16526 emit_move_insn (low[0], high[0]);
16529 else if (count >= single_width)
16531 emit_move_insn (low[0], high[1]);
16532 emit_move_insn (high[0], low[0]);
16533 emit_insn ((mode == DImode
16535 : gen_ashrdi3) (high[0], high[0],
16536 GEN_INT (single_width - 1)));
16537 if (count > single_width)
16538 emit_insn ((mode == DImode
16540 : gen_ashrdi3) (low[0], low[0],
16541 GEN_INT (count - single_width)));
16545 if (!rtx_equal_p (operands[0], operands[1]))
16546 emit_move_insn (operands[0], operands[1]);
16547 emit_insn ((mode == DImode
16549 : gen_x86_64_shrd) (low[0], high[0], GEN_INT (count)));
16550 emit_insn ((mode == DImode
16552 : gen_ashrdi3) (high[0], high[0], GEN_INT (count)));
16557 if (!rtx_equal_p (operands[0], operands[1]))
16558 emit_move_insn (operands[0], operands[1]);
16560 (mode == DImode ? split_di : split_ti) (operands, 1, low, high);
16562 emit_insn ((mode == DImode
16564 : gen_x86_64_shrd) (low[0], high[0], operands[2]));
16565 emit_insn ((mode == DImode
16567 : gen_ashrdi3) (high[0], high[0], operands[2]));
16569 if (TARGET_CMOVE && scratch)
16571 emit_move_insn (scratch, high[0]);
16572 emit_insn ((mode == DImode
16574 : gen_ashrdi3) (scratch, scratch,
16575 GEN_INT (single_width - 1)));
16576 emit_insn ((mode == DImode
16577 ? gen_x86_shift_adj_1
16578 : gen_x86_64_shift_adj_1) (low[0], high[0], operands[2],
16582 emit_insn ((mode == DImode
16583 ? gen_x86_shift_adj_3
16584 : gen_x86_64_shift_adj_3) (low[0], high[0], operands[2]));
16589 ix86_split_lshr (rtx *operands, rtx scratch, enum machine_mode mode)
16591 rtx low[2], high[2];
16593 const int single_width = mode == DImode ? 32 : 64;
16595 if (CONST_INT_P (operands[2]))
16597 (mode == DImode ? split_di : split_ti) (operands, 2, low, high);
16598 count = INTVAL (operands[2]) & (single_width * 2 - 1);
16600 if (count >= single_width)
16602 emit_move_insn (low[0], high[1]);
16603 ix86_expand_clear (high[0]);
16605 if (count > single_width)
16606 emit_insn ((mode == DImode
16608 : gen_lshrdi3) (low[0], low[0],
16609 GEN_INT (count - single_width)));
16613 if (!rtx_equal_p (operands[0], operands[1]))
16614 emit_move_insn (operands[0], operands[1]);
16615 emit_insn ((mode == DImode
16617 : gen_x86_64_shrd) (low[0], high[0], GEN_INT (count)));
16618 emit_insn ((mode == DImode
16620 : gen_lshrdi3) (high[0], high[0], GEN_INT (count)));
16625 if (!rtx_equal_p (operands[0], operands[1]))
16626 emit_move_insn (operands[0], operands[1]);
16628 (mode == DImode ? split_di : split_ti) (operands, 1, low, high);
16630 emit_insn ((mode == DImode
16632 : gen_x86_64_shrd) (low[0], high[0], operands[2]));
16633 emit_insn ((mode == DImode
16635 : gen_lshrdi3) (high[0], high[0], operands[2]));
16637 /* Heh. By reversing the arguments, we can reuse this pattern. */
16638 if (TARGET_CMOVE && scratch)
16640 ix86_expand_clear (scratch);
16641 emit_insn ((mode == DImode
16642 ? gen_x86_shift_adj_1
16643 : gen_x86_64_shift_adj_1) (low[0], high[0], operands[2],
16647 emit_insn ((mode == DImode
16648 ? gen_x86_shift_adj_2
16649 : gen_x86_64_shift_adj_2) (low[0], high[0], operands[2]));
16653 /* Predict just emitted jump instruction to be taken with probability PROB. */
16655 predict_jump (int prob)
16657 rtx insn = get_last_insn ();
16658 gcc_assert (JUMP_P (insn));
16660 = gen_rtx_EXPR_LIST (REG_BR_PROB,
16665 /* Helper function for the string operations below. Dest VARIABLE whether
16666 it is aligned to VALUE bytes. If true, jump to the label. */
16668 ix86_expand_aligntest (rtx variable, int value, bool epilogue)
16670 rtx label = gen_label_rtx ();
16671 rtx tmpcount = gen_reg_rtx (GET_MODE (variable));
16672 if (GET_MODE (variable) == DImode)
16673 emit_insn (gen_anddi3 (tmpcount, variable, GEN_INT (value)));
16675 emit_insn (gen_andsi3 (tmpcount, variable, GEN_INT (value)));
16676 emit_cmp_and_jump_insns (tmpcount, const0_rtx, EQ, 0, GET_MODE (variable),
16679 predict_jump (REG_BR_PROB_BASE * 50 / 100);
16681 predict_jump (REG_BR_PROB_BASE * 90 / 100);
16685 /* Adjust COUNTER by the VALUE. */
16687 ix86_adjust_counter (rtx countreg, HOST_WIDE_INT value)
16689 if (GET_MODE (countreg) == DImode)
16690 emit_insn (gen_adddi3 (countreg, countreg, GEN_INT (-value)));
16692 emit_insn (gen_addsi3 (countreg, countreg, GEN_INT (-value)));
16695 /* Zero extend possibly SImode EXP to Pmode register. */
16697 ix86_zero_extend_to_Pmode (rtx exp)
16700 if (GET_MODE (exp) == VOIDmode)
16701 return force_reg (Pmode, exp);
16702 if (GET_MODE (exp) == Pmode)
16703 return copy_to_mode_reg (Pmode, exp);
16704 r = gen_reg_rtx (Pmode);
16705 emit_insn (gen_zero_extendsidi2 (r, exp));
16709 /* Divide COUNTREG by SCALE. */
16711 scale_counter (rtx countreg, int scale)
16714 rtx piece_size_mask;
16718 if (CONST_INT_P (countreg))
16719 return GEN_INT (INTVAL (countreg) / scale);
16720 gcc_assert (REG_P (countreg));
16722 piece_size_mask = GEN_INT (scale - 1);
16723 sc = expand_simple_binop (GET_MODE (countreg), LSHIFTRT, countreg,
16724 GEN_INT (exact_log2 (scale)),
16725 NULL, 1, OPTAB_DIRECT);
16729 /* Return mode for the memcpy/memset loop counter. Prefer SImode over
16730 DImode for constant loop counts. */
16732 static enum machine_mode
16733 counter_mode (rtx count_exp)
16735 if (GET_MODE (count_exp) != VOIDmode)
16736 return GET_MODE (count_exp);
16737 if (GET_CODE (count_exp) != CONST_INT)
16739 if (TARGET_64BIT && (INTVAL (count_exp) & ~0xffffffff))
16744 /* When SRCPTR is non-NULL, output simple loop to move memory
16745 pointer to SRCPTR to DESTPTR via chunks of MODE unrolled UNROLL times,
16746 overall size is COUNT specified in bytes. When SRCPTR is NULL, output the
16747 equivalent loop to set memory by VALUE (supposed to be in MODE).
16749 The size is rounded down to whole number of chunk size moved at once.
16750 SRCMEM and DESTMEM provide MEMrtx to feed proper aliasing info. */
16754 expand_set_or_movmem_via_loop (rtx destmem, rtx srcmem,
16755 rtx destptr, rtx srcptr, rtx value,
16756 rtx count, enum machine_mode mode, int unroll,
16759 rtx out_label, top_label, iter, tmp;
16760 enum machine_mode iter_mode = counter_mode (count);
16761 rtx piece_size = GEN_INT (GET_MODE_SIZE (mode) * unroll);
16762 rtx piece_size_mask = GEN_INT (~((GET_MODE_SIZE (mode) * unroll) - 1));
16768 top_label = gen_label_rtx ();
16769 out_label = gen_label_rtx ();
16770 iter = gen_reg_rtx (iter_mode);
16772 size = expand_simple_binop (iter_mode, AND, count, piece_size_mask,
16773 NULL, 1, OPTAB_DIRECT);
16774 /* Those two should combine. */
16775 if (piece_size == const1_rtx)
16777 emit_cmp_and_jump_insns (size, const0_rtx, EQ, NULL_RTX, iter_mode,
16779 predict_jump (REG_BR_PROB_BASE * 10 / 100);
16781 emit_move_insn (iter, const0_rtx);
16783 emit_label (top_label);
16785 tmp = convert_modes (Pmode, iter_mode, iter, true);
16786 x_addr = gen_rtx_PLUS (Pmode, destptr, tmp);
16787 destmem = change_address (destmem, mode, x_addr);
16791 y_addr = gen_rtx_PLUS (Pmode, srcptr, copy_rtx (tmp));
16792 srcmem = change_address (srcmem, mode, y_addr);
16794 /* When unrolling for chips that reorder memory reads and writes,
16795 we can save registers by using single temporary.
16796 Also using 4 temporaries is overkill in 32bit mode. */
16797 if (!TARGET_64BIT && 0)
16799 for (i = 0; i < unroll; i++)
16804 adjust_address (copy_rtx (destmem), mode, GET_MODE_SIZE (mode));
16806 adjust_address (copy_rtx (srcmem), mode, GET_MODE_SIZE (mode));
16808 emit_move_insn (destmem, srcmem);
16814 gcc_assert (unroll <= 4);
16815 for (i = 0; i < unroll; i++)
16817 tmpreg[i] = gen_reg_rtx (mode);
16821 adjust_address (copy_rtx (srcmem), mode, GET_MODE_SIZE (mode));
16823 emit_move_insn (tmpreg[i], srcmem);
16825 for (i = 0; i < unroll; i++)
16830 adjust_address (copy_rtx (destmem), mode, GET_MODE_SIZE (mode));
16832 emit_move_insn (destmem, tmpreg[i]);
16837 for (i = 0; i < unroll; i++)
16841 adjust_address (copy_rtx (destmem), mode, GET_MODE_SIZE (mode));
16842 emit_move_insn (destmem, value);
16845 tmp = expand_simple_binop (iter_mode, PLUS, iter, piece_size, iter,
16846 true, OPTAB_LIB_WIDEN);
16848 emit_move_insn (iter, tmp);
16850 emit_cmp_and_jump_insns (iter, size, LT, NULL_RTX, iter_mode,
16852 if (expected_size != -1)
16854 expected_size /= GET_MODE_SIZE (mode) * unroll;
16855 if (expected_size == 0)
16857 else if (expected_size > REG_BR_PROB_BASE)
16858 predict_jump (REG_BR_PROB_BASE - 1);
16860 predict_jump (REG_BR_PROB_BASE - (REG_BR_PROB_BASE + expected_size / 2) / expected_size);
16863 predict_jump (REG_BR_PROB_BASE * 80 / 100);
16864 iter = ix86_zero_extend_to_Pmode (iter);
16865 tmp = expand_simple_binop (Pmode, PLUS, destptr, iter, destptr,
16866 true, OPTAB_LIB_WIDEN);
16867 if (tmp != destptr)
16868 emit_move_insn (destptr, tmp);
16871 tmp = expand_simple_binop (Pmode, PLUS, srcptr, iter, srcptr,
16872 true, OPTAB_LIB_WIDEN);
16874 emit_move_insn (srcptr, tmp);
16876 emit_label (out_label);
16879 /* Output "rep; mov" instruction.
16880 Arguments have same meaning as for previous function */
16882 expand_movmem_via_rep_mov (rtx destmem, rtx srcmem,
16883 rtx destptr, rtx srcptr,
16885 enum machine_mode mode)
16891 /* If the size is known, it is shorter to use rep movs. */
16892 if (mode == QImode && CONST_INT_P (count)
16893 && !(INTVAL (count) & 3))
16896 if (destptr != XEXP (destmem, 0) || GET_MODE (destmem) != BLKmode)
16897 destmem = adjust_automodify_address_nv (destmem, BLKmode, destptr, 0);
16898 if (srcptr != XEXP (srcmem, 0) || GET_MODE (srcmem) != BLKmode)
16899 srcmem = adjust_automodify_address_nv (srcmem, BLKmode, srcptr, 0);
16900 countreg = ix86_zero_extend_to_Pmode (scale_counter (count, GET_MODE_SIZE (mode)));
16901 if (mode != QImode)
16903 destexp = gen_rtx_ASHIFT (Pmode, countreg,
16904 GEN_INT (exact_log2 (GET_MODE_SIZE (mode))));
16905 destexp = gen_rtx_PLUS (Pmode, destexp, destptr);
16906 srcexp = gen_rtx_ASHIFT (Pmode, countreg,
16907 GEN_INT (exact_log2 (GET_MODE_SIZE (mode))));
16908 srcexp = gen_rtx_PLUS (Pmode, srcexp, srcptr);
16912 destexp = gen_rtx_PLUS (Pmode, destptr, countreg);
16913 srcexp = gen_rtx_PLUS (Pmode, srcptr, countreg);
16915 if (CONST_INT_P (count))
16917 count = GEN_INT (INTVAL (count)
16918 & ~((HOST_WIDE_INT) GET_MODE_SIZE (mode) - 1));
16919 destmem = shallow_copy_rtx (destmem);
16920 srcmem = shallow_copy_rtx (srcmem);
16921 set_mem_size (destmem, count);
16922 set_mem_size (srcmem, count);
16926 if (MEM_SIZE (destmem))
16927 set_mem_size (destmem, NULL_RTX);
16928 if (MEM_SIZE (srcmem))
16929 set_mem_size (srcmem, NULL_RTX);
16931 emit_insn (gen_rep_mov (destptr, destmem, srcptr, srcmem, countreg,
16935 /* Output "rep; stos" instruction.
16936 Arguments have same meaning as for previous function */
16938 expand_setmem_via_rep_stos (rtx destmem, rtx destptr, rtx value,
16939 rtx count, enum machine_mode mode,
16945 if (destptr != XEXP (destmem, 0) || GET_MODE (destmem) != BLKmode)
16946 destmem = adjust_automodify_address_nv (destmem, BLKmode, destptr, 0);
16947 value = force_reg (mode, gen_lowpart (mode, value));
16948 countreg = ix86_zero_extend_to_Pmode (scale_counter (count, GET_MODE_SIZE (mode)));
16949 if (mode != QImode)
16951 destexp = gen_rtx_ASHIFT (Pmode, countreg,
16952 GEN_INT (exact_log2 (GET_MODE_SIZE (mode))));
16953 destexp = gen_rtx_PLUS (Pmode, destexp, destptr);
16956 destexp = gen_rtx_PLUS (Pmode, destptr, countreg);
16957 if (orig_value == const0_rtx && CONST_INT_P (count))
16959 count = GEN_INT (INTVAL (count)
16960 & ~((HOST_WIDE_INT) GET_MODE_SIZE (mode) - 1));
16961 destmem = shallow_copy_rtx (destmem);
16962 set_mem_size (destmem, count);
16964 else if (MEM_SIZE (destmem))
16965 set_mem_size (destmem, NULL_RTX);
16966 emit_insn (gen_rep_stos (destptr, countreg, destmem, value, destexp));
16970 emit_strmov (rtx destmem, rtx srcmem,
16971 rtx destptr, rtx srcptr, enum machine_mode mode, int offset)
16973 rtx src = adjust_automodify_address_nv (srcmem, mode, srcptr, offset);
16974 rtx dest = adjust_automodify_address_nv (destmem, mode, destptr, offset);
16975 emit_insn (gen_strmov (destptr, dest, srcptr, src));
16978 /* Output code to copy at most count & (max_size - 1) bytes from SRC to DEST. */
16980 expand_movmem_epilogue (rtx destmem, rtx srcmem,
16981 rtx destptr, rtx srcptr, rtx count, int max_size)
16984 if (CONST_INT_P (count))
16986 HOST_WIDE_INT countval = INTVAL (count);
16989 if ((countval & 0x10) && max_size > 16)
16993 emit_strmov (destmem, srcmem, destptr, srcptr, DImode, offset);
16994 emit_strmov (destmem, srcmem, destptr, srcptr, DImode, offset + 8);
16997 gcc_unreachable ();
17000 if ((countval & 0x08) && max_size > 8)
17003 emit_strmov (destmem, srcmem, destptr, srcptr, DImode, offset);
17006 emit_strmov (destmem, srcmem, destptr, srcptr, SImode, offset);
17007 emit_strmov (destmem, srcmem, destptr, srcptr, SImode, offset + 4);
17011 if ((countval & 0x04) && max_size > 4)
17013 emit_strmov (destmem, srcmem, destptr, srcptr, SImode, offset);
17016 if ((countval & 0x02) && max_size > 2)
17018 emit_strmov (destmem, srcmem, destptr, srcptr, HImode, offset);
17021 if ((countval & 0x01) && max_size > 1)
17023 emit_strmov (destmem, srcmem, destptr, srcptr, QImode, offset);
17030 count = expand_simple_binop (GET_MODE (count), AND, count, GEN_INT (max_size - 1),
17031 count, 1, OPTAB_DIRECT);
17032 expand_set_or_movmem_via_loop (destmem, srcmem, destptr, srcptr, NULL,
17033 count, QImode, 1, 4);
17037 /* When there are stringops, we can cheaply increase dest and src pointers.
17038 Otherwise we save code size by maintaining offset (zero is readily
17039 available from preceding rep operation) and using x86 addressing modes.
17041 if (TARGET_SINGLE_STRINGOP)
17045 rtx label = ix86_expand_aligntest (count, 4, true);
17046 src = change_address (srcmem, SImode, srcptr);
17047 dest = change_address (destmem, SImode, destptr);
17048 emit_insn (gen_strmov (destptr, dest, srcptr, src));
17049 emit_label (label);
17050 LABEL_NUSES (label) = 1;
17054 rtx label = ix86_expand_aligntest (count, 2, true);
17055 src = change_address (srcmem, HImode, srcptr);
17056 dest = change_address (destmem, HImode, destptr);
17057 emit_insn (gen_strmov (destptr, dest, srcptr, src));
17058 emit_label (label);
17059 LABEL_NUSES (label) = 1;
17063 rtx label = ix86_expand_aligntest (count, 1, true);
17064 src = change_address (srcmem, QImode, srcptr);
17065 dest = change_address (destmem, QImode, destptr);
17066 emit_insn (gen_strmov (destptr, dest, srcptr, src));
17067 emit_label (label);
17068 LABEL_NUSES (label) = 1;
17073 rtx offset = force_reg (Pmode, const0_rtx);
17078 rtx label = ix86_expand_aligntest (count, 4, true);
17079 src = change_address (srcmem, SImode, srcptr);
17080 dest = change_address (destmem, SImode, destptr);
17081 emit_move_insn (dest, src);
17082 tmp = expand_simple_binop (Pmode, PLUS, offset, GEN_INT (4), NULL,
17083 true, OPTAB_LIB_WIDEN);
17085 emit_move_insn (offset, tmp);
17086 emit_label (label);
17087 LABEL_NUSES (label) = 1;
17091 rtx label = ix86_expand_aligntest (count, 2, true);
17092 tmp = gen_rtx_PLUS (Pmode, srcptr, offset);
17093 src = change_address (srcmem, HImode, tmp);
17094 tmp = gen_rtx_PLUS (Pmode, destptr, offset);
17095 dest = change_address (destmem, HImode, tmp);
17096 emit_move_insn (dest, src);
17097 tmp = expand_simple_binop (Pmode, PLUS, offset, GEN_INT (2), tmp,
17098 true, OPTAB_LIB_WIDEN);
17100 emit_move_insn (offset, tmp);
17101 emit_label (label);
17102 LABEL_NUSES (label) = 1;
17106 rtx label = ix86_expand_aligntest (count, 1, true);
17107 tmp = gen_rtx_PLUS (Pmode, srcptr, offset);
17108 src = change_address (srcmem, QImode, tmp);
17109 tmp = gen_rtx_PLUS (Pmode, destptr, offset);
17110 dest = change_address (destmem, QImode, tmp);
17111 emit_move_insn (dest, src);
17112 emit_label (label);
17113 LABEL_NUSES (label) = 1;
17118 /* Output code to set at most count & (max_size - 1) bytes starting by DEST. */
17120 expand_setmem_epilogue_via_loop (rtx destmem, rtx destptr, rtx value,
17121 rtx count, int max_size)
17124 expand_simple_binop (counter_mode (count), AND, count,
17125 GEN_INT (max_size - 1), count, 1, OPTAB_DIRECT);
17126 expand_set_or_movmem_via_loop (destmem, NULL, destptr, NULL,
17127 gen_lowpart (QImode, value), count, QImode,
17131 /* Output code to set at most count & (max_size - 1) bytes starting by DEST. */
17133 expand_setmem_epilogue (rtx destmem, rtx destptr, rtx value, rtx count, int max_size)
17137 if (CONST_INT_P (count))
17139 HOST_WIDE_INT countval = INTVAL (count);
17142 if ((countval & 0x10) && max_size > 16)
17146 dest = adjust_automodify_address_nv (destmem, DImode, destptr, offset);
17147 emit_insn (gen_strset (destptr, dest, value));
17148 dest = adjust_automodify_address_nv (destmem, DImode, destptr, offset + 8);
17149 emit_insn (gen_strset (destptr, dest, value));
17152 gcc_unreachable ();
17155 if ((countval & 0x08) && max_size > 8)
17159 dest = adjust_automodify_address_nv (destmem, DImode, destptr, offset);
17160 emit_insn (gen_strset (destptr, dest, value));
17164 dest = adjust_automodify_address_nv (destmem, SImode, destptr, offset);
17165 emit_insn (gen_strset (destptr, dest, value));
17166 dest = adjust_automodify_address_nv (destmem, SImode, destptr, offset + 4);
17167 emit_insn (gen_strset (destptr, dest, value));
17171 if ((countval & 0x04) && max_size > 4)
17173 dest = adjust_automodify_address_nv (destmem, SImode, destptr, offset);
17174 emit_insn (gen_strset (destptr, dest, gen_lowpart (SImode, value)));
17177 if ((countval & 0x02) && max_size > 2)
17179 dest = adjust_automodify_address_nv (destmem, HImode, destptr, offset);
17180 emit_insn (gen_strset (destptr, dest, gen_lowpart (HImode, value)));
17183 if ((countval & 0x01) && max_size > 1)
17185 dest = adjust_automodify_address_nv (destmem, QImode, destptr, offset);
17186 emit_insn (gen_strset (destptr, dest, gen_lowpart (QImode, value)));
17193 expand_setmem_epilogue_via_loop (destmem, destptr, value, count, max_size);
17198 rtx label = ix86_expand_aligntest (count, 16, true);
17201 dest = change_address (destmem, DImode, destptr);
17202 emit_insn (gen_strset (destptr, dest, value));
17203 emit_insn (gen_strset (destptr, dest, value));
17207 dest = change_address (destmem, SImode, destptr);
17208 emit_insn (gen_strset (destptr, dest, value));
17209 emit_insn (gen_strset (destptr, dest, value));
17210 emit_insn (gen_strset (destptr, dest, value));
17211 emit_insn (gen_strset (destptr, dest, value));
17213 emit_label (label);
17214 LABEL_NUSES (label) = 1;
17218 rtx label = ix86_expand_aligntest (count, 8, true);
17221 dest = change_address (destmem, DImode, destptr);
17222 emit_insn (gen_strset (destptr, dest, value));
17226 dest = change_address (destmem, SImode, destptr);
17227 emit_insn (gen_strset (destptr, dest, value));
17228 emit_insn (gen_strset (destptr, dest, value));
17230 emit_label (label);
17231 LABEL_NUSES (label) = 1;
17235 rtx label = ix86_expand_aligntest (count, 4, true);
17236 dest = change_address (destmem, SImode, destptr);
17237 emit_insn (gen_strset (destptr, dest, gen_lowpart (SImode, value)));
17238 emit_label (label);
17239 LABEL_NUSES (label) = 1;
17243 rtx label = ix86_expand_aligntest (count, 2, true);
17244 dest = change_address (destmem, HImode, destptr);
17245 emit_insn (gen_strset (destptr, dest, gen_lowpart (HImode, value)));
17246 emit_label (label);
17247 LABEL_NUSES (label) = 1;
17251 rtx label = ix86_expand_aligntest (count, 1, true);
17252 dest = change_address (destmem, QImode, destptr);
17253 emit_insn (gen_strset (destptr, dest, gen_lowpart (QImode, value)));
17254 emit_label (label);
17255 LABEL_NUSES (label) = 1;
17259 /* Copy enough from DEST to SRC to align DEST known to by aligned by ALIGN to
17260 DESIRED_ALIGNMENT. */
17262 expand_movmem_prologue (rtx destmem, rtx srcmem,
17263 rtx destptr, rtx srcptr, rtx count,
17264 int align, int desired_alignment)
17266 if (align <= 1 && desired_alignment > 1)
17268 rtx label = ix86_expand_aligntest (destptr, 1, false);
17269 srcmem = change_address (srcmem, QImode, srcptr);
17270 destmem = change_address (destmem, QImode, destptr);
17271 emit_insn (gen_strmov (destptr, destmem, srcptr, srcmem));
17272 ix86_adjust_counter (count, 1);
17273 emit_label (label);
17274 LABEL_NUSES (label) = 1;
17276 if (align <= 2 && desired_alignment > 2)
17278 rtx label = ix86_expand_aligntest (destptr, 2, false);
17279 srcmem = change_address (srcmem, HImode, srcptr);
17280 destmem = change_address (destmem, HImode, destptr);
17281 emit_insn (gen_strmov (destptr, destmem, srcptr, srcmem));
17282 ix86_adjust_counter (count, 2);
17283 emit_label (label);
17284 LABEL_NUSES (label) = 1;
17286 if (align <= 4 && desired_alignment > 4)
17288 rtx label = ix86_expand_aligntest (destptr, 4, false);
17289 srcmem = change_address (srcmem, SImode, srcptr);
17290 destmem = change_address (destmem, SImode, destptr);
17291 emit_insn (gen_strmov (destptr, destmem, srcptr, srcmem));
17292 ix86_adjust_counter (count, 4);
17293 emit_label (label);
17294 LABEL_NUSES (label) = 1;
17296 gcc_assert (desired_alignment <= 8);
17299 /* Copy enough from DST to SRC to align DST known to DESIRED_ALIGN.
17300 ALIGN_BYTES is how many bytes need to be copied. */
17302 expand_constant_movmem_prologue (rtx dst, rtx *srcp, rtx destreg, rtx srcreg,
17303 int desired_align, int align_bytes)
17306 rtx src_size, dst_size;
17308 int src_align_bytes = get_mem_align_offset (src, desired_align * BITS_PER_UNIT);
17309 if (src_align_bytes >= 0)
17310 src_align_bytes = desired_align - src_align_bytes;
17311 src_size = MEM_SIZE (src);
17312 dst_size = MEM_SIZE (dst);
17313 if (align_bytes & 1)
17315 dst = adjust_automodify_address_nv (dst, QImode, destreg, 0);
17316 src = adjust_automodify_address_nv (src, QImode, srcreg, 0);
17318 emit_insn (gen_strmov (destreg, dst, srcreg, src));
17320 if (align_bytes & 2)
17322 dst = adjust_automodify_address_nv (dst, HImode, destreg, off);
17323 src = adjust_automodify_address_nv (src, HImode, srcreg, off);
17324 if (MEM_ALIGN (dst) < 2 * BITS_PER_UNIT)
17325 set_mem_align (dst, 2 * BITS_PER_UNIT);
17326 if (src_align_bytes >= 0
17327 && (src_align_bytes & 1) == (align_bytes & 1)
17328 && MEM_ALIGN (src) < 2 * BITS_PER_UNIT)
17329 set_mem_align (src, 2 * BITS_PER_UNIT);
17331 emit_insn (gen_strmov (destreg, dst, srcreg, src));
17333 if (align_bytes & 4)
17335 dst = adjust_automodify_address_nv (dst, SImode, destreg, off);
17336 src = adjust_automodify_address_nv (src, SImode, srcreg, off);
17337 if (MEM_ALIGN (dst) < 4 * BITS_PER_UNIT)
17338 set_mem_align (dst, 4 * BITS_PER_UNIT);
17339 if (src_align_bytes >= 0)
17341 unsigned int src_align = 0;
17342 if ((src_align_bytes & 3) == (align_bytes & 3))
17344 else if ((src_align_bytes & 1) == (align_bytes & 1))
17346 if (MEM_ALIGN (src) < src_align * BITS_PER_UNIT)
17347 set_mem_align (src, src_align * BITS_PER_UNIT);
17350 emit_insn (gen_strmov (destreg, dst, srcreg, src));
17352 dst = adjust_automodify_address_nv (dst, BLKmode, destreg, off);
17353 src = adjust_automodify_address_nv (src, BLKmode, srcreg, off);
17354 if (MEM_ALIGN (dst) < (unsigned int) desired_align * BITS_PER_UNIT)
17355 set_mem_align (dst, desired_align * BITS_PER_UNIT);
17356 if (src_align_bytes >= 0)
17358 unsigned int src_align = 0;
17359 if ((src_align_bytes & 7) == (align_bytes & 7))
17361 else if ((src_align_bytes & 3) == (align_bytes & 3))
17363 else if ((src_align_bytes & 1) == (align_bytes & 1))
17365 if (src_align > (unsigned int) desired_align)
17366 src_align = desired_align;
17367 if (MEM_ALIGN (src) < src_align * BITS_PER_UNIT)
17368 set_mem_align (src, src_align * BITS_PER_UNIT);
17371 set_mem_size (dst, GEN_INT (INTVAL (dst_size) - align_bytes));
17373 set_mem_size (dst, GEN_INT (INTVAL (src_size) - align_bytes));
17378 /* Set enough from DEST to align DEST known to by aligned by ALIGN to
17379 DESIRED_ALIGNMENT. */
17381 expand_setmem_prologue (rtx destmem, rtx destptr, rtx value, rtx count,
17382 int align, int desired_alignment)
17384 if (align <= 1 && desired_alignment > 1)
17386 rtx label = ix86_expand_aligntest (destptr, 1, false);
17387 destmem = change_address (destmem, QImode, destptr);
17388 emit_insn (gen_strset (destptr, destmem, gen_lowpart (QImode, value)));
17389 ix86_adjust_counter (count, 1);
17390 emit_label (label);
17391 LABEL_NUSES (label) = 1;
17393 if (align <= 2 && desired_alignment > 2)
17395 rtx label = ix86_expand_aligntest (destptr, 2, false);
17396 destmem = change_address (destmem, HImode, destptr);
17397 emit_insn (gen_strset (destptr, destmem, gen_lowpart (HImode, value)));
17398 ix86_adjust_counter (count, 2);
17399 emit_label (label);
17400 LABEL_NUSES (label) = 1;
17402 if (align <= 4 && desired_alignment > 4)
17404 rtx label = ix86_expand_aligntest (destptr, 4, false);
17405 destmem = change_address (destmem, SImode, destptr);
17406 emit_insn (gen_strset (destptr, destmem, gen_lowpart (SImode, value)));
17407 ix86_adjust_counter (count, 4);
17408 emit_label (label);
17409 LABEL_NUSES (label) = 1;
17411 gcc_assert (desired_alignment <= 8);
17414 /* Set enough from DST to align DST known to by aligned by ALIGN to
17415 DESIRED_ALIGN. ALIGN_BYTES is how many bytes need to be stored. */
17417 expand_constant_setmem_prologue (rtx dst, rtx destreg, rtx value,
17418 int desired_align, int align_bytes)
17421 rtx dst_size = MEM_SIZE (dst);
17422 if (align_bytes & 1)
17424 dst = adjust_automodify_address_nv (dst, QImode, destreg, 0);
17426 emit_insn (gen_strset (destreg, dst,
17427 gen_lowpart (QImode, value)));
17429 if (align_bytes & 2)
17431 dst = adjust_automodify_address_nv (dst, HImode, destreg, off);
17432 if (MEM_ALIGN (dst) < 2 * BITS_PER_UNIT)
17433 set_mem_align (dst, 2 * BITS_PER_UNIT);
17435 emit_insn (gen_strset (destreg, dst,
17436 gen_lowpart (HImode, value)));
17438 if (align_bytes & 4)
17440 dst = adjust_automodify_address_nv (dst, SImode, destreg, off);
17441 if (MEM_ALIGN (dst) < 4 * BITS_PER_UNIT)
17442 set_mem_align (dst, 4 * BITS_PER_UNIT);
17444 emit_insn (gen_strset (destreg, dst,
17445 gen_lowpart (SImode, value)));
17447 dst = adjust_automodify_address_nv (dst, BLKmode, destreg, off);
17448 if (MEM_ALIGN (dst) < (unsigned int) desired_align * BITS_PER_UNIT)
17449 set_mem_align (dst, desired_align * BITS_PER_UNIT);
17451 set_mem_size (dst, GEN_INT (INTVAL (dst_size) - align_bytes));
17455 /* Given COUNT and EXPECTED_SIZE, decide on codegen of string operation. */
17456 static enum stringop_alg
17457 decide_alg (HOST_WIDE_INT count, HOST_WIDE_INT expected_size, bool memset,
17458 int *dynamic_check)
17460 const struct stringop_algs * algs;
17461 bool optimize_for_speed;
17462 /* Algorithms using the rep prefix want at least edi and ecx;
17463 additionally, memset wants eax and memcpy wants esi. Don't
17464 consider such algorithms if the user has appropriated those
17465 registers for their own purposes. */
17466 bool rep_prefix_usable = !(fixed_regs[CX_REG] || fixed_regs[DI_REG]
17468 ? fixed_regs[AX_REG] : fixed_regs[SI_REG]));
17470 #define ALG_USABLE_P(alg) (rep_prefix_usable \
17471 || (alg != rep_prefix_1_byte \
17472 && alg != rep_prefix_4_byte \
17473 && alg != rep_prefix_8_byte))
17474 const struct processor_costs *cost;
17476 /* Even if the string operation call is cold, we still might spend a lot
17477 of time processing large blocks. */
17478 if (optimize_function_for_size_p (cfun)
17479 || (optimize_insn_for_size_p ()
17480 && expected_size != -1 && expected_size < 256))
17481 optimize_for_speed = false;
17483 optimize_for_speed = true;
17485 cost = optimize_for_speed ? ix86_cost : &ix86_size_cost;
17487 *dynamic_check = -1;
17489 algs = &cost->memset[TARGET_64BIT != 0];
17491 algs = &cost->memcpy[TARGET_64BIT != 0];
17492 if (stringop_alg != no_stringop && ALG_USABLE_P (stringop_alg))
17493 return stringop_alg;
17494 /* rep; movq or rep; movl is the smallest variant. */
17495 else if (!optimize_for_speed)
17497 if (!count || (count & 3))
17498 return rep_prefix_usable ? rep_prefix_1_byte : loop_1_byte;
17500 return rep_prefix_usable ? rep_prefix_4_byte : loop;
17502 /* Very tiny blocks are best handled via the loop, REP is expensive to setup.
17504 else if (expected_size != -1 && expected_size < 4)
17505 return loop_1_byte;
17506 else if (expected_size != -1)
17509 enum stringop_alg alg = libcall;
17510 for (i = 0; i < NAX_STRINGOP_ALGS; i++)
17512 /* We get here if the algorithms that were not libcall-based
17513 were rep-prefix based and we are unable to use rep prefixes
17514 based on global register usage. Break out of the loop and
17515 use the heuristic below. */
17516 if (algs->size[i].max == 0)
17518 if (algs->size[i].max >= expected_size || algs->size[i].max == -1)
17520 enum stringop_alg candidate = algs->size[i].alg;
17522 if (candidate != libcall && ALG_USABLE_P (candidate))
17524 /* Honor TARGET_INLINE_ALL_STRINGOPS by picking
17525 last non-libcall inline algorithm. */
17526 if (TARGET_INLINE_ALL_STRINGOPS)
17528 /* When the current size is best to be copied by a libcall,
17529 but we are still forced to inline, run the heuristic below
17530 that will pick code for medium sized blocks. */
17531 if (alg != libcall)
17535 else if (ALG_USABLE_P (candidate))
17539 gcc_assert (TARGET_INLINE_ALL_STRINGOPS || !rep_prefix_usable);
17541 /* When asked to inline the call anyway, try to pick meaningful choice.
17542 We look for maximal size of block that is faster to copy by hand and
17543 take blocks of at most of that size guessing that average size will
17544 be roughly half of the block.
17546 If this turns out to be bad, we might simply specify the preferred
17547 choice in ix86_costs. */
17548 if ((TARGET_INLINE_ALL_STRINGOPS || TARGET_INLINE_STRINGOPS_DYNAMICALLY)
17549 && (algs->unknown_size == libcall || !ALG_USABLE_P (algs->unknown_size)))
17552 enum stringop_alg alg;
17554 bool any_alg_usable_p = true;
17556 for (i = 0; i < NAX_STRINGOP_ALGS; i++)
17558 enum stringop_alg candidate = algs->size[i].alg;
17559 any_alg_usable_p = any_alg_usable_p && ALG_USABLE_P (candidate);
17561 if (candidate != libcall && candidate
17562 && ALG_USABLE_P (candidate))
17563 max = algs->size[i].max;
17565 /* If there aren't any usable algorithms, then recursing on
17566 smaller sizes isn't going to find anything. Just return the
17567 simple byte-at-a-time copy loop. */
17568 if (!any_alg_usable_p)
17570 /* Pick something reasonable. */
17571 if (TARGET_INLINE_STRINGOPS_DYNAMICALLY)
17572 *dynamic_check = 128;
17573 return loop_1_byte;
17577 alg = decide_alg (count, max / 2, memset, dynamic_check);
17578 gcc_assert (*dynamic_check == -1);
17579 gcc_assert (alg != libcall);
17580 if (TARGET_INLINE_STRINGOPS_DYNAMICALLY)
17581 *dynamic_check = max;
17584 return ALG_USABLE_P (algs->unknown_size) ? algs->unknown_size : libcall;
17585 #undef ALG_USABLE_P
17588 /* Decide on alignment. We know that the operand is already aligned to ALIGN
17589 (ALIGN can be based on profile feedback and thus it is not 100% guaranteed). */
17591 decide_alignment (int align,
17592 enum stringop_alg alg,
17595 int desired_align = 0;
17599 gcc_unreachable ();
17601 case unrolled_loop:
17602 desired_align = GET_MODE_SIZE (Pmode);
17604 case rep_prefix_8_byte:
17607 case rep_prefix_4_byte:
17608 /* PentiumPro has special logic triggering for 8 byte aligned blocks.
17609 copying whole cacheline at once. */
17610 if (TARGET_PENTIUMPRO)
17615 case rep_prefix_1_byte:
17616 /* PentiumPro has special logic triggering for 8 byte aligned blocks.
17617 copying whole cacheline at once. */
17618 if (TARGET_PENTIUMPRO)
17632 if (desired_align < align)
17633 desired_align = align;
17634 if (expected_size != -1 && expected_size < 4)
17635 desired_align = align;
17636 return desired_align;
17639 /* Return the smallest power of 2 greater than VAL. */
17641 smallest_pow2_greater_than (int val)
17649 /* Expand string move (memcpy) operation. Use i386 string operations when
17650 profitable. expand_setmem contains similar code. The code depends upon
17651 architecture, block size and alignment, but always has the same
17654 1) Prologue guard: Conditional that jumps up to epilogues for small
17655 blocks that can be handled by epilogue alone. This is faster but
17656 also needed for correctness, since prologue assume the block is larger
17657 than the desired alignment.
17659 Optional dynamic check for size and libcall for large
17660 blocks is emitted here too, with -minline-stringops-dynamically.
17662 2) Prologue: copy first few bytes in order to get destination aligned
17663 to DESIRED_ALIGN. It is emitted only when ALIGN is less than
17664 DESIRED_ALIGN and and up to DESIRED_ALIGN - ALIGN bytes can be copied.
17665 We emit either a jump tree on power of two sized blocks, or a byte loop.
17667 3) Main body: the copying loop itself, copying in SIZE_NEEDED chunks
17668 with specified algorithm.
17670 4) Epilogue: code copying tail of the block that is too small to be
17671 handled by main body (or up to size guarded by prologue guard). */
17674 ix86_expand_movmem (rtx dst, rtx src, rtx count_exp, rtx align_exp,
17675 rtx expected_align_exp, rtx expected_size_exp)
17681 rtx jump_around_label = NULL;
17682 HOST_WIDE_INT align = 1;
17683 unsigned HOST_WIDE_INT count = 0;
17684 HOST_WIDE_INT expected_size = -1;
17685 int size_needed = 0, epilogue_size_needed;
17686 int desired_align = 0, align_bytes = 0;
17687 enum stringop_alg alg;
17689 bool need_zero_guard = false;
17691 if (CONST_INT_P (align_exp))
17692 align = INTVAL (align_exp);
17693 /* i386 can do misaligned access on reasonably increased cost. */
17694 if (CONST_INT_P (expected_align_exp)
17695 && INTVAL (expected_align_exp) > align)
17696 align = INTVAL (expected_align_exp);
17697 /* ALIGN is the minimum of destination and source alignment, but we care here
17698 just about destination alignment. */
17699 else if (MEM_ALIGN (dst) > (unsigned HOST_WIDE_INT) align * BITS_PER_UNIT)
17700 align = MEM_ALIGN (dst) / BITS_PER_UNIT;
17702 if (CONST_INT_P (count_exp))
17703 count = expected_size = INTVAL (count_exp);
17704 if (CONST_INT_P (expected_size_exp) && count == 0)
17705 expected_size = INTVAL (expected_size_exp);
17707 /* Make sure we don't need to care about overflow later on. */
17708 if (count > ((unsigned HOST_WIDE_INT) 1 << 30))
17711 /* Step 0: Decide on preferred algorithm, desired alignment and
17712 size of chunks to be copied by main loop. */
17714 alg = decide_alg (count, expected_size, false, &dynamic_check);
17715 desired_align = decide_alignment (align, alg, expected_size);
17717 if (!TARGET_ALIGN_STRINGOPS)
17718 align = desired_align;
17720 if (alg == libcall)
17722 gcc_assert (alg != no_stringop);
17724 count_exp = copy_to_mode_reg (GET_MODE (count_exp), count_exp);
17725 destreg = copy_to_mode_reg (Pmode, XEXP (dst, 0));
17726 srcreg = copy_to_mode_reg (Pmode, XEXP (src, 0));
17731 gcc_unreachable ();
17733 need_zero_guard = true;
17734 size_needed = GET_MODE_SIZE (Pmode);
17736 case unrolled_loop:
17737 need_zero_guard = true;
17738 size_needed = GET_MODE_SIZE (Pmode) * (TARGET_64BIT ? 4 : 2);
17740 case rep_prefix_8_byte:
17743 case rep_prefix_4_byte:
17746 case rep_prefix_1_byte:
17750 need_zero_guard = true;
17755 epilogue_size_needed = size_needed;
17757 /* Step 1: Prologue guard. */
17759 /* Alignment code needs count to be in register. */
17760 if (CONST_INT_P (count_exp) && desired_align > align)
17762 if (INTVAL (count_exp) > desired_align
17763 && INTVAL (count_exp) > size_needed)
17766 = get_mem_align_offset (dst, desired_align * BITS_PER_UNIT);
17767 if (align_bytes <= 0)
17770 align_bytes = desired_align - align_bytes;
17772 if (align_bytes == 0)
17773 count_exp = force_reg (counter_mode (count_exp), count_exp);
17775 gcc_assert (desired_align >= 1 && align >= 1);
17777 /* Ensure that alignment prologue won't copy past end of block. */
17778 if (size_needed > 1 || (desired_align > 1 && desired_align > align))
17780 epilogue_size_needed = MAX (size_needed - 1, desired_align - align);
17781 /* Epilogue always copies COUNT_EXP & EPILOGUE_SIZE_NEEDED bytes.
17782 Make sure it is power of 2. */
17783 epilogue_size_needed = smallest_pow2_greater_than (epilogue_size_needed);
17787 if (count < (unsigned HOST_WIDE_INT)epilogue_size_needed)
17789 /* If main algorithm works on QImode, no epilogue is needed.
17790 For small sizes just don't align anything. */
17791 if (size_needed == 1)
17792 desired_align = align;
17799 label = gen_label_rtx ();
17800 emit_cmp_and_jump_insns (count_exp,
17801 GEN_INT (epilogue_size_needed),
17802 LTU, 0, counter_mode (count_exp), 1, label);
17803 if (expected_size == -1 || expected_size < epilogue_size_needed)
17804 predict_jump (REG_BR_PROB_BASE * 60 / 100);
17806 predict_jump (REG_BR_PROB_BASE * 20 / 100);
17810 /* Emit code to decide on runtime whether library call or inline should be
17812 if (dynamic_check != -1)
17814 if (CONST_INT_P (count_exp))
17816 if (UINTVAL (count_exp) >= (unsigned HOST_WIDE_INT)dynamic_check)
17818 emit_block_move_via_libcall (dst, src, count_exp, false);
17819 count_exp = const0_rtx;
17825 rtx hot_label = gen_label_rtx ();
17826 jump_around_label = gen_label_rtx ();
17827 emit_cmp_and_jump_insns (count_exp, GEN_INT (dynamic_check - 1),
17828 LEU, 0, GET_MODE (count_exp), 1, hot_label);
17829 predict_jump (REG_BR_PROB_BASE * 90 / 100);
17830 emit_block_move_via_libcall (dst, src, count_exp, false);
17831 emit_jump (jump_around_label);
17832 emit_label (hot_label);
17836 /* Step 2: Alignment prologue. */
17838 if (desired_align > align)
17840 if (align_bytes == 0)
17842 /* Except for the first move in epilogue, we no longer know
17843 constant offset in aliasing info. It don't seems to worth
17844 the pain to maintain it for the first move, so throw away
17846 src = change_address (src, BLKmode, srcreg);
17847 dst = change_address (dst, BLKmode, destreg);
17848 expand_movmem_prologue (dst, src, destreg, srcreg, count_exp, align,
17853 /* If we know how many bytes need to be stored before dst is
17854 sufficiently aligned, maintain aliasing info accurately. */
17855 dst = expand_constant_movmem_prologue (dst, &src, destreg, srcreg,
17856 desired_align, align_bytes);
17857 count_exp = plus_constant (count_exp, -align_bytes);
17858 count -= align_bytes;
17860 if (need_zero_guard
17861 && (count < (unsigned HOST_WIDE_INT) size_needed
17862 || (align_bytes == 0
17863 && count < ((unsigned HOST_WIDE_INT) size_needed
17864 + desired_align - align))))
17866 /* It is possible that we copied enough so the main loop will not
17868 gcc_assert (size_needed > 1);
17869 if (label == NULL_RTX)
17870 label = gen_label_rtx ();
17871 emit_cmp_and_jump_insns (count_exp,
17872 GEN_INT (size_needed),
17873 LTU, 0, counter_mode (count_exp), 1, label);
17874 if (expected_size == -1
17875 || expected_size < (desired_align - align) / 2 + size_needed)
17876 predict_jump (REG_BR_PROB_BASE * 20 / 100);
17878 predict_jump (REG_BR_PROB_BASE * 60 / 100);
17881 if (label && size_needed == 1)
17883 emit_label (label);
17884 LABEL_NUSES (label) = 1;
17886 epilogue_size_needed = 1;
17888 else if (label == NULL_RTX)
17889 epilogue_size_needed = size_needed;
17891 /* Step 3: Main loop. */
17897 gcc_unreachable ();
17899 expand_set_or_movmem_via_loop (dst, src, destreg, srcreg, NULL,
17900 count_exp, QImode, 1, expected_size);
17903 expand_set_or_movmem_via_loop (dst, src, destreg, srcreg, NULL,
17904 count_exp, Pmode, 1, expected_size);
17906 case unrolled_loop:
17907 /* Unroll only by factor of 2 in 32bit mode, since we don't have enough
17908 registers for 4 temporaries anyway. */
17909 expand_set_or_movmem_via_loop (dst, src, destreg, srcreg, NULL,
17910 count_exp, Pmode, TARGET_64BIT ? 4 : 2,
17913 case rep_prefix_8_byte:
17914 expand_movmem_via_rep_mov (dst, src, destreg, srcreg, count_exp,
17917 case rep_prefix_4_byte:
17918 expand_movmem_via_rep_mov (dst, src, destreg, srcreg, count_exp,
17921 case rep_prefix_1_byte:
17922 expand_movmem_via_rep_mov (dst, src, destreg, srcreg, count_exp,
17926 /* Adjust properly the offset of src and dest memory for aliasing. */
17927 if (CONST_INT_P (count_exp))
17929 src = adjust_automodify_address_nv (src, BLKmode, srcreg,
17930 (count / size_needed) * size_needed);
17931 dst = adjust_automodify_address_nv (dst, BLKmode, destreg,
17932 (count / size_needed) * size_needed);
17936 src = change_address (src, BLKmode, srcreg);
17937 dst = change_address (dst, BLKmode, destreg);
17940 /* Step 4: Epilogue to copy the remaining bytes. */
17944 /* When the main loop is done, COUNT_EXP might hold original count,
17945 while we want to copy only COUNT_EXP & SIZE_NEEDED bytes.
17946 Epilogue code will actually copy COUNT_EXP & EPILOGUE_SIZE_NEEDED
17947 bytes. Compensate if needed. */
17949 if (size_needed < epilogue_size_needed)
17952 expand_simple_binop (counter_mode (count_exp), AND, count_exp,
17953 GEN_INT (size_needed - 1), count_exp, 1,
17955 if (tmp != count_exp)
17956 emit_move_insn (count_exp, tmp);
17958 emit_label (label);
17959 LABEL_NUSES (label) = 1;
17962 if (count_exp != const0_rtx && epilogue_size_needed > 1)
17963 expand_movmem_epilogue (dst, src, destreg, srcreg, count_exp,
17964 epilogue_size_needed);
17965 if (jump_around_label)
17966 emit_label (jump_around_label);
17970 /* Helper function for memcpy. For QImode value 0xXY produce
17971 0xXYXYXYXY of wide specified by MODE. This is essentially
17972 a * 0x10101010, but we can do slightly better than
17973 synth_mult by unwinding the sequence by hand on CPUs with
17976 promote_duplicated_reg (enum machine_mode mode, rtx val)
17978 enum machine_mode valmode = GET_MODE (val);
17980 int nops = mode == DImode ? 3 : 2;
17982 gcc_assert (mode == SImode || mode == DImode);
17983 if (val == const0_rtx)
17984 return copy_to_mode_reg (mode, const0_rtx);
17985 if (CONST_INT_P (val))
17987 HOST_WIDE_INT v = INTVAL (val) & 255;
17991 if (mode == DImode)
17992 v |= (v << 16) << 16;
17993 return copy_to_mode_reg (mode, gen_int_mode (v, mode));
17996 if (valmode == VOIDmode)
17998 if (valmode != QImode)
17999 val = gen_lowpart (QImode, val);
18000 if (mode == QImode)
18002 if (!TARGET_PARTIAL_REG_STALL)
18004 if (ix86_cost->mult_init[mode == DImode ? 3 : 2]
18005 + ix86_cost->mult_bit * (mode == DImode ? 8 : 4)
18006 <= (ix86_cost->shift_const + ix86_cost->add) * nops
18007 + (COSTS_N_INSNS (TARGET_PARTIAL_REG_STALL == 0)))
18009 rtx reg = convert_modes (mode, QImode, val, true);
18010 tmp = promote_duplicated_reg (mode, const1_rtx);
18011 return expand_simple_binop (mode, MULT, reg, tmp, NULL, 1,
18016 rtx reg = convert_modes (mode, QImode, val, true);
18018 if (!TARGET_PARTIAL_REG_STALL)
18019 if (mode == SImode)
18020 emit_insn (gen_movsi_insv_1 (reg, reg));
18022 emit_insn (gen_movdi_insv_1_rex64 (reg, reg));
18025 tmp = expand_simple_binop (mode, ASHIFT, reg, GEN_INT (8),
18026 NULL, 1, OPTAB_DIRECT);
18028 expand_simple_binop (mode, IOR, reg, tmp, reg, 1, OPTAB_DIRECT);
18030 tmp = expand_simple_binop (mode, ASHIFT, reg, GEN_INT (16),
18031 NULL, 1, OPTAB_DIRECT);
18032 reg = expand_simple_binop (mode, IOR, reg, tmp, reg, 1, OPTAB_DIRECT);
18033 if (mode == SImode)
18035 tmp = expand_simple_binop (mode, ASHIFT, reg, GEN_INT (32),
18036 NULL, 1, OPTAB_DIRECT);
18037 reg = expand_simple_binop (mode, IOR, reg, tmp, reg, 1, OPTAB_DIRECT);
18042 /* Duplicate value VAL using promote_duplicated_reg into maximal size that will
18043 be needed by main loop copying SIZE_NEEDED chunks and prologue getting
18044 alignment from ALIGN to DESIRED_ALIGN. */
18046 promote_duplicated_reg_to_size (rtx val, int size_needed, int desired_align, int align)
18051 && (size_needed > 4 || (desired_align > align && desired_align > 4)))
18052 promoted_val = promote_duplicated_reg (DImode, val);
18053 else if (size_needed > 2 || (desired_align > align && desired_align > 2))
18054 promoted_val = promote_duplicated_reg (SImode, val);
18055 else if (size_needed > 1 || (desired_align > align && desired_align > 1))
18056 promoted_val = promote_duplicated_reg (HImode, val);
18058 promoted_val = val;
18060 return promoted_val;
18063 /* Expand string clear operation (bzero). Use i386 string operations when
18064 profitable. See expand_movmem comment for explanation of individual
18065 steps performed. */
18067 ix86_expand_setmem (rtx dst, rtx count_exp, rtx val_exp, rtx align_exp,
18068 rtx expected_align_exp, rtx expected_size_exp)
18073 rtx jump_around_label = NULL;
18074 HOST_WIDE_INT align = 1;
18075 unsigned HOST_WIDE_INT count = 0;
18076 HOST_WIDE_INT expected_size = -1;
18077 int size_needed = 0, epilogue_size_needed;
18078 int desired_align = 0, align_bytes = 0;
18079 enum stringop_alg alg;
18080 rtx promoted_val = NULL;
18081 bool force_loopy_epilogue = false;
18083 bool need_zero_guard = false;
18085 if (CONST_INT_P (align_exp))
18086 align = INTVAL (align_exp);
18087 /* i386 can do misaligned access on reasonably increased cost. */
18088 if (CONST_INT_P (expected_align_exp)
18089 && INTVAL (expected_align_exp) > align)
18090 align = INTVAL (expected_align_exp);
18091 if (CONST_INT_P (count_exp))
18092 count = expected_size = INTVAL (count_exp);
18093 if (CONST_INT_P (expected_size_exp) && count == 0)
18094 expected_size = INTVAL (expected_size_exp);
18096 /* Make sure we don't need to care about overflow later on. */
18097 if (count > ((unsigned HOST_WIDE_INT) 1 << 30))
18100 /* Step 0: Decide on preferred algorithm, desired alignment and
18101 size of chunks to be copied by main loop. */
18103 alg = decide_alg (count, expected_size, true, &dynamic_check);
18104 desired_align = decide_alignment (align, alg, expected_size);
18106 if (!TARGET_ALIGN_STRINGOPS)
18107 align = desired_align;
18109 if (alg == libcall)
18111 gcc_assert (alg != no_stringop);
18113 count_exp = copy_to_mode_reg (counter_mode (count_exp), count_exp);
18114 destreg = copy_to_mode_reg (Pmode, XEXP (dst, 0));
18119 gcc_unreachable ();
18121 need_zero_guard = true;
18122 size_needed = GET_MODE_SIZE (Pmode);
18124 case unrolled_loop:
18125 need_zero_guard = true;
18126 size_needed = GET_MODE_SIZE (Pmode) * 4;
18128 case rep_prefix_8_byte:
18131 case rep_prefix_4_byte:
18134 case rep_prefix_1_byte:
18138 need_zero_guard = true;
18142 epilogue_size_needed = size_needed;
18144 /* Step 1: Prologue guard. */
18146 /* Alignment code needs count to be in register. */
18147 if (CONST_INT_P (count_exp) && desired_align > align)
18149 if (INTVAL (count_exp) > desired_align
18150 && INTVAL (count_exp) > size_needed)
18153 = get_mem_align_offset (dst, desired_align * BITS_PER_UNIT);
18154 if (align_bytes <= 0)
18157 align_bytes = desired_align - align_bytes;
18159 if (align_bytes == 0)
18161 enum machine_mode mode = SImode;
18162 if (TARGET_64BIT && (count & ~0xffffffff))
18164 count_exp = force_reg (mode, count_exp);
18167 /* Do the cheap promotion to allow better CSE across the
18168 main loop and epilogue (ie one load of the big constant in the
18169 front of all code. */
18170 if (CONST_INT_P (val_exp))
18171 promoted_val = promote_duplicated_reg_to_size (val_exp, size_needed,
18172 desired_align, align);
18173 /* Ensure that alignment prologue won't copy past end of block. */
18174 if (size_needed > 1 || (desired_align > 1 && desired_align > align))
18176 epilogue_size_needed = MAX (size_needed - 1, desired_align - align);
18177 /* Epilogue always copies COUNT_EXP & (EPILOGUE_SIZE_NEEDED - 1) bytes.
18178 Make sure it is power of 2. */
18179 epilogue_size_needed = smallest_pow2_greater_than (epilogue_size_needed);
18181 /* To improve performance of small blocks, we jump around the VAL
18182 promoting mode. This mean that if the promoted VAL is not constant,
18183 we might not use it in the epilogue and have to use byte
18185 if (epilogue_size_needed > 2 && !promoted_val)
18186 force_loopy_epilogue = true;
18189 if (count < (unsigned HOST_WIDE_INT)epilogue_size_needed)
18191 /* If main algorithm works on QImode, no epilogue is needed.
18192 For small sizes just don't align anything. */
18193 if (size_needed == 1)
18194 desired_align = align;
18201 label = gen_label_rtx ();
18202 emit_cmp_and_jump_insns (count_exp,
18203 GEN_INT (epilogue_size_needed),
18204 LTU, 0, counter_mode (count_exp), 1, label);
18205 if (expected_size == -1 || expected_size <= epilogue_size_needed)
18206 predict_jump (REG_BR_PROB_BASE * 60 / 100);
18208 predict_jump (REG_BR_PROB_BASE * 20 / 100);
18211 if (dynamic_check != -1)
18213 rtx hot_label = gen_label_rtx ();
18214 jump_around_label = gen_label_rtx ();
18215 emit_cmp_and_jump_insns (count_exp, GEN_INT (dynamic_check - 1),
18216 LEU, 0, counter_mode (count_exp), 1, hot_label);
18217 predict_jump (REG_BR_PROB_BASE * 90 / 100);
18218 set_storage_via_libcall (dst, count_exp, val_exp, false);
18219 emit_jump (jump_around_label);
18220 emit_label (hot_label);
18223 /* Step 2: Alignment prologue. */
18225 /* Do the expensive promotion once we branched off the small blocks. */
18227 promoted_val = promote_duplicated_reg_to_size (val_exp, size_needed,
18228 desired_align, align);
18229 gcc_assert (desired_align >= 1 && align >= 1);
18231 if (desired_align > align)
18233 if (align_bytes == 0)
18235 /* Except for the first move in epilogue, we no longer know
18236 constant offset in aliasing info. It don't seems to worth
18237 the pain to maintain it for the first move, so throw away
18239 dst = change_address (dst, BLKmode, destreg);
18240 expand_setmem_prologue (dst, destreg, promoted_val, count_exp, align,
18245 /* If we know how many bytes need to be stored before dst is
18246 sufficiently aligned, maintain aliasing info accurately. */
18247 dst = expand_constant_setmem_prologue (dst, destreg, promoted_val,
18248 desired_align, align_bytes);
18249 count_exp = plus_constant (count_exp, -align_bytes);
18250 count -= align_bytes;
18252 if (need_zero_guard
18253 && (count < (unsigned HOST_WIDE_INT) size_needed
18254 || (align_bytes == 0
18255 && count < ((unsigned HOST_WIDE_INT) size_needed
18256 + desired_align - align))))
18258 /* It is possible that we copied enough so the main loop will not
18260 gcc_assert (size_needed > 1);
18261 if (label == NULL_RTX)
18262 label = gen_label_rtx ();
18263 emit_cmp_and_jump_insns (count_exp,
18264 GEN_INT (size_needed),
18265 LTU, 0, counter_mode (count_exp), 1, label);
18266 if (expected_size == -1
18267 || expected_size < (desired_align - align) / 2 + size_needed)
18268 predict_jump (REG_BR_PROB_BASE * 20 / 100);
18270 predict_jump (REG_BR_PROB_BASE * 60 / 100);
18273 if (label && size_needed == 1)
18275 emit_label (label);
18276 LABEL_NUSES (label) = 1;
18278 promoted_val = val_exp;
18279 epilogue_size_needed = 1;
18281 else if (label == NULL_RTX)
18282 epilogue_size_needed = size_needed;
18284 /* Step 3: Main loop. */
18290 gcc_unreachable ();
18292 expand_set_or_movmem_via_loop (dst, NULL, destreg, NULL, promoted_val,
18293 count_exp, QImode, 1, expected_size);
18296 expand_set_or_movmem_via_loop (dst, NULL, destreg, NULL, promoted_val,
18297 count_exp, Pmode, 1, expected_size);
18299 case unrolled_loop:
18300 expand_set_or_movmem_via_loop (dst, NULL, destreg, NULL, promoted_val,
18301 count_exp, Pmode, 4, expected_size);
18303 case rep_prefix_8_byte:
18304 expand_setmem_via_rep_stos (dst, destreg, promoted_val, count_exp,
18307 case rep_prefix_4_byte:
18308 expand_setmem_via_rep_stos (dst, destreg, promoted_val, count_exp,
18311 case rep_prefix_1_byte:
18312 expand_setmem_via_rep_stos (dst, destreg, promoted_val, count_exp,
18316 /* Adjust properly the offset of src and dest memory for aliasing. */
18317 if (CONST_INT_P (count_exp))
18318 dst = adjust_automodify_address_nv (dst, BLKmode, destreg,
18319 (count / size_needed) * size_needed);
18321 dst = change_address (dst, BLKmode, destreg);
18323 /* Step 4: Epilogue to copy the remaining bytes. */
18327 /* When the main loop is done, COUNT_EXP might hold original count,
18328 while we want to copy only COUNT_EXP & SIZE_NEEDED bytes.
18329 Epilogue code will actually copy COUNT_EXP & EPILOGUE_SIZE_NEEDED
18330 bytes. Compensate if needed. */
18332 if (size_needed < epilogue_size_needed)
18335 expand_simple_binop (counter_mode (count_exp), AND, count_exp,
18336 GEN_INT (size_needed - 1), count_exp, 1,
18338 if (tmp != count_exp)
18339 emit_move_insn (count_exp, tmp);
18341 emit_label (label);
18342 LABEL_NUSES (label) = 1;
18345 if (count_exp != const0_rtx && epilogue_size_needed > 1)
18347 if (force_loopy_epilogue)
18348 expand_setmem_epilogue_via_loop (dst, destreg, val_exp, count_exp,
18349 epilogue_size_needed);
18351 expand_setmem_epilogue (dst, destreg, promoted_val, count_exp,
18352 epilogue_size_needed);
18354 if (jump_around_label)
18355 emit_label (jump_around_label);
18359 /* Expand the appropriate insns for doing strlen if not just doing
18362 out = result, initialized with the start address
18363 align_rtx = alignment of the address.
18364 scratch = scratch register, initialized with the startaddress when
18365 not aligned, otherwise undefined
18367 This is just the body. It needs the initializations mentioned above and
18368 some address computing at the end. These things are done in i386.md. */
18371 ix86_expand_strlensi_unroll_1 (rtx out, rtx src, rtx align_rtx)
18375 rtx align_2_label = NULL_RTX;
18376 rtx align_3_label = NULL_RTX;
18377 rtx align_4_label = gen_label_rtx ();
18378 rtx end_0_label = gen_label_rtx ();
18380 rtx tmpreg = gen_reg_rtx (SImode);
18381 rtx scratch = gen_reg_rtx (SImode);
18385 if (CONST_INT_P (align_rtx))
18386 align = INTVAL (align_rtx);
18388 /* Loop to check 1..3 bytes for null to get an aligned pointer. */
18390 /* Is there a known alignment and is it less than 4? */
18393 rtx scratch1 = gen_reg_rtx (Pmode);
18394 emit_move_insn (scratch1, out);
18395 /* Is there a known alignment and is it not 2? */
18398 align_3_label = gen_label_rtx (); /* Label when aligned to 3-byte */
18399 align_2_label = gen_label_rtx (); /* Label when aligned to 2-byte */
18401 /* Leave just the 3 lower bits. */
18402 align_rtx = expand_binop (Pmode, and_optab, scratch1, GEN_INT (3),
18403 NULL_RTX, 0, OPTAB_WIDEN);
18405 emit_cmp_and_jump_insns (align_rtx, const0_rtx, EQ, NULL,
18406 Pmode, 1, align_4_label);
18407 emit_cmp_and_jump_insns (align_rtx, const2_rtx, EQ, NULL,
18408 Pmode, 1, align_2_label);
18409 emit_cmp_and_jump_insns (align_rtx, const2_rtx, GTU, NULL,
18410 Pmode, 1, align_3_label);
18414 /* Since the alignment is 2, we have to check 2 or 0 bytes;
18415 check if is aligned to 4 - byte. */
18417 align_rtx = expand_binop (Pmode, and_optab, scratch1, const2_rtx,
18418 NULL_RTX, 0, OPTAB_WIDEN);
18420 emit_cmp_and_jump_insns (align_rtx, const0_rtx, EQ, NULL,
18421 Pmode, 1, align_4_label);
18424 mem = change_address (src, QImode, out);
18426 /* Now compare the bytes. */
18428 /* Compare the first n unaligned byte on a byte per byte basis. */
18429 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL,
18430 QImode, 1, end_0_label);
18432 /* Increment the address. */
18433 emit_insn ((*ix86_gen_add3) (out, out, const1_rtx));
18435 /* Not needed with an alignment of 2 */
18438 emit_label (align_2_label);
18440 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL, QImode, 1,
18443 emit_insn ((*ix86_gen_add3) (out, out, const1_rtx));
18445 emit_label (align_3_label);
18448 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL, QImode, 1,
18451 emit_insn ((*ix86_gen_add3) (out, out, const1_rtx));
18454 /* Generate loop to check 4 bytes at a time. It is not a good idea to
18455 align this loop. It gives only huge programs, but does not help to
18457 emit_label (align_4_label);
18459 mem = change_address (src, SImode, out);
18460 emit_move_insn (scratch, mem);
18461 emit_insn ((*ix86_gen_add3) (out, out, GEN_INT (4)));
18463 /* This formula yields a nonzero result iff one of the bytes is zero.
18464 This saves three branches inside loop and many cycles. */
18466 emit_insn (gen_addsi3 (tmpreg, scratch, GEN_INT (-0x01010101)));
18467 emit_insn (gen_one_cmplsi2 (scratch, scratch));
18468 emit_insn (gen_andsi3 (tmpreg, tmpreg, scratch));
18469 emit_insn (gen_andsi3 (tmpreg, tmpreg,
18470 gen_int_mode (0x80808080, SImode)));
18471 emit_cmp_and_jump_insns (tmpreg, const0_rtx, EQ, 0, SImode, 1,
18476 rtx reg = gen_reg_rtx (SImode);
18477 rtx reg2 = gen_reg_rtx (Pmode);
18478 emit_move_insn (reg, tmpreg);
18479 emit_insn (gen_lshrsi3 (reg, reg, GEN_INT (16)));
18481 /* If zero is not in the first two bytes, move two bytes forward. */
18482 emit_insn (gen_testsi_ccno_1 (tmpreg, GEN_INT (0x8080)));
18483 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
18484 tmp = gen_rtx_EQ (VOIDmode, tmp, const0_rtx);
18485 emit_insn (gen_rtx_SET (VOIDmode, tmpreg,
18486 gen_rtx_IF_THEN_ELSE (SImode, tmp,
18489 /* Emit lea manually to avoid clobbering of flags. */
18490 emit_insn (gen_rtx_SET (SImode, reg2,
18491 gen_rtx_PLUS (Pmode, out, const2_rtx)));
18493 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
18494 tmp = gen_rtx_EQ (VOIDmode, tmp, const0_rtx);
18495 emit_insn (gen_rtx_SET (VOIDmode, out,
18496 gen_rtx_IF_THEN_ELSE (Pmode, tmp,
18503 rtx end_2_label = gen_label_rtx ();
18504 /* Is zero in the first two bytes? */
18506 emit_insn (gen_testsi_ccno_1 (tmpreg, GEN_INT (0x8080)));
18507 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
18508 tmp = gen_rtx_NE (VOIDmode, tmp, const0_rtx);
18509 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
18510 gen_rtx_LABEL_REF (VOIDmode, end_2_label),
18512 tmp = emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
18513 JUMP_LABEL (tmp) = end_2_label;
18515 /* Not in the first two. Move two bytes forward. */
18516 emit_insn (gen_lshrsi3 (tmpreg, tmpreg, GEN_INT (16)));
18517 emit_insn ((*ix86_gen_add3) (out, out, const2_rtx));
18519 emit_label (end_2_label);
18523 /* Avoid branch in fixing the byte. */
18524 tmpreg = gen_lowpart (QImode, tmpreg);
18525 emit_insn (gen_addqi3_cc (tmpreg, tmpreg, tmpreg));
18526 cmp = gen_rtx_LTU (Pmode, gen_rtx_REG (CCmode, FLAGS_REG), const0_rtx);
18527 emit_insn ((*ix86_gen_sub3_carry) (out, out, GEN_INT (3), cmp));
18529 emit_label (end_0_label);
18532 /* Expand strlen. */
18535 ix86_expand_strlen (rtx out, rtx src, rtx eoschar, rtx align)
18537 rtx addr, scratch1, scratch2, scratch3, scratch4;
18539 /* The generic case of strlen expander is long. Avoid it's
18540 expanding unless TARGET_INLINE_ALL_STRINGOPS. */
18542 if (TARGET_UNROLL_STRLEN && eoschar == const0_rtx && optimize > 1
18543 && !TARGET_INLINE_ALL_STRINGOPS
18544 && !optimize_insn_for_size_p ()
18545 && (!CONST_INT_P (align) || INTVAL (align) < 4))
18548 addr = force_reg (Pmode, XEXP (src, 0));
18549 scratch1 = gen_reg_rtx (Pmode);
18551 if (TARGET_UNROLL_STRLEN && eoschar == const0_rtx && optimize > 1
18552 && !optimize_insn_for_size_p ())
18554 /* Well it seems that some optimizer does not combine a call like
18555 foo(strlen(bar), strlen(bar));
18556 when the move and the subtraction is done here. It does calculate
18557 the length just once when these instructions are done inside of
18558 output_strlen_unroll(). But I think since &bar[strlen(bar)] is
18559 often used and I use one fewer register for the lifetime of
18560 output_strlen_unroll() this is better. */
18562 emit_move_insn (out, addr);
18564 ix86_expand_strlensi_unroll_1 (out, src, align);
18566 /* strlensi_unroll_1 returns the address of the zero at the end of
18567 the string, like memchr(), so compute the length by subtracting
18568 the start address. */
18569 emit_insn ((*ix86_gen_sub3) (out, out, addr));
18575 /* Can't use this if the user has appropriated eax, ecx, or edi. */
18576 if (fixed_regs[AX_REG] || fixed_regs[CX_REG] || fixed_regs[DI_REG])
18579 scratch2 = gen_reg_rtx (Pmode);
18580 scratch3 = gen_reg_rtx (Pmode);
18581 scratch4 = force_reg (Pmode, constm1_rtx);
18583 emit_move_insn (scratch3, addr);
18584 eoschar = force_reg (QImode, eoschar);
18586 src = replace_equiv_address_nv (src, scratch3);
18588 /* If .md starts supporting :P, this can be done in .md. */
18589 unspec = gen_rtx_UNSPEC (Pmode, gen_rtvec (4, src, eoschar, align,
18590 scratch4), UNSPEC_SCAS);
18591 emit_insn (gen_strlenqi_1 (scratch1, scratch3, unspec));
18592 emit_insn ((*ix86_gen_one_cmpl2) (scratch2, scratch1));
18593 emit_insn ((*ix86_gen_add3) (out, scratch2, constm1_rtx));
18598 /* For given symbol (function) construct code to compute address of it's PLT
18599 entry in large x86-64 PIC model. */
18601 construct_plt_address (rtx symbol)
18603 rtx tmp = gen_reg_rtx (Pmode);
18604 rtx unspec = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, symbol), UNSPEC_PLTOFF);
18606 gcc_assert (GET_CODE (symbol) == SYMBOL_REF);
18607 gcc_assert (ix86_cmodel == CM_LARGE_PIC);
18609 emit_move_insn (tmp, gen_rtx_CONST (Pmode, unspec));
18610 emit_insn (gen_adddi3 (tmp, tmp, pic_offset_table_rtx));
18615 ix86_expand_call (rtx retval, rtx fnaddr, rtx callarg1,
18617 rtx pop, int sibcall)
18619 rtx use = NULL, call;
18620 enum calling_abi function_call_abi;
18622 if (callarg2 && INTVAL (callarg2) == -2)
18623 function_call_abi = MS_ABI;
18625 function_call_abi = SYSV_ABI;
18626 if (pop == const0_rtx)
18628 gcc_assert (!TARGET_64BIT || !pop);
18630 if (TARGET_MACHO && !TARGET_64BIT)
18633 if (flag_pic && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF)
18634 fnaddr = machopic_indirect_call_target (fnaddr);
18639 /* Static functions and indirect calls don't need the pic register. */
18640 if (flag_pic && (!TARGET_64BIT || ix86_cmodel == CM_LARGE_PIC)
18641 && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF
18642 && ! SYMBOL_REF_LOCAL_P (XEXP (fnaddr, 0)))
18643 use_reg (&use, pic_offset_table_rtx);
18646 if (TARGET_64BIT && INTVAL (callarg2) >= 0)
18648 rtx al = gen_rtx_REG (QImode, AX_REG);
18649 emit_move_insn (al, callarg2);
18650 use_reg (&use, al);
18653 if (ix86_cmodel == CM_LARGE_PIC
18654 && GET_CODE (fnaddr) == MEM
18655 && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF
18656 && !local_symbolic_operand (XEXP (fnaddr, 0), VOIDmode))
18657 fnaddr = gen_rtx_MEM (QImode, construct_plt_address (XEXP (fnaddr, 0)));
18658 else if (! call_insn_operand (XEXP (fnaddr, 0), Pmode))
18660 fnaddr = copy_to_mode_reg (Pmode, XEXP (fnaddr, 0));
18661 fnaddr = gen_rtx_MEM (QImode, fnaddr);
18663 if (sibcall && TARGET_64BIT
18664 && !constant_call_address_operand (XEXP (fnaddr, 0), Pmode))
18667 addr = copy_to_mode_reg (Pmode, XEXP (fnaddr, 0));
18668 fnaddr = gen_rtx_REG (Pmode, R11_REG);
18669 emit_move_insn (fnaddr, addr);
18670 fnaddr = gen_rtx_MEM (QImode, fnaddr);
18673 call = gen_rtx_CALL (VOIDmode, fnaddr, callarg1);
18675 call = gen_rtx_SET (VOIDmode, retval, call);
18678 pop = gen_rtx_PLUS (Pmode, stack_pointer_rtx, pop);
18679 pop = gen_rtx_SET (VOIDmode, stack_pointer_rtx, pop);
18680 call = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, call, pop));
18681 gcc_assert (ix86_cfun_abi () != MS_ABI || function_call_abi != SYSV_ABI);
18683 /* We need to represent that SI and DI registers are clobbered
18685 if (ix86_cfun_abi () == MS_ABI && function_call_abi == SYSV_ABI)
18687 static int clobbered_registers[] = {27, 28, 45, 46, 47, 48, 49, 50, 51,
18688 52, SI_REG, DI_REG};
18690 rtx vec[ARRAY_SIZE (clobbered_registers) + 2];
18691 rtx unspec = gen_rtx_UNSPEC (VOIDmode, gen_rtvec (1, const0_rtx),
18692 UNSPEC_MS_TO_SYSV_CALL);
18696 for (i = 0; i < ARRAY_SIZE (clobbered_registers); i++)
18697 vec[i + 2] = gen_rtx_CLOBBER (SSE_REGNO_P (clobbered_registers[i])
18700 (SSE_REGNO_P (clobbered_registers[i])
18702 clobbered_registers[i]));
18704 call = gen_rtx_PARALLEL (VOIDmode,
18705 gen_rtvec_v (ARRAY_SIZE (clobbered_registers)
18709 call = emit_call_insn (call);
18711 CALL_INSN_FUNCTION_USAGE (call) = use;
18715 /* Clear stack slot assignments remembered from previous functions.
18716 This is called from INIT_EXPANDERS once before RTL is emitted for each
18719 static struct machine_function *
18720 ix86_init_machine_status (void)
18722 struct machine_function *f;
18724 f = GGC_CNEW (struct machine_function);
18725 f->use_fast_prologue_epilogue_nregs = -1;
18726 f->tls_descriptor_call_expanded_p = 0;
18727 f->call_abi = DEFAULT_ABI;
18732 /* Return a MEM corresponding to a stack slot with mode MODE.
18733 Allocate a new slot if necessary.
18735 The RTL for a function can have several slots available: N is
18736 which slot to use. */
18739 assign_386_stack_local (enum machine_mode mode, enum ix86_stack_slot n)
18741 struct stack_local_entry *s;
18743 gcc_assert (n < MAX_386_STACK_LOCALS);
18745 /* Virtual slot is valid only before vregs are instantiated. */
18746 gcc_assert ((n == SLOT_VIRTUAL) == !virtuals_instantiated);
18748 for (s = ix86_stack_locals; s; s = s->next)
18749 if (s->mode == mode && s->n == n)
18750 return copy_rtx (s->rtl);
18752 s = (struct stack_local_entry *)
18753 ggc_alloc (sizeof (struct stack_local_entry));
18756 s->rtl = assign_stack_local (mode, GET_MODE_SIZE (mode), 0);
18758 s->next = ix86_stack_locals;
18759 ix86_stack_locals = s;
18763 /* Construct the SYMBOL_REF for the tls_get_addr function. */
18765 static GTY(()) rtx ix86_tls_symbol;
18767 ix86_tls_get_addr (void)
18770 if (!ix86_tls_symbol)
18772 ix86_tls_symbol = gen_rtx_SYMBOL_REF (Pmode,
18773 (TARGET_ANY_GNU_TLS
18775 ? "___tls_get_addr"
18776 : "__tls_get_addr");
18779 return ix86_tls_symbol;
18782 /* Construct the SYMBOL_REF for the _TLS_MODULE_BASE_ symbol. */
18784 static GTY(()) rtx ix86_tls_module_base_symbol;
18786 ix86_tls_module_base (void)
18789 if (!ix86_tls_module_base_symbol)
18791 ix86_tls_module_base_symbol = gen_rtx_SYMBOL_REF (Pmode,
18792 "_TLS_MODULE_BASE_");
18793 SYMBOL_REF_FLAGS (ix86_tls_module_base_symbol)
18794 |= TLS_MODEL_GLOBAL_DYNAMIC << SYMBOL_FLAG_TLS_SHIFT;
18797 return ix86_tls_module_base_symbol;
18800 /* Calculate the length of the memory address in the instruction
18801 encoding. Does not include the one-byte modrm, opcode, or prefix. */
18804 memory_address_length (rtx addr)
18806 struct ix86_address parts;
18807 rtx base, index, disp;
18811 if (GET_CODE (addr) == PRE_DEC
18812 || GET_CODE (addr) == POST_INC
18813 || GET_CODE (addr) == PRE_MODIFY
18814 || GET_CODE (addr) == POST_MODIFY)
18817 ok = ix86_decompose_address (addr, &parts);
18820 if (parts.base && GET_CODE (parts.base) == SUBREG)
18821 parts.base = SUBREG_REG (parts.base);
18822 if (parts.index && GET_CODE (parts.index) == SUBREG)
18823 parts.index = SUBREG_REG (parts.index);
18826 index = parts.index;
18831 - esp as the base always wants an index,
18832 - ebp as the base always wants a displacement. */
18834 /* Register Indirect. */
18835 if (base && !index && !disp)
18837 /* esp (for its index) and ebp (for its displacement) need
18838 the two-byte modrm form. */
18839 if (addr == stack_pointer_rtx
18840 || addr == arg_pointer_rtx
18841 || addr == frame_pointer_rtx
18842 || addr == hard_frame_pointer_rtx)
18846 /* Direct Addressing. */
18847 else if (disp && !base && !index)
18852 /* Find the length of the displacement constant. */
18855 if (base && satisfies_constraint_K (disp))
18860 /* ebp always wants a displacement. */
18861 else if (base == hard_frame_pointer_rtx)
18864 /* An index requires the two-byte modrm form.... */
18866 /* ...like esp, which always wants an index. */
18867 || base == stack_pointer_rtx
18868 || base == arg_pointer_rtx
18869 || base == frame_pointer_rtx)
18876 /* Compute default value for "length_immediate" attribute. When SHORTFORM
18877 is set, expect that insn have 8bit immediate alternative. */
18879 ix86_attr_length_immediate_default (rtx insn, int shortform)
18883 extract_insn_cached (insn);
18884 for (i = recog_data.n_operands - 1; i >= 0; --i)
18885 if (CONSTANT_P (recog_data.operand[i]))
18888 if (shortform && satisfies_constraint_K (recog_data.operand[i]))
18892 switch (get_attr_mode (insn))
18903 /* Immediates for DImode instructions are encoded as 32bit sign extended values. */
18908 fatal_insn ("unknown insn mode", insn);
18914 /* Compute default value for "length_address" attribute. */
18916 ix86_attr_length_address_default (rtx insn)
18920 if (get_attr_type (insn) == TYPE_LEA)
18922 rtx set = PATTERN (insn);
18924 if (GET_CODE (set) == PARALLEL)
18925 set = XVECEXP (set, 0, 0);
18927 gcc_assert (GET_CODE (set) == SET);
18929 return memory_address_length (SET_SRC (set));
18932 extract_insn_cached (insn);
18933 for (i = recog_data.n_operands - 1; i >= 0; --i)
18934 if (MEM_P (recog_data.operand[i]))
18936 return memory_address_length (XEXP (recog_data.operand[i], 0));
18942 /* Compute default value for "length_vex" attribute. It includes
18943 2 or 3 byte VEX prefix and 1 opcode byte. */
18946 ix86_attr_length_vex_default (rtx insn, int has_0f_opcode,
18951 /* Only 0f opcode can use 2 byte VEX prefix and VEX W bit uses 3
18952 byte VEX prefix. */
18953 if (!has_0f_opcode || has_vex_w)
18956 /* We can always use 2 byte VEX prefix in 32bit. */
18960 extract_insn_cached (insn);
18962 for (i = recog_data.n_operands - 1; i >= 0; --i)
18963 if (REG_P (recog_data.operand[i]))
18965 /* REX.W bit uses 3 byte VEX prefix. */
18966 if (GET_MODE (recog_data.operand[i]) == DImode)
18971 /* REX.X or REX.B bits use 3 byte VEX prefix. */
18972 if (MEM_P (recog_data.operand[i])
18973 && x86_extended_reg_mentioned_p (recog_data.operand[i]))
18980 /* Return the maximum number of instructions a cpu can issue. */
18983 ix86_issue_rate (void)
18987 case PROCESSOR_PENTIUM:
18991 case PROCESSOR_PENTIUMPRO:
18992 case PROCESSOR_PENTIUM4:
18993 case PROCESSOR_ATHLON:
18995 case PROCESSOR_AMDFAM10:
18996 case PROCESSOR_NOCONA:
18997 case PROCESSOR_GENERIC32:
18998 case PROCESSOR_GENERIC64:
19001 case PROCESSOR_CORE2:
19009 /* A subroutine of ix86_adjust_cost -- return true iff INSN reads flags set
19010 by DEP_INSN and nothing set by DEP_INSN. */
19013 ix86_flags_dependent (rtx insn, rtx dep_insn, enum attr_type insn_type)
19017 /* Simplify the test for uninteresting insns. */
19018 if (insn_type != TYPE_SETCC
19019 && insn_type != TYPE_ICMOV
19020 && insn_type != TYPE_FCMOV
19021 && insn_type != TYPE_IBR)
19024 if ((set = single_set (dep_insn)) != 0)
19026 set = SET_DEST (set);
19029 else if (GET_CODE (PATTERN (dep_insn)) == PARALLEL
19030 && XVECLEN (PATTERN (dep_insn), 0) == 2
19031 && GET_CODE (XVECEXP (PATTERN (dep_insn), 0, 0)) == SET
19032 && GET_CODE (XVECEXP (PATTERN (dep_insn), 0, 1)) == SET)
19034 set = SET_DEST (XVECEXP (PATTERN (dep_insn), 0, 0));
19035 set2 = SET_DEST (XVECEXP (PATTERN (dep_insn), 0, 0));
19040 if (!REG_P (set) || REGNO (set) != FLAGS_REG)
19043 /* This test is true if the dependent insn reads the flags but
19044 not any other potentially set register. */
19045 if (!reg_overlap_mentioned_p (set, PATTERN (insn)))
19048 if (set2 && reg_overlap_mentioned_p (set2, PATTERN (insn)))
19054 /* A subroutine of ix86_adjust_cost -- return true iff INSN has a memory
19055 address with operands set by DEP_INSN. */
19058 ix86_agi_dependent (rtx insn, rtx dep_insn, enum attr_type insn_type)
19062 if (insn_type == TYPE_LEA
19065 addr = PATTERN (insn);
19067 if (GET_CODE (addr) == PARALLEL)
19068 addr = XVECEXP (addr, 0, 0);
19070 gcc_assert (GET_CODE (addr) == SET);
19072 addr = SET_SRC (addr);
19077 extract_insn_cached (insn);
19078 for (i = recog_data.n_operands - 1; i >= 0; --i)
19079 if (MEM_P (recog_data.operand[i]))
19081 addr = XEXP (recog_data.operand[i], 0);
19088 return modified_in_p (addr, dep_insn);
19092 ix86_adjust_cost (rtx insn, rtx link, rtx dep_insn, int cost)
19094 enum attr_type insn_type, dep_insn_type;
19095 enum attr_memory memory;
19097 int dep_insn_code_number;
19099 /* Anti and output dependencies have zero cost on all CPUs. */
19100 if (REG_NOTE_KIND (link) != 0)
19103 dep_insn_code_number = recog_memoized (dep_insn);
19105 /* If we can't recognize the insns, we can't really do anything. */
19106 if (dep_insn_code_number < 0 || recog_memoized (insn) < 0)
19109 insn_type = get_attr_type (insn);
19110 dep_insn_type = get_attr_type (dep_insn);
19114 case PROCESSOR_PENTIUM:
19115 /* Address Generation Interlock adds a cycle of latency. */
19116 if (ix86_agi_dependent (insn, dep_insn, insn_type))
19119 /* ??? Compares pair with jump/setcc. */
19120 if (ix86_flags_dependent (insn, dep_insn, insn_type))
19123 /* Floating point stores require value to be ready one cycle earlier. */
19124 if (insn_type == TYPE_FMOV
19125 && get_attr_memory (insn) == MEMORY_STORE
19126 && !ix86_agi_dependent (insn, dep_insn, insn_type))
19130 case PROCESSOR_PENTIUMPRO:
19131 memory = get_attr_memory (insn);
19133 /* INT->FP conversion is expensive. */
19134 if (get_attr_fp_int_src (dep_insn))
19137 /* There is one cycle extra latency between an FP op and a store. */
19138 if (insn_type == TYPE_FMOV
19139 && (set = single_set (dep_insn)) != NULL_RTX
19140 && (set2 = single_set (insn)) != NULL_RTX
19141 && rtx_equal_p (SET_DEST (set), SET_SRC (set2))
19142 && MEM_P (SET_DEST (set2)))
19145 /* Show ability of reorder buffer to hide latency of load by executing
19146 in parallel with previous instruction in case
19147 previous instruction is not needed to compute the address. */
19148 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
19149 && !ix86_agi_dependent (insn, dep_insn, insn_type))
19151 /* Claim moves to take one cycle, as core can issue one load
19152 at time and the next load can start cycle later. */
19153 if (dep_insn_type == TYPE_IMOV
19154 || dep_insn_type == TYPE_FMOV)
19162 memory = get_attr_memory (insn);
19164 /* The esp dependency is resolved before the instruction is really
19166 if ((insn_type == TYPE_PUSH || insn_type == TYPE_POP)
19167 && (dep_insn_type == TYPE_PUSH || dep_insn_type == TYPE_POP))
19170 /* INT->FP conversion is expensive. */
19171 if (get_attr_fp_int_src (dep_insn))
19174 /* Show ability of reorder buffer to hide latency of load by executing
19175 in parallel with previous instruction in case
19176 previous instruction is not needed to compute the address. */
19177 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
19178 && !ix86_agi_dependent (insn, dep_insn, insn_type))
19180 /* Claim moves to take one cycle, as core can issue one load
19181 at time and the next load can start cycle later. */
19182 if (dep_insn_type == TYPE_IMOV
19183 || dep_insn_type == TYPE_FMOV)
19192 case PROCESSOR_ATHLON:
19194 case PROCESSOR_AMDFAM10:
19195 case PROCESSOR_GENERIC32:
19196 case PROCESSOR_GENERIC64:
19197 memory = get_attr_memory (insn);
19199 /* Show ability of reorder buffer to hide latency of load by executing
19200 in parallel with previous instruction in case
19201 previous instruction is not needed to compute the address. */
19202 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
19203 && !ix86_agi_dependent (insn, dep_insn, insn_type))
19205 enum attr_unit unit = get_attr_unit (insn);
19208 /* Because of the difference between the length of integer and
19209 floating unit pipeline preparation stages, the memory operands
19210 for floating point are cheaper.
19212 ??? For Athlon it the difference is most probably 2. */
19213 if (unit == UNIT_INTEGER || unit == UNIT_UNKNOWN)
19216 loadcost = TARGET_ATHLON ? 2 : 0;
19218 if (cost >= loadcost)
19231 /* How many alternative schedules to try. This should be as wide as the
19232 scheduling freedom in the DFA, but no wider. Making this value too
19233 large results extra work for the scheduler. */
19236 ia32_multipass_dfa_lookahead (void)
19240 case PROCESSOR_PENTIUM:
19243 case PROCESSOR_PENTIUMPRO:
19253 /* Compute the alignment given to a constant that is being placed in memory.
19254 EXP is the constant and ALIGN is the alignment that the object would
19256 The value of this function is used instead of that alignment to align
19260 ix86_constant_alignment (tree exp, int align)
19262 if (TREE_CODE (exp) == REAL_CST || TREE_CODE (exp) == VECTOR_CST
19263 || TREE_CODE (exp) == INTEGER_CST)
19265 if (TYPE_MODE (TREE_TYPE (exp)) == DFmode && align < 64)
19267 else if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (exp))) && align < 128)
19270 else if (!optimize_size && TREE_CODE (exp) == STRING_CST
19271 && TREE_STRING_LENGTH (exp) >= 31 && align < BITS_PER_WORD)
19272 return BITS_PER_WORD;
19277 /* Compute the alignment for a static variable.
19278 TYPE is the data type, and ALIGN is the alignment that
19279 the object would ordinarily have. The value of this function is used
19280 instead of that alignment to align the object. */
19283 ix86_data_alignment (tree type, int align)
19285 int max_align = optimize_size ? BITS_PER_WORD : MIN (256, MAX_OFILE_ALIGNMENT);
19287 if (AGGREGATE_TYPE_P (type)
19288 && TYPE_SIZE (type)
19289 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
19290 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= (unsigned) max_align
19291 || TREE_INT_CST_HIGH (TYPE_SIZE (type)))
19292 && align < max_align)
19295 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
19296 to 16byte boundary. */
19299 if (AGGREGATE_TYPE_P (type)
19300 && TYPE_SIZE (type)
19301 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
19302 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= 128
19303 || TREE_INT_CST_HIGH (TYPE_SIZE (type))) && align < 128)
19307 if (TREE_CODE (type) == ARRAY_TYPE)
19309 if (TYPE_MODE (TREE_TYPE (type)) == DFmode && align < 64)
19311 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type))) && align < 128)
19314 else if (TREE_CODE (type) == COMPLEX_TYPE)
19317 if (TYPE_MODE (type) == DCmode && align < 64)
19319 if ((TYPE_MODE (type) == XCmode
19320 || TYPE_MODE (type) == TCmode) && align < 128)
19323 else if ((TREE_CODE (type) == RECORD_TYPE
19324 || TREE_CODE (type) == UNION_TYPE
19325 || TREE_CODE (type) == QUAL_UNION_TYPE)
19326 && TYPE_FIELDS (type))
19328 if (DECL_MODE (TYPE_FIELDS (type)) == DFmode && align < 64)
19330 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type))) && align < 128)
19333 else if (TREE_CODE (type) == REAL_TYPE || TREE_CODE (type) == VECTOR_TYPE
19334 || TREE_CODE (type) == INTEGER_TYPE)
19336 if (TYPE_MODE (type) == DFmode && align < 64)
19338 if (ALIGN_MODE_128 (TYPE_MODE (type)) && align < 128)
19345 /* Compute the alignment for a local variable or a stack slot. TYPE is
19346 the data type, MODE is the widest mode available and ALIGN is the
19347 alignment that the object would ordinarily have. The value of this
19348 macro is used instead of that alignment to align the object. */
19351 ix86_local_alignment (tree type, enum machine_mode mode,
19352 unsigned int align)
19354 /* If TYPE is NULL, we are allocating a stack slot for caller-save
19355 register in MODE. We will return the largest alignment of XF
19359 if (mode == XFmode && align < GET_MODE_ALIGNMENT (DFmode))
19360 align = GET_MODE_ALIGNMENT (DFmode);
19364 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
19365 to 16byte boundary. */
19368 if (AGGREGATE_TYPE_P (type)
19369 && TYPE_SIZE (type)
19370 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
19371 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= 16
19372 || TREE_INT_CST_HIGH (TYPE_SIZE (type))) && align < 128)
19375 if (TREE_CODE (type) == ARRAY_TYPE)
19377 if (TYPE_MODE (TREE_TYPE (type)) == DFmode && align < 64)
19379 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type))) && align < 128)
19382 else if (TREE_CODE (type) == COMPLEX_TYPE)
19384 if (TYPE_MODE (type) == DCmode && align < 64)
19386 if ((TYPE_MODE (type) == XCmode
19387 || TYPE_MODE (type) == TCmode) && align < 128)
19390 else if ((TREE_CODE (type) == RECORD_TYPE
19391 || TREE_CODE (type) == UNION_TYPE
19392 || TREE_CODE (type) == QUAL_UNION_TYPE)
19393 && TYPE_FIELDS (type))
19395 if (DECL_MODE (TYPE_FIELDS (type)) == DFmode && align < 64)
19397 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type))) && align < 128)
19400 else if (TREE_CODE (type) == REAL_TYPE || TREE_CODE (type) == VECTOR_TYPE
19401 || TREE_CODE (type) == INTEGER_TYPE)
19404 if (TYPE_MODE (type) == DFmode && align < 64)
19406 if (ALIGN_MODE_128 (TYPE_MODE (type)) && align < 128)
19412 /* Emit RTL insns to initialize the variable parts of a trampoline.
19413 FNADDR is an RTX for the address of the function's pure code.
19414 CXT is an RTX for the static chain value for the function. */
19416 x86_initialize_trampoline (rtx tramp, rtx fnaddr, rtx cxt)
19420 /* Compute offset from the end of the jmp to the target function. */
19421 rtx disp = expand_binop (SImode, sub_optab, fnaddr,
19422 plus_constant (tramp, 10),
19423 NULL_RTX, 1, OPTAB_DIRECT);
19424 emit_move_insn (gen_rtx_MEM (QImode, tramp),
19425 gen_int_mode (0xb9, QImode));
19426 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 1)), cxt);
19427 emit_move_insn (gen_rtx_MEM (QImode, plus_constant (tramp, 5)),
19428 gen_int_mode (0xe9, QImode));
19429 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 6)), disp);
19434 /* Try to load address using shorter movl instead of movabs.
19435 We may want to support movq for kernel mode, but kernel does not use
19436 trampolines at the moment. */
19437 if (x86_64_zext_immediate_operand (fnaddr, VOIDmode))
19439 fnaddr = copy_to_mode_reg (DImode, fnaddr);
19440 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
19441 gen_int_mode (0xbb41, HImode));
19442 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, offset + 2)),
19443 gen_lowpart (SImode, fnaddr));
19448 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
19449 gen_int_mode (0xbb49, HImode));
19450 emit_move_insn (gen_rtx_MEM (DImode, plus_constant (tramp, offset + 2)),
19454 /* Load static chain using movabs to r10. */
19455 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
19456 gen_int_mode (0xba49, HImode));
19457 emit_move_insn (gen_rtx_MEM (DImode, plus_constant (tramp, offset + 2)),
19460 /* Jump to the r11 */
19461 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
19462 gen_int_mode (0xff49, HImode));
19463 emit_move_insn (gen_rtx_MEM (QImode, plus_constant (tramp, offset+2)),
19464 gen_int_mode (0xe3, QImode));
19466 gcc_assert (offset <= TRAMPOLINE_SIZE);
19469 #ifdef ENABLE_EXECUTE_STACK
19470 emit_library_call (gen_rtx_SYMBOL_REF (Pmode, "__enable_execute_stack"),
19471 LCT_NORMAL, VOIDmode, 1, tramp, Pmode);
19475 /* Codes for all the SSE/MMX builtins. */
19478 IX86_BUILTIN_ADDPS,
19479 IX86_BUILTIN_ADDSS,
19480 IX86_BUILTIN_DIVPS,
19481 IX86_BUILTIN_DIVSS,
19482 IX86_BUILTIN_MULPS,
19483 IX86_BUILTIN_MULSS,
19484 IX86_BUILTIN_SUBPS,
19485 IX86_BUILTIN_SUBSS,
19487 IX86_BUILTIN_CMPEQPS,
19488 IX86_BUILTIN_CMPLTPS,
19489 IX86_BUILTIN_CMPLEPS,
19490 IX86_BUILTIN_CMPGTPS,
19491 IX86_BUILTIN_CMPGEPS,
19492 IX86_BUILTIN_CMPNEQPS,
19493 IX86_BUILTIN_CMPNLTPS,
19494 IX86_BUILTIN_CMPNLEPS,
19495 IX86_BUILTIN_CMPNGTPS,
19496 IX86_BUILTIN_CMPNGEPS,
19497 IX86_BUILTIN_CMPORDPS,
19498 IX86_BUILTIN_CMPUNORDPS,
19499 IX86_BUILTIN_CMPEQSS,
19500 IX86_BUILTIN_CMPLTSS,
19501 IX86_BUILTIN_CMPLESS,
19502 IX86_BUILTIN_CMPNEQSS,
19503 IX86_BUILTIN_CMPNLTSS,
19504 IX86_BUILTIN_CMPNLESS,
19505 IX86_BUILTIN_CMPNGTSS,
19506 IX86_BUILTIN_CMPNGESS,
19507 IX86_BUILTIN_CMPORDSS,
19508 IX86_BUILTIN_CMPUNORDSS,
19510 IX86_BUILTIN_COMIEQSS,
19511 IX86_BUILTIN_COMILTSS,
19512 IX86_BUILTIN_COMILESS,
19513 IX86_BUILTIN_COMIGTSS,
19514 IX86_BUILTIN_COMIGESS,
19515 IX86_BUILTIN_COMINEQSS,
19516 IX86_BUILTIN_UCOMIEQSS,
19517 IX86_BUILTIN_UCOMILTSS,
19518 IX86_BUILTIN_UCOMILESS,
19519 IX86_BUILTIN_UCOMIGTSS,
19520 IX86_BUILTIN_UCOMIGESS,
19521 IX86_BUILTIN_UCOMINEQSS,
19523 IX86_BUILTIN_CVTPI2PS,
19524 IX86_BUILTIN_CVTPS2PI,
19525 IX86_BUILTIN_CVTSI2SS,
19526 IX86_BUILTIN_CVTSI642SS,
19527 IX86_BUILTIN_CVTSS2SI,
19528 IX86_BUILTIN_CVTSS2SI64,
19529 IX86_BUILTIN_CVTTPS2PI,
19530 IX86_BUILTIN_CVTTSS2SI,
19531 IX86_BUILTIN_CVTTSS2SI64,
19533 IX86_BUILTIN_MAXPS,
19534 IX86_BUILTIN_MAXSS,
19535 IX86_BUILTIN_MINPS,
19536 IX86_BUILTIN_MINSS,
19538 IX86_BUILTIN_LOADUPS,
19539 IX86_BUILTIN_STOREUPS,
19540 IX86_BUILTIN_MOVSS,
19542 IX86_BUILTIN_MOVHLPS,
19543 IX86_BUILTIN_MOVLHPS,
19544 IX86_BUILTIN_LOADHPS,
19545 IX86_BUILTIN_LOADLPS,
19546 IX86_BUILTIN_STOREHPS,
19547 IX86_BUILTIN_STORELPS,
19549 IX86_BUILTIN_MASKMOVQ,
19550 IX86_BUILTIN_MOVMSKPS,
19551 IX86_BUILTIN_PMOVMSKB,
19553 IX86_BUILTIN_MOVNTPS,
19554 IX86_BUILTIN_MOVNTQ,
19556 IX86_BUILTIN_LOADDQU,
19557 IX86_BUILTIN_STOREDQU,
19559 IX86_BUILTIN_PACKSSWB,
19560 IX86_BUILTIN_PACKSSDW,
19561 IX86_BUILTIN_PACKUSWB,
19563 IX86_BUILTIN_PADDB,
19564 IX86_BUILTIN_PADDW,
19565 IX86_BUILTIN_PADDD,
19566 IX86_BUILTIN_PADDQ,
19567 IX86_BUILTIN_PADDSB,
19568 IX86_BUILTIN_PADDSW,
19569 IX86_BUILTIN_PADDUSB,
19570 IX86_BUILTIN_PADDUSW,
19571 IX86_BUILTIN_PSUBB,
19572 IX86_BUILTIN_PSUBW,
19573 IX86_BUILTIN_PSUBD,
19574 IX86_BUILTIN_PSUBQ,
19575 IX86_BUILTIN_PSUBSB,
19576 IX86_BUILTIN_PSUBSW,
19577 IX86_BUILTIN_PSUBUSB,
19578 IX86_BUILTIN_PSUBUSW,
19581 IX86_BUILTIN_PANDN,
19585 IX86_BUILTIN_PAVGB,
19586 IX86_BUILTIN_PAVGW,
19588 IX86_BUILTIN_PCMPEQB,
19589 IX86_BUILTIN_PCMPEQW,
19590 IX86_BUILTIN_PCMPEQD,
19591 IX86_BUILTIN_PCMPGTB,
19592 IX86_BUILTIN_PCMPGTW,
19593 IX86_BUILTIN_PCMPGTD,
19595 IX86_BUILTIN_PMADDWD,
19597 IX86_BUILTIN_PMAXSW,
19598 IX86_BUILTIN_PMAXUB,
19599 IX86_BUILTIN_PMINSW,
19600 IX86_BUILTIN_PMINUB,
19602 IX86_BUILTIN_PMULHUW,
19603 IX86_BUILTIN_PMULHW,
19604 IX86_BUILTIN_PMULLW,
19606 IX86_BUILTIN_PSADBW,
19607 IX86_BUILTIN_PSHUFW,
19609 IX86_BUILTIN_PSLLW,
19610 IX86_BUILTIN_PSLLD,
19611 IX86_BUILTIN_PSLLQ,
19612 IX86_BUILTIN_PSRAW,
19613 IX86_BUILTIN_PSRAD,
19614 IX86_BUILTIN_PSRLW,
19615 IX86_BUILTIN_PSRLD,
19616 IX86_BUILTIN_PSRLQ,
19617 IX86_BUILTIN_PSLLWI,
19618 IX86_BUILTIN_PSLLDI,
19619 IX86_BUILTIN_PSLLQI,
19620 IX86_BUILTIN_PSRAWI,
19621 IX86_BUILTIN_PSRADI,
19622 IX86_BUILTIN_PSRLWI,
19623 IX86_BUILTIN_PSRLDI,
19624 IX86_BUILTIN_PSRLQI,
19626 IX86_BUILTIN_PUNPCKHBW,
19627 IX86_BUILTIN_PUNPCKHWD,
19628 IX86_BUILTIN_PUNPCKHDQ,
19629 IX86_BUILTIN_PUNPCKLBW,
19630 IX86_BUILTIN_PUNPCKLWD,
19631 IX86_BUILTIN_PUNPCKLDQ,
19633 IX86_BUILTIN_SHUFPS,
19635 IX86_BUILTIN_RCPPS,
19636 IX86_BUILTIN_RCPSS,
19637 IX86_BUILTIN_RSQRTPS,
19638 IX86_BUILTIN_RSQRTPS_NR,
19639 IX86_BUILTIN_RSQRTSS,
19640 IX86_BUILTIN_RSQRTF,
19641 IX86_BUILTIN_SQRTPS,
19642 IX86_BUILTIN_SQRTPS_NR,
19643 IX86_BUILTIN_SQRTSS,
19645 IX86_BUILTIN_UNPCKHPS,
19646 IX86_BUILTIN_UNPCKLPS,
19648 IX86_BUILTIN_ANDPS,
19649 IX86_BUILTIN_ANDNPS,
19651 IX86_BUILTIN_XORPS,
19654 IX86_BUILTIN_LDMXCSR,
19655 IX86_BUILTIN_STMXCSR,
19656 IX86_BUILTIN_SFENCE,
19658 /* 3DNow! Original */
19659 IX86_BUILTIN_FEMMS,
19660 IX86_BUILTIN_PAVGUSB,
19661 IX86_BUILTIN_PF2ID,
19662 IX86_BUILTIN_PFACC,
19663 IX86_BUILTIN_PFADD,
19664 IX86_BUILTIN_PFCMPEQ,
19665 IX86_BUILTIN_PFCMPGE,
19666 IX86_BUILTIN_PFCMPGT,
19667 IX86_BUILTIN_PFMAX,
19668 IX86_BUILTIN_PFMIN,
19669 IX86_BUILTIN_PFMUL,
19670 IX86_BUILTIN_PFRCP,
19671 IX86_BUILTIN_PFRCPIT1,
19672 IX86_BUILTIN_PFRCPIT2,
19673 IX86_BUILTIN_PFRSQIT1,
19674 IX86_BUILTIN_PFRSQRT,
19675 IX86_BUILTIN_PFSUB,
19676 IX86_BUILTIN_PFSUBR,
19677 IX86_BUILTIN_PI2FD,
19678 IX86_BUILTIN_PMULHRW,
19680 /* 3DNow! Athlon Extensions */
19681 IX86_BUILTIN_PF2IW,
19682 IX86_BUILTIN_PFNACC,
19683 IX86_BUILTIN_PFPNACC,
19684 IX86_BUILTIN_PI2FW,
19685 IX86_BUILTIN_PSWAPDSI,
19686 IX86_BUILTIN_PSWAPDSF,
19689 IX86_BUILTIN_ADDPD,
19690 IX86_BUILTIN_ADDSD,
19691 IX86_BUILTIN_DIVPD,
19692 IX86_BUILTIN_DIVSD,
19693 IX86_BUILTIN_MULPD,
19694 IX86_BUILTIN_MULSD,
19695 IX86_BUILTIN_SUBPD,
19696 IX86_BUILTIN_SUBSD,
19698 IX86_BUILTIN_CMPEQPD,
19699 IX86_BUILTIN_CMPLTPD,
19700 IX86_BUILTIN_CMPLEPD,
19701 IX86_BUILTIN_CMPGTPD,
19702 IX86_BUILTIN_CMPGEPD,
19703 IX86_BUILTIN_CMPNEQPD,
19704 IX86_BUILTIN_CMPNLTPD,
19705 IX86_BUILTIN_CMPNLEPD,
19706 IX86_BUILTIN_CMPNGTPD,
19707 IX86_BUILTIN_CMPNGEPD,
19708 IX86_BUILTIN_CMPORDPD,
19709 IX86_BUILTIN_CMPUNORDPD,
19710 IX86_BUILTIN_CMPEQSD,
19711 IX86_BUILTIN_CMPLTSD,
19712 IX86_BUILTIN_CMPLESD,
19713 IX86_BUILTIN_CMPNEQSD,
19714 IX86_BUILTIN_CMPNLTSD,
19715 IX86_BUILTIN_CMPNLESD,
19716 IX86_BUILTIN_CMPORDSD,
19717 IX86_BUILTIN_CMPUNORDSD,
19719 IX86_BUILTIN_COMIEQSD,
19720 IX86_BUILTIN_COMILTSD,
19721 IX86_BUILTIN_COMILESD,
19722 IX86_BUILTIN_COMIGTSD,
19723 IX86_BUILTIN_COMIGESD,
19724 IX86_BUILTIN_COMINEQSD,
19725 IX86_BUILTIN_UCOMIEQSD,
19726 IX86_BUILTIN_UCOMILTSD,
19727 IX86_BUILTIN_UCOMILESD,
19728 IX86_BUILTIN_UCOMIGTSD,
19729 IX86_BUILTIN_UCOMIGESD,
19730 IX86_BUILTIN_UCOMINEQSD,
19732 IX86_BUILTIN_MAXPD,
19733 IX86_BUILTIN_MAXSD,
19734 IX86_BUILTIN_MINPD,
19735 IX86_BUILTIN_MINSD,
19737 IX86_BUILTIN_ANDPD,
19738 IX86_BUILTIN_ANDNPD,
19740 IX86_BUILTIN_XORPD,
19742 IX86_BUILTIN_SQRTPD,
19743 IX86_BUILTIN_SQRTSD,
19745 IX86_BUILTIN_UNPCKHPD,
19746 IX86_BUILTIN_UNPCKLPD,
19748 IX86_BUILTIN_SHUFPD,
19750 IX86_BUILTIN_LOADUPD,
19751 IX86_BUILTIN_STOREUPD,
19752 IX86_BUILTIN_MOVSD,
19754 IX86_BUILTIN_LOADHPD,
19755 IX86_BUILTIN_LOADLPD,
19757 IX86_BUILTIN_CVTDQ2PD,
19758 IX86_BUILTIN_CVTDQ2PS,
19760 IX86_BUILTIN_CVTPD2DQ,
19761 IX86_BUILTIN_CVTPD2PI,
19762 IX86_BUILTIN_CVTPD2PS,
19763 IX86_BUILTIN_CVTTPD2DQ,
19764 IX86_BUILTIN_CVTTPD2PI,
19766 IX86_BUILTIN_CVTPI2PD,
19767 IX86_BUILTIN_CVTSI2SD,
19768 IX86_BUILTIN_CVTSI642SD,
19770 IX86_BUILTIN_CVTSD2SI,
19771 IX86_BUILTIN_CVTSD2SI64,
19772 IX86_BUILTIN_CVTSD2SS,
19773 IX86_BUILTIN_CVTSS2SD,
19774 IX86_BUILTIN_CVTTSD2SI,
19775 IX86_BUILTIN_CVTTSD2SI64,
19777 IX86_BUILTIN_CVTPS2DQ,
19778 IX86_BUILTIN_CVTPS2PD,
19779 IX86_BUILTIN_CVTTPS2DQ,
19781 IX86_BUILTIN_MOVNTI,
19782 IX86_BUILTIN_MOVNTPD,
19783 IX86_BUILTIN_MOVNTDQ,
19785 IX86_BUILTIN_MOVQ128,
19788 IX86_BUILTIN_MASKMOVDQU,
19789 IX86_BUILTIN_MOVMSKPD,
19790 IX86_BUILTIN_PMOVMSKB128,
19792 IX86_BUILTIN_PACKSSWB128,
19793 IX86_BUILTIN_PACKSSDW128,
19794 IX86_BUILTIN_PACKUSWB128,
19796 IX86_BUILTIN_PADDB128,
19797 IX86_BUILTIN_PADDW128,
19798 IX86_BUILTIN_PADDD128,
19799 IX86_BUILTIN_PADDQ128,
19800 IX86_BUILTIN_PADDSB128,
19801 IX86_BUILTIN_PADDSW128,
19802 IX86_BUILTIN_PADDUSB128,
19803 IX86_BUILTIN_PADDUSW128,
19804 IX86_BUILTIN_PSUBB128,
19805 IX86_BUILTIN_PSUBW128,
19806 IX86_BUILTIN_PSUBD128,
19807 IX86_BUILTIN_PSUBQ128,
19808 IX86_BUILTIN_PSUBSB128,
19809 IX86_BUILTIN_PSUBSW128,
19810 IX86_BUILTIN_PSUBUSB128,
19811 IX86_BUILTIN_PSUBUSW128,
19813 IX86_BUILTIN_PAND128,
19814 IX86_BUILTIN_PANDN128,
19815 IX86_BUILTIN_POR128,
19816 IX86_BUILTIN_PXOR128,
19818 IX86_BUILTIN_PAVGB128,
19819 IX86_BUILTIN_PAVGW128,
19821 IX86_BUILTIN_PCMPEQB128,
19822 IX86_BUILTIN_PCMPEQW128,
19823 IX86_BUILTIN_PCMPEQD128,
19824 IX86_BUILTIN_PCMPGTB128,
19825 IX86_BUILTIN_PCMPGTW128,
19826 IX86_BUILTIN_PCMPGTD128,
19828 IX86_BUILTIN_PMADDWD128,
19830 IX86_BUILTIN_PMAXSW128,
19831 IX86_BUILTIN_PMAXUB128,
19832 IX86_BUILTIN_PMINSW128,
19833 IX86_BUILTIN_PMINUB128,
19835 IX86_BUILTIN_PMULUDQ,
19836 IX86_BUILTIN_PMULUDQ128,
19837 IX86_BUILTIN_PMULHUW128,
19838 IX86_BUILTIN_PMULHW128,
19839 IX86_BUILTIN_PMULLW128,
19841 IX86_BUILTIN_PSADBW128,
19842 IX86_BUILTIN_PSHUFHW,
19843 IX86_BUILTIN_PSHUFLW,
19844 IX86_BUILTIN_PSHUFD,
19846 IX86_BUILTIN_PSLLDQI128,
19847 IX86_BUILTIN_PSLLWI128,
19848 IX86_BUILTIN_PSLLDI128,
19849 IX86_BUILTIN_PSLLQI128,
19850 IX86_BUILTIN_PSRAWI128,
19851 IX86_BUILTIN_PSRADI128,
19852 IX86_BUILTIN_PSRLDQI128,
19853 IX86_BUILTIN_PSRLWI128,
19854 IX86_BUILTIN_PSRLDI128,
19855 IX86_BUILTIN_PSRLQI128,
19857 IX86_BUILTIN_PSLLDQ128,
19858 IX86_BUILTIN_PSLLW128,
19859 IX86_BUILTIN_PSLLD128,
19860 IX86_BUILTIN_PSLLQ128,
19861 IX86_BUILTIN_PSRAW128,
19862 IX86_BUILTIN_PSRAD128,
19863 IX86_BUILTIN_PSRLW128,
19864 IX86_BUILTIN_PSRLD128,
19865 IX86_BUILTIN_PSRLQ128,
19867 IX86_BUILTIN_PUNPCKHBW128,
19868 IX86_BUILTIN_PUNPCKHWD128,
19869 IX86_BUILTIN_PUNPCKHDQ128,
19870 IX86_BUILTIN_PUNPCKHQDQ128,
19871 IX86_BUILTIN_PUNPCKLBW128,
19872 IX86_BUILTIN_PUNPCKLWD128,
19873 IX86_BUILTIN_PUNPCKLDQ128,
19874 IX86_BUILTIN_PUNPCKLQDQ128,
19876 IX86_BUILTIN_CLFLUSH,
19877 IX86_BUILTIN_MFENCE,
19878 IX86_BUILTIN_LFENCE,
19881 IX86_BUILTIN_ADDSUBPS,
19882 IX86_BUILTIN_HADDPS,
19883 IX86_BUILTIN_HSUBPS,
19884 IX86_BUILTIN_MOVSHDUP,
19885 IX86_BUILTIN_MOVSLDUP,
19886 IX86_BUILTIN_ADDSUBPD,
19887 IX86_BUILTIN_HADDPD,
19888 IX86_BUILTIN_HSUBPD,
19889 IX86_BUILTIN_LDDQU,
19891 IX86_BUILTIN_MONITOR,
19892 IX86_BUILTIN_MWAIT,
19895 IX86_BUILTIN_PHADDW,
19896 IX86_BUILTIN_PHADDD,
19897 IX86_BUILTIN_PHADDSW,
19898 IX86_BUILTIN_PHSUBW,
19899 IX86_BUILTIN_PHSUBD,
19900 IX86_BUILTIN_PHSUBSW,
19901 IX86_BUILTIN_PMADDUBSW,
19902 IX86_BUILTIN_PMULHRSW,
19903 IX86_BUILTIN_PSHUFB,
19904 IX86_BUILTIN_PSIGNB,
19905 IX86_BUILTIN_PSIGNW,
19906 IX86_BUILTIN_PSIGND,
19907 IX86_BUILTIN_PALIGNR,
19908 IX86_BUILTIN_PABSB,
19909 IX86_BUILTIN_PABSW,
19910 IX86_BUILTIN_PABSD,
19912 IX86_BUILTIN_PHADDW128,
19913 IX86_BUILTIN_PHADDD128,
19914 IX86_BUILTIN_PHADDSW128,
19915 IX86_BUILTIN_PHSUBW128,
19916 IX86_BUILTIN_PHSUBD128,
19917 IX86_BUILTIN_PHSUBSW128,
19918 IX86_BUILTIN_PMADDUBSW128,
19919 IX86_BUILTIN_PMULHRSW128,
19920 IX86_BUILTIN_PSHUFB128,
19921 IX86_BUILTIN_PSIGNB128,
19922 IX86_BUILTIN_PSIGNW128,
19923 IX86_BUILTIN_PSIGND128,
19924 IX86_BUILTIN_PALIGNR128,
19925 IX86_BUILTIN_PABSB128,
19926 IX86_BUILTIN_PABSW128,
19927 IX86_BUILTIN_PABSD128,
19929 /* AMDFAM10 - SSE4A New Instructions. */
19930 IX86_BUILTIN_MOVNTSD,
19931 IX86_BUILTIN_MOVNTSS,
19932 IX86_BUILTIN_EXTRQI,
19933 IX86_BUILTIN_EXTRQ,
19934 IX86_BUILTIN_INSERTQI,
19935 IX86_BUILTIN_INSERTQ,
19938 IX86_BUILTIN_BLENDPD,
19939 IX86_BUILTIN_BLENDPS,
19940 IX86_BUILTIN_BLENDVPD,
19941 IX86_BUILTIN_BLENDVPS,
19942 IX86_BUILTIN_PBLENDVB128,
19943 IX86_BUILTIN_PBLENDW128,
19948 IX86_BUILTIN_INSERTPS128,
19950 IX86_BUILTIN_MOVNTDQA,
19951 IX86_BUILTIN_MPSADBW128,
19952 IX86_BUILTIN_PACKUSDW128,
19953 IX86_BUILTIN_PCMPEQQ,
19954 IX86_BUILTIN_PHMINPOSUW128,
19956 IX86_BUILTIN_PMAXSB128,
19957 IX86_BUILTIN_PMAXSD128,
19958 IX86_BUILTIN_PMAXUD128,
19959 IX86_BUILTIN_PMAXUW128,
19961 IX86_BUILTIN_PMINSB128,
19962 IX86_BUILTIN_PMINSD128,
19963 IX86_BUILTIN_PMINUD128,
19964 IX86_BUILTIN_PMINUW128,
19966 IX86_BUILTIN_PMOVSXBW128,
19967 IX86_BUILTIN_PMOVSXBD128,
19968 IX86_BUILTIN_PMOVSXBQ128,
19969 IX86_BUILTIN_PMOVSXWD128,
19970 IX86_BUILTIN_PMOVSXWQ128,
19971 IX86_BUILTIN_PMOVSXDQ128,
19973 IX86_BUILTIN_PMOVZXBW128,
19974 IX86_BUILTIN_PMOVZXBD128,
19975 IX86_BUILTIN_PMOVZXBQ128,
19976 IX86_BUILTIN_PMOVZXWD128,
19977 IX86_BUILTIN_PMOVZXWQ128,
19978 IX86_BUILTIN_PMOVZXDQ128,
19980 IX86_BUILTIN_PMULDQ128,
19981 IX86_BUILTIN_PMULLD128,
19983 IX86_BUILTIN_ROUNDPD,
19984 IX86_BUILTIN_ROUNDPS,
19985 IX86_BUILTIN_ROUNDSD,
19986 IX86_BUILTIN_ROUNDSS,
19988 IX86_BUILTIN_PTESTZ,
19989 IX86_BUILTIN_PTESTC,
19990 IX86_BUILTIN_PTESTNZC,
19992 IX86_BUILTIN_VEC_INIT_V2SI,
19993 IX86_BUILTIN_VEC_INIT_V4HI,
19994 IX86_BUILTIN_VEC_INIT_V8QI,
19995 IX86_BUILTIN_VEC_EXT_V2DF,
19996 IX86_BUILTIN_VEC_EXT_V2DI,
19997 IX86_BUILTIN_VEC_EXT_V4SF,
19998 IX86_BUILTIN_VEC_EXT_V4SI,
19999 IX86_BUILTIN_VEC_EXT_V8HI,
20000 IX86_BUILTIN_VEC_EXT_V2SI,
20001 IX86_BUILTIN_VEC_EXT_V4HI,
20002 IX86_BUILTIN_VEC_EXT_V16QI,
20003 IX86_BUILTIN_VEC_SET_V2DI,
20004 IX86_BUILTIN_VEC_SET_V4SF,
20005 IX86_BUILTIN_VEC_SET_V4SI,
20006 IX86_BUILTIN_VEC_SET_V8HI,
20007 IX86_BUILTIN_VEC_SET_V4HI,
20008 IX86_BUILTIN_VEC_SET_V16QI,
20010 IX86_BUILTIN_VEC_PACK_SFIX,
20013 IX86_BUILTIN_CRC32QI,
20014 IX86_BUILTIN_CRC32HI,
20015 IX86_BUILTIN_CRC32SI,
20016 IX86_BUILTIN_CRC32DI,
20018 IX86_BUILTIN_PCMPESTRI128,
20019 IX86_BUILTIN_PCMPESTRM128,
20020 IX86_BUILTIN_PCMPESTRA128,
20021 IX86_BUILTIN_PCMPESTRC128,
20022 IX86_BUILTIN_PCMPESTRO128,
20023 IX86_BUILTIN_PCMPESTRS128,
20024 IX86_BUILTIN_PCMPESTRZ128,
20025 IX86_BUILTIN_PCMPISTRI128,
20026 IX86_BUILTIN_PCMPISTRM128,
20027 IX86_BUILTIN_PCMPISTRA128,
20028 IX86_BUILTIN_PCMPISTRC128,
20029 IX86_BUILTIN_PCMPISTRO128,
20030 IX86_BUILTIN_PCMPISTRS128,
20031 IX86_BUILTIN_PCMPISTRZ128,
20033 IX86_BUILTIN_PCMPGTQ,
20035 /* AES instructions */
20036 IX86_BUILTIN_AESENC128,
20037 IX86_BUILTIN_AESENCLAST128,
20038 IX86_BUILTIN_AESDEC128,
20039 IX86_BUILTIN_AESDECLAST128,
20040 IX86_BUILTIN_AESIMC128,
20041 IX86_BUILTIN_AESKEYGENASSIST128,
20043 /* PCLMUL instruction */
20044 IX86_BUILTIN_PCLMULQDQ128,
20047 IX86_BUILTIN_ADDPD256,
20048 IX86_BUILTIN_ADDPS256,
20049 IX86_BUILTIN_ADDSUBPD256,
20050 IX86_BUILTIN_ADDSUBPS256,
20051 IX86_BUILTIN_ANDPD256,
20052 IX86_BUILTIN_ANDPS256,
20053 IX86_BUILTIN_ANDNPD256,
20054 IX86_BUILTIN_ANDNPS256,
20055 IX86_BUILTIN_BLENDPD256,
20056 IX86_BUILTIN_BLENDPS256,
20057 IX86_BUILTIN_BLENDVPD256,
20058 IX86_BUILTIN_BLENDVPS256,
20059 IX86_BUILTIN_DIVPD256,
20060 IX86_BUILTIN_DIVPS256,
20061 IX86_BUILTIN_DPPS256,
20062 IX86_BUILTIN_HADDPD256,
20063 IX86_BUILTIN_HADDPS256,
20064 IX86_BUILTIN_HSUBPD256,
20065 IX86_BUILTIN_HSUBPS256,
20066 IX86_BUILTIN_MAXPD256,
20067 IX86_BUILTIN_MAXPS256,
20068 IX86_BUILTIN_MINPD256,
20069 IX86_BUILTIN_MINPS256,
20070 IX86_BUILTIN_MULPD256,
20071 IX86_BUILTIN_MULPS256,
20072 IX86_BUILTIN_ORPD256,
20073 IX86_BUILTIN_ORPS256,
20074 IX86_BUILTIN_SHUFPD256,
20075 IX86_BUILTIN_SHUFPS256,
20076 IX86_BUILTIN_SUBPD256,
20077 IX86_BUILTIN_SUBPS256,
20078 IX86_BUILTIN_XORPD256,
20079 IX86_BUILTIN_XORPS256,
20080 IX86_BUILTIN_CMPSD,
20081 IX86_BUILTIN_CMPSS,
20082 IX86_BUILTIN_CMPPD,
20083 IX86_BUILTIN_CMPPS,
20084 IX86_BUILTIN_CMPPD256,
20085 IX86_BUILTIN_CMPPS256,
20086 IX86_BUILTIN_CVTDQ2PD256,
20087 IX86_BUILTIN_CVTDQ2PS256,
20088 IX86_BUILTIN_CVTPD2PS256,
20089 IX86_BUILTIN_CVTPS2DQ256,
20090 IX86_BUILTIN_CVTPS2PD256,
20091 IX86_BUILTIN_CVTTPD2DQ256,
20092 IX86_BUILTIN_CVTPD2DQ256,
20093 IX86_BUILTIN_CVTTPS2DQ256,
20094 IX86_BUILTIN_EXTRACTF128PD256,
20095 IX86_BUILTIN_EXTRACTF128PS256,
20096 IX86_BUILTIN_EXTRACTF128SI256,
20097 IX86_BUILTIN_VZEROALL,
20098 IX86_BUILTIN_VZEROUPPER,
20099 IX86_BUILTIN_VZEROUPPER_REX64,
20100 IX86_BUILTIN_VPERMILVARPD,
20101 IX86_BUILTIN_VPERMILVARPS,
20102 IX86_BUILTIN_VPERMILVARPD256,
20103 IX86_BUILTIN_VPERMILVARPS256,
20104 IX86_BUILTIN_VPERMILPD,
20105 IX86_BUILTIN_VPERMILPS,
20106 IX86_BUILTIN_VPERMILPD256,
20107 IX86_BUILTIN_VPERMILPS256,
20108 IX86_BUILTIN_VPERM2F128PD256,
20109 IX86_BUILTIN_VPERM2F128PS256,
20110 IX86_BUILTIN_VPERM2F128SI256,
20111 IX86_BUILTIN_VBROADCASTSS,
20112 IX86_BUILTIN_VBROADCASTSD256,
20113 IX86_BUILTIN_VBROADCASTSS256,
20114 IX86_BUILTIN_VBROADCASTPD256,
20115 IX86_BUILTIN_VBROADCASTPS256,
20116 IX86_BUILTIN_VINSERTF128PD256,
20117 IX86_BUILTIN_VINSERTF128PS256,
20118 IX86_BUILTIN_VINSERTF128SI256,
20119 IX86_BUILTIN_LOADUPD256,
20120 IX86_BUILTIN_LOADUPS256,
20121 IX86_BUILTIN_STOREUPD256,
20122 IX86_BUILTIN_STOREUPS256,
20123 IX86_BUILTIN_LDDQU256,
20124 IX86_BUILTIN_MOVNTDQ256,
20125 IX86_BUILTIN_MOVNTPD256,
20126 IX86_BUILTIN_MOVNTPS256,
20127 IX86_BUILTIN_LOADDQU256,
20128 IX86_BUILTIN_STOREDQU256,
20129 IX86_BUILTIN_MASKLOADPD,
20130 IX86_BUILTIN_MASKLOADPS,
20131 IX86_BUILTIN_MASKSTOREPD,
20132 IX86_BUILTIN_MASKSTOREPS,
20133 IX86_BUILTIN_MASKLOADPD256,
20134 IX86_BUILTIN_MASKLOADPS256,
20135 IX86_BUILTIN_MASKSTOREPD256,
20136 IX86_BUILTIN_MASKSTOREPS256,
20137 IX86_BUILTIN_MOVSHDUP256,
20138 IX86_BUILTIN_MOVSLDUP256,
20139 IX86_BUILTIN_MOVDDUP256,
20141 IX86_BUILTIN_SQRTPD256,
20142 IX86_BUILTIN_SQRTPS256,
20143 IX86_BUILTIN_SQRTPS_NR256,
20144 IX86_BUILTIN_RSQRTPS256,
20145 IX86_BUILTIN_RSQRTPS_NR256,
20147 IX86_BUILTIN_RCPPS256,
20149 IX86_BUILTIN_ROUNDPD256,
20150 IX86_BUILTIN_ROUNDPS256,
20152 IX86_BUILTIN_UNPCKHPD256,
20153 IX86_BUILTIN_UNPCKLPD256,
20154 IX86_BUILTIN_UNPCKHPS256,
20155 IX86_BUILTIN_UNPCKLPS256,
20157 IX86_BUILTIN_SI256_SI,
20158 IX86_BUILTIN_PS256_PS,
20159 IX86_BUILTIN_PD256_PD,
20160 IX86_BUILTIN_SI_SI256,
20161 IX86_BUILTIN_PS_PS256,
20162 IX86_BUILTIN_PD_PD256,
20164 IX86_BUILTIN_VTESTZPD,
20165 IX86_BUILTIN_VTESTCPD,
20166 IX86_BUILTIN_VTESTNZCPD,
20167 IX86_BUILTIN_VTESTZPS,
20168 IX86_BUILTIN_VTESTCPS,
20169 IX86_BUILTIN_VTESTNZCPS,
20170 IX86_BUILTIN_VTESTZPD256,
20171 IX86_BUILTIN_VTESTCPD256,
20172 IX86_BUILTIN_VTESTNZCPD256,
20173 IX86_BUILTIN_VTESTZPS256,
20174 IX86_BUILTIN_VTESTCPS256,
20175 IX86_BUILTIN_VTESTNZCPS256,
20176 IX86_BUILTIN_PTESTZ256,
20177 IX86_BUILTIN_PTESTC256,
20178 IX86_BUILTIN_PTESTNZC256,
20180 IX86_BUILTIN_MOVMSKPD256,
20181 IX86_BUILTIN_MOVMSKPS256,
20183 /* TFmode support builtins. */
20185 IX86_BUILTIN_FABSQ,
20186 IX86_BUILTIN_COPYSIGNQ,
20188 /* SSE5 instructions */
20189 IX86_BUILTIN_FMADDSS,
20190 IX86_BUILTIN_FMADDSD,
20191 IX86_BUILTIN_FMADDPS,
20192 IX86_BUILTIN_FMADDPD,
20193 IX86_BUILTIN_FMSUBSS,
20194 IX86_BUILTIN_FMSUBSD,
20195 IX86_BUILTIN_FMSUBPS,
20196 IX86_BUILTIN_FMSUBPD,
20197 IX86_BUILTIN_FNMADDSS,
20198 IX86_BUILTIN_FNMADDSD,
20199 IX86_BUILTIN_FNMADDPS,
20200 IX86_BUILTIN_FNMADDPD,
20201 IX86_BUILTIN_FNMSUBSS,
20202 IX86_BUILTIN_FNMSUBSD,
20203 IX86_BUILTIN_FNMSUBPS,
20204 IX86_BUILTIN_FNMSUBPD,
20205 IX86_BUILTIN_PCMOV,
20206 IX86_BUILTIN_PCMOV_V2DI,
20207 IX86_BUILTIN_PCMOV_V4SI,
20208 IX86_BUILTIN_PCMOV_V8HI,
20209 IX86_BUILTIN_PCMOV_V16QI,
20210 IX86_BUILTIN_PCMOV_V4SF,
20211 IX86_BUILTIN_PCMOV_V2DF,
20212 IX86_BUILTIN_PPERM,
20213 IX86_BUILTIN_PERMPS,
20214 IX86_BUILTIN_PERMPD,
20215 IX86_BUILTIN_PMACSSWW,
20216 IX86_BUILTIN_PMACSWW,
20217 IX86_BUILTIN_PMACSSWD,
20218 IX86_BUILTIN_PMACSWD,
20219 IX86_BUILTIN_PMACSSDD,
20220 IX86_BUILTIN_PMACSDD,
20221 IX86_BUILTIN_PMACSSDQL,
20222 IX86_BUILTIN_PMACSSDQH,
20223 IX86_BUILTIN_PMACSDQL,
20224 IX86_BUILTIN_PMACSDQH,
20225 IX86_BUILTIN_PMADCSSWD,
20226 IX86_BUILTIN_PMADCSWD,
20227 IX86_BUILTIN_PHADDBW,
20228 IX86_BUILTIN_PHADDBD,
20229 IX86_BUILTIN_PHADDBQ,
20230 IX86_BUILTIN_PHADDWD,
20231 IX86_BUILTIN_PHADDWQ,
20232 IX86_BUILTIN_PHADDDQ,
20233 IX86_BUILTIN_PHADDUBW,
20234 IX86_BUILTIN_PHADDUBD,
20235 IX86_BUILTIN_PHADDUBQ,
20236 IX86_BUILTIN_PHADDUWD,
20237 IX86_BUILTIN_PHADDUWQ,
20238 IX86_BUILTIN_PHADDUDQ,
20239 IX86_BUILTIN_PHSUBBW,
20240 IX86_BUILTIN_PHSUBWD,
20241 IX86_BUILTIN_PHSUBDQ,
20242 IX86_BUILTIN_PROTB,
20243 IX86_BUILTIN_PROTW,
20244 IX86_BUILTIN_PROTD,
20245 IX86_BUILTIN_PROTQ,
20246 IX86_BUILTIN_PROTB_IMM,
20247 IX86_BUILTIN_PROTW_IMM,
20248 IX86_BUILTIN_PROTD_IMM,
20249 IX86_BUILTIN_PROTQ_IMM,
20250 IX86_BUILTIN_PSHLB,
20251 IX86_BUILTIN_PSHLW,
20252 IX86_BUILTIN_PSHLD,
20253 IX86_BUILTIN_PSHLQ,
20254 IX86_BUILTIN_PSHAB,
20255 IX86_BUILTIN_PSHAW,
20256 IX86_BUILTIN_PSHAD,
20257 IX86_BUILTIN_PSHAQ,
20258 IX86_BUILTIN_FRCZSS,
20259 IX86_BUILTIN_FRCZSD,
20260 IX86_BUILTIN_FRCZPS,
20261 IX86_BUILTIN_FRCZPD,
20262 IX86_BUILTIN_CVTPH2PS,
20263 IX86_BUILTIN_CVTPS2PH,
20265 IX86_BUILTIN_COMEQSS,
20266 IX86_BUILTIN_COMNESS,
20267 IX86_BUILTIN_COMLTSS,
20268 IX86_BUILTIN_COMLESS,
20269 IX86_BUILTIN_COMGTSS,
20270 IX86_BUILTIN_COMGESS,
20271 IX86_BUILTIN_COMUEQSS,
20272 IX86_BUILTIN_COMUNESS,
20273 IX86_BUILTIN_COMULTSS,
20274 IX86_BUILTIN_COMULESS,
20275 IX86_BUILTIN_COMUGTSS,
20276 IX86_BUILTIN_COMUGESS,
20277 IX86_BUILTIN_COMORDSS,
20278 IX86_BUILTIN_COMUNORDSS,
20279 IX86_BUILTIN_COMFALSESS,
20280 IX86_BUILTIN_COMTRUESS,
20282 IX86_BUILTIN_COMEQSD,
20283 IX86_BUILTIN_COMNESD,
20284 IX86_BUILTIN_COMLTSD,
20285 IX86_BUILTIN_COMLESD,
20286 IX86_BUILTIN_COMGTSD,
20287 IX86_BUILTIN_COMGESD,
20288 IX86_BUILTIN_COMUEQSD,
20289 IX86_BUILTIN_COMUNESD,
20290 IX86_BUILTIN_COMULTSD,
20291 IX86_BUILTIN_COMULESD,
20292 IX86_BUILTIN_COMUGTSD,
20293 IX86_BUILTIN_COMUGESD,
20294 IX86_BUILTIN_COMORDSD,
20295 IX86_BUILTIN_COMUNORDSD,
20296 IX86_BUILTIN_COMFALSESD,
20297 IX86_BUILTIN_COMTRUESD,
20299 IX86_BUILTIN_COMEQPS,
20300 IX86_BUILTIN_COMNEPS,
20301 IX86_BUILTIN_COMLTPS,
20302 IX86_BUILTIN_COMLEPS,
20303 IX86_BUILTIN_COMGTPS,
20304 IX86_BUILTIN_COMGEPS,
20305 IX86_BUILTIN_COMUEQPS,
20306 IX86_BUILTIN_COMUNEPS,
20307 IX86_BUILTIN_COMULTPS,
20308 IX86_BUILTIN_COMULEPS,
20309 IX86_BUILTIN_COMUGTPS,
20310 IX86_BUILTIN_COMUGEPS,
20311 IX86_BUILTIN_COMORDPS,
20312 IX86_BUILTIN_COMUNORDPS,
20313 IX86_BUILTIN_COMFALSEPS,
20314 IX86_BUILTIN_COMTRUEPS,
20316 IX86_BUILTIN_COMEQPD,
20317 IX86_BUILTIN_COMNEPD,
20318 IX86_BUILTIN_COMLTPD,
20319 IX86_BUILTIN_COMLEPD,
20320 IX86_BUILTIN_COMGTPD,
20321 IX86_BUILTIN_COMGEPD,
20322 IX86_BUILTIN_COMUEQPD,
20323 IX86_BUILTIN_COMUNEPD,
20324 IX86_BUILTIN_COMULTPD,
20325 IX86_BUILTIN_COMULEPD,
20326 IX86_BUILTIN_COMUGTPD,
20327 IX86_BUILTIN_COMUGEPD,
20328 IX86_BUILTIN_COMORDPD,
20329 IX86_BUILTIN_COMUNORDPD,
20330 IX86_BUILTIN_COMFALSEPD,
20331 IX86_BUILTIN_COMTRUEPD,
20333 IX86_BUILTIN_PCOMEQUB,
20334 IX86_BUILTIN_PCOMNEUB,
20335 IX86_BUILTIN_PCOMLTUB,
20336 IX86_BUILTIN_PCOMLEUB,
20337 IX86_BUILTIN_PCOMGTUB,
20338 IX86_BUILTIN_PCOMGEUB,
20339 IX86_BUILTIN_PCOMFALSEUB,
20340 IX86_BUILTIN_PCOMTRUEUB,
20341 IX86_BUILTIN_PCOMEQUW,
20342 IX86_BUILTIN_PCOMNEUW,
20343 IX86_BUILTIN_PCOMLTUW,
20344 IX86_BUILTIN_PCOMLEUW,
20345 IX86_BUILTIN_PCOMGTUW,
20346 IX86_BUILTIN_PCOMGEUW,
20347 IX86_BUILTIN_PCOMFALSEUW,
20348 IX86_BUILTIN_PCOMTRUEUW,
20349 IX86_BUILTIN_PCOMEQUD,
20350 IX86_BUILTIN_PCOMNEUD,
20351 IX86_BUILTIN_PCOMLTUD,
20352 IX86_BUILTIN_PCOMLEUD,
20353 IX86_BUILTIN_PCOMGTUD,
20354 IX86_BUILTIN_PCOMGEUD,
20355 IX86_BUILTIN_PCOMFALSEUD,
20356 IX86_BUILTIN_PCOMTRUEUD,
20357 IX86_BUILTIN_PCOMEQUQ,
20358 IX86_BUILTIN_PCOMNEUQ,
20359 IX86_BUILTIN_PCOMLTUQ,
20360 IX86_BUILTIN_PCOMLEUQ,
20361 IX86_BUILTIN_PCOMGTUQ,
20362 IX86_BUILTIN_PCOMGEUQ,
20363 IX86_BUILTIN_PCOMFALSEUQ,
20364 IX86_BUILTIN_PCOMTRUEUQ,
20366 IX86_BUILTIN_PCOMEQB,
20367 IX86_BUILTIN_PCOMNEB,
20368 IX86_BUILTIN_PCOMLTB,
20369 IX86_BUILTIN_PCOMLEB,
20370 IX86_BUILTIN_PCOMGTB,
20371 IX86_BUILTIN_PCOMGEB,
20372 IX86_BUILTIN_PCOMFALSEB,
20373 IX86_BUILTIN_PCOMTRUEB,
20374 IX86_BUILTIN_PCOMEQW,
20375 IX86_BUILTIN_PCOMNEW,
20376 IX86_BUILTIN_PCOMLTW,
20377 IX86_BUILTIN_PCOMLEW,
20378 IX86_BUILTIN_PCOMGTW,
20379 IX86_BUILTIN_PCOMGEW,
20380 IX86_BUILTIN_PCOMFALSEW,
20381 IX86_BUILTIN_PCOMTRUEW,
20382 IX86_BUILTIN_PCOMEQD,
20383 IX86_BUILTIN_PCOMNED,
20384 IX86_BUILTIN_PCOMLTD,
20385 IX86_BUILTIN_PCOMLED,
20386 IX86_BUILTIN_PCOMGTD,
20387 IX86_BUILTIN_PCOMGED,
20388 IX86_BUILTIN_PCOMFALSED,
20389 IX86_BUILTIN_PCOMTRUED,
20390 IX86_BUILTIN_PCOMEQQ,
20391 IX86_BUILTIN_PCOMNEQ,
20392 IX86_BUILTIN_PCOMLTQ,
20393 IX86_BUILTIN_PCOMLEQ,
20394 IX86_BUILTIN_PCOMGTQ,
20395 IX86_BUILTIN_PCOMGEQ,
20396 IX86_BUILTIN_PCOMFALSEQ,
20397 IX86_BUILTIN_PCOMTRUEQ,
20402 /* Table for the ix86 builtin decls. */
20403 static GTY(()) tree ix86_builtins[(int) IX86_BUILTIN_MAX];
20405 /* Table of all of the builtin functions that are possible with different ISA's
20406 but are waiting to be built until a function is declared to use that
20408 struct builtin_isa GTY(())
20410 tree type; /* builtin type to use in the declaration */
20411 const char *name; /* function name */
20412 int isa; /* isa_flags this builtin is defined for */
20413 bool const_p; /* true if the declaration is constant */
20416 static GTY(()) struct builtin_isa ix86_builtins_isa[(int) IX86_BUILTIN_MAX];
20419 /* Add an ix86 target builtin function with CODE, NAME and TYPE. Save the MASK
20420 * of which isa_flags to use in the ix86_builtins_isa array. Stores the
20421 * function decl in the ix86_builtins array. Returns the function decl or
20422 * NULL_TREE, if the builtin was not added.
20424 * If the front end has a special hook for builtin functions, delay adding
20425 * builtin functions that aren't in the current ISA until the ISA is changed
20426 * with function specific optimization. Doing so, can save about 300K for the
20427 * default compiler. When the builtin is expanded, check at that time whether
20430 * If the front end doesn't have a special hook, record all builtins, even if
20431 * it isn't an instruction set in the current ISA in case the user uses
20432 * function specific options for a different ISA, so that we don't get scope
20433 * errors if a builtin is added in the middle of a function scope. */
20436 def_builtin (int mask, const char *name, tree type, enum ix86_builtins code)
20438 tree decl = NULL_TREE;
20440 if (!(mask & OPTION_MASK_ISA_64BIT) || TARGET_64BIT)
20442 ix86_builtins_isa[(int) code].isa = mask;
20444 if ((mask & ix86_isa_flags) != 0
20445 || (lang_hooks.builtin_function
20446 == lang_hooks.builtin_function_ext_scope))
20449 decl = add_builtin_function (name, type, code, BUILT_IN_MD, NULL,
20451 ix86_builtins[(int) code] = decl;
20452 ix86_builtins_isa[(int) code].type = NULL_TREE;
20456 ix86_builtins[(int) code] = NULL_TREE;
20457 ix86_builtins_isa[(int) code].const_p = false;
20458 ix86_builtins_isa[(int) code].type = type;
20459 ix86_builtins_isa[(int) code].name = name;
20466 /* Like def_builtin, but also marks the function decl "const". */
20469 def_builtin_const (int mask, const char *name, tree type,
20470 enum ix86_builtins code)
20472 tree decl = def_builtin (mask, name, type, code);
20474 TREE_READONLY (decl) = 1;
20476 ix86_builtins_isa[(int) code].const_p = true;
20481 /* Add any new builtin functions for a given ISA that may not have been
20482 declared. This saves a bit of space compared to adding all of the
20483 declarations to the tree, even if we didn't use them. */
20486 ix86_add_new_builtins (int isa)
20491 for (i = 0; i < (int)IX86_BUILTIN_MAX; i++)
20493 if ((ix86_builtins_isa[i].isa & isa) != 0
20494 && ix86_builtins_isa[i].type != NULL_TREE)
20496 decl = add_builtin_function_ext_scope (ix86_builtins_isa[i].name,
20497 ix86_builtins_isa[i].type,
20498 i, BUILT_IN_MD, NULL,
20501 ix86_builtins[i] = decl;
20502 ix86_builtins_isa[i].type = NULL_TREE;
20503 if (ix86_builtins_isa[i].const_p)
20504 TREE_READONLY (decl) = 1;
20509 /* Bits for builtin_description.flag. */
20511 /* Set when we don't support the comparison natively, and should
20512 swap_comparison in order to support it. */
20513 #define BUILTIN_DESC_SWAP_OPERANDS 1
20515 struct builtin_description
20517 const unsigned int mask;
20518 const enum insn_code icode;
20519 const char *const name;
20520 const enum ix86_builtins code;
20521 const enum rtx_code comparison;
20525 static const struct builtin_description bdesc_comi[] =
20527 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comieq", IX86_BUILTIN_COMIEQSS, UNEQ, 0 },
20528 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comilt", IX86_BUILTIN_COMILTSS, UNLT, 0 },
20529 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comile", IX86_BUILTIN_COMILESS, UNLE, 0 },
20530 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comigt", IX86_BUILTIN_COMIGTSS, GT, 0 },
20531 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comige", IX86_BUILTIN_COMIGESS, GE, 0 },
20532 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comineq", IX86_BUILTIN_COMINEQSS, LTGT, 0 },
20533 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomieq", IX86_BUILTIN_UCOMIEQSS, UNEQ, 0 },
20534 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomilt", IX86_BUILTIN_UCOMILTSS, UNLT, 0 },
20535 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomile", IX86_BUILTIN_UCOMILESS, UNLE, 0 },
20536 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomigt", IX86_BUILTIN_UCOMIGTSS, GT, 0 },
20537 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomige", IX86_BUILTIN_UCOMIGESS, GE, 0 },
20538 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomineq", IX86_BUILTIN_UCOMINEQSS, LTGT, 0 },
20539 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdeq", IX86_BUILTIN_COMIEQSD, UNEQ, 0 },
20540 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdlt", IX86_BUILTIN_COMILTSD, UNLT, 0 },
20541 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdle", IX86_BUILTIN_COMILESD, UNLE, 0 },
20542 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdgt", IX86_BUILTIN_COMIGTSD, GT, 0 },
20543 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdge", IX86_BUILTIN_COMIGESD, GE, 0 },
20544 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdneq", IX86_BUILTIN_COMINEQSD, LTGT, 0 },
20545 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdeq", IX86_BUILTIN_UCOMIEQSD, UNEQ, 0 },
20546 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdlt", IX86_BUILTIN_UCOMILTSD, UNLT, 0 },
20547 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdle", IX86_BUILTIN_UCOMILESD, UNLE, 0 },
20548 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdgt", IX86_BUILTIN_UCOMIGTSD, GT, 0 },
20549 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdge", IX86_BUILTIN_UCOMIGESD, GE, 0 },
20550 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdneq", IX86_BUILTIN_UCOMINEQSD, LTGT, 0 },
20553 static const struct builtin_description bdesc_pcmpestr[] =
20556 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestri128", IX86_BUILTIN_PCMPESTRI128, UNKNOWN, 0 },
20557 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestrm128", IX86_BUILTIN_PCMPESTRM128, UNKNOWN, 0 },
20558 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestria128", IX86_BUILTIN_PCMPESTRA128, UNKNOWN, (int) CCAmode },
20559 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestric128", IX86_BUILTIN_PCMPESTRC128, UNKNOWN, (int) CCCmode },
20560 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestrio128", IX86_BUILTIN_PCMPESTRO128, UNKNOWN, (int) CCOmode },
20561 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestris128", IX86_BUILTIN_PCMPESTRS128, UNKNOWN, (int) CCSmode },
20562 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestriz128", IX86_BUILTIN_PCMPESTRZ128, UNKNOWN, (int) CCZmode },
20565 static const struct builtin_description bdesc_pcmpistr[] =
20568 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistri128", IX86_BUILTIN_PCMPISTRI128, UNKNOWN, 0 },
20569 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistrm128", IX86_BUILTIN_PCMPISTRM128, UNKNOWN, 0 },
20570 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistria128", IX86_BUILTIN_PCMPISTRA128, UNKNOWN, (int) CCAmode },
20571 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistric128", IX86_BUILTIN_PCMPISTRC128, UNKNOWN, (int) CCCmode },
20572 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistrio128", IX86_BUILTIN_PCMPISTRO128, UNKNOWN, (int) CCOmode },
20573 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistris128", IX86_BUILTIN_PCMPISTRS128, UNKNOWN, (int) CCSmode },
20574 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistriz128", IX86_BUILTIN_PCMPISTRZ128, UNKNOWN, (int) CCZmode },
20577 /* Special builtin types */
20578 enum ix86_special_builtin_type
20580 SPECIAL_FTYPE_UNKNOWN,
20582 V32QI_FTYPE_PCCHAR,
20583 V16QI_FTYPE_PCCHAR,
20585 V8SF_FTYPE_PCFLOAT,
20587 V4DF_FTYPE_PCDOUBLE,
20588 V4SF_FTYPE_PCFLOAT,
20589 V2DF_FTYPE_PCDOUBLE,
20590 V8SF_FTYPE_PCV8SF_V8SF,
20591 V4DF_FTYPE_PCV4DF_V4DF,
20592 V4SF_FTYPE_V4SF_PCV2SF,
20593 V4SF_FTYPE_PCV4SF_V4SF,
20594 V2DF_FTYPE_V2DF_PCDOUBLE,
20595 V2DF_FTYPE_PCV2DF_V2DF,
20597 VOID_FTYPE_PV2SF_V4SF,
20598 VOID_FTYPE_PV4DI_V4DI,
20599 VOID_FTYPE_PV2DI_V2DI,
20600 VOID_FTYPE_PCHAR_V32QI,
20601 VOID_FTYPE_PCHAR_V16QI,
20602 VOID_FTYPE_PFLOAT_V8SF,
20603 VOID_FTYPE_PFLOAT_V4SF,
20604 VOID_FTYPE_PDOUBLE_V4DF,
20605 VOID_FTYPE_PDOUBLE_V2DF,
20607 VOID_FTYPE_PINT_INT,
20608 VOID_FTYPE_PV8SF_V8SF_V8SF,
20609 VOID_FTYPE_PV4DF_V4DF_V4DF,
20610 VOID_FTYPE_PV4SF_V4SF_V4SF,
20611 VOID_FTYPE_PV2DF_V2DF_V2DF
20614 /* Builtin types */
20615 enum ix86_builtin_type
20618 FLOAT128_FTYPE_FLOAT128,
20620 FLOAT128_FTYPE_FLOAT128_FLOAT128,
20621 INT_FTYPE_V8SF_V8SF_PTEST,
20622 INT_FTYPE_V4DI_V4DI_PTEST,
20623 INT_FTYPE_V4DF_V4DF_PTEST,
20624 INT_FTYPE_V4SF_V4SF_PTEST,
20625 INT_FTYPE_V2DI_V2DI_PTEST,
20626 INT_FTYPE_V2DF_V2DF_PTEST,
20658 V4SF_FTYPE_V4SF_VEC_MERGE,
20667 V2DF_FTYPE_V2DF_VEC_MERGE,
20678 V16QI_FTYPE_V16QI_V16QI,
20679 V16QI_FTYPE_V8HI_V8HI,
20680 V8QI_FTYPE_V8QI_V8QI,
20681 V8QI_FTYPE_V4HI_V4HI,
20682 V8HI_FTYPE_V8HI_V8HI,
20683 V8HI_FTYPE_V8HI_V8HI_COUNT,
20684 V8HI_FTYPE_V16QI_V16QI,
20685 V8HI_FTYPE_V4SI_V4SI,
20686 V8HI_FTYPE_V8HI_SI_COUNT,
20687 V8SF_FTYPE_V8SF_V8SF,
20688 V8SF_FTYPE_V8SF_V8SI,
20689 V4SI_FTYPE_V4SI_V4SI,
20690 V4SI_FTYPE_V4SI_V4SI_COUNT,
20691 V4SI_FTYPE_V8HI_V8HI,
20692 V4SI_FTYPE_V4SF_V4SF,
20693 V4SI_FTYPE_V2DF_V2DF,
20694 V4SI_FTYPE_V4SI_SI_COUNT,
20695 V4HI_FTYPE_V4HI_V4HI,
20696 V4HI_FTYPE_V4HI_V4HI_COUNT,
20697 V4HI_FTYPE_V8QI_V8QI,
20698 V4HI_FTYPE_V2SI_V2SI,
20699 V4HI_FTYPE_V4HI_SI_COUNT,
20700 V4DF_FTYPE_V4DF_V4DF,
20701 V4DF_FTYPE_V4DF_V4DI,
20702 V4SF_FTYPE_V4SF_V4SF,
20703 V4SF_FTYPE_V4SF_V4SF_SWAP,
20704 V4SF_FTYPE_V4SF_V4SI,
20705 V4SF_FTYPE_V4SF_V2SI,
20706 V4SF_FTYPE_V4SF_V2DF,
20707 V4SF_FTYPE_V4SF_DI,
20708 V4SF_FTYPE_V4SF_SI,
20709 V2DI_FTYPE_V2DI_V2DI,
20710 V2DI_FTYPE_V2DI_V2DI_COUNT,
20711 V2DI_FTYPE_V16QI_V16QI,
20712 V2DI_FTYPE_V4SI_V4SI,
20713 V2DI_FTYPE_V2DI_V16QI,
20714 V2DI_FTYPE_V2DF_V2DF,
20715 V2DI_FTYPE_V2DI_SI_COUNT,
20716 V2SI_FTYPE_V2SI_V2SI,
20717 V2SI_FTYPE_V2SI_V2SI_COUNT,
20718 V2SI_FTYPE_V4HI_V4HI,
20719 V2SI_FTYPE_V2SF_V2SF,
20720 V2SI_FTYPE_V2SI_SI_COUNT,
20721 V2DF_FTYPE_V2DF_V2DF,
20722 V2DF_FTYPE_V2DF_V2DF_SWAP,
20723 V2DF_FTYPE_V2DF_V4SF,
20724 V2DF_FTYPE_V2DF_V2DI,
20725 V2DF_FTYPE_V2DF_DI,
20726 V2DF_FTYPE_V2DF_SI,
20727 V2SF_FTYPE_V2SF_V2SF,
20728 V1DI_FTYPE_V1DI_V1DI,
20729 V1DI_FTYPE_V1DI_V1DI_COUNT,
20730 V1DI_FTYPE_V8QI_V8QI,
20731 V1DI_FTYPE_V2SI_V2SI,
20732 V1DI_FTYPE_V1DI_SI_COUNT,
20733 UINT64_FTYPE_UINT64_UINT64,
20734 UINT_FTYPE_UINT_UINT,
20735 UINT_FTYPE_UINT_USHORT,
20736 UINT_FTYPE_UINT_UCHAR,
20737 V8HI_FTYPE_V8HI_INT,
20738 V4SI_FTYPE_V4SI_INT,
20739 V4HI_FTYPE_V4HI_INT,
20740 V8SF_FTYPE_V8SF_INT,
20741 V4SI_FTYPE_V8SI_INT,
20742 V4SF_FTYPE_V8SF_INT,
20743 V2DF_FTYPE_V4DF_INT,
20744 V4DF_FTYPE_V4DF_INT,
20745 V4SF_FTYPE_V4SF_INT,
20746 V2DI_FTYPE_V2DI_INT,
20747 V2DI2TI_FTYPE_V2DI_INT,
20748 V2DF_FTYPE_V2DF_INT,
20749 V16QI_FTYPE_V16QI_V16QI_V16QI,
20750 V8SF_FTYPE_V8SF_V8SF_V8SF,
20751 V4DF_FTYPE_V4DF_V4DF_V4DF,
20752 V4SF_FTYPE_V4SF_V4SF_V4SF,
20753 V2DF_FTYPE_V2DF_V2DF_V2DF,
20754 V16QI_FTYPE_V16QI_V16QI_INT,
20755 V8SI_FTYPE_V8SI_V8SI_INT,
20756 V8SI_FTYPE_V8SI_V4SI_INT,
20757 V8HI_FTYPE_V8HI_V8HI_INT,
20758 V8SF_FTYPE_V8SF_V8SF_INT,
20759 V8SF_FTYPE_V8SF_V4SF_INT,
20760 V4SI_FTYPE_V4SI_V4SI_INT,
20761 V4DF_FTYPE_V4DF_V4DF_INT,
20762 V4DF_FTYPE_V4DF_V2DF_INT,
20763 V4SF_FTYPE_V4SF_V4SF_INT,
20764 V2DI_FTYPE_V2DI_V2DI_INT,
20765 V2DI2TI_FTYPE_V2DI_V2DI_INT,
20766 V1DI2DI_FTYPE_V1DI_V1DI_INT,
20767 V2DF_FTYPE_V2DF_V2DF_INT,
20768 V2DI_FTYPE_V2DI_UINT_UINT,
20769 V2DI_FTYPE_V2DI_V2DI_UINT_UINT
20772 /* Special builtins with variable number of arguments. */
20773 static const struct builtin_description bdesc_special_args[] =
20776 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_emms, "__builtin_ia32_emms", IX86_BUILTIN_EMMS, UNKNOWN, (int) VOID_FTYPE_VOID },
20779 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_femms, "__builtin_ia32_femms", IX86_BUILTIN_FEMMS, UNKNOWN, (int) VOID_FTYPE_VOID },
20782 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movups, "__builtin_ia32_storeups", IX86_BUILTIN_STOREUPS, UNKNOWN, (int) VOID_FTYPE_PFLOAT_V4SF },
20783 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movntv4sf, "__builtin_ia32_movntps", IX86_BUILTIN_MOVNTPS, UNKNOWN, (int) VOID_FTYPE_PFLOAT_V4SF },
20784 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movups, "__builtin_ia32_loadups", IX86_BUILTIN_LOADUPS, UNKNOWN, (int) V4SF_FTYPE_PCFLOAT },
20786 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_loadhps_exp, "__builtin_ia32_loadhps", IX86_BUILTIN_LOADHPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_PCV2SF },
20787 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_loadlps_exp, "__builtin_ia32_loadlps", IX86_BUILTIN_LOADLPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_PCV2SF },
20788 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_storehps, "__builtin_ia32_storehps", IX86_BUILTIN_STOREHPS, UNKNOWN, (int) VOID_FTYPE_PV2SF_V4SF },
20789 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_storelps, "__builtin_ia32_storelps", IX86_BUILTIN_STORELPS, UNKNOWN, (int) VOID_FTYPE_PV2SF_V4SF },
20791 /* SSE or 3DNow!A */
20792 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_sse_sfence, "__builtin_ia32_sfence", IX86_BUILTIN_SFENCE, UNKNOWN, (int) VOID_FTYPE_VOID },
20793 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_sse_movntdi, "__builtin_ia32_movntq", IX86_BUILTIN_MOVNTQ, UNKNOWN, (int) VOID_FTYPE_PDI_DI },
20796 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_lfence, "__builtin_ia32_lfence", IX86_BUILTIN_LFENCE, UNKNOWN, (int) VOID_FTYPE_VOID },
20797 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_mfence, 0, IX86_BUILTIN_MFENCE, UNKNOWN, (int) VOID_FTYPE_VOID },
20798 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movupd, "__builtin_ia32_storeupd", IX86_BUILTIN_STOREUPD, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V2DF },
20799 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movdqu, "__builtin_ia32_storedqu", IX86_BUILTIN_STOREDQU, UNKNOWN, (int) VOID_FTYPE_PCHAR_V16QI },
20800 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movntv2df, "__builtin_ia32_movntpd", IX86_BUILTIN_MOVNTPD, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V2DF },
20801 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movntv2di, "__builtin_ia32_movntdq", IX86_BUILTIN_MOVNTDQ, UNKNOWN, (int) VOID_FTYPE_PV2DI_V2DI },
20802 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movntsi, "__builtin_ia32_movnti", IX86_BUILTIN_MOVNTI, UNKNOWN, (int) VOID_FTYPE_PINT_INT },
20803 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movupd, "__builtin_ia32_loadupd", IX86_BUILTIN_LOADUPD, UNKNOWN, (int) V2DF_FTYPE_PCDOUBLE },
20804 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movdqu, "__builtin_ia32_loaddqu", IX86_BUILTIN_LOADDQU, UNKNOWN, (int) V16QI_FTYPE_PCCHAR },
20806 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_loadhpd_exp, "__builtin_ia32_loadhpd", IX86_BUILTIN_LOADHPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_PCDOUBLE },
20807 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_loadlpd_exp, "__builtin_ia32_loadlpd", IX86_BUILTIN_LOADLPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_PCDOUBLE },
20810 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_lddqu, "__builtin_ia32_lddqu", IX86_BUILTIN_LDDQU, UNKNOWN, (int) V16QI_FTYPE_PCCHAR },
20813 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_movntdqa, "__builtin_ia32_movntdqa", IX86_BUILTIN_MOVNTDQA, UNKNOWN, (int) V2DI_FTYPE_PV2DI },
20816 { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_vmmovntv2df, "__builtin_ia32_movntsd", IX86_BUILTIN_MOVNTSD, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V2DF },
20817 { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_vmmovntv4sf, "__builtin_ia32_movntss", IX86_BUILTIN_MOVNTSS, UNKNOWN, (int) VOID_FTYPE_PFLOAT_V4SF },
20820 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vzeroall, "__builtin_ia32_vzeroall", IX86_BUILTIN_VZEROALL, UNKNOWN, (int) VOID_FTYPE_VOID },
20821 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vzeroupper, 0, IX86_BUILTIN_VZEROUPPER, UNKNOWN, (int) VOID_FTYPE_VOID },
20822 { OPTION_MASK_ISA_AVX | OPTION_MASK_ISA_64BIT, CODE_FOR_avx_vzeroupper_rex64, 0, IX86_BUILTIN_VZEROUPPER_REX64, UNKNOWN, (int) VOID_FTYPE_VOID },
20824 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vbroadcastss, "__builtin_ia32_vbroadcastss", IX86_BUILTIN_VBROADCASTSS, UNKNOWN, (int) V4SF_FTYPE_PCFLOAT },
20825 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vbroadcastsd256, "__builtin_ia32_vbroadcastsd256", IX86_BUILTIN_VBROADCASTSD256, UNKNOWN, (int) V4DF_FTYPE_PCDOUBLE },
20826 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vbroadcastss256, "__builtin_ia32_vbroadcastss256", IX86_BUILTIN_VBROADCASTSS256, UNKNOWN, (int) V8SF_FTYPE_PCFLOAT },
20827 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vbroadcastf128_pd256, "__builtin_ia32_vbroadcastf128_pd256", IX86_BUILTIN_VBROADCASTPD256, UNKNOWN, (int) V4DF_FTYPE_PCV2DF },
20828 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vbroadcastf128_ps256, "__builtin_ia32_vbroadcastf128_ps256", IX86_BUILTIN_VBROADCASTPS256, UNKNOWN, (int) V8SF_FTYPE_PCV4SF },
20830 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movupd256, "__builtin_ia32_loadupd256", IX86_BUILTIN_LOADUPD256, UNKNOWN, (int) V4DF_FTYPE_PCDOUBLE },
20831 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movups256, "__builtin_ia32_loadups256", IX86_BUILTIN_LOADUPS256, UNKNOWN, (int) V8SF_FTYPE_PCFLOAT },
20832 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movupd256, "__builtin_ia32_storeupd256", IX86_BUILTIN_STOREUPD256, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V4DF },
20833 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movups256, "__builtin_ia32_storeups256", IX86_BUILTIN_STOREUPS256, UNKNOWN, (int) VOID_FTYPE_PFLOAT_V8SF },
20834 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movdqu256, "__builtin_ia32_loaddqu256", IX86_BUILTIN_LOADDQU256, UNKNOWN, (int) V32QI_FTYPE_PCCHAR },
20835 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movdqu256, "__builtin_ia32_storedqu256", IX86_BUILTIN_STOREDQU256, UNKNOWN, (int) VOID_FTYPE_PCHAR_V32QI },
20836 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_lddqu256, "__builtin_ia32_lddqu256", IX86_BUILTIN_LDDQU256, UNKNOWN, (int) V32QI_FTYPE_PCCHAR },
20838 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movntv4di, "__builtin_ia32_movntdq256", IX86_BUILTIN_MOVNTDQ256, UNKNOWN, (int) VOID_FTYPE_PV4DI_V4DI },
20839 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movntv4df, "__builtin_ia32_movntpd256", IX86_BUILTIN_MOVNTPD256, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V4DF },
20840 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movntv8sf, "__builtin_ia32_movntps256", IX86_BUILTIN_MOVNTPS256, UNKNOWN, (int) VOID_FTYPE_PFLOAT_V8SF },
20842 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskloadpd, "__builtin_ia32_maskloadpd", IX86_BUILTIN_MASKLOADPD, UNKNOWN, (int) V2DF_FTYPE_PCV2DF_V2DF },
20843 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskloadps, "__builtin_ia32_maskloadps", IX86_BUILTIN_MASKLOADPS, UNKNOWN, (int) V4SF_FTYPE_PCV4SF_V4SF },
20844 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskloadpd256, "__builtin_ia32_maskloadpd256", IX86_BUILTIN_MASKLOADPD256, UNKNOWN, (int) V4DF_FTYPE_PCV4DF_V4DF },
20845 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskloadps256, "__builtin_ia32_maskloadps256", IX86_BUILTIN_MASKLOADPS256, UNKNOWN, (int) V8SF_FTYPE_PCV8SF_V8SF },
20846 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskstorepd, "__builtin_ia32_maskstorepd", IX86_BUILTIN_MASKSTOREPD, UNKNOWN, (int) VOID_FTYPE_PV2DF_V2DF_V2DF },
20847 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskstoreps, "__builtin_ia32_maskstoreps", IX86_BUILTIN_MASKSTOREPS, UNKNOWN, (int) VOID_FTYPE_PV4SF_V4SF_V4SF },
20848 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskstorepd256, "__builtin_ia32_maskstorepd256", IX86_BUILTIN_MASKSTOREPD256, UNKNOWN, (int) VOID_FTYPE_PV4DF_V4DF_V4DF },
20849 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskstoreps256, "__builtin_ia32_maskstoreps256", IX86_BUILTIN_MASKSTOREPS256, UNKNOWN, (int) VOID_FTYPE_PV8SF_V8SF_V8SF },
20852 /* Builtins with variable number of arguments. */
20853 static const struct builtin_description bdesc_args[] =
20856 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_addv8qi3, "__builtin_ia32_paddb", IX86_BUILTIN_PADDB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
20857 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_addv4hi3, "__builtin_ia32_paddw", IX86_BUILTIN_PADDW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
20858 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_addv2si3, "__builtin_ia32_paddd", IX86_BUILTIN_PADDD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
20859 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_subv8qi3, "__builtin_ia32_psubb", IX86_BUILTIN_PSUBB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
20860 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_subv4hi3, "__builtin_ia32_psubw", IX86_BUILTIN_PSUBW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
20861 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_subv2si3, "__builtin_ia32_psubd", IX86_BUILTIN_PSUBD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
20863 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ssaddv8qi3, "__builtin_ia32_paddsb", IX86_BUILTIN_PADDSB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
20864 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ssaddv4hi3, "__builtin_ia32_paddsw", IX86_BUILTIN_PADDSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
20865 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_sssubv8qi3, "__builtin_ia32_psubsb", IX86_BUILTIN_PSUBSB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
20866 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_sssubv4hi3, "__builtin_ia32_psubsw", IX86_BUILTIN_PSUBSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
20867 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_usaddv8qi3, "__builtin_ia32_paddusb", IX86_BUILTIN_PADDUSB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
20868 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_usaddv4hi3, "__builtin_ia32_paddusw", IX86_BUILTIN_PADDUSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
20869 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ussubv8qi3, "__builtin_ia32_psubusb", IX86_BUILTIN_PSUBUSB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
20870 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ussubv4hi3, "__builtin_ia32_psubusw", IX86_BUILTIN_PSUBUSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
20872 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_mulv4hi3, "__builtin_ia32_pmullw", IX86_BUILTIN_PMULLW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
20873 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_smulv4hi3_highpart, "__builtin_ia32_pmulhw", IX86_BUILTIN_PMULHW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
20875 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_andv2si3, "__builtin_ia32_pand", IX86_BUILTIN_PAND, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
20876 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_andnotv2si3, "__builtin_ia32_pandn", IX86_BUILTIN_PANDN, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
20877 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_iorv2si3, "__builtin_ia32_por", IX86_BUILTIN_POR, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
20878 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_xorv2si3, "__builtin_ia32_pxor", IX86_BUILTIN_PXOR, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
20880 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_eqv8qi3, "__builtin_ia32_pcmpeqb", IX86_BUILTIN_PCMPEQB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
20881 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_eqv4hi3, "__builtin_ia32_pcmpeqw", IX86_BUILTIN_PCMPEQW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
20882 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_eqv2si3, "__builtin_ia32_pcmpeqd", IX86_BUILTIN_PCMPEQD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
20883 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_gtv8qi3, "__builtin_ia32_pcmpgtb", IX86_BUILTIN_PCMPGTB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
20884 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_gtv4hi3, "__builtin_ia32_pcmpgtw", IX86_BUILTIN_PCMPGTW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
20885 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_gtv2si3, "__builtin_ia32_pcmpgtd", IX86_BUILTIN_PCMPGTD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
20887 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpckhbw, "__builtin_ia32_punpckhbw", IX86_BUILTIN_PUNPCKHBW, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
20888 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpckhwd, "__builtin_ia32_punpckhwd", IX86_BUILTIN_PUNPCKHWD, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
20889 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpckhdq, "__builtin_ia32_punpckhdq", IX86_BUILTIN_PUNPCKHDQ, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
20890 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpcklbw, "__builtin_ia32_punpcklbw", IX86_BUILTIN_PUNPCKLBW, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
20891 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpcklwd, "__builtin_ia32_punpcklwd", IX86_BUILTIN_PUNPCKLWD, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI},
20892 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpckldq, "__builtin_ia32_punpckldq", IX86_BUILTIN_PUNPCKLDQ, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI},
20894 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_packsswb, "__builtin_ia32_packsswb", IX86_BUILTIN_PACKSSWB, UNKNOWN, (int) V8QI_FTYPE_V4HI_V4HI },
20895 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_packssdw, "__builtin_ia32_packssdw", IX86_BUILTIN_PACKSSDW, UNKNOWN, (int) V4HI_FTYPE_V2SI_V2SI },
20896 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_packuswb, "__builtin_ia32_packuswb", IX86_BUILTIN_PACKUSWB, UNKNOWN, (int) V8QI_FTYPE_V4HI_V4HI },
20898 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_pmaddwd, "__builtin_ia32_pmaddwd", IX86_BUILTIN_PMADDWD, UNKNOWN, (int) V2SI_FTYPE_V4HI_V4HI },
20900 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv4hi3, "__builtin_ia32_psllwi", IX86_BUILTIN_PSLLWI, UNKNOWN, (int) V4HI_FTYPE_V4HI_SI_COUNT },
20901 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv2si3, "__builtin_ia32_pslldi", IX86_BUILTIN_PSLLDI, UNKNOWN, (int) V2SI_FTYPE_V2SI_SI_COUNT },
20902 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv1di3, "__builtin_ia32_psllqi", IX86_BUILTIN_PSLLQI, UNKNOWN, (int) V1DI_FTYPE_V1DI_SI_COUNT },
20903 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv4hi3, "__builtin_ia32_psllw", IX86_BUILTIN_PSLLW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI_COUNT },
20904 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv2si3, "__builtin_ia32_pslld", IX86_BUILTIN_PSLLD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI_COUNT },
20905 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv1di3, "__builtin_ia32_psllq", IX86_BUILTIN_PSLLQ, UNKNOWN, (int) V1DI_FTYPE_V1DI_V1DI_COUNT },
20907 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv4hi3, "__builtin_ia32_psrlwi", IX86_BUILTIN_PSRLWI, UNKNOWN, (int) V4HI_FTYPE_V4HI_SI_COUNT },
20908 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv2si3, "__builtin_ia32_psrldi", IX86_BUILTIN_PSRLDI, UNKNOWN, (int) V2SI_FTYPE_V2SI_SI_COUNT },
20909 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv1di3, "__builtin_ia32_psrlqi", IX86_BUILTIN_PSRLQI, UNKNOWN, (int) V1DI_FTYPE_V1DI_SI_COUNT },
20910 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv4hi3, "__builtin_ia32_psrlw", IX86_BUILTIN_PSRLW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI_COUNT },
20911 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv2si3, "__builtin_ia32_psrld", IX86_BUILTIN_PSRLD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI_COUNT },
20912 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv1di3, "__builtin_ia32_psrlq", IX86_BUILTIN_PSRLQ, UNKNOWN, (int) V1DI_FTYPE_V1DI_V1DI_COUNT },
20914 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashrv4hi3, "__builtin_ia32_psrawi", IX86_BUILTIN_PSRAWI, UNKNOWN, (int) V4HI_FTYPE_V4HI_SI_COUNT },
20915 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashrv2si3, "__builtin_ia32_psradi", IX86_BUILTIN_PSRADI, UNKNOWN, (int) V2SI_FTYPE_V2SI_SI_COUNT },
20916 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashrv4hi3, "__builtin_ia32_psraw", IX86_BUILTIN_PSRAW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI_COUNT },
20917 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashrv2si3, "__builtin_ia32_psrad", IX86_BUILTIN_PSRAD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI_COUNT },
20920 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_pf2id, "__builtin_ia32_pf2id", IX86_BUILTIN_PF2ID, UNKNOWN, (int) V2SI_FTYPE_V2SF },
20921 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_floatv2si2, "__builtin_ia32_pi2fd", IX86_BUILTIN_PI2FD, UNKNOWN, (int) V2SF_FTYPE_V2SI },
20922 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_rcpv2sf2, "__builtin_ia32_pfrcp", IX86_BUILTIN_PFRCP, UNKNOWN, (int) V2SF_FTYPE_V2SF },
20923 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_rsqrtv2sf2, "__builtin_ia32_pfrsqrt", IX86_BUILTIN_PFRSQRT, UNKNOWN, (int) V2SF_FTYPE_V2SF },
20925 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_uavgv8qi3, "__builtin_ia32_pavgusb", IX86_BUILTIN_PAVGUSB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
20926 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_haddv2sf3, "__builtin_ia32_pfacc", IX86_BUILTIN_PFACC, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
20927 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_addv2sf3, "__builtin_ia32_pfadd", IX86_BUILTIN_PFADD, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
20928 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_eqv2sf3, "__builtin_ia32_pfcmpeq", IX86_BUILTIN_PFCMPEQ, UNKNOWN, (int) V2SI_FTYPE_V2SF_V2SF },
20929 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_gev2sf3, "__builtin_ia32_pfcmpge", IX86_BUILTIN_PFCMPGE, UNKNOWN, (int) V2SI_FTYPE_V2SF_V2SF },
20930 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_gtv2sf3, "__builtin_ia32_pfcmpgt", IX86_BUILTIN_PFCMPGT, UNKNOWN, (int) V2SI_FTYPE_V2SF_V2SF },
20931 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_smaxv2sf3, "__builtin_ia32_pfmax", IX86_BUILTIN_PFMAX, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
20932 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_sminv2sf3, "__builtin_ia32_pfmin", IX86_BUILTIN_PFMIN, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
20933 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_mulv2sf3, "__builtin_ia32_pfmul", IX86_BUILTIN_PFMUL, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
20934 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_rcpit1v2sf3, "__builtin_ia32_pfrcpit1", IX86_BUILTIN_PFRCPIT1, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
20935 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_rcpit2v2sf3, "__builtin_ia32_pfrcpit2", IX86_BUILTIN_PFRCPIT2, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
20936 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_rsqit1v2sf3, "__builtin_ia32_pfrsqit1", IX86_BUILTIN_PFRSQIT1, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
20937 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_subv2sf3, "__builtin_ia32_pfsub", IX86_BUILTIN_PFSUB, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
20938 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_subrv2sf3, "__builtin_ia32_pfsubr", IX86_BUILTIN_PFSUBR, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
20939 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_pmulhrwv4hi3, "__builtin_ia32_pmulhrw", IX86_BUILTIN_PMULHRW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
20942 { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pf2iw, "__builtin_ia32_pf2iw", IX86_BUILTIN_PF2IW, UNKNOWN, (int) V2SI_FTYPE_V2SF },
20943 { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pi2fw, "__builtin_ia32_pi2fw", IX86_BUILTIN_PI2FW, UNKNOWN, (int) V2SF_FTYPE_V2SI },
20944 { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pswapdv2si2, "__builtin_ia32_pswapdsi", IX86_BUILTIN_PSWAPDSI, UNKNOWN, (int) V2SI_FTYPE_V2SI },
20945 { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pswapdv2sf2, "__builtin_ia32_pswapdsf", IX86_BUILTIN_PSWAPDSF, UNKNOWN, (int) V2SF_FTYPE_V2SF },
20946 { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_hsubv2sf3, "__builtin_ia32_pfnacc", IX86_BUILTIN_PFNACC, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
20947 { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_addsubv2sf3, "__builtin_ia32_pfpnacc", IX86_BUILTIN_PFPNACC, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
20950 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movmskps, "__builtin_ia32_movmskps", IX86_BUILTIN_MOVMSKPS, UNKNOWN, (int) INT_FTYPE_V4SF },
20951 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_sqrtv4sf2, "__builtin_ia32_sqrtps", IX86_BUILTIN_SQRTPS, UNKNOWN, (int) V4SF_FTYPE_V4SF },
20952 { OPTION_MASK_ISA_SSE, CODE_FOR_sqrtv4sf2, "__builtin_ia32_sqrtps_nr", IX86_BUILTIN_SQRTPS_NR, UNKNOWN, (int) V4SF_FTYPE_V4SF },
20953 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_rsqrtv4sf2, "__builtin_ia32_rsqrtps", IX86_BUILTIN_RSQRTPS, UNKNOWN, (int) V4SF_FTYPE_V4SF },
20954 { OPTION_MASK_ISA_SSE, CODE_FOR_rsqrtv4sf2, "__builtin_ia32_rsqrtps_nr", IX86_BUILTIN_RSQRTPS_NR, UNKNOWN, (int) V4SF_FTYPE_V4SF },
20955 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_rcpv4sf2, "__builtin_ia32_rcpps", IX86_BUILTIN_RCPPS, UNKNOWN, (int) V4SF_FTYPE_V4SF },
20956 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvtps2pi, "__builtin_ia32_cvtps2pi", IX86_BUILTIN_CVTPS2PI, UNKNOWN, (int) V2SI_FTYPE_V4SF },
20957 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvtss2si, "__builtin_ia32_cvtss2si", IX86_BUILTIN_CVTSS2SI, UNKNOWN, (int) INT_FTYPE_V4SF },
20958 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_64BIT, CODE_FOR_sse_cvtss2siq, "__builtin_ia32_cvtss2si64", IX86_BUILTIN_CVTSS2SI64, UNKNOWN, (int) INT64_FTYPE_V4SF },
20959 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvttps2pi, "__builtin_ia32_cvttps2pi", IX86_BUILTIN_CVTTPS2PI, UNKNOWN, (int) V2SI_FTYPE_V4SF },
20960 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvttss2si, "__builtin_ia32_cvttss2si", IX86_BUILTIN_CVTTSS2SI, UNKNOWN, (int) INT_FTYPE_V4SF },
20961 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_64BIT, CODE_FOR_sse_cvttss2siq, "__builtin_ia32_cvttss2si64", IX86_BUILTIN_CVTTSS2SI64, UNKNOWN, (int) INT64_FTYPE_V4SF },
20963 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_shufps, "__builtin_ia32_shufps", IX86_BUILTIN_SHUFPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
20965 { OPTION_MASK_ISA_SSE, CODE_FOR_addv4sf3, "__builtin_ia32_addps", IX86_BUILTIN_ADDPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
20966 { OPTION_MASK_ISA_SSE, CODE_FOR_subv4sf3, "__builtin_ia32_subps", IX86_BUILTIN_SUBPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
20967 { OPTION_MASK_ISA_SSE, CODE_FOR_mulv4sf3, "__builtin_ia32_mulps", IX86_BUILTIN_MULPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
20968 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_divv4sf3, "__builtin_ia32_divps", IX86_BUILTIN_DIVPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
20969 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmaddv4sf3, "__builtin_ia32_addss", IX86_BUILTIN_ADDSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
20970 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmsubv4sf3, "__builtin_ia32_subss", IX86_BUILTIN_SUBSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
20971 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmulv4sf3, "__builtin_ia32_mulss", IX86_BUILTIN_MULSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
20972 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmdivv4sf3, "__builtin_ia32_divss", IX86_BUILTIN_DIVSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
20974 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpeqps", IX86_BUILTIN_CMPEQPS, EQ, (int) V4SF_FTYPE_V4SF_V4SF },
20975 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpltps", IX86_BUILTIN_CMPLTPS, LT, (int) V4SF_FTYPE_V4SF_V4SF },
20976 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpleps", IX86_BUILTIN_CMPLEPS, LE, (int) V4SF_FTYPE_V4SF_V4SF },
20977 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpgtps", IX86_BUILTIN_CMPGTPS, LT, (int) V4SF_FTYPE_V4SF_V4SF_SWAP },
20978 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpgeps", IX86_BUILTIN_CMPGEPS, LE, (int) V4SF_FTYPE_V4SF_V4SF_SWAP },
20979 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpunordps", IX86_BUILTIN_CMPUNORDPS, UNORDERED, (int) V4SF_FTYPE_V4SF_V4SF },
20980 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpneqps", IX86_BUILTIN_CMPNEQPS, NE, (int) V4SF_FTYPE_V4SF_V4SF },
20981 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpnltps", IX86_BUILTIN_CMPNLTPS, UNGE, (int) V4SF_FTYPE_V4SF_V4SF },
20982 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpnleps", IX86_BUILTIN_CMPNLEPS, UNGT, (int) V4SF_FTYPE_V4SF_V4SF },
20983 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpngtps", IX86_BUILTIN_CMPNGTPS, UNGE, (int) V4SF_FTYPE_V4SF_V4SF_SWAP },
20984 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpngeps", IX86_BUILTIN_CMPNGEPS, UNGT, (int) V4SF_FTYPE_V4SF_V4SF_SWAP},
20985 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpordps", IX86_BUILTIN_CMPORDPS, ORDERED, (int) V4SF_FTYPE_V4SF_V4SF },
20986 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpeqss", IX86_BUILTIN_CMPEQSS, EQ, (int) V4SF_FTYPE_V4SF_V4SF },
20987 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpltss", IX86_BUILTIN_CMPLTSS, LT, (int) V4SF_FTYPE_V4SF_V4SF },
20988 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpless", IX86_BUILTIN_CMPLESS, LE, (int) V4SF_FTYPE_V4SF_V4SF },
20989 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpunordss", IX86_BUILTIN_CMPUNORDSS, UNORDERED, (int) V4SF_FTYPE_V4SF_V4SF },
20990 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpneqss", IX86_BUILTIN_CMPNEQSS, NE, (int) V4SF_FTYPE_V4SF_V4SF },
20991 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpnltss", IX86_BUILTIN_CMPNLTSS, UNGE, (int) V4SF_FTYPE_V4SF_V4SF },
20992 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpnless", IX86_BUILTIN_CMPNLESS, UNGT, (int) V4SF_FTYPE_V4SF_V4SF },
20993 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpngtss", IX86_BUILTIN_CMPNGTSS, UNGE, (int) V4SF_FTYPE_V4SF_V4SF_SWAP },
20994 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpngess", IX86_BUILTIN_CMPNGESS, UNGT, (int) V4SF_FTYPE_V4SF_V4SF_SWAP },
20995 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpordss", IX86_BUILTIN_CMPORDSS, ORDERED, (int) V4SF_FTYPE_V4SF_V4SF },
20997 { OPTION_MASK_ISA_SSE, CODE_FOR_sminv4sf3, "__builtin_ia32_minps", IX86_BUILTIN_MINPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
20998 { OPTION_MASK_ISA_SSE, CODE_FOR_smaxv4sf3, "__builtin_ia32_maxps", IX86_BUILTIN_MAXPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
20999 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmsminv4sf3, "__builtin_ia32_minss", IX86_BUILTIN_MINSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
21000 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmsmaxv4sf3, "__builtin_ia32_maxss", IX86_BUILTIN_MAXSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
21002 { OPTION_MASK_ISA_SSE, CODE_FOR_andv4sf3, "__builtin_ia32_andps", IX86_BUILTIN_ANDPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
21003 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_andnotv4sf3, "__builtin_ia32_andnps", IX86_BUILTIN_ANDNPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
21004 { OPTION_MASK_ISA_SSE, CODE_FOR_iorv4sf3, "__builtin_ia32_orps", IX86_BUILTIN_ORPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
21005 { OPTION_MASK_ISA_SSE, CODE_FOR_xorv4sf3, "__builtin_ia32_xorps", IX86_BUILTIN_XORPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
21007 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movss, "__builtin_ia32_movss", IX86_BUILTIN_MOVSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
21008 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movhlps_exp, "__builtin_ia32_movhlps", IX86_BUILTIN_MOVHLPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
21009 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movlhps_exp, "__builtin_ia32_movlhps", IX86_BUILTIN_MOVLHPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
21010 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_unpckhps, "__builtin_ia32_unpckhps", IX86_BUILTIN_UNPCKHPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
21011 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_unpcklps, "__builtin_ia32_unpcklps", IX86_BUILTIN_UNPCKLPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
21013 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvtpi2ps, "__builtin_ia32_cvtpi2ps", IX86_BUILTIN_CVTPI2PS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V2SI },
21014 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvtsi2ss, "__builtin_ia32_cvtsi2ss", IX86_BUILTIN_CVTSI2SS, UNKNOWN, (int) V4SF_FTYPE_V4SF_SI },
21015 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_64BIT, CODE_FOR_sse_cvtsi2ssq, "__builtin_ia32_cvtsi642ss", IX86_BUILTIN_CVTSI642SS, UNKNOWN, V4SF_FTYPE_V4SF_DI },
21017 { OPTION_MASK_ISA_SSE, CODE_FOR_rsqrtsf2, "__builtin_ia32_rsqrtf", IX86_BUILTIN_RSQRTF, UNKNOWN, (int) FLOAT_FTYPE_FLOAT },
21019 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmsqrtv4sf2, "__builtin_ia32_sqrtss", IX86_BUILTIN_SQRTSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_VEC_MERGE },
21020 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmrsqrtv4sf2, "__builtin_ia32_rsqrtss", IX86_BUILTIN_RSQRTSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_VEC_MERGE },
21021 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmrcpv4sf2, "__builtin_ia32_rcpss", IX86_BUILTIN_RCPSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_VEC_MERGE },
21023 /* SSE MMX or 3Dnow!A */
21024 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_uavgv8qi3, "__builtin_ia32_pavgb", IX86_BUILTIN_PAVGB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
21025 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_uavgv4hi3, "__builtin_ia32_pavgw", IX86_BUILTIN_PAVGW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
21026 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_umulv4hi3_highpart, "__builtin_ia32_pmulhuw", IX86_BUILTIN_PMULHUW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
21028 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_umaxv8qi3, "__builtin_ia32_pmaxub", IX86_BUILTIN_PMAXUB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
21029 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_smaxv4hi3, "__builtin_ia32_pmaxsw", IX86_BUILTIN_PMAXSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
21030 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_uminv8qi3, "__builtin_ia32_pminub", IX86_BUILTIN_PMINUB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
21031 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_sminv4hi3, "__builtin_ia32_pminsw", IX86_BUILTIN_PMINSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
21033 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_psadbw, "__builtin_ia32_psadbw", IX86_BUILTIN_PSADBW, UNKNOWN, (int) V1DI_FTYPE_V8QI_V8QI },
21034 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pmovmskb, "__builtin_ia32_pmovmskb", IX86_BUILTIN_PMOVMSKB, UNKNOWN, (int) INT_FTYPE_V8QI },
21036 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pshufw, "__builtin_ia32_pshufw", IX86_BUILTIN_PSHUFW, UNKNOWN, (int) V4HI_FTYPE_V4HI_INT },
21039 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_shufpd, "__builtin_ia32_shufpd", IX86_BUILTIN_SHUFPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
21041 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movmskpd, "__builtin_ia32_movmskpd", IX86_BUILTIN_MOVMSKPD, UNKNOWN, (int) INT_FTYPE_V2DF },
21042 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_pmovmskb, "__builtin_ia32_pmovmskb128", IX86_BUILTIN_PMOVMSKB128, UNKNOWN, (int) INT_FTYPE_V16QI },
21043 { OPTION_MASK_ISA_SSE2, CODE_FOR_sqrtv2df2, "__builtin_ia32_sqrtpd", IX86_BUILTIN_SQRTPD, UNKNOWN, (int) V2DF_FTYPE_V2DF },
21044 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtdq2pd, "__builtin_ia32_cvtdq2pd", IX86_BUILTIN_CVTDQ2PD, UNKNOWN, (int) V2DF_FTYPE_V4SI },
21045 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtdq2ps, "__builtin_ia32_cvtdq2ps", IX86_BUILTIN_CVTDQ2PS, UNKNOWN, (int) V4SF_FTYPE_V4SI },
21047 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtpd2dq, "__builtin_ia32_cvtpd2dq", IX86_BUILTIN_CVTPD2DQ, UNKNOWN, (int) V4SI_FTYPE_V2DF },
21048 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtpd2pi, "__builtin_ia32_cvtpd2pi", IX86_BUILTIN_CVTPD2PI, UNKNOWN, (int) V2SI_FTYPE_V2DF },
21049 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtpd2ps, "__builtin_ia32_cvtpd2ps", IX86_BUILTIN_CVTPD2PS, UNKNOWN, (int) V4SF_FTYPE_V2DF },
21050 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvttpd2dq, "__builtin_ia32_cvttpd2dq", IX86_BUILTIN_CVTTPD2DQ, UNKNOWN, (int) V4SI_FTYPE_V2DF },
21051 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvttpd2pi, "__builtin_ia32_cvttpd2pi", IX86_BUILTIN_CVTTPD2PI, UNKNOWN, (int) V2SI_FTYPE_V2DF },
21053 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtpi2pd, "__builtin_ia32_cvtpi2pd", IX86_BUILTIN_CVTPI2PD, UNKNOWN, (int) V2DF_FTYPE_V2SI },
21055 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtsd2si, "__builtin_ia32_cvtsd2si", IX86_BUILTIN_CVTSD2SI, UNKNOWN, (int) INT_FTYPE_V2DF },
21056 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvttsd2si, "__builtin_ia32_cvttsd2si", IX86_BUILTIN_CVTTSD2SI, UNKNOWN, (int) INT_FTYPE_V2DF },
21057 { OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_64BIT, CODE_FOR_sse2_cvtsd2siq, "__builtin_ia32_cvtsd2si64", IX86_BUILTIN_CVTSD2SI64, UNKNOWN, (int) INT64_FTYPE_V2DF },
21058 { OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_64BIT, CODE_FOR_sse2_cvttsd2siq, "__builtin_ia32_cvttsd2si64", IX86_BUILTIN_CVTTSD2SI64, UNKNOWN, (int) INT64_FTYPE_V2DF },
21060 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtps2dq, "__builtin_ia32_cvtps2dq", IX86_BUILTIN_CVTPS2DQ, UNKNOWN, (int) V4SI_FTYPE_V4SF },
21061 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtps2pd, "__builtin_ia32_cvtps2pd", IX86_BUILTIN_CVTPS2PD, UNKNOWN, (int) V2DF_FTYPE_V4SF },
21062 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvttps2dq, "__builtin_ia32_cvttps2dq", IX86_BUILTIN_CVTTPS2DQ, UNKNOWN, (int) V4SI_FTYPE_V4SF },
21064 { OPTION_MASK_ISA_SSE2, CODE_FOR_addv2df3, "__builtin_ia32_addpd", IX86_BUILTIN_ADDPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
21065 { OPTION_MASK_ISA_SSE2, CODE_FOR_subv2df3, "__builtin_ia32_subpd", IX86_BUILTIN_SUBPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
21066 { OPTION_MASK_ISA_SSE2, CODE_FOR_mulv2df3, "__builtin_ia32_mulpd", IX86_BUILTIN_MULPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
21067 { OPTION_MASK_ISA_SSE2, CODE_FOR_divv2df3, "__builtin_ia32_divpd", IX86_BUILTIN_DIVPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
21068 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmaddv2df3, "__builtin_ia32_addsd", IX86_BUILTIN_ADDSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
21069 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmsubv2df3, "__builtin_ia32_subsd", IX86_BUILTIN_SUBSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
21070 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmulv2df3, "__builtin_ia32_mulsd", IX86_BUILTIN_MULSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
21071 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmdivv2df3, "__builtin_ia32_divsd", IX86_BUILTIN_DIVSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
21073 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpeqpd", IX86_BUILTIN_CMPEQPD, EQ, (int) V2DF_FTYPE_V2DF_V2DF },
21074 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpltpd", IX86_BUILTIN_CMPLTPD, LT, (int) V2DF_FTYPE_V2DF_V2DF },
21075 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmplepd", IX86_BUILTIN_CMPLEPD, LE, (int) V2DF_FTYPE_V2DF_V2DF },
21076 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpgtpd", IX86_BUILTIN_CMPGTPD, LT, (int) V2DF_FTYPE_V2DF_V2DF_SWAP },
21077 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpgepd", IX86_BUILTIN_CMPGEPD, LE, (int) V2DF_FTYPE_V2DF_V2DF_SWAP},
21078 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpunordpd", IX86_BUILTIN_CMPUNORDPD, UNORDERED, (int) V2DF_FTYPE_V2DF_V2DF },
21079 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpneqpd", IX86_BUILTIN_CMPNEQPD, NE, (int) V2DF_FTYPE_V2DF_V2DF },
21080 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpnltpd", IX86_BUILTIN_CMPNLTPD, UNGE, (int) V2DF_FTYPE_V2DF_V2DF },
21081 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpnlepd", IX86_BUILTIN_CMPNLEPD, UNGT, (int) V2DF_FTYPE_V2DF_V2DF },
21082 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpngtpd", IX86_BUILTIN_CMPNGTPD, UNGE, (int) V2DF_FTYPE_V2DF_V2DF_SWAP },
21083 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpngepd", IX86_BUILTIN_CMPNGEPD, UNGT, (int) V2DF_FTYPE_V2DF_V2DF_SWAP },
21084 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpordpd", IX86_BUILTIN_CMPORDPD, ORDERED, (int) V2DF_FTYPE_V2DF_V2DF },
21085 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpeqsd", IX86_BUILTIN_CMPEQSD, EQ, (int) V2DF_FTYPE_V2DF_V2DF },
21086 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpltsd", IX86_BUILTIN_CMPLTSD, LT, (int) V2DF_FTYPE_V2DF_V2DF },
21087 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmplesd", IX86_BUILTIN_CMPLESD, LE, (int) V2DF_FTYPE_V2DF_V2DF },
21088 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpunordsd", IX86_BUILTIN_CMPUNORDSD, UNORDERED, (int) V2DF_FTYPE_V2DF_V2DF },
21089 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpneqsd", IX86_BUILTIN_CMPNEQSD, NE, (int) V2DF_FTYPE_V2DF_V2DF },
21090 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpnltsd", IX86_BUILTIN_CMPNLTSD, UNGE, (int) V2DF_FTYPE_V2DF_V2DF },
21091 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpnlesd", IX86_BUILTIN_CMPNLESD, UNGT, (int) V2DF_FTYPE_V2DF_V2DF },
21092 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpordsd", IX86_BUILTIN_CMPORDSD, ORDERED, (int) V2DF_FTYPE_V2DF_V2DF },
21094 { OPTION_MASK_ISA_SSE2, CODE_FOR_sminv2df3, "__builtin_ia32_minpd", IX86_BUILTIN_MINPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
21095 { OPTION_MASK_ISA_SSE2, CODE_FOR_smaxv2df3, "__builtin_ia32_maxpd", IX86_BUILTIN_MAXPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
21096 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmsminv2df3, "__builtin_ia32_minsd", IX86_BUILTIN_MINSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
21097 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmsmaxv2df3, "__builtin_ia32_maxsd", IX86_BUILTIN_MAXSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
21099 { OPTION_MASK_ISA_SSE2, CODE_FOR_andv2df3, "__builtin_ia32_andpd", IX86_BUILTIN_ANDPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
21100 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_andnotv2df3, "__builtin_ia32_andnpd", IX86_BUILTIN_ANDNPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
21101 { OPTION_MASK_ISA_SSE2, CODE_FOR_iorv2df3, "__builtin_ia32_orpd", IX86_BUILTIN_ORPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
21102 { OPTION_MASK_ISA_SSE2, CODE_FOR_xorv2df3, "__builtin_ia32_xorpd", IX86_BUILTIN_XORPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
21104 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movsd, "__builtin_ia32_movsd", IX86_BUILTIN_MOVSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
21105 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_unpckhpd_exp, "__builtin_ia32_unpckhpd", IX86_BUILTIN_UNPCKHPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
21106 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_unpcklpd_exp, "__builtin_ia32_unpcklpd", IX86_BUILTIN_UNPCKLPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
21108 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_pack_sfix_v2df, "__builtin_ia32_vec_pack_sfix", IX86_BUILTIN_VEC_PACK_SFIX, UNKNOWN, (int) V4SI_FTYPE_V2DF_V2DF },
21110 { OPTION_MASK_ISA_SSE2, CODE_FOR_addv16qi3, "__builtin_ia32_paddb128", IX86_BUILTIN_PADDB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
21111 { OPTION_MASK_ISA_SSE2, CODE_FOR_addv8hi3, "__builtin_ia32_paddw128", IX86_BUILTIN_PADDW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
21112 { OPTION_MASK_ISA_SSE2, CODE_FOR_addv4si3, "__builtin_ia32_paddd128", IX86_BUILTIN_PADDD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
21113 { OPTION_MASK_ISA_SSE2, CODE_FOR_addv2di3, "__builtin_ia32_paddq128", IX86_BUILTIN_PADDQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
21114 { OPTION_MASK_ISA_SSE2, CODE_FOR_subv16qi3, "__builtin_ia32_psubb128", IX86_BUILTIN_PSUBB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
21115 { OPTION_MASK_ISA_SSE2, CODE_FOR_subv8hi3, "__builtin_ia32_psubw128", IX86_BUILTIN_PSUBW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
21116 { OPTION_MASK_ISA_SSE2, CODE_FOR_subv4si3, "__builtin_ia32_psubd128", IX86_BUILTIN_PSUBD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
21117 { OPTION_MASK_ISA_SSE2, CODE_FOR_subv2di3, "__builtin_ia32_psubq128", IX86_BUILTIN_PSUBQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
21119 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ssaddv16qi3, "__builtin_ia32_paddsb128", IX86_BUILTIN_PADDSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
21120 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ssaddv8hi3, "__builtin_ia32_paddsw128", IX86_BUILTIN_PADDSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
21121 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_sssubv16qi3, "__builtin_ia32_psubsb128", IX86_BUILTIN_PSUBSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
21122 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_sssubv8hi3, "__builtin_ia32_psubsw128", IX86_BUILTIN_PSUBSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
21123 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_usaddv16qi3, "__builtin_ia32_paddusb128", IX86_BUILTIN_PADDUSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
21124 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_usaddv8hi3, "__builtin_ia32_paddusw128", IX86_BUILTIN_PADDUSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
21125 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ussubv16qi3, "__builtin_ia32_psubusb128", IX86_BUILTIN_PSUBUSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
21126 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ussubv8hi3, "__builtin_ia32_psubusw128", IX86_BUILTIN_PSUBUSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
21128 { OPTION_MASK_ISA_SSE2, CODE_FOR_mulv8hi3, "__builtin_ia32_pmullw128", IX86_BUILTIN_PMULLW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
21129 { OPTION_MASK_ISA_SSE2, CODE_FOR_smulv8hi3_highpart, "__builtin_ia32_pmulhw128", IX86_BUILTIN_PMULHW128, UNKNOWN,(int) V8HI_FTYPE_V8HI_V8HI },
21131 { OPTION_MASK_ISA_SSE2, CODE_FOR_andv2di3, "__builtin_ia32_pand128", IX86_BUILTIN_PAND128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
21132 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_andnotv2di3, "__builtin_ia32_pandn128", IX86_BUILTIN_PANDN128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
21133 { OPTION_MASK_ISA_SSE2, CODE_FOR_iorv2di3, "__builtin_ia32_por128", IX86_BUILTIN_POR128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
21134 { OPTION_MASK_ISA_SSE2, CODE_FOR_xorv2di3, "__builtin_ia32_pxor128", IX86_BUILTIN_PXOR128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
21136 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_uavgv16qi3, "__builtin_ia32_pavgb128", IX86_BUILTIN_PAVGB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
21137 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_uavgv8hi3, "__builtin_ia32_pavgw128", IX86_BUILTIN_PAVGW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
21139 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_eqv16qi3, "__builtin_ia32_pcmpeqb128", IX86_BUILTIN_PCMPEQB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
21140 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_eqv8hi3, "__builtin_ia32_pcmpeqw128", IX86_BUILTIN_PCMPEQW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
21141 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_eqv4si3, "__builtin_ia32_pcmpeqd128", IX86_BUILTIN_PCMPEQD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
21142 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_gtv16qi3, "__builtin_ia32_pcmpgtb128", IX86_BUILTIN_PCMPGTB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
21143 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_gtv8hi3, "__builtin_ia32_pcmpgtw128", IX86_BUILTIN_PCMPGTW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
21144 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_gtv4si3, "__builtin_ia32_pcmpgtd128", IX86_BUILTIN_PCMPGTD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
21146 { OPTION_MASK_ISA_SSE2, CODE_FOR_umaxv16qi3, "__builtin_ia32_pmaxub128", IX86_BUILTIN_PMAXUB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
21147 { OPTION_MASK_ISA_SSE2, CODE_FOR_smaxv8hi3, "__builtin_ia32_pmaxsw128", IX86_BUILTIN_PMAXSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
21148 { OPTION_MASK_ISA_SSE2, CODE_FOR_uminv16qi3, "__builtin_ia32_pminub128", IX86_BUILTIN_PMINUB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
21149 { OPTION_MASK_ISA_SSE2, CODE_FOR_sminv8hi3, "__builtin_ia32_pminsw128", IX86_BUILTIN_PMINSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
21151 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_punpckhbw, "__builtin_ia32_punpckhbw128", IX86_BUILTIN_PUNPCKHBW128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
21152 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_punpckhwd, "__builtin_ia32_punpckhwd128", IX86_BUILTIN_PUNPCKHWD128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
21153 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_punpckhdq, "__builtin_ia32_punpckhdq128", IX86_BUILTIN_PUNPCKHDQ128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
21154 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_punpckhqdq, "__builtin_ia32_punpckhqdq128", IX86_BUILTIN_PUNPCKHQDQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
21155 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_punpcklbw, "__builtin_ia32_punpcklbw128", IX86_BUILTIN_PUNPCKLBW128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
21156 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_punpcklwd, "__builtin_ia32_punpcklwd128", IX86_BUILTIN_PUNPCKLWD128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
21157 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_punpckldq, "__builtin_ia32_punpckldq128", IX86_BUILTIN_PUNPCKLDQ128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
21158 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_punpcklqdq, "__builtin_ia32_punpcklqdq128", IX86_BUILTIN_PUNPCKLQDQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
21160 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_packsswb, "__builtin_ia32_packsswb128", IX86_BUILTIN_PACKSSWB128, UNKNOWN, (int) V16QI_FTYPE_V8HI_V8HI },
21161 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_packssdw, "__builtin_ia32_packssdw128", IX86_BUILTIN_PACKSSDW128, UNKNOWN, (int) V8HI_FTYPE_V4SI_V4SI },
21162 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_packuswb, "__builtin_ia32_packuswb128", IX86_BUILTIN_PACKUSWB128, UNKNOWN, (int) V16QI_FTYPE_V8HI_V8HI },
21164 { OPTION_MASK_ISA_SSE2, CODE_FOR_umulv8hi3_highpart, "__builtin_ia32_pmulhuw128", IX86_BUILTIN_PMULHUW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
21165 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_psadbw, "__builtin_ia32_psadbw128", IX86_BUILTIN_PSADBW128, UNKNOWN, (int) V2DI_FTYPE_V16QI_V16QI },
21167 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_umulv1siv1di3, "__builtin_ia32_pmuludq", IX86_BUILTIN_PMULUDQ, UNKNOWN, (int) V1DI_FTYPE_V2SI_V2SI },
21168 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_umulv2siv2di3, "__builtin_ia32_pmuludq128", IX86_BUILTIN_PMULUDQ128, UNKNOWN, (int) V2DI_FTYPE_V4SI_V4SI },
21170 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_pmaddwd, "__builtin_ia32_pmaddwd128", IX86_BUILTIN_PMADDWD128, UNKNOWN, (int) V4SI_FTYPE_V8HI_V8HI },
21172 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtsi2sd, "__builtin_ia32_cvtsi2sd", IX86_BUILTIN_CVTSI2SD, UNKNOWN, (int) V2DF_FTYPE_V2DF_SI },
21173 { OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_64BIT, CODE_FOR_sse2_cvtsi2sdq, "__builtin_ia32_cvtsi642sd", IX86_BUILTIN_CVTSI642SD, UNKNOWN, (int) V2DF_FTYPE_V2DF_DI },
21174 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtsd2ss, "__builtin_ia32_cvtsd2ss", IX86_BUILTIN_CVTSD2SS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V2DF },
21175 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtss2sd, "__builtin_ia32_cvtss2sd", IX86_BUILTIN_CVTSS2SD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V4SF },
21177 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ashlti3, "__builtin_ia32_pslldqi128", IX86_BUILTIN_PSLLDQI128, UNKNOWN, (int) V2DI2TI_FTYPE_V2DI_INT },
21178 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv8hi3, "__builtin_ia32_psllwi128", IX86_BUILTIN_PSLLWI128, UNKNOWN, (int) V8HI_FTYPE_V8HI_SI_COUNT },
21179 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv4si3, "__builtin_ia32_pslldi128", IX86_BUILTIN_PSLLDI128, UNKNOWN, (int) V4SI_FTYPE_V4SI_SI_COUNT },
21180 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv2di3, "__builtin_ia32_psllqi128", IX86_BUILTIN_PSLLQI128, UNKNOWN, (int) V2DI_FTYPE_V2DI_SI_COUNT },
21181 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv8hi3, "__builtin_ia32_psllw128", IX86_BUILTIN_PSLLW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_COUNT },
21182 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv4si3, "__builtin_ia32_pslld128", IX86_BUILTIN_PSLLD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_COUNT },
21183 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv2di3, "__builtin_ia32_psllq128", IX86_BUILTIN_PSLLQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_COUNT },
21185 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_lshrti3, "__builtin_ia32_psrldqi128", IX86_BUILTIN_PSRLDQI128, UNKNOWN, (int) V2DI2TI_FTYPE_V2DI_INT },
21186 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv8hi3, "__builtin_ia32_psrlwi128", IX86_BUILTIN_PSRLWI128, UNKNOWN, (int) V8HI_FTYPE_V8HI_SI_COUNT },
21187 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv4si3, "__builtin_ia32_psrldi128", IX86_BUILTIN_PSRLDI128, UNKNOWN, (int) V4SI_FTYPE_V4SI_SI_COUNT },
21188 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv2di3, "__builtin_ia32_psrlqi128", IX86_BUILTIN_PSRLQI128, UNKNOWN, (int) V2DI_FTYPE_V2DI_SI_COUNT },
21189 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv8hi3, "__builtin_ia32_psrlw128", IX86_BUILTIN_PSRLW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_COUNT },
21190 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv4si3, "__builtin_ia32_psrld128", IX86_BUILTIN_PSRLD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_COUNT },
21191 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv2di3, "__builtin_ia32_psrlq128", IX86_BUILTIN_PSRLQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_COUNT },
21193 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashrv8hi3, "__builtin_ia32_psrawi128", IX86_BUILTIN_PSRAWI128, UNKNOWN, (int) V8HI_FTYPE_V8HI_SI_COUNT },
21194 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashrv4si3, "__builtin_ia32_psradi128", IX86_BUILTIN_PSRADI128, UNKNOWN, (int) V4SI_FTYPE_V4SI_SI_COUNT },
21195 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashrv8hi3, "__builtin_ia32_psraw128", IX86_BUILTIN_PSRAW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_COUNT },
21196 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashrv4si3, "__builtin_ia32_psrad128", IX86_BUILTIN_PSRAD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_COUNT },
21198 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_pshufd, "__builtin_ia32_pshufd", IX86_BUILTIN_PSHUFD, UNKNOWN, (int) V4SI_FTYPE_V4SI_INT },
21199 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_pshuflw, "__builtin_ia32_pshuflw", IX86_BUILTIN_PSHUFLW, UNKNOWN, (int) V8HI_FTYPE_V8HI_INT },
21200 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_pshufhw, "__builtin_ia32_pshufhw", IX86_BUILTIN_PSHUFHW, UNKNOWN, (int) V8HI_FTYPE_V8HI_INT },
21202 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmsqrtv2df2, "__builtin_ia32_sqrtsd", IX86_BUILTIN_SQRTSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_VEC_MERGE },
21204 { OPTION_MASK_ISA_SSE2, CODE_FOR_abstf2, 0, IX86_BUILTIN_FABSQ, UNKNOWN, (int) FLOAT128_FTYPE_FLOAT128 },
21205 { OPTION_MASK_ISA_SSE2, CODE_FOR_copysigntf3, 0, IX86_BUILTIN_COPYSIGNQ, UNKNOWN, (int) FLOAT128_FTYPE_FLOAT128_FLOAT128 },
21207 { OPTION_MASK_ISA_SSE, CODE_FOR_sse2_movq128, "__builtin_ia32_movq128", IX86_BUILTIN_MOVQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI },
21210 { OPTION_MASK_ISA_SSE2, CODE_FOR_mmx_addv1di3, "__builtin_ia32_paddq", IX86_BUILTIN_PADDQ, UNKNOWN, (int) V1DI_FTYPE_V1DI_V1DI },
21211 { OPTION_MASK_ISA_SSE2, CODE_FOR_mmx_subv1di3, "__builtin_ia32_psubq", IX86_BUILTIN_PSUBQ, UNKNOWN, (int) V1DI_FTYPE_V1DI_V1DI },
21214 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_movshdup, "__builtin_ia32_movshdup", IX86_BUILTIN_MOVSHDUP, UNKNOWN, (int) V4SF_FTYPE_V4SF},
21215 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_movsldup, "__builtin_ia32_movsldup", IX86_BUILTIN_MOVSLDUP, UNKNOWN, (int) V4SF_FTYPE_V4SF },
21217 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_addsubv4sf3, "__builtin_ia32_addsubps", IX86_BUILTIN_ADDSUBPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
21218 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_addsubv2df3, "__builtin_ia32_addsubpd", IX86_BUILTIN_ADDSUBPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
21219 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_haddv4sf3, "__builtin_ia32_haddps", IX86_BUILTIN_HADDPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
21220 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_haddv2df3, "__builtin_ia32_haddpd", IX86_BUILTIN_HADDPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
21221 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_hsubv4sf3, "__builtin_ia32_hsubps", IX86_BUILTIN_HSUBPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
21222 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_hsubv2df3, "__builtin_ia32_hsubpd", IX86_BUILTIN_HSUBPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
21225 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv16qi2, "__builtin_ia32_pabsb128", IX86_BUILTIN_PABSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI },
21226 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv8qi2, "__builtin_ia32_pabsb", IX86_BUILTIN_PABSB, UNKNOWN, (int) V8QI_FTYPE_V8QI },
21227 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv8hi2, "__builtin_ia32_pabsw128", IX86_BUILTIN_PABSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI },
21228 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv4hi2, "__builtin_ia32_pabsw", IX86_BUILTIN_PABSW, UNKNOWN, (int) V4HI_FTYPE_V4HI },
21229 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv4si2, "__builtin_ia32_pabsd128", IX86_BUILTIN_PABSD128, UNKNOWN, (int) V4SI_FTYPE_V4SI },
21230 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv2si2, "__builtin_ia32_pabsd", IX86_BUILTIN_PABSD, UNKNOWN, (int) V2SI_FTYPE_V2SI },
21232 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phaddwv8hi3, "__builtin_ia32_phaddw128", IX86_BUILTIN_PHADDW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
21233 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phaddwv4hi3, "__builtin_ia32_phaddw", IX86_BUILTIN_PHADDW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
21234 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phadddv4si3, "__builtin_ia32_phaddd128", IX86_BUILTIN_PHADDD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
21235 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phadddv2si3, "__builtin_ia32_phaddd", IX86_BUILTIN_PHADDD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
21236 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phaddswv8hi3, "__builtin_ia32_phaddsw128", IX86_BUILTIN_PHADDSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
21237 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phaddswv4hi3, "__builtin_ia32_phaddsw", IX86_BUILTIN_PHADDSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
21238 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubwv8hi3, "__builtin_ia32_phsubw128", IX86_BUILTIN_PHSUBW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
21239 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubwv4hi3, "__builtin_ia32_phsubw", IX86_BUILTIN_PHSUBW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
21240 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubdv4si3, "__builtin_ia32_phsubd128", IX86_BUILTIN_PHSUBD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
21241 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubdv2si3, "__builtin_ia32_phsubd", IX86_BUILTIN_PHSUBD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
21242 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubswv8hi3, "__builtin_ia32_phsubsw128", IX86_BUILTIN_PHSUBSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
21243 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubswv4hi3, "__builtin_ia32_phsubsw", IX86_BUILTIN_PHSUBSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
21244 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pmaddubsw128, "__builtin_ia32_pmaddubsw128", IX86_BUILTIN_PMADDUBSW128, UNKNOWN, (int) V8HI_FTYPE_V16QI_V16QI },
21245 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pmaddubsw, "__builtin_ia32_pmaddubsw", IX86_BUILTIN_PMADDUBSW, UNKNOWN, (int) V4HI_FTYPE_V8QI_V8QI },
21246 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pmulhrswv8hi3, "__builtin_ia32_pmulhrsw128", IX86_BUILTIN_PMULHRSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
21247 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pmulhrswv4hi3, "__builtin_ia32_pmulhrsw", IX86_BUILTIN_PMULHRSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
21248 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pshufbv16qi3, "__builtin_ia32_pshufb128", IX86_BUILTIN_PSHUFB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
21249 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pshufbv8qi3, "__builtin_ia32_pshufb", IX86_BUILTIN_PSHUFB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
21250 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv16qi3, "__builtin_ia32_psignb128", IX86_BUILTIN_PSIGNB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
21251 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv8qi3, "__builtin_ia32_psignb", IX86_BUILTIN_PSIGNB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
21252 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv8hi3, "__builtin_ia32_psignw128", IX86_BUILTIN_PSIGNW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
21253 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv4hi3, "__builtin_ia32_psignw", IX86_BUILTIN_PSIGNW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
21254 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv4si3, "__builtin_ia32_psignd128", IX86_BUILTIN_PSIGND128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
21255 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv2si3, "__builtin_ia32_psignd", IX86_BUILTIN_PSIGND, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
21258 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_palignrti, "__builtin_ia32_palignr128", IX86_BUILTIN_PALIGNR128, UNKNOWN, (int) V2DI2TI_FTYPE_V2DI_V2DI_INT },
21259 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_palignrdi, "__builtin_ia32_palignr", IX86_BUILTIN_PALIGNR, UNKNOWN, (int) V1DI2DI_FTYPE_V1DI_V1DI_INT },
21262 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_blendpd, "__builtin_ia32_blendpd", IX86_BUILTIN_BLENDPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
21263 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_blendps, "__builtin_ia32_blendps", IX86_BUILTIN_BLENDPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
21264 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_blendvpd, "__builtin_ia32_blendvpd", IX86_BUILTIN_BLENDVPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF },
21265 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_blendvps, "__builtin_ia32_blendvps", IX86_BUILTIN_BLENDVPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF },
21266 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_dppd, "__builtin_ia32_dppd", IX86_BUILTIN_DPPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
21267 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_dpps, "__builtin_ia32_dpps", IX86_BUILTIN_DPPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
21268 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_insertps, "__builtin_ia32_insertps128", IX86_BUILTIN_INSERTPS128, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
21269 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_mpsadbw, "__builtin_ia32_mpsadbw128", IX86_BUILTIN_MPSADBW128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_INT },
21270 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_pblendvb, "__builtin_ia32_pblendvb128", IX86_BUILTIN_PBLENDVB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI },
21271 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_pblendw, "__builtin_ia32_pblendw128", IX86_BUILTIN_PBLENDW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_INT },
21273 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_extendv8qiv8hi2, "__builtin_ia32_pmovsxbw128", IX86_BUILTIN_PMOVSXBW128, UNKNOWN, (int) V8HI_FTYPE_V16QI },
21274 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_extendv4qiv4si2, "__builtin_ia32_pmovsxbd128", IX86_BUILTIN_PMOVSXBD128, UNKNOWN, (int) V4SI_FTYPE_V16QI },
21275 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_extendv2qiv2di2, "__builtin_ia32_pmovsxbq128", IX86_BUILTIN_PMOVSXBQ128, UNKNOWN, (int) V2DI_FTYPE_V16QI },
21276 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_extendv4hiv4si2, "__builtin_ia32_pmovsxwd128", IX86_BUILTIN_PMOVSXWD128, UNKNOWN, (int) V4SI_FTYPE_V8HI },
21277 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_extendv2hiv2di2, "__builtin_ia32_pmovsxwq128", IX86_BUILTIN_PMOVSXWQ128, UNKNOWN, (int) V2DI_FTYPE_V8HI },
21278 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_extendv2siv2di2, "__builtin_ia32_pmovsxdq128", IX86_BUILTIN_PMOVSXDQ128, UNKNOWN, (int) V2DI_FTYPE_V4SI },
21279 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv8qiv8hi2, "__builtin_ia32_pmovzxbw128", IX86_BUILTIN_PMOVZXBW128, UNKNOWN, (int) V8HI_FTYPE_V16QI },
21280 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv4qiv4si2, "__builtin_ia32_pmovzxbd128", IX86_BUILTIN_PMOVZXBD128, UNKNOWN, (int) V4SI_FTYPE_V16QI },
21281 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv2qiv2di2, "__builtin_ia32_pmovzxbq128", IX86_BUILTIN_PMOVZXBQ128, UNKNOWN, (int) V2DI_FTYPE_V16QI },
21282 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv4hiv4si2, "__builtin_ia32_pmovzxwd128", IX86_BUILTIN_PMOVZXWD128, UNKNOWN, (int) V4SI_FTYPE_V8HI },
21283 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv2hiv2di2, "__builtin_ia32_pmovzxwq128", IX86_BUILTIN_PMOVZXWQ128, UNKNOWN, (int) V2DI_FTYPE_V8HI },
21284 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv2siv2di2, "__builtin_ia32_pmovzxdq128", IX86_BUILTIN_PMOVZXDQ128, UNKNOWN, (int) V2DI_FTYPE_V4SI },
21285 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_phminposuw, "__builtin_ia32_phminposuw128", IX86_BUILTIN_PHMINPOSUW128, UNKNOWN, (int) V8HI_FTYPE_V8HI },
21287 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_packusdw, "__builtin_ia32_packusdw128", IX86_BUILTIN_PACKUSDW128, UNKNOWN, (int) V8HI_FTYPE_V4SI_V4SI },
21288 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_eqv2di3, "__builtin_ia32_pcmpeqq", IX86_BUILTIN_PCMPEQQ, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
21289 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_smaxv16qi3, "__builtin_ia32_pmaxsb128", IX86_BUILTIN_PMAXSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
21290 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_smaxv4si3, "__builtin_ia32_pmaxsd128", IX86_BUILTIN_PMAXSD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
21291 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_umaxv4si3, "__builtin_ia32_pmaxud128", IX86_BUILTIN_PMAXUD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
21292 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_umaxv8hi3, "__builtin_ia32_pmaxuw128", IX86_BUILTIN_PMAXUW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
21293 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sminv16qi3, "__builtin_ia32_pminsb128", IX86_BUILTIN_PMINSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
21294 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sminv4si3, "__builtin_ia32_pminsd128", IX86_BUILTIN_PMINSD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
21295 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_uminv4si3, "__builtin_ia32_pminud128", IX86_BUILTIN_PMINUD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
21296 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_uminv8hi3, "__builtin_ia32_pminuw128", IX86_BUILTIN_PMINUW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
21297 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_mulv2siv2di3, "__builtin_ia32_pmuldq128", IX86_BUILTIN_PMULDQ128, UNKNOWN, (int) V2DI_FTYPE_V4SI_V4SI },
21298 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_mulv4si3, "__builtin_ia32_pmulld128", IX86_BUILTIN_PMULLD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
21300 /* SSE4.1 and SSE5 */
21301 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundpd, "__builtin_ia32_roundpd", IX86_BUILTIN_ROUNDPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_INT },
21302 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundps, "__builtin_ia32_roundps", IX86_BUILTIN_ROUNDPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_INT },
21303 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundsd, "__builtin_ia32_roundsd", IX86_BUILTIN_ROUNDSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
21304 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundss, "__builtin_ia32_roundss", IX86_BUILTIN_ROUNDSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
21306 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_ptest, "__builtin_ia32_ptestz128", IX86_BUILTIN_PTESTZ, EQ, (int) INT_FTYPE_V2DI_V2DI_PTEST },
21307 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_ptest, "__builtin_ia32_ptestc128", IX86_BUILTIN_PTESTC, LTU, (int) INT_FTYPE_V2DI_V2DI_PTEST },
21308 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_ptest, "__builtin_ia32_ptestnzc128", IX86_BUILTIN_PTESTNZC, GTU, (int) INT_FTYPE_V2DI_V2DI_PTEST },
21311 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_gtv2di3, "__builtin_ia32_pcmpgtq", IX86_BUILTIN_PCMPGTQ, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
21312 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_crc32qi, "__builtin_ia32_crc32qi", IX86_BUILTIN_CRC32QI, UNKNOWN, (int) UINT_FTYPE_UINT_UCHAR },
21313 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_crc32hi, "__builtin_ia32_crc32hi", IX86_BUILTIN_CRC32HI, UNKNOWN, (int) UINT_FTYPE_UINT_USHORT },
21314 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_crc32si, "__builtin_ia32_crc32si", IX86_BUILTIN_CRC32SI, UNKNOWN, (int) UINT_FTYPE_UINT_UINT },
21315 { OPTION_MASK_ISA_SSE4_2 | OPTION_MASK_ISA_64BIT, CODE_FOR_sse4_2_crc32di, "__builtin_ia32_crc32di", IX86_BUILTIN_CRC32DI, UNKNOWN, (int) UINT64_FTYPE_UINT64_UINT64 },
21318 { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_extrqi, "__builtin_ia32_extrqi", IX86_BUILTIN_EXTRQI, UNKNOWN, (int) V2DI_FTYPE_V2DI_UINT_UINT },
21319 { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_extrq, "__builtin_ia32_extrq", IX86_BUILTIN_EXTRQ, UNKNOWN, (int) V2DI_FTYPE_V2DI_V16QI },
21320 { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_insertqi, "__builtin_ia32_insertqi", IX86_BUILTIN_INSERTQI, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_UINT_UINT },
21321 { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_insertq, "__builtin_ia32_insertq", IX86_BUILTIN_INSERTQ, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
21324 { OPTION_MASK_ISA_SSE2, CODE_FOR_aeskeygenassist, 0, IX86_BUILTIN_AESKEYGENASSIST128, UNKNOWN, (int) V2DI_FTYPE_V2DI_INT },
21325 { OPTION_MASK_ISA_SSE2, CODE_FOR_aesimc, 0, IX86_BUILTIN_AESIMC128, UNKNOWN, (int) V2DI_FTYPE_V2DI },
21327 { OPTION_MASK_ISA_SSE2, CODE_FOR_aesenc, 0, IX86_BUILTIN_AESENC128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
21328 { OPTION_MASK_ISA_SSE2, CODE_FOR_aesenclast, 0, IX86_BUILTIN_AESENCLAST128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
21329 { OPTION_MASK_ISA_SSE2, CODE_FOR_aesdec, 0, IX86_BUILTIN_AESDEC128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
21330 { OPTION_MASK_ISA_SSE2, CODE_FOR_aesdeclast, 0, IX86_BUILTIN_AESDECLAST128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
21333 { OPTION_MASK_ISA_SSE2, CODE_FOR_pclmulqdq, 0, IX86_BUILTIN_PCLMULQDQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_INT },
21336 { OPTION_MASK_ISA_AVX, CODE_FOR_addv4df3, "__builtin_ia32_addpd256", IX86_BUILTIN_ADDPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
21337 { OPTION_MASK_ISA_AVX, CODE_FOR_addv8sf3, "__builtin_ia32_addps256", IX86_BUILTIN_ADDPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
21338 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_addsubv4df3, "__builtin_ia32_addsubpd256", IX86_BUILTIN_ADDSUBPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
21339 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_addsubv8sf3, "__builtin_ia32_addsubps256", IX86_BUILTIN_ADDSUBPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
21340 { OPTION_MASK_ISA_AVX, CODE_FOR_andv4df3, "__builtin_ia32_andpd256", IX86_BUILTIN_ANDPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
21341 { OPTION_MASK_ISA_AVX, CODE_FOR_andv8sf3, "__builtin_ia32_andps256", IX86_BUILTIN_ANDPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
21342 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_andnotv4df3, "__builtin_ia32_andnpd256", IX86_BUILTIN_ANDNPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
21343 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_andnotv8sf3, "__builtin_ia32_andnps256", IX86_BUILTIN_ANDNPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
21344 { OPTION_MASK_ISA_AVX, CODE_FOR_divv4df3, "__builtin_ia32_divpd256", IX86_BUILTIN_DIVPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
21345 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_divv8sf3, "__builtin_ia32_divps256", IX86_BUILTIN_DIVPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
21346 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_haddv4df3, "__builtin_ia32_haddpd256", IX86_BUILTIN_HADDPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
21347 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_hsubv8sf3, "__builtin_ia32_hsubps256", IX86_BUILTIN_HSUBPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
21348 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_hsubv4df3, "__builtin_ia32_hsubpd256", IX86_BUILTIN_HSUBPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
21349 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_haddv8sf3, "__builtin_ia32_haddps256", IX86_BUILTIN_HADDPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
21350 { OPTION_MASK_ISA_AVX, CODE_FOR_smaxv4df3, "__builtin_ia32_maxpd256", IX86_BUILTIN_MAXPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
21351 { OPTION_MASK_ISA_AVX, CODE_FOR_smaxv8sf3, "__builtin_ia32_maxps256", IX86_BUILTIN_MAXPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
21352 { OPTION_MASK_ISA_AVX, CODE_FOR_sminv4df3, "__builtin_ia32_minpd256", IX86_BUILTIN_MINPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
21353 { OPTION_MASK_ISA_AVX, CODE_FOR_sminv8sf3, "__builtin_ia32_minps256", IX86_BUILTIN_MINPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
21354 { OPTION_MASK_ISA_AVX, CODE_FOR_mulv4df3, "__builtin_ia32_mulpd256", IX86_BUILTIN_MULPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
21355 { OPTION_MASK_ISA_AVX, CODE_FOR_mulv8sf3, "__builtin_ia32_mulps256", IX86_BUILTIN_MULPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
21356 { OPTION_MASK_ISA_AVX, CODE_FOR_iorv4df3, "__builtin_ia32_orpd256", IX86_BUILTIN_ORPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
21357 { OPTION_MASK_ISA_AVX, CODE_FOR_iorv8sf3, "__builtin_ia32_orps256", IX86_BUILTIN_ORPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
21358 { OPTION_MASK_ISA_AVX, CODE_FOR_subv4df3, "__builtin_ia32_subpd256", IX86_BUILTIN_SUBPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
21359 { OPTION_MASK_ISA_AVX, CODE_FOR_subv8sf3, "__builtin_ia32_subps256", IX86_BUILTIN_SUBPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
21360 { OPTION_MASK_ISA_AVX, CODE_FOR_xorv4df3, "__builtin_ia32_xorpd256", IX86_BUILTIN_XORPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
21361 { OPTION_MASK_ISA_AVX, CODE_FOR_xorv8sf3, "__builtin_ia32_xorps256", IX86_BUILTIN_XORPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
21363 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilvarv2df3, "__builtin_ia32_vpermilvarpd", IX86_BUILTIN_VPERMILVARPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DI },
21364 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilvarv4sf3, "__builtin_ia32_vpermilvarps", IX86_BUILTIN_VPERMILVARPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SI },
21365 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilvarv4df3, "__builtin_ia32_vpermilvarpd256", IX86_BUILTIN_VPERMILVARPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DI },
21366 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilvarv8sf3, "__builtin_ia32_vpermilvarps256", IX86_BUILTIN_VPERMILVARPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SI },
21368 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_blendpd256, "__builtin_ia32_blendpd256", IX86_BUILTIN_BLENDPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_INT },
21369 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_blendps256, "__builtin_ia32_blendps256", IX86_BUILTIN_BLENDPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_INT },
21370 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_blendvpd256, "__builtin_ia32_blendvpd256", IX86_BUILTIN_BLENDVPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF },
21371 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_blendvps256, "__builtin_ia32_blendvps256", IX86_BUILTIN_BLENDVPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF },
21372 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_dpps256, "__builtin_ia32_dpps256", IX86_BUILTIN_DPPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_INT },
21373 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_shufpd256, "__builtin_ia32_shufpd256", IX86_BUILTIN_SHUFPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_INT },
21374 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_shufps256, "__builtin_ia32_shufps256", IX86_BUILTIN_SHUFPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_INT },
21375 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cmpsdv2df3, "__builtin_ia32_cmpsd", IX86_BUILTIN_CMPSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
21376 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cmpssv4sf3, "__builtin_ia32_cmpss", IX86_BUILTIN_CMPSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
21377 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cmppdv2df3, "__builtin_ia32_cmppd", IX86_BUILTIN_CMPPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
21378 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cmppsv4sf3, "__builtin_ia32_cmpps", IX86_BUILTIN_CMPPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
21379 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cmppdv4df3, "__builtin_ia32_cmppd256", IX86_BUILTIN_CMPPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_INT },
21380 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cmppsv8sf3, "__builtin_ia32_cmpps256", IX86_BUILTIN_CMPPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_INT },
21381 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vextractf128v4df, "__builtin_ia32_vextractf128_pd256", IX86_BUILTIN_EXTRACTF128PD256, UNKNOWN, (int) V2DF_FTYPE_V4DF_INT },
21382 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vextractf128v8sf, "__builtin_ia32_vextractf128_ps256", IX86_BUILTIN_EXTRACTF128PS256, UNKNOWN, (int) V4SF_FTYPE_V8SF_INT },
21383 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vextractf128v8si, "__builtin_ia32_vextractf128_si256", IX86_BUILTIN_EXTRACTF128SI256, UNKNOWN, (int) V4SI_FTYPE_V8SI_INT },
21384 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cvtdq2pd256, "__builtin_ia32_cvtdq2pd256", IX86_BUILTIN_CVTDQ2PD256, UNKNOWN, (int) V4DF_FTYPE_V4SI },
21385 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cvtdq2ps256, "__builtin_ia32_cvtdq2ps256", IX86_BUILTIN_CVTDQ2PS256, UNKNOWN, (int) V8SF_FTYPE_V8SI },
21386 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cvtpd2ps256, "__builtin_ia32_cvtpd2ps256", IX86_BUILTIN_CVTPD2PS256, UNKNOWN, (int) V4SF_FTYPE_V4DF },
21387 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cvtps2dq256, "__builtin_ia32_cvtps2dq256", IX86_BUILTIN_CVTPS2DQ256, UNKNOWN, (int) V8SI_FTYPE_V8SF },
21388 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cvtps2pd256, "__builtin_ia32_cvtps2pd256", IX86_BUILTIN_CVTPS2PD256, UNKNOWN, (int) V4DF_FTYPE_V4SF },
21389 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cvttpd2dq256, "__builtin_ia32_cvttpd2dq256", IX86_BUILTIN_CVTTPD2DQ256, UNKNOWN, (int) V4SI_FTYPE_V4DF },
21390 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cvtpd2dq256, "__builtin_ia32_cvtpd2dq256", IX86_BUILTIN_CVTPD2DQ256, UNKNOWN, (int) V4SI_FTYPE_V4DF },
21391 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cvttps2dq256, "__builtin_ia32_cvttps2dq256", IX86_BUILTIN_CVTTPS2DQ256, UNKNOWN, (int) V8SI_FTYPE_V8SF },
21392 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vperm2f128v4df3, "__builtin_ia32_vperm2f128_pd256", IX86_BUILTIN_VPERM2F128PD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_INT },
21393 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vperm2f128v8sf3, "__builtin_ia32_vperm2f128_ps256", IX86_BUILTIN_VPERM2F128PS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_INT },
21394 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vperm2f128v8si3, "__builtin_ia32_vperm2f128_si256", IX86_BUILTIN_VPERM2F128SI256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_INT },
21395 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilv2df, "__builtin_ia32_vpermilpd", IX86_BUILTIN_VPERMILPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_INT },
21396 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilv4sf, "__builtin_ia32_vpermilps", IX86_BUILTIN_VPERMILPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_INT },
21397 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilv4df, "__builtin_ia32_vpermilpd256", IX86_BUILTIN_VPERMILPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_INT },
21398 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilv8sf, "__builtin_ia32_vpermilps256", IX86_BUILTIN_VPERMILPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_INT },
21399 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vinsertf128v4df, "__builtin_ia32_vinsertf128_pd256", IX86_BUILTIN_VINSERTF128PD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V2DF_INT },
21400 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vinsertf128v8sf, "__builtin_ia32_vinsertf128_ps256", IX86_BUILTIN_VINSERTF128PS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V4SF_INT },
21401 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vinsertf128v8si, "__builtin_ia32_vinsertf128_si256", IX86_BUILTIN_VINSERTF128SI256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V4SI_INT },
21403 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movshdup256, "__builtin_ia32_movshdup256", IX86_BUILTIN_MOVSHDUP256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
21404 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movsldup256, "__builtin_ia32_movsldup256", IX86_BUILTIN_MOVSLDUP256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
21405 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movddup256, "__builtin_ia32_movddup256", IX86_BUILTIN_MOVDDUP256, UNKNOWN, (int) V4DF_FTYPE_V4DF },
21407 { OPTION_MASK_ISA_AVX, CODE_FOR_sqrtv4df2, "__builtin_ia32_sqrtpd256", IX86_BUILTIN_SQRTPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF },
21408 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_sqrtv8sf2, "__builtin_ia32_sqrtps256", IX86_BUILTIN_SQRTPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
21409 { OPTION_MASK_ISA_AVX, CODE_FOR_sqrtv8sf2, "__builtin_ia32_sqrtps_nr256", IX86_BUILTIN_SQRTPS_NR256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
21410 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_rsqrtv8sf2, "__builtin_ia32_rsqrtps256", IX86_BUILTIN_RSQRTPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
21411 { OPTION_MASK_ISA_AVX, CODE_FOR_rsqrtv8sf2, "__builtin_ia32_rsqrtps_nr256", IX86_BUILTIN_RSQRTPS_NR256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
21413 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_rcpv8sf2, "__builtin_ia32_rcpps256", IX86_BUILTIN_RCPPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
21415 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundpd256, "__builtin_ia32_roundpd256", IX86_BUILTIN_ROUNDPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_INT },
21416 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundps256, "__builtin_ia32_roundps256", IX86_BUILTIN_ROUNDPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_INT },
21418 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_unpckhpd256, "__builtin_ia32_unpckhpd256", IX86_BUILTIN_UNPCKHPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
21419 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_unpcklpd256, "__builtin_ia32_unpcklpd256", IX86_BUILTIN_UNPCKLPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
21420 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_unpckhps256, "__builtin_ia32_unpckhps256", IX86_BUILTIN_UNPCKHPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
21421 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_unpcklps256, "__builtin_ia32_unpcklps256", IX86_BUILTIN_UNPCKLPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
21423 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_si256_si, "__builtin_ia32_si256_si", IX86_BUILTIN_SI256_SI, UNKNOWN, (int) V8SI_FTYPE_V4SI },
21424 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_ps256_ps, "__builtin_ia32_ps256_ps", IX86_BUILTIN_PS256_PS, UNKNOWN, (int) V8SF_FTYPE_V4SF },
21425 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_pd256_pd, "__builtin_ia32_pd256_pd", IX86_BUILTIN_PD256_PD, UNKNOWN, (int) V4DF_FTYPE_V2DF },
21426 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_si_si256, "__builtin_ia32_si_si256", IX86_BUILTIN_SI_SI256, UNKNOWN, (int) V4SI_FTYPE_V8SI },
21427 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_ps_ps256, "__builtin_ia32_ps_ps256", IX86_BUILTIN_PS_PS256, UNKNOWN, (int) V4SF_FTYPE_V8SF },
21428 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_pd_pd256, "__builtin_ia32_pd_pd256", IX86_BUILTIN_PD_PD256, UNKNOWN, (int) V2DF_FTYPE_V4DF },
21430 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestpd, "__builtin_ia32_vtestzpd", IX86_BUILTIN_VTESTZPD, EQ, (int) INT_FTYPE_V2DF_V2DF_PTEST },
21431 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestpd, "__builtin_ia32_vtestcpd", IX86_BUILTIN_VTESTCPD, LTU, (int) INT_FTYPE_V2DF_V2DF_PTEST },
21432 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestpd, "__builtin_ia32_vtestnzcpd", IX86_BUILTIN_VTESTNZCPD, GTU, (int) INT_FTYPE_V2DF_V2DF_PTEST },
21433 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestps, "__builtin_ia32_vtestzps", IX86_BUILTIN_VTESTZPS, EQ, (int) INT_FTYPE_V4SF_V4SF_PTEST },
21434 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestps, "__builtin_ia32_vtestcps", IX86_BUILTIN_VTESTCPS, LTU, (int) INT_FTYPE_V4SF_V4SF_PTEST },
21435 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestps, "__builtin_ia32_vtestnzcps", IX86_BUILTIN_VTESTNZCPS, GTU, (int) INT_FTYPE_V4SF_V4SF_PTEST },
21436 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestpd256, "__builtin_ia32_vtestzpd256", IX86_BUILTIN_VTESTZPD256, EQ, (int) INT_FTYPE_V4DF_V4DF_PTEST },
21437 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestpd256, "__builtin_ia32_vtestcpd256", IX86_BUILTIN_VTESTCPD256, LTU, (int) INT_FTYPE_V4DF_V4DF_PTEST },
21438 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestpd256, "__builtin_ia32_vtestnzcpd256", IX86_BUILTIN_VTESTNZCPD256, GTU, (int) INT_FTYPE_V4DF_V4DF_PTEST },
21439 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestps256, "__builtin_ia32_vtestzps256", IX86_BUILTIN_VTESTZPS256, EQ, (int) INT_FTYPE_V8SF_V8SF_PTEST },
21440 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestps256, "__builtin_ia32_vtestcps256", IX86_BUILTIN_VTESTCPS256, LTU, (int) INT_FTYPE_V8SF_V8SF_PTEST },
21441 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestps256, "__builtin_ia32_vtestnzcps256", IX86_BUILTIN_VTESTNZCPS256, GTU, (int) INT_FTYPE_V8SF_V8SF_PTEST },
21442 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_ptest256, "__builtin_ia32_ptestz256", IX86_BUILTIN_PTESTZ256, EQ, (int) INT_FTYPE_V4DI_V4DI_PTEST },
21443 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_ptest256, "__builtin_ia32_ptestc256", IX86_BUILTIN_PTESTC256, LTU, (int) INT_FTYPE_V4DI_V4DI_PTEST },
21444 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_ptest256, "__builtin_ia32_ptestnzc256", IX86_BUILTIN_PTESTNZC256, GTU, (int) INT_FTYPE_V4DI_V4DI_PTEST },
21446 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movmskpd256, "__builtin_ia32_movmskpd256", IX86_BUILTIN_MOVMSKPD256, UNKNOWN, (int) INT_FTYPE_V4DF },
21447 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movmskps256, "__builtin_ia32_movmskps256", IX86_BUILTIN_MOVMSKPS256, UNKNOWN, (int) INT_FTYPE_V8SF },
21451 enum multi_arg_type {
21461 MULTI_ARG_3_PERMPS,
21462 MULTI_ARG_3_PERMPD,
21469 MULTI_ARG_2_DI_IMM,
21470 MULTI_ARG_2_SI_IMM,
21471 MULTI_ARG_2_HI_IMM,
21472 MULTI_ARG_2_QI_IMM,
21473 MULTI_ARG_2_SF_CMP,
21474 MULTI_ARG_2_DF_CMP,
21475 MULTI_ARG_2_DI_CMP,
21476 MULTI_ARG_2_SI_CMP,
21477 MULTI_ARG_2_HI_CMP,
21478 MULTI_ARG_2_QI_CMP,
21501 static const struct builtin_description bdesc_multi_arg[] =
21503 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_vmfmaddv4sf4, "__builtin_ia32_fmaddss", IX86_BUILTIN_FMADDSS, 0, (int)MULTI_ARG_3_SF },
21504 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_vmfmaddv2df4, "__builtin_ia32_fmaddsd", IX86_BUILTIN_FMADDSD, 0, (int)MULTI_ARG_3_DF },
21505 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_fmaddv4sf4, "__builtin_ia32_fmaddps", IX86_BUILTIN_FMADDPS, 0, (int)MULTI_ARG_3_SF },
21506 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_fmaddv2df4, "__builtin_ia32_fmaddpd", IX86_BUILTIN_FMADDPD, 0, (int)MULTI_ARG_3_DF },
21507 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_vmfmsubv4sf4, "__builtin_ia32_fmsubss", IX86_BUILTIN_FMSUBSS, 0, (int)MULTI_ARG_3_SF },
21508 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_vmfmsubv2df4, "__builtin_ia32_fmsubsd", IX86_BUILTIN_FMSUBSD, 0, (int)MULTI_ARG_3_DF },
21509 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_fmsubv4sf4, "__builtin_ia32_fmsubps", IX86_BUILTIN_FMSUBPS, 0, (int)MULTI_ARG_3_SF },
21510 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_fmsubv2df4, "__builtin_ia32_fmsubpd", IX86_BUILTIN_FMSUBPD, 0, (int)MULTI_ARG_3_DF },
21511 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_vmfnmaddv4sf4, "__builtin_ia32_fnmaddss", IX86_BUILTIN_FNMADDSS, 0, (int)MULTI_ARG_3_SF },
21512 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_vmfnmaddv2df4, "__builtin_ia32_fnmaddsd", IX86_BUILTIN_FNMADDSD, 0, (int)MULTI_ARG_3_DF },
21513 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_fnmaddv4sf4, "__builtin_ia32_fnmaddps", IX86_BUILTIN_FNMADDPS, 0, (int)MULTI_ARG_3_SF },
21514 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_fnmaddv2df4, "__builtin_ia32_fnmaddpd", IX86_BUILTIN_FNMADDPD, 0, (int)MULTI_ARG_3_DF },
21515 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_vmfnmsubv4sf4, "__builtin_ia32_fnmsubss", IX86_BUILTIN_FNMSUBSS, 0, (int)MULTI_ARG_3_SF },
21516 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_vmfnmsubv2df4, "__builtin_ia32_fnmsubsd", IX86_BUILTIN_FNMSUBSD, 0, (int)MULTI_ARG_3_DF },
21517 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_fnmsubv4sf4, "__builtin_ia32_fnmsubps", IX86_BUILTIN_FNMSUBPS, 0, (int)MULTI_ARG_3_SF },
21518 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_fnmsubv2df4, "__builtin_ia32_fnmsubpd", IX86_BUILTIN_FNMSUBPD, 0, (int)MULTI_ARG_3_DF },
21519 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcmov_v2di, "__builtin_ia32_pcmov", IX86_BUILTIN_PCMOV, 0, (int)MULTI_ARG_3_DI },
21520 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcmov_v2di, "__builtin_ia32_pcmov_v2di", IX86_BUILTIN_PCMOV_V2DI, 0, (int)MULTI_ARG_3_DI },
21521 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcmov_v4si, "__builtin_ia32_pcmov_v4si", IX86_BUILTIN_PCMOV_V4SI, 0, (int)MULTI_ARG_3_SI },
21522 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcmov_v8hi, "__builtin_ia32_pcmov_v8hi", IX86_BUILTIN_PCMOV_V8HI, 0, (int)MULTI_ARG_3_HI },
21523 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcmov_v16qi, "__builtin_ia32_pcmov_v16qi",IX86_BUILTIN_PCMOV_V16QI,0, (int)MULTI_ARG_3_QI },
21524 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcmov_v2df, "__builtin_ia32_pcmov_v2df", IX86_BUILTIN_PCMOV_V2DF, 0, (int)MULTI_ARG_3_DF },
21525 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcmov_v4sf, "__builtin_ia32_pcmov_v4sf", IX86_BUILTIN_PCMOV_V4SF, 0, (int)MULTI_ARG_3_SF },
21526 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pperm, "__builtin_ia32_pperm", IX86_BUILTIN_PPERM, 0, (int)MULTI_ARG_3_QI },
21527 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_permv4sf, "__builtin_ia32_permps", IX86_BUILTIN_PERMPS, 0, (int)MULTI_ARG_3_PERMPS },
21528 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_permv2df, "__builtin_ia32_permpd", IX86_BUILTIN_PERMPD, 0, (int)MULTI_ARG_3_PERMPD },
21529 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pmacssww, "__builtin_ia32_pmacssww", IX86_BUILTIN_PMACSSWW, 0, (int)MULTI_ARG_3_HI },
21530 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pmacsww, "__builtin_ia32_pmacsww", IX86_BUILTIN_PMACSWW, 0, (int)MULTI_ARG_3_HI },
21531 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pmacsswd, "__builtin_ia32_pmacsswd", IX86_BUILTIN_PMACSSWD, 0, (int)MULTI_ARG_3_HI_SI },
21532 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pmacswd, "__builtin_ia32_pmacswd", IX86_BUILTIN_PMACSWD, 0, (int)MULTI_ARG_3_HI_SI },
21533 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pmacssdd, "__builtin_ia32_pmacssdd", IX86_BUILTIN_PMACSSDD, 0, (int)MULTI_ARG_3_SI },
21534 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pmacsdd, "__builtin_ia32_pmacsdd", IX86_BUILTIN_PMACSDD, 0, (int)MULTI_ARG_3_SI },
21535 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pmacssdql, "__builtin_ia32_pmacssdql", IX86_BUILTIN_PMACSSDQL, 0, (int)MULTI_ARG_3_SI_DI },
21536 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pmacssdqh, "__builtin_ia32_pmacssdqh", IX86_BUILTIN_PMACSSDQH, 0, (int)MULTI_ARG_3_SI_DI },
21537 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pmacsdql, "__builtin_ia32_pmacsdql", IX86_BUILTIN_PMACSDQL, 0, (int)MULTI_ARG_3_SI_DI },
21538 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pmacsdqh, "__builtin_ia32_pmacsdqh", IX86_BUILTIN_PMACSDQH, 0, (int)MULTI_ARG_3_SI_DI },
21539 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pmadcsswd, "__builtin_ia32_pmadcsswd", IX86_BUILTIN_PMADCSSWD, 0, (int)MULTI_ARG_3_HI_SI },
21540 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pmadcswd, "__builtin_ia32_pmadcswd", IX86_BUILTIN_PMADCSWD, 0, (int)MULTI_ARG_3_HI_SI },
21541 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vrotlv2di3, "__builtin_ia32_protq", IX86_BUILTIN_PROTQ, 0, (int)MULTI_ARG_2_DI },
21542 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vrotlv4si3, "__builtin_ia32_protd", IX86_BUILTIN_PROTD, 0, (int)MULTI_ARG_2_SI },
21543 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vrotlv8hi3, "__builtin_ia32_protw", IX86_BUILTIN_PROTW, 0, (int)MULTI_ARG_2_HI },
21544 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vrotlv16qi3, "__builtin_ia32_protb", IX86_BUILTIN_PROTB, 0, (int)MULTI_ARG_2_QI },
21545 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_rotlv2di3, "__builtin_ia32_protqi", IX86_BUILTIN_PROTQ_IMM, 0, (int)MULTI_ARG_2_DI_IMM },
21546 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_rotlv4si3, "__builtin_ia32_protdi", IX86_BUILTIN_PROTD_IMM, 0, (int)MULTI_ARG_2_SI_IMM },
21547 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_rotlv8hi3, "__builtin_ia32_protwi", IX86_BUILTIN_PROTW_IMM, 0, (int)MULTI_ARG_2_HI_IMM },
21548 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_rotlv16qi3, "__builtin_ia32_protbi", IX86_BUILTIN_PROTB_IMM, 0, (int)MULTI_ARG_2_QI_IMM },
21549 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_ashlv2di3, "__builtin_ia32_pshaq", IX86_BUILTIN_PSHAQ, 0, (int)MULTI_ARG_2_DI },
21550 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_ashlv4si3, "__builtin_ia32_pshad", IX86_BUILTIN_PSHAD, 0, (int)MULTI_ARG_2_SI },
21551 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_ashlv8hi3, "__builtin_ia32_pshaw", IX86_BUILTIN_PSHAW, 0, (int)MULTI_ARG_2_HI },
21552 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_ashlv16qi3, "__builtin_ia32_pshab", IX86_BUILTIN_PSHAB, 0, (int)MULTI_ARG_2_QI },
21553 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_lshlv2di3, "__builtin_ia32_pshlq", IX86_BUILTIN_PSHLQ, 0, (int)MULTI_ARG_2_DI },
21554 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_lshlv4si3, "__builtin_ia32_pshld", IX86_BUILTIN_PSHLD, 0, (int)MULTI_ARG_2_SI },
21555 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_lshlv8hi3, "__builtin_ia32_pshlw", IX86_BUILTIN_PSHLW, 0, (int)MULTI_ARG_2_HI },
21556 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_lshlv16qi3, "__builtin_ia32_pshlb", IX86_BUILTIN_PSHLB, 0, (int)MULTI_ARG_2_QI },
21557 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmfrczv4sf2, "__builtin_ia32_frczss", IX86_BUILTIN_FRCZSS, 0, (int)MULTI_ARG_2_SF },
21558 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmfrczv2df2, "__builtin_ia32_frczsd", IX86_BUILTIN_FRCZSD, 0, (int)MULTI_ARG_2_DF },
21559 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_frczv4sf2, "__builtin_ia32_frczps", IX86_BUILTIN_FRCZPS, 0, (int)MULTI_ARG_1_SF },
21560 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_frczv2df2, "__builtin_ia32_frczpd", IX86_BUILTIN_FRCZPD, 0, (int)MULTI_ARG_1_DF },
21561 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_cvtph2ps, "__builtin_ia32_cvtph2ps", IX86_BUILTIN_CVTPH2PS, 0, (int)MULTI_ARG_1_PH2PS },
21562 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_cvtps2ph, "__builtin_ia32_cvtps2ph", IX86_BUILTIN_CVTPS2PH, 0, (int)MULTI_ARG_1_PS2PH },
21563 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_phaddbw, "__builtin_ia32_phaddbw", IX86_BUILTIN_PHADDBW, 0, (int)MULTI_ARG_1_QI_HI },
21564 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_phaddbd, "__builtin_ia32_phaddbd", IX86_BUILTIN_PHADDBD, 0, (int)MULTI_ARG_1_QI_SI },
21565 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_phaddbq, "__builtin_ia32_phaddbq", IX86_BUILTIN_PHADDBQ, 0, (int)MULTI_ARG_1_QI_DI },
21566 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_phaddwd, "__builtin_ia32_phaddwd", IX86_BUILTIN_PHADDWD, 0, (int)MULTI_ARG_1_HI_SI },
21567 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_phaddwq, "__builtin_ia32_phaddwq", IX86_BUILTIN_PHADDWQ, 0, (int)MULTI_ARG_1_HI_DI },
21568 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_phadddq, "__builtin_ia32_phadddq", IX86_BUILTIN_PHADDDQ, 0, (int)MULTI_ARG_1_SI_DI },
21569 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_phaddubw, "__builtin_ia32_phaddubw", IX86_BUILTIN_PHADDUBW, 0, (int)MULTI_ARG_1_QI_HI },
21570 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_phaddubd, "__builtin_ia32_phaddubd", IX86_BUILTIN_PHADDUBD, 0, (int)MULTI_ARG_1_QI_SI },
21571 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_phaddubq, "__builtin_ia32_phaddubq", IX86_BUILTIN_PHADDUBQ, 0, (int)MULTI_ARG_1_QI_DI },
21572 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_phadduwd, "__builtin_ia32_phadduwd", IX86_BUILTIN_PHADDUWD, 0, (int)MULTI_ARG_1_HI_SI },
21573 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_phadduwq, "__builtin_ia32_phadduwq", IX86_BUILTIN_PHADDUWQ, 0, (int)MULTI_ARG_1_HI_DI },
21574 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_phaddudq, "__builtin_ia32_phaddudq", IX86_BUILTIN_PHADDUDQ, 0, (int)MULTI_ARG_1_SI_DI },
21575 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_phsubbw, "__builtin_ia32_phsubbw", IX86_BUILTIN_PHSUBBW, 0, (int)MULTI_ARG_1_QI_HI },
21576 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_phsubwd, "__builtin_ia32_phsubwd", IX86_BUILTIN_PHSUBWD, 0, (int)MULTI_ARG_1_HI_SI },
21577 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_phsubdq, "__builtin_ia32_phsubdq", IX86_BUILTIN_PHSUBDQ, 0, (int)MULTI_ARG_1_SI_DI },
21579 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv4sf3, "__builtin_ia32_comeqss", IX86_BUILTIN_COMEQSS, EQ, (int)MULTI_ARG_2_SF_CMP },
21580 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv4sf3, "__builtin_ia32_comness", IX86_BUILTIN_COMNESS, NE, (int)MULTI_ARG_2_SF_CMP },
21581 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv4sf3, "__builtin_ia32_comneqss", IX86_BUILTIN_COMNESS, NE, (int)MULTI_ARG_2_SF_CMP },
21582 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv4sf3, "__builtin_ia32_comltss", IX86_BUILTIN_COMLTSS, LT, (int)MULTI_ARG_2_SF_CMP },
21583 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv4sf3, "__builtin_ia32_comless", IX86_BUILTIN_COMLESS, LE, (int)MULTI_ARG_2_SF_CMP },
21584 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv4sf3, "__builtin_ia32_comgtss", IX86_BUILTIN_COMGTSS, GT, (int)MULTI_ARG_2_SF_CMP },
21585 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv4sf3, "__builtin_ia32_comgess", IX86_BUILTIN_COMGESS, GE, (int)MULTI_ARG_2_SF_CMP },
21586 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv4sf3, "__builtin_ia32_comueqss", IX86_BUILTIN_COMUEQSS, UNEQ, (int)MULTI_ARG_2_SF_CMP },
21587 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv4sf3, "__builtin_ia32_comuness", IX86_BUILTIN_COMUNESS, LTGT, (int)MULTI_ARG_2_SF_CMP },
21588 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv4sf3, "__builtin_ia32_comuneqss", IX86_BUILTIN_COMUNESS, LTGT, (int)MULTI_ARG_2_SF_CMP },
21589 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv4sf3, "__builtin_ia32_comunltss", IX86_BUILTIN_COMULTSS, UNLT, (int)MULTI_ARG_2_SF_CMP },
21590 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv4sf3, "__builtin_ia32_comunless", IX86_BUILTIN_COMULESS, UNLE, (int)MULTI_ARG_2_SF_CMP },
21591 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv4sf3, "__builtin_ia32_comungtss", IX86_BUILTIN_COMUGTSS, UNGT, (int)MULTI_ARG_2_SF_CMP },
21592 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv4sf3, "__builtin_ia32_comungess", IX86_BUILTIN_COMUGESS, UNGE, (int)MULTI_ARG_2_SF_CMP },
21593 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv4sf3, "__builtin_ia32_comordss", IX86_BUILTIN_COMORDSS, ORDERED, (int)MULTI_ARG_2_SF_CMP },
21594 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv4sf3, "__builtin_ia32_comunordss", IX86_BUILTIN_COMUNORDSS, UNORDERED, (int)MULTI_ARG_2_SF_CMP },
21596 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv2df3, "__builtin_ia32_comeqsd", IX86_BUILTIN_COMEQSD, EQ, (int)MULTI_ARG_2_DF_CMP },
21597 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv2df3, "__builtin_ia32_comnesd", IX86_BUILTIN_COMNESD, NE, (int)MULTI_ARG_2_DF_CMP },
21598 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv2df3, "__builtin_ia32_comneqsd", IX86_BUILTIN_COMNESD, NE, (int)MULTI_ARG_2_DF_CMP },
21599 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv2df3, "__builtin_ia32_comltsd", IX86_BUILTIN_COMLTSD, LT, (int)MULTI_ARG_2_DF_CMP },
21600 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv2df3, "__builtin_ia32_comlesd", IX86_BUILTIN_COMLESD, LE, (int)MULTI_ARG_2_DF_CMP },
21601 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv2df3, "__builtin_ia32_comgtsd", IX86_BUILTIN_COMGTSD, GT, (int)MULTI_ARG_2_DF_CMP },
21602 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv2df3, "__builtin_ia32_comgesd", IX86_BUILTIN_COMGESD, GE, (int)MULTI_ARG_2_DF_CMP },
21603 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv2df3, "__builtin_ia32_comueqsd", IX86_BUILTIN_COMUEQSD, UNEQ, (int)MULTI_ARG_2_DF_CMP },
21604 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv2df3, "__builtin_ia32_comunesd", IX86_BUILTIN_COMUNESD, LTGT, (int)MULTI_ARG_2_DF_CMP },
21605 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv2df3, "__builtin_ia32_comuneqsd", IX86_BUILTIN_COMUNESD, LTGT, (int)MULTI_ARG_2_DF_CMP },
21606 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv2df3, "__builtin_ia32_comunltsd", IX86_BUILTIN_COMULTSD, UNLT, (int)MULTI_ARG_2_DF_CMP },
21607 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv2df3, "__builtin_ia32_comunlesd", IX86_BUILTIN_COMULESD, UNLE, (int)MULTI_ARG_2_DF_CMP },
21608 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv2df3, "__builtin_ia32_comungtsd", IX86_BUILTIN_COMUGTSD, UNGT, (int)MULTI_ARG_2_DF_CMP },
21609 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv2df3, "__builtin_ia32_comungesd", IX86_BUILTIN_COMUGESD, UNGE, (int)MULTI_ARG_2_DF_CMP },
21610 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv2df3, "__builtin_ia32_comordsd", IX86_BUILTIN_COMORDSD, ORDERED, (int)MULTI_ARG_2_DF_CMP },
21611 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv2df3, "__builtin_ia32_comunordsd", IX86_BUILTIN_COMUNORDSD, UNORDERED, (int)MULTI_ARG_2_DF_CMP },
21613 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4sf3, "__builtin_ia32_comeqps", IX86_BUILTIN_COMEQPS, EQ, (int)MULTI_ARG_2_SF_CMP },
21614 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4sf3, "__builtin_ia32_comneps", IX86_BUILTIN_COMNEPS, NE, (int)MULTI_ARG_2_SF_CMP },
21615 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4sf3, "__builtin_ia32_comneqps", IX86_BUILTIN_COMNEPS, NE, (int)MULTI_ARG_2_SF_CMP },
21616 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4sf3, "__builtin_ia32_comltps", IX86_BUILTIN_COMLTPS, LT, (int)MULTI_ARG_2_SF_CMP },
21617 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4sf3, "__builtin_ia32_comleps", IX86_BUILTIN_COMLEPS, LE, (int)MULTI_ARG_2_SF_CMP },
21618 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4sf3, "__builtin_ia32_comgtps", IX86_BUILTIN_COMGTPS, GT, (int)MULTI_ARG_2_SF_CMP },
21619 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4sf3, "__builtin_ia32_comgeps", IX86_BUILTIN_COMGEPS, GE, (int)MULTI_ARG_2_SF_CMP },
21620 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4sf3, "__builtin_ia32_comueqps", IX86_BUILTIN_COMUEQPS, UNEQ, (int)MULTI_ARG_2_SF_CMP },
21621 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4sf3, "__builtin_ia32_comuneps", IX86_BUILTIN_COMUNEPS, LTGT, (int)MULTI_ARG_2_SF_CMP },
21622 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4sf3, "__builtin_ia32_comuneqps", IX86_BUILTIN_COMUNEPS, LTGT, (int)MULTI_ARG_2_SF_CMP },
21623 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4sf3, "__builtin_ia32_comunltps", IX86_BUILTIN_COMULTPS, UNLT, (int)MULTI_ARG_2_SF_CMP },
21624 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4sf3, "__builtin_ia32_comunleps", IX86_BUILTIN_COMULEPS, UNLE, (int)MULTI_ARG_2_SF_CMP },
21625 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4sf3, "__builtin_ia32_comungtps", IX86_BUILTIN_COMUGTPS, UNGT, (int)MULTI_ARG_2_SF_CMP },
21626 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4sf3, "__builtin_ia32_comungeps", IX86_BUILTIN_COMUGEPS, UNGE, (int)MULTI_ARG_2_SF_CMP },
21627 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4sf3, "__builtin_ia32_comordps", IX86_BUILTIN_COMORDPS, ORDERED, (int)MULTI_ARG_2_SF_CMP },
21628 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4sf3, "__builtin_ia32_comunordps", IX86_BUILTIN_COMUNORDPS, UNORDERED, (int)MULTI_ARG_2_SF_CMP },
21630 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2df3, "__builtin_ia32_comeqpd", IX86_BUILTIN_COMEQPD, EQ, (int)MULTI_ARG_2_DF_CMP },
21631 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2df3, "__builtin_ia32_comnepd", IX86_BUILTIN_COMNEPD, NE, (int)MULTI_ARG_2_DF_CMP },
21632 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2df3, "__builtin_ia32_comneqpd", IX86_BUILTIN_COMNEPD, NE, (int)MULTI_ARG_2_DF_CMP },
21633 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2df3, "__builtin_ia32_comltpd", IX86_BUILTIN_COMLTPD, LT, (int)MULTI_ARG_2_DF_CMP },
21634 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2df3, "__builtin_ia32_comlepd", IX86_BUILTIN_COMLEPD, LE, (int)MULTI_ARG_2_DF_CMP },
21635 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2df3, "__builtin_ia32_comgtpd", IX86_BUILTIN_COMGTPD, GT, (int)MULTI_ARG_2_DF_CMP },
21636 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2df3, "__builtin_ia32_comgepd", IX86_BUILTIN_COMGEPD, GE, (int)MULTI_ARG_2_DF_CMP },
21637 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2df3, "__builtin_ia32_comueqpd", IX86_BUILTIN_COMUEQPD, UNEQ, (int)MULTI_ARG_2_DF_CMP },
21638 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2df3, "__builtin_ia32_comunepd", IX86_BUILTIN_COMUNEPD, LTGT, (int)MULTI_ARG_2_DF_CMP },
21639 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2df3, "__builtin_ia32_comuneqpd", IX86_BUILTIN_COMUNEPD, LTGT, (int)MULTI_ARG_2_DF_CMP },
21640 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2df3, "__builtin_ia32_comunltpd", IX86_BUILTIN_COMULTPD, UNLT, (int)MULTI_ARG_2_DF_CMP },
21641 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2df3, "__builtin_ia32_comunlepd", IX86_BUILTIN_COMULEPD, UNLE, (int)MULTI_ARG_2_DF_CMP },
21642 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2df3, "__builtin_ia32_comungtpd", IX86_BUILTIN_COMUGTPD, UNGT, (int)MULTI_ARG_2_DF_CMP },
21643 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2df3, "__builtin_ia32_comungepd", IX86_BUILTIN_COMUGEPD, UNGE, (int)MULTI_ARG_2_DF_CMP },
21644 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2df3, "__builtin_ia32_comordpd", IX86_BUILTIN_COMORDPD, ORDERED, (int)MULTI_ARG_2_DF_CMP },
21645 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2df3, "__builtin_ia32_comunordpd", IX86_BUILTIN_COMUNORDPD, UNORDERED, (int)MULTI_ARG_2_DF_CMP },
21647 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv16qi3, "__builtin_ia32_pcomeqb", IX86_BUILTIN_PCOMEQB, EQ, (int)MULTI_ARG_2_QI_CMP },
21648 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv16qi3, "__builtin_ia32_pcomneb", IX86_BUILTIN_PCOMNEB, NE, (int)MULTI_ARG_2_QI_CMP },
21649 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv16qi3, "__builtin_ia32_pcomneqb", IX86_BUILTIN_PCOMNEB, NE, (int)MULTI_ARG_2_QI_CMP },
21650 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv16qi3, "__builtin_ia32_pcomltb", IX86_BUILTIN_PCOMLTB, LT, (int)MULTI_ARG_2_QI_CMP },
21651 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv16qi3, "__builtin_ia32_pcomleb", IX86_BUILTIN_PCOMLEB, LE, (int)MULTI_ARG_2_QI_CMP },
21652 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv16qi3, "__builtin_ia32_pcomgtb", IX86_BUILTIN_PCOMGTB, GT, (int)MULTI_ARG_2_QI_CMP },
21653 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv16qi3, "__builtin_ia32_pcomgeb", IX86_BUILTIN_PCOMGEB, GE, (int)MULTI_ARG_2_QI_CMP },
21655 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv8hi3, "__builtin_ia32_pcomeqw", IX86_BUILTIN_PCOMEQW, EQ, (int)MULTI_ARG_2_HI_CMP },
21656 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv8hi3, "__builtin_ia32_pcomnew", IX86_BUILTIN_PCOMNEW, NE, (int)MULTI_ARG_2_HI_CMP },
21657 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv8hi3, "__builtin_ia32_pcomneqw", IX86_BUILTIN_PCOMNEW, NE, (int)MULTI_ARG_2_HI_CMP },
21658 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv8hi3, "__builtin_ia32_pcomltw", IX86_BUILTIN_PCOMLTW, LT, (int)MULTI_ARG_2_HI_CMP },
21659 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv8hi3, "__builtin_ia32_pcomlew", IX86_BUILTIN_PCOMLEW, LE, (int)MULTI_ARG_2_HI_CMP },
21660 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv8hi3, "__builtin_ia32_pcomgtw", IX86_BUILTIN_PCOMGTW, GT, (int)MULTI_ARG_2_HI_CMP },
21661 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv8hi3, "__builtin_ia32_pcomgew", IX86_BUILTIN_PCOMGEW, GE, (int)MULTI_ARG_2_HI_CMP },
21663 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4si3, "__builtin_ia32_pcomeqd", IX86_BUILTIN_PCOMEQD, EQ, (int)MULTI_ARG_2_SI_CMP },
21664 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4si3, "__builtin_ia32_pcomned", IX86_BUILTIN_PCOMNED, NE, (int)MULTI_ARG_2_SI_CMP },
21665 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4si3, "__builtin_ia32_pcomneqd", IX86_BUILTIN_PCOMNED, NE, (int)MULTI_ARG_2_SI_CMP },
21666 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4si3, "__builtin_ia32_pcomltd", IX86_BUILTIN_PCOMLTD, LT, (int)MULTI_ARG_2_SI_CMP },
21667 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4si3, "__builtin_ia32_pcomled", IX86_BUILTIN_PCOMLED, LE, (int)MULTI_ARG_2_SI_CMP },
21668 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4si3, "__builtin_ia32_pcomgtd", IX86_BUILTIN_PCOMGTD, GT, (int)MULTI_ARG_2_SI_CMP },
21669 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4si3, "__builtin_ia32_pcomged", IX86_BUILTIN_PCOMGED, GE, (int)MULTI_ARG_2_SI_CMP },
21671 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2di3, "__builtin_ia32_pcomeqq", IX86_BUILTIN_PCOMEQQ, EQ, (int)MULTI_ARG_2_DI_CMP },
21672 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2di3, "__builtin_ia32_pcomneq", IX86_BUILTIN_PCOMNEQ, NE, (int)MULTI_ARG_2_DI_CMP },
21673 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2di3, "__builtin_ia32_pcomneqq", IX86_BUILTIN_PCOMNEQ, NE, (int)MULTI_ARG_2_DI_CMP },
21674 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2di3, "__builtin_ia32_pcomltq", IX86_BUILTIN_PCOMLTQ, LT, (int)MULTI_ARG_2_DI_CMP },
21675 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2di3, "__builtin_ia32_pcomleq", IX86_BUILTIN_PCOMLEQ, LE, (int)MULTI_ARG_2_DI_CMP },
21676 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2di3, "__builtin_ia32_pcomgtq", IX86_BUILTIN_PCOMGTQ, GT, (int)MULTI_ARG_2_DI_CMP },
21677 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2di3, "__builtin_ia32_pcomgeq", IX86_BUILTIN_PCOMGEQ, GE, (int)MULTI_ARG_2_DI_CMP },
21679 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_uns2v16qi3,"__builtin_ia32_pcomequb", IX86_BUILTIN_PCOMEQUB, EQ, (int)MULTI_ARG_2_QI_CMP },
21680 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_uns2v16qi3,"__builtin_ia32_pcomneub", IX86_BUILTIN_PCOMNEUB, NE, (int)MULTI_ARG_2_QI_CMP },
21681 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_uns2v16qi3,"__builtin_ia32_pcomnequb", IX86_BUILTIN_PCOMNEUB, NE, (int)MULTI_ARG_2_QI_CMP },
21682 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_unsv16qi3, "__builtin_ia32_pcomltub", IX86_BUILTIN_PCOMLTUB, LTU, (int)MULTI_ARG_2_QI_CMP },
21683 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_unsv16qi3, "__builtin_ia32_pcomleub", IX86_BUILTIN_PCOMLEUB, LEU, (int)MULTI_ARG_2_QI_CMP },
21684 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_unsv16qi3, "__builtin_ia32_pcomgtub", IX86_BUILTIN_PCOMGTUB, GTU, (int)MULTI_ARG_2_QI_CMP },
21685 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_unsv16qi3, "__builtin_ia32_pcomgeub", IX86_BUILTIN_PCOMGEUB, GEU, (int)MULTI_ARG_2_QI_CMP },
21687 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_uns2v8hi3, "__builtin_ia32_pcomequw", IX86_BUILTIN_PCOMEQUW, EQ, (int)MULTI_ARG_2_HI_CMP },
21688 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_uns2v8hi3, "__builtin_ia32_pcomneuw", IX86_BUILTIN_PCOMNEUW, NE, (int)MULTI_ARG_2_HI_CMP },
21689 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_uns2v8hi3, "__builtin_ia32_pcomnequw", IX86_BUILTIN_PCOMNEUW, NE, (int)MULTI_ARG_2_HI_CMP },
21690 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_unsv8hi3, "__builtin_ia32_pcomltuw", IX86_BUILTIN_PCOMLTUW, LTU, (int)MULTI_ARG_2_HI_CMP },
21691 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_unsv8hi3, "__builtin_ia32_pcomleuw", IX86_BUILTIN_PCOMLEUW, LEU, (int)MULTI_ARG_2_HI_CMP },
21692 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_unsv8hi3, "__builtin_ia32_pcomgtuw", IX86_BUILTIN_PCOMGTUW, GTU, (int)MULTI_ARG_2_HI_CMP },
21693 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_unsv8hi3, "__builtin_ia32_pcomgeuw", IX86_BUILTIN_PCOMGEUW, GEU, (int)MULTI_ARG_2_HI_CMP },
21695 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_uns2v4si3, "__builtin_ia32_pcomequd", IX86_BUILTIN_PCOMEQUD, EQ, (int)MULTI_ARG_2_SI_CMP },
21696 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_uns2v4si3, "__builtin_ia32_pcomneud", IX86_BUILTIN_PCOMNEUD, NE, (int)MULTI_ARG_2_SI_CMP },
21697 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_uns2v4si3, "__builtin_ia32_pcomnequd", IX86_BUILTIN_PCOMNEUD, NE, (int)MULTI_ARG_2_SI_CMP },
21698 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_unsv4si3, "__builtin_ia32_pcomltud", IX86_BUILTIN_PCOMLTUD, LTU, (int)MULTI_ARG_2_SI_CMP },
21699 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_unsv4si3, "__builtin_ia32_pcomleud", IX86_BUILTIN_PCOMLEUD, LEU, (int)MULTI_ARG_2_SI_CMP },
21700 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_unsv4si3, "__builtin_ia32_pcomgtud", IX86_BUILTIN_PCOMGTUD, GTU, (int)MULTI_ARG_2_SI_CMP },
21701 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_unsv4si3, "__builtin_ia32_pcomgeud", IX86_BUILTIN_PCOMGEUD, GEU, (int)MULTI_ARG_2_SI_CMP },
21703 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_uns2v2di3, "__builtin_ia32_pcomequq", IX86_BUILTIN_PCOMEQUQ, EQ, (int)MULTI_ARG_2_DI_CMP },
21704 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_uns2v2di3, "__builtin_ia32_pcomneuq", IX86_BUILTIN_PCOMNEUQ, NE, (int)MULTI_ARG_2_DI_CMP },
21705 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_uns2v2di3, "__builtin_ia32_pcomnequq", IX86_BUILTIN_PCOMNEUQ, NE, (int)MULTI_ARG_2_DI_CMP },
21706 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_unsv2di3, "__builtin_ia32_pcomltuq", IX86_BUILTIN_PCOMLTUQ, LTU, (int)MULTI_ARG_2_DI_CMP },
21707 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_unsv2di3, "__builtin_ia32_pcomleuq", IX86_BUILTIN_PCOMLEUQ, LEU, (int)MULTI_ARG_2_DI_CMP },
21708 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_unsv2di3, "__builtin_ia32_pcomgtuq", IX86_BUILTIN_PCOMGTUQ, GTU, (int)MULTI_ARG_2_DI_CMP },
21709 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_unsv2di3, "__builtin_ia32_pcomgeuq", IX86_BUILTIN_PCOMGEUQ, GEU, (int)MULTI_ARG_2_DI_CMP },
21711 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_com_tfv4sf3, "__builtin_ia32_comfalsess", IX86_BUILTIN_COMFALSESS, COM_FALSE_S, (int)MULTI_ARG_2_SF_TF },
21712 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_com_tfv4sf3, "__builtin_ia32_comtruess", IX86_BUILTIN_COMTRUESS, COM_TRUE_S, (int)MULTI_ARG_2_SF_TF },
21713 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_com_tfv4sf3, "__builtin_ia32_comfalseps", IX86_BUILTIN_COMFALSEPS, COM_FALSE_P, (int)MULTI_ARG_2_SF_TF },
21714 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_com_tfv4sf3, "__builtin_ia32_comtrueps", IX86_BUILTIN_COMTRUEPS, COM_TRUE_P, (int)MULTI_ARG_2_SF_TF },
21715 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_com_tfv2df3, "__builtin_ia32_comfalsesd", IX86_BUILTIN_COMFALSESD, COM_FALSE_S, (int)MULTI_ARG_2_DF_TF },
21716 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_com_tfv2df3, "__builtin_ia32_comtruesd", IX86_BUILTIN_COMTRUESD, COM_TRUE_S, (int)MULTI_ARG_2_DF_TF },
21717 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_com_tfv2df3, "__builtin_ia32_comfalsepd", IX86_BUILTIN_COMFALSEPD, COM_FALSE_P, (int)MULTI_ARG_2_DF_TF },
21718 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_com_tfv2df3, "__builtin_ia32_comtruepd", IX86_BUILTIN_COMTRUEPD, COM_TRUE_P, (int)MULTI_ARG_2_DF_TF },
21720 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcom_tfv16qi3, "__builtin_ia32_pcomfalseb", IX86_BUILTIN_PCOMFALSEB, PCOM_FALSE, (int)MULTI_ARG_2_QI_TF },
21721 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcom_tfv8hi3, "__builtin_ia32_pcomfalsew", IX86_BUILTIN_PCOMFALSEW, PCOM_FALSE, (int)MULTI_ARG_2_HI_TF },
21722 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcom_tfv4si3, "__builtin_ia32_pcomfalsed", IX86_BUILTIN_PCOMFALSED, PCOM_FALSE, (int)MULTI_ARG_2_SI_TF },
21723 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcom_tfv2di3, "__builtin_ia32_pcomfalseq", IX86_BUILTIN_PCOMFALSEQ, PCOM_FALSE, (int)MULTI_ARG_2_DI_TF },
21724 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcom_tfv16qi3, "__builtin_ia32_pcomfalseub",IX86_BUILTIN_PCOMFALSEUB,PCOM_FALSE, (int)MULTI_ARG_2_QI_TF },
21725 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcom_tfv8hi3, "__builtin_ia32_pcomfalseuw",IX86_BUILTIN_PCOMFALSEUW,PCOM_FALSE, (int)MULTI_ARG_2_HI_TF },
21726 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcom_tfv4si3, "__builtin_ia32_pcomfalseud",IX86_BUILTIN_PCOMFALSEUD,PCOM_FALSE, (int)MULTI_ARG_2_SI_TF },
21727 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcom_tfv2di3, "__builtin_ia32_pcomfalseuq",IX86_BUILTIN_PCOMFALSEUQ,PCOM_FALSE, (int)MULTI_ARG_2_DI_TF },
21729 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcom_tfv16qi3, "__builtin_ia32_pcomtrueb", IX86_BUILTIN_PCOMTRUEB, PCOM_TRUE, (int)MULTI_ARG_2_QI_TF },
21730 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcom_tfv8hi3, "__builtin_ia32_pcomtruew", IX86_BUILTIN_PCOMTRUEW, PCOM_TRUE, (int)MULTI_ARG_2_HI_TF },
21731 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcom_tfv4si3, "__builtin_ia32_pcomtrued", IX86_BUILTIN_PCOMTRUED, PCOM_TRUE, (int)MULTI_ARG_2_SI_TF },
21732 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcom_tfv2di3, "__builtin_ia32_pcomtrueq", IX86_BUILTIN_PCOMTRUEQ, PCOM_TRUE, (int)MULTI_ARG_2_DI_TF },
21733 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcom_tfv16qi3, "__builtin_ia32_pcomtrueub", IX86_BUILTIN_PCOMTRUEUB, PCOM_TRUE, (int)MULTI_ARG_2_QI_TF },
21734 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcom_tfv8hi3, "__builtin_ia32_pcomtrueuw", IX86_BUILTIN_PCOMTRUEUW, PCOM_TRUE, (int)MULTI_ARG_2_HI_TF },
21735 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcom_tfv4si3, "__builtin_ia32_pcomtrueud", IX86_BUILTIN_PCOMTRUEUD, PCOM_TRUE, (int)MULTI_ARG_2_SI_TF },
21736 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcom_tfv2di3, "__builtin_ia32_pcomtrueuq", IX86_BUILTIN_PCOMTRUEUQ, PCOM_TRUE, (int)MULTI_ARG_2_DI_TF },
21739 /* Set up all the MMX/SSE builtins, even builtins for instructions that are not
21740 in the current target ISA to allow the user to compile particular modules
21741 with different target specific options that differ from the command line
21744 ix86_init_mmx_sse_builtins (void)
21746 const struct builtin_description * d;
21749 tree V16QI_type_node = build_vector_type_for_mode (char_type_node, V16QImode);
21750 tree V2SI_type_node = build_vector_type_for_mode (intSI_type_node, V2SImode);
21751 tree V1DI_type_node
21752 = build_vector_type_for_mode (long_long_integer_type_node, V1DImode);
21753 tree V2SF_type_node = build_vector_type_for_mode (float_type_node, V2SFmode);
21754 tree V2DI_type_node
21755 = build_vector_type_for_mode (long_long_integer_type_node, V2DImode);
21756 tree V2DF_type_node = build_vector_type_for_mode (double_type_node, V2DFmode);
21757 tree V4SF_type_node = build_vector_type_for_mode (float_type_node, V4SFmode);
21758 tree V4SI_type_node = build_vector_type_for_mode (intSI_type_node, V4SImode);
21759 tree V4HI_type_node = build_vector_type_for_mode (intHI_type_node, V4HImode);
21760 tree V8QI_type_node = build_vector_type_for_mode (char_type_node, V8QImode);
21761 tree V8HI_type_node = build_vector_type_for_mode (intHI_type_node, V8HImode);
21763 tree pchar_type_node = build_pointer_type (char_type_node);
21764 tree pcchar_type_node
21765 = build_pointer_type (build_type_variant (char_type_node, 1, 0));
21766 tree pfloat_type_node = build_pointer_type (float_type_node);
21767 tree pcfloat_type_node
21768 = build_pointer_type (build_type_variant (float_type_node, 1, 0));
21769 tree pv2sf_type_node = build_pointer_type (V2SF_type_node);
21770 tree pcv2sf_type_node
21771 = build_pointer_type (build_type_variant (V2SF_type_node, 1, 0));
21772 tree pv2di_type_node = build_pointer_type (V2DI_type_node);
21773 tree pdi_type_node = build_pointer_type (long_long_unsigned_type_node);
21776 tree int_ftype_v4sf_v4sf
21777 = build_function_type_list (integer_type_node,
21778 V4SF_type_node, V4SF_type_node, NULL_TREE);
21779 tree v4si_ftype_v4sf_v4sf
21780 = build_function_type_list (V4SI_type_node,
21781 V4SF_type_node, V4SF_type_node, NULL_TREE);
21782 /* MMX/SSE/integer conversions. */
21783 tree int_ftype_v4sf
21784 = build_function_type_list (integer_type_node,
21785 V4SF_type_node, NULL_TREE);
21786 tree int64_ftype_v4sf
21787 = build_function_type_list (long_long_integer_type_node,
21788 V4SF_type_node, NULL_TREE);
21789 tree int_ftype_v8qi
21790 = build_function_type_list (integer_type_node, V8QI_type_node, NULL_TREE);
21791 tree v4sf_ftype_v4sf_int
21792 = build_function_type_list (V4SF_type_node,
21793 V4SF_type_node, integer_type_node, NULL_TREE);
21794 tree v4sf_ftype_v4sf_int64
21795 = build_function_type_list (V4SF_type_node,
21796 V4SF_type_node, long_long_integer_type_node,
21798 tree v4sf_ftype_v4sf_v2si
21799 = build_function_type_list (V4SF_type_node,
21800 V4SF_type_node, V2SI_type_node, NULL_TREE);
21802 /* Miscellaneous. */
21803 tree v8qi_ftype_v4hi_v4hi
21804 = build_function_type_list (V8QI_type_node,
21805 V4HI_type_node, V4HI_type_node, NULL_TREE);
21806 tree v4hi_ftype_v2si_v2si
21807 = build_function_type_list (V4HI_type_node,
21808 V2SI_type_node, V2SI_type_node, NULL_TREE);
21809 tree v4sf_ftype_v4sf_v4sf_int
21810 = build_function_type_list (V4SF_type_node,
21811 V4SF_type_node, V4SF_type_node,
21812 integer_type_node, NULL_TREE);
21813 tree v2si_ftype_v4hi_v4hi
21814 = build_function_type_list (V2SI_type_node,
21815 V4HI_type_node, V4HI_type_node, NULL_TREE);
21816 tree v4hi_ftype_v4hi_int
21817 = build_function_type_list (V4HI_type_node,
21818 V4HI_type_node, integer_type_node, NULL_TREE);
21819 tree v2si_ftype_v2si_int
21820 = build_function_type_list (V2SI_type_node,
21821 V2SI_type_node, integer_type_node, NULL_TREE);
21822 tree v1di_ftype_v1di_int
21823 = build_function_type_list (V1DI_type_node,
21824 V1DI_type_node, integer_type_node, NULL_TREE);
21826 tree void_ftype_void
21827 = build_function_type (void_type_node, void_list_node);
21828 tree void_ftype_unsigned
21829 = build_function_type_list (void_type_node, unsigned_type_node, NULL_TREE);
21830 tree void_ftype_unsigned_unsigned
21831 = build_function_type_list (void_type_node, unsigned_type_node,
21832 unsigned_type_node, NULL_TREE);
21833 tree void_ftype_pcvoid_unsigned_unsigned
21834 = build_function_type_list (void_type_node, const_ptr_type_node,
21835 unsigned_type_node, unsigned_type_node,
21837 tree unsigned_ftype_void
21838 = build_function_type (unsigned_type_node, void_list_node);
21839 tree v2si_ftype_v4sf
21840 = build_function_type_list (V2SI_type_node, V4SF_type_node, NULL_TREE);
21841 /* Loads/stores. */
21842 tree void_ftype_v8qi_v8qi_pchar
21843 = build_function_type_list (void_type_node,
21844 V8QI_type_node, V8QI_type_node,
21845 pchar_type_node, NULL_TREE);
21846 tree v4sf_ftype_pcfloat
21847 = build_function_type_list (V4SF_type_node, pcfloat_type_node, NULL_TREE);
21848 tree v4sf_ftype_v4sf_pcv2sf
21849 = build_function_type_list (V4SF_type_node,
21850 V4SF_type_node, pcv2sf_type_node, NULL_TREE);
21851 tree void_ftype_pv2sf_v4sf
21852 = build_function_type_list (void_type_node,
21853 pv2sf_type_node, V4SF_type_node, NULL_TREE);
21854 tree void_ftype_pfloat_v4sf
21855 = build_function_type_list (void_type_node,
21856 pfloat_type_node, V4SF_type_node, NULL_TREE);
21857 tree void_ftype_pdi_di
21858 = build_function_type_list (void_type_node,
21859 pdi_type_node, long_long_unsigned_type_node,
21861 tree void_ftype_pv2di_v2di
21862 = build_function_type_list (void_type_node,
21863 pv2di_type_node, V2DI_type_node, NULL_TREE);
21864 /* Normal vector unops. */
21865 tree v4sf_ftype_v4sf
21866 = build_function_type_list (V4SF_type_node, V4SF_type_node, NULL_TREE);
21867 tree v16qi_ftype_v16qi
21868 = build_function_type_list (V16QI_type_node, V16QI_type_node, NULL_TREE);
21869 tree v8hi_ftype_v8hi
21870 = build_function_type_list (V8HI_type_node, V8HI_type_node, NULL_TREE);
21871 tree v4si_ftype_v4si
21872 = build_function_type_list (V4SI_type_node, V4SI_type_node, NULL_TREE);
21873 tree v8qi_ftype_v8qi
21874 = build_function_type_list (V8QI_type_node, V8QI_type_node, NULL_TREE);
21875 tree v4hi_ftype_v4hi
21876 = build_function_type_list (V4HI_type_node, V4HI_type_node, NULL_TREE);
21878 /* Normal vector binops. */
21879 tree v4sf_ftype_v4sf_v4sf
21880 = build_function_type_list (V4SF_type_node,
21881 V4SF_type_node, V4SF_type_node, NULL_TREE);
21882 tree v8qi_ftype_v8qi_v8qi
21883 = build_function_type_list (V8QI_type_node,
21884 V8QI_type_node, V8QI_type_node, NULL_TREE);
21885 tree v4hi_ftype_v4hi_v4hi
21886 = build_function_type_list (V4HI_type_node,
21887 V4HI_type_node, V4HI_type_node, NULL_TREE);
21888 tree v2si_ftype_v2si_v2si
21889 = build_function_type_list (V2SI_type_node,
21890 V2SI_type_node, V2SI_type_node, NULL_TREE);
21891 tree v1di_ftype_v1di_v1di
21892 = build_function_type_list (V1DI_type_node,
21893 V1DI_type_node, V1DI_type_node, NULL_TREE);
21894 tree v1di_ftype_v1di_v1di_int
21895 = build_function_type_list (V1DI_type_node,
21896 V1DI_type_node, V1DI_type_node,
21897 integer_type_node, NULL_TREE);
21898 tree v2si_ftype_v2sf
21899 = build_function_type_list (V2SI_type_node, V2SF_type_node, NULL_TREE);
21900 tree v2sf_ftype_v2si
21901 = build_function_type_list (V2SF_type_node, V2SI_type_node, NULL_TREE);
21902 tree v2si_ftype_v2si
21903 = build_function_type_list (V2SI_type_node, V2SI_type_node, NULL_TREE);
21904 tree v2sf_ftype_v2sf
21905 = build_function_type_list (V2SF_type_node, V2SF_type_node, NULL_TREE);
21906 tree v2sf_ftype_v2sf_v2sf
21907 = build_function_type_list (V2SF_type_node,
21908 V2SF_type_node, V2SF_type_node, NULL_TREE);
21909 tree v2si_ftype_v2sf_v2sf
21910 = build_function_type_list (V2SI_type_node,
21911 V2SF_type_node, V2SF_type_node, NULL_TREE);
21912 tree pint_type_node = build_pointer_type (integer_type_node);
21913 tree pdouble_type_node = build_pointer_type (double_type_node);
21914 tree pcdouble_type_node = build_pointer_type (
21915 build_type_variant (double_type_node, 1, 0));
21916 tree int_ftype_v2df_v2df
21917 = build_function_type_list (integer_type_node,
21918 V2DF_type_node, V2DF_type_node, NULL_TREE);
21920 tree void_ftype_pcvoid
21921 = build_function_type_list (void_type_node, const_ptr_type_node, NULL_TREE);
21922 tree v4sf_ftype_v4si
21923 = build_function_type_list (V4SF_type_node, V4SI_type_node, NULL_TREE);
21924 tree v4si_ftype_v4sf
21925 = build_function_type_list (V4SI_type_node, V4SF_type_node, NULL_TREE);
21926 tree v2df_ftype_v4si
21927 = build_function_type_list (V2DF_type_node, V4SI_type_node, NULL_TREE);
21928 tree v4si_ftype_v2df
21929 = build_function_type_list (V4SI_type_node, V2DF_type_node, NULL_TREE);
21930 tree v4si_ftype_v2df_v2df
21931 = build_function_type_list (V4SI_type_node,
21932 V2DF_type_node, V2DF_type_node, NULL_TREE);
21933 tree v2si_ftype_v2df
21934 = build_function_type_list (V2SI_type_node, V2DF_type_node, NULL_TREE);
21935 tree v4sf_ftype_v2df
21936 = build_function_type_list (V4SF_type_node, V2DF_type_node, NULL_TREE);
21937 tree v2df_ftype_v2si
21938 = build_function_type_list (V2DF_type_node, V2SI_type_node, NULL_TREE);
21939 tree v2df_ftype_v4sf
21940 = build_function_type_list (V2DF_type_node, V4SF_type_node, NULL_TREE);
21941 tree int_ftype_v2df
21942 = build_function_type_list (integer_type_node, V2DF_type_node, NULL_TREE);
21943 tree int64_ftype_v2df
21944 = build_function_type_list (long_long_integer_type_node,
21945 V2DF_type_node, NULL_TREE);
21946 tree v2df_ftype_v2df_int
21947 = build_function_type_list (V2DF_type_node,
21948 V2DF_type_node, integer_type_node, NULL_TREE);
21949 tree v2df_ftype_v2df_int64
21950 = build_function_type_list (V2DF_type_node,
21951 V2DF_type_node, long_long_integer_type_node,
21953 tree v4sf_ftype_v4sf_v2df
21954 = build_function_type_list (V4SF_type_node,
21955 V4SF_type_node, V2DF_type_node, NULL_TREE);
21956 tree v2df_ftype_v2df_v4sf
21957 = build_function_type_list (V2DF_type_node,
21958 V2DF_type_node, V4SF_type_node, NULL_TREE);
21959 tree v2df_ftype_v2df_v2df_int
21960 = build_function_type_list (V2DF_type_node,
21961 V2DF_type_node, V2DF_type_node,
21964 tree v2df_ftype_v2df_pcdouble
21965 = build_function_type_list (V2DF_type_node,
21966 V2DF_type_node, pcdouble_type_node, NULL_TREE);
21967 tree void_ftype_pdouble_v2df
21968 = build_function_type_list (void_type_node,
21969 pdouble_type_node, V2DF_type_node, NULL_TREE);
21970 tree void_ftype_pint_int
21971 = build_function_type_list (void_type_node,
21972 pint_type_node, integer_type_node, NULL_TREE);
21973 tree void_ftype_v16qi_v16qi_pchar
21974 = build_function_type_list (void_type_node,
21975 V16QI_type_node, V16QI_type_node,
21976 pchar_type_node, NULL_TREE);
21977 tree v2df_ftype_pcdouble
21978 = build_function_type_list (V2DF_type_node, pcdouble_type_node, NULL_TREE);
21979 tree v2df_ftype_v2df_v2df
21980 = build_function_type_list (V2DF_type_node,
21981 V2DF_type_node, V2DF_type_node, NULL_TREE);
21982 tree v16qi_ftype_v16qi_v16qi
21983 = build_function_type_list (V16QI_type_node,
21984 V16QI_type_node, V16QI_type_node, NULL_TREE);
21985 tree v8hi_ftype_v8hi_v8hi
21986 = build_function_type_list (V8HI_type_node,
21987 V8HI_type_node, V8HI_type_node, NULL_TREE);
21988 tree v4si_ftype_v4si_v4si
21989 = build_function_type_list (V4SI_type_node,
21990 V4SI_type_node, V4SI_type_node, NULL_TREE);
21991 tree v2di_ftype_v2di_v2di
21992 = build_function_type_list (V2DI_type_node,
21993 V2DI_type_node, V2DI_type_node, NULL_TREE);
21994 tree v2di_ftype_v2df_v2df
21995 = build_function_type_list (V2DI_type_node,
21996 V2DF_type_node, V2DF_type_node, NULL_TREE);
21997 tree v2df_ftype_v2df
21998 = build_function_type_list (V2DF_type_node, V2DF_type_node, NULL_TREE);
21999 tree v2di_ftype_v2di_int
22000 = build_function_type_list (V2DI_type_node,
22001 V2DI_type_node, integer_type_node, NULL_TREE);
22002 tree v2di_ftype_v2di_v2di_int
22003 = build_function_type_list (V2DI_type_node, V2DI_type_node,
22004 V2DI_type_node, integer_type_node, NULL_TREE);
22005 tree v4si_ftype_v4si_int
22006 = build_function_type_list (V4SI_type_node,
22007 V4SI_type_node, integer_type_node, NULL_TREE);
22008 tree v8hi_ftype_v8hi_int
22009 = build_function_type_list (V8HI_type_node,
22010 V8HI_type_node, integer_type_node, NULL_TREE);
22011 tree v4si_ftype_v8hi_v8hi
22012 = build_function_type_list (V4SI_type_node,
22013 V8HI_type_node, V8HI_type_node, NULL_TREE);
22014 tree v1di_ftype_v8qi_v8qi
22015 = build_function_type_list (V1DI_type_node,
22016 V8QI_type_node, V8QI_type_node, NULL_TREE);
22017 tree v1di_ftype_v2si_v2si
22018 = build_function_type_list (V1DI_type_node,
22019 V2SI_type_node, V2SI_type_node, NULL_TREE);
22020 tree v2di_ftype_v16qi_v16qi
22021 = build_function_type_list (V2DI_type_node,
22022 V16QI_type_node, V16QI_type_node, NULL_TREE);
22023 tree v2di_ftype_v4si_v4si
22024 = build_function_type_list (V2DI_type_node,
22025 V4SI_type_node, V4SI_type_node, NULL_TREE);
22026 tree int_ftype_v16qi
22027 = build_function_type_list (integer_type_node, V16QI_type_node, NULL_TREE);
22028 tree v16qi_ftype_pcchar
22029 = build_function_type_list (V16QI_type_node, pcchar_type_node, NULL_TREE);
22030 tree void_ftype_pchar_v16qi
22031 = build_function_type_list (void_type_node,
22032 pchar_type_node, V16QI_type_node, NULL_TREE);
22034 tree v2di_ftype_v2di_unsigned_unsigned
22035 = build_function_type_list (V2DI_type_node, V2DI_type_node,
22036 unsigned_type_node, unsigned_type_node,
22038 tree v2di_ftype_v2di_v2di_unsigned_unsigned
22039 = build_function_type_list (V2DI_type_node, V2DI_type_node, V2DI_type_node,
22040 unsigned_type_node, unsigned_type_node,
22042 tree v2di_ftype_v2di_v16qi
22043 = build_function_type_list (V2DI_type_node, V2DI_type_node, V16QI_type_node,
22045 tree v2df_ftype_v2df_v2df_v2df
22046 = build_function_type_list (V2DF_type_node,
22047 V2DF_type_node, V2DF_type_node,
22048 V2DF_type_node, NULL_TREE);
22049 tree v4sf_ftype_v4sf_v4sf_v4sf
22050 = build_function_type_list (V4SF_type_node,
22051 V4SF_type_node, V4SF_type_node,
22052 V4SF_type_node, NULL_TREE);
22053 tree v8hi_ftype_v16qi
22054 = build_function_type_list (V8HI_type_node, V16QI_type_node,
22056 tree v4si_ftype_v16qi
22057 = build_function_type_list (V4SI_type_node, V16QI_type_node,
22059 tree v2di_ftype_v16qi
22060 = build_function_type_list (V2DI_type_node, V16QI_type_node,
22062 tree v4si_ftype_v8hi
22063 = build_function_type_list (V4SI_type_node, V8HI_type_node,
22065 tree v2di_ftype_v8hi
22066 = build_function_type_list (V2DI_type_node, V8HI_type_node,
22068 tree v2di_ftype_v4si
22069 = build_function_type_list (V2DI_type_node, V4SI_type_node,
22071 tree v2di_ftype_pv2di
22072 = build_function_type_list (V2DI_type_node, pv2di_type_node,
22074 tree v16qi_ftype_v16qi_v16qi_int
22075 = build_function_type_list (V16QI_type_node, V16QI_type_node,
22076 V16QI_type_node, integer_type_node,
22078 tree v16qi_ftype_v16qi_v16qi_v16qi
22079 = build_function_type_list (V16QI_type_node, V16QI_type_node,
22080 V16QI_type_node, V16QI_type_node,
22082 tree v8hi_ftype_v8hi_v8hi_int
22083 = build_function_type_list (V8HI_type_node, V8HI_type_node,
22084 V8HI_type_node, integer_type_node,
22086 tree v4si_ftype_v4si_v4si_int
22087 = build_function_type_list (V4SI_type_node, V4SI_type_node,
22088 V4SI_type_node, integer_type_node,
22090 tree int_ftype_v2di_v2di
22091 = build_function_type_list (integer_type_node,
22092 V2DI_type_node, V2DI_type_node,
22094 tree int_ftype_v16qi_int_v16qi_int_int
22095 = build_function_type_list (integer_type_node,
22102 tree v16qi_ftype_v16qi_int_v16qi_int_int
22103 = build_function_type_list (V16QI_type_node,
22110 tree int_ftype_v16qi_v16qi_int
22111 = build_function_type_list (integer_type_node,
22117 /* SSE5 instructions */
22118 tree v2di_ftype_v2di_v2di_v2di
22119 = build_function_type_list (V2DI_type_node,
22125 tree v4si_ftype_v4si_v4si_v4si
22126 = build_function_type_list (V4SI_type_node,
22132 tree v4si_ftype_v4si_v4si_v2di
22133 = build_function_type_list (V4SI_type_node,
22139 tree v8hi_ftype_v8hi_v8hi_v8hi
22140 = build_function_type_list (V8HI_type_node,
22146 tree v8hi_ftype_v8hi_v8hi_v4si
22147 = build_function_type_list (V8HI_type_node,
22153 tree v2df_ftype_v2df_v2df_v16qi
22154 = build_function_type_list (V2DF_type_node,
22160 tree v4sf_ftype_v4sf_v4sf_v16qi
22161 = build_function_type_list (V4SF_type_node,
22167 tree v2di_ftype_v2di_si
22168 = build_function_type_list (V2DI_type_node,
22173 tree v4si_ftype_v4si_si
22174 = build_function_type_list (V4SI_type_node,
22179 tree v8hi_ftype_v8hi_si
22180 = build_function_type_list (V8HI_type_node,
22185 tree v16qi_ftype_v16qi_si
22186 = build_function_type_list (V16QI_type_node,
22190 tree v4sf_ftype_v4hi
22191 = build_function_type_list (V4SF_type_node,
22195 tree v4hi_ftype_v4sf
22196 = build_function_type_list (V4HI_type_node,
22200 tree v2di_ftype_v2di
22201 = build_function_type_list (V2DI_type_node, V2DI_type_node, NULL_TREE);
22203 tree v16qi_ftype_v8hi_v8hi
22204 = build_function_type_list (V16QI_type_node,
22205 V8HI_type_node, V8HI_type_node,
22207 tree v8hi_ftype_v4si_v4si
22208 = build_function_type_list (V8HI_type_node,
22209 V4SI_type_node, V4SI_type_node,
22211 tree v8hi_ftype_v16qi_v16qi
22212 = build_function_type_list (V8HI_type_node,
22213 V16QI_type_node, V16QI_type_node,
22215 tree v4hi_ftype_v8qi_v8qi
22216 = build_function_type_list (V4HI_type_node,
22217 V8QI_type_node, V8QI_type_node,
22219 tree unsigned_ftype_unsigned_uchar
22220 = build_function_type_list (unsigned_type_node,
22221 unsigned_type_node,
22222 unsigned_char_type_node,
22224 tree unsigned_ftype_unsigned_ushort
22225 = build_function_type_list (unsigned_type_node,
22226 unsigned_type_node,
22227 short_unsigned_type_node,
22229 tree unsigned_ftype_unsigned_unsigned
22230 = build_function_type_list (unsigned_type_node,
22231 unsigned_type_node,
22232 unsigned_type_node,
22234 tree uint64_ftype_uint64_uint64
22235 = build_function_type_list (long_long_unsigned_type_node,
22236 long_long_unsigned_type_node,
22237 long_long_unsigned_type_node,
22239 tree float_ftype_float
22240 = build_function_type_list (float_type_node,
22245 tree V32QI_type_node = build_vector_type_for_mode (char_type_node,
22247 tree V8SI_type_node = build_vector_type_for_mode (intSI_type_node,
22249 tree V8SF_type_node = build_vector_type_for_mode (float_type_node,
22251 tree V4DI_type_node = build_vector_type_for_mode (long_long_integer_type_node,
22253 tree V4DF_type_node = build_vector_type_for_mode (double_type_node,
22255 tree v8sf_ftype_v8sf
22256 = build_function_type_list (V8SF_type_node,
22259 tree v8si_ftype_v8sf
22260 = build_function_type_list (V8SI_type_node,
22263 tree v8sf_ftype_v8si
22264 = build_function_type_list (V8SF_type_node,
22267 tree v4si_ftype_v4df
22268 = build_function_type_list (V4SI_type_node,
22271 tree v4df_ftype_v4df
22272 = build_function_type_list (V4DF_type_node,
22275 tree v4df_ftype_v4si
22276 = build_function_type_list (V4DF_type_node,
22279 tree v4df_ftype_v4sf
22280 = build_function_type_list (V4DF_type_node,
22283 tree v4sf_ftype_v4df
22284 = build_function_type_list (V4SF_type_node,
22287 tree v8sf_ftype_v8sf_v8sf
22288 = build_function_type_list (V8SF_type_node,
22289 V8SF_type_node, V8SF_type_node,
22291 tree v4df_ftype_v4df_v4df
22292 = build_function_type_list (V4DF_type_node,
22293 V4DF_type_node, V4DF_type_node,
22295 tree v8sf_ftype_v8sf_int
22296 = build_function_type_list (V8SF_type_node,
22297 V8SF_type_node, integer_type_node,
22299 tree v4si_ftype_v8si_int
22300 = build_function_type_list (V4SI_type_node,
22301 V8SI_type_node, integer_type_node,
22303 tree v4df_ftype_v4df_int
22304 = build_function_type_list (V4DF_type_node,
22305 V4DF_type_node, integer_type_node,
22307 tree v4sf_ftype_v8sf_int
22308 = build_function_type_list (V4SF_type_node,
22309 V8SF_type_node, integer_type_node,
22311 tree v2df_ftype_v4df_int
22312 = build_function_type_list (V2DF_type_node,
22313 V4DF_type_node, integer_type_node,
22315 tree v8sf_ftype_v8sf_v8sf_int
22316 = build_function_type_list (V8SF_type_node,
22317 V8SF_type_node, V8SF_type_node,
22320 tree v8sf_ftype_v8sf_v8sf_v8sf
22321 = build_function_type_list (V8SF_type_node,
22322 V8SF_type_node, V8SF_type_node,
22325 tree v4df_ftype_v4df_v4df_v4df
22326 = build_function_type_list (V4DF_type_node,
22327 V4DF_type_node, V4DF_type_node,
22330 tree v8si_ftype_v8si_v8si_int
22331 = build_function_type_list (V8SI_type_node,
22332 V8SI_type_node, V8SI_type_node,
22335 tree v4df_ftype_v4df_v4df_int
22336 = build_function_type_list (V4DF_type_node,
22337 V4DF_type_node, V4DF_type_node,
22340 tree v8sf_ftype_pcfloat
22341 = build_function_type_list (V8SF_type_node,
22344 tree v4df_ftype_pcdouble
22345 = build_function_type_list (V4DF_type_node,
22346 pcdouble_type_node,
22348 tree pcv4sf_type_node
22349 = build_pointer_type (build_type_variant (V4SF_type_node, 1, 0));
22350 tree pcv2df_type_node
22351 = build_pointer_type (build_type_variant (V2DF_type_node, 1, 0));
22352 tree v8sf_ftype_pcv4sf
22353 = build_function_type_list (V8SF_type_node,
22356 tree v4df_ftype_pcv2df
22357 = build_function_type_list (V4DF_type_node,
22360 tree v32qi_ftype_pcchar
22361 = build_function_type_list (V32QI_type_node,
22364 tree void_ftype_pchar_v32qi
22365 = build_function_type_list (void_type_node,
22366 pchar_type_node, V32QI_type_node,
22368 tree v8si_ftype_v8si_v4si_int
22369 = build_function_type_list (V8SI_type_node,
22370 V8SI_type_node, V4SI_type_node,
22373 tree pv4di_type_node = build_pointer_type (V4DI_type_node);
22374 tree void_ftype_pv4di_v4di
22375 = build_function_type_list (void_type_node,
22376 pv4di_type_node, V4DI_type_node,
22378 tree v8sf_ftype_v8sf_v4sf_int
22379 = build_function_type_list (V8SF_type_node,
22380 V8SF_type_node, V4SF_type_node,
22383 tree v4df_ftype_v4df_v2df_int
22384 = build_function_type_list (V4DF_type_node,
22385 V4DF_type_node, V2DF_type_node,
22388 tree void_ftype_pfloat_v8sf
22389 = build_function_type_list (void_type_node,
22390 pfloat_type_node, V8SF_type_node,
22392 tree void_ftype_pdouble_v4df
22393 = build_function_type_list (void_type_node,
22394 pdouble_type_node, V4DF_type_node,
22396 tree pv8sf_type_node = build_pointer_type (V8SF_type_node);
22397 tree pv4sf_type_node = build_pointer_type (V4SF_type_node);
22398 tree pv4df_type_node = build_pointer_type (V4DF_type_node);
22399 tree pv2df_type_node = build_pointer_type (V2DF_type_node);
22400 tree pcv8sf_type_node
22401 = build_pointer_type (build_type_variant (V8SF_type_node, 1, 0));
22402 tree pcv4df_type_node
22403 = build_pointer_type (build_type_variant (V4DF_type_node, 1, 0));
22404 tree v8sf_ftype_pcv8sf_v8sf
22405 = build_function_type_list (V8SF_type_node,
22406 pcv8sf_type_node, V8SF_type_node,
22408 tree v4df_ftype_pcv4df_v4df
22409 = build_function_type_list (V4DF_type_node,
22410 pcv4df_type_node, V4DF_type_node,
22412 tree v4sf_ftype_pcv4sf_v4sf
22413 = build_function_type_list (V4SF_type_node,
22414 pcv4sf_type_node, V4SF_type_node,
22416 tree v2df_ftype_pcv2df_v2df
22417 = build_function_type_list (V2DF_type_node,
22418 pcv2df_type_node, V2DF_type_node,
22420 tree void_ftype_pv8sf_v8sf_v8sf
22421 = build_function_type_list (void_type_node,
22422 pv8sf_type_node, V8SF_type_node,
22425 tree void_ftype_pv4df_v4df_v4df
22426 = build_function_type_list (void_type_node,
22427 pv4df_type_node, V4DF_type_node,
22430 tree void_ftype_pv4sf_v4sf_v4sf
22431 = build_function_type_list (void_type_node,
22432 pv4sf_type_node, V4SF_type_node,
22435 tree void_ftype_pv2df_v2df_v2df
22436 = build_function_type_list (void_type_node,
22437 pv2df_type_node, V2DF_type_node,
22440 tree v4df_ftype_v2df
22441 = build_function_type_list (V4DF_type_node,
22444 tree v8sf_ftype_v4sf
22445 = build_function_type_list (V8SF_type_node,
22448 tree v8si_ftype_v4si
22449 = build_function_type_list (V8SI_type_node,
22452 tree v2df_ftype_v4df
22453 = build_function_type_list (V2DF_type_node,
22456 tree v4sf_ftype_v8sf
22457 = build_function_type_list (V4SF_type_node,
22460 tree v4si_ftype_v8si
22461 = build_function_type_list (V4SI_type_node,
22464 tree int_ftype_v4df
22465 = build_function_type_list (integer_type_node,
22468 tree int_ftype_v8sf
22469 = build_function_type_list (integer_type_node,
22472 tree int_ftype_v8sf_v8sf
22473 = build_function_type_list (integer_type_node,
22474 V8SF_type_node, V8SF_type_node,
22476 tree int_ftype_v4di_v4di
22477 = build_function_type_list (integer_type_node,
22478 V4DI_type_node, V4DI_type_node,
22480 tree int_ftype_v4df_v4df
22481 = build_function_type_list (integer_type_node,
22482 V4DF_type_node, V4DF_type_node,
22484 tree v8sf_ftype_v8sf_v8si
22485 = build_function_type_list (V8SF_type_node,
22486 V8SF_type_node, V8SI_type_node,
22488 tree v4df_ftype_v4df_v4di
22489 = build_function_type_list (V4DF_type_node,
22490 V4DF_type_node, V4DI_type_node,
22492 tree v4sf_ftype_v4sf_v4si
22493 = build_function_type_list (V4SF_type_node,
22494 V4SF_type_node, V4SI_type_node, NULL_TREE);
22495 tree v2df_ftype_v2df_v2di
22496 = build_function_type_list (V2DF_type_node,
22497 V2DF_type_node, V2DI_type_node, NULL_TREE);
22501 /* Add all special builtins with variable number of operands. */
22502 for (i = 0, d = bdesc_special_args;
22503 i < ARRAY_SIZE (bdesc_special_args);
22511 switch ((enum ix86_special_builtin_type) d->flag)
22513 case VOID_FTYPE_VOID:
22514 type = void_ftype_void;
22516 case V32QI_FTYPE_PCCHAR:
22517 type = v32qi_ftype_pcchar;
22519 case V16QI_FTYPE_PCCHAR:
22520 type = v16qi_ftype_pcchar;
22522 case V8SF_FTYPE_PCV4SF:
22523 type = v8sf_ftype_pcv4sf;
22525 case V8SF_FTYPE_PCFLOAT:
22526 type = v8sf_ftype_pcfloat;
22528 case V4DF_FTYPE_PCV2DF:
22529 type = v4df_ftype_pcv2df;
22531 case V4DF_FTYPE_PCDOUBLE:
22532 type = v4df_ftype_pcdouble;
22534 case V4SF_FTYPE_PCFLOAT:
22535 type = v4sf_ftype_pcfloat;
22537 case V2DI_FTYPE_PV2DI:
22538 type = v2di_ftype_pv2di;
22540 case V2DF_FTYPE_PCDOUBLE:
22541 type = v2df_ftype_pcdouble;
22543 case V8SF_FTYPE_PCV8SF_V8SF:
22544 type = v8sf_ftype_pcv8sf_v8sf;
22546 case V4DF_FTYPE_PCV4DF_V4DF:
22547 type = v4df_ftype_pcv4df_v4df;
22549 case V4SF_FTYPE_V4SF_PCV2SF:
22550 type = v4sf_ftype_v4sf_pcv2sf;
22552 case V4SF_FTYPE_PCV4SF_V4SF:
22553 type = v4sf_ftype_pcv4sf_v4sf;
22555 case V2DF_FTYPE_V2DF_PCDOUBLE:
22556 type = v2df_ftype_v2df_pcdouble;
22558 case V2DF_FTYPE_PCV2DF_V2DF:
22559 type = v2df_ftype_pcv2df_v2df;
22561 case VOID_FTYPE_PV2SF_V4SF:
22562 type = void_ftype_pv2sf_v4sf;
22564 case VOID_FTYPE_PV4DI_V4DI:
22565 type = void_ftype_pv4di_v4di;
22567 case VOID_FTYPE_PV2DI_V2DI:
22568 type = void_ftype_pv2di_v2di;
22570 case VOID_FTYPE_PCHAR_V32QI:
22571 type = void_ftype_pchar_v32qi;
22573 case VOID_FTYPE_PCHAR_V16QI:
22574 type = void_ftype_pchar_v16qi;
22576 case VOID_FTYPE_PFLOAT_V8SF:
22577 type = void_ftype_pfloat_v8sf;
22579 case VOID_FTYPE_PFLOAT_V4SF:
22580 type = void_ftype_pfloat_v4sf;
22582 case VOID_FTYPE_PDOUBLE_V4DF:
22583 type = void_ftype_pdouble_v4df;
22585 case VOID_FTYPE_PDOUBLE_V2DF:
22586 type = void_ftype_pdouble_v2df;
22588 case VOID_FTYPE_PDI_DI:
22589 type = void_ftype_pdi_di;
22591 case VOID_FTYPE_PINT_INT:
22592 type = void_ftype_pint_int;
22594 case VOID_FTYPE_PV8SF_V8SF_V8SF:
22595 type = void_ftype_pv8sf_v8sf_v8sf;
22597 case VOID_FTYPE_PV4DF_V4DF_V4DF:
22598 type = void_ftype_pv4df_v4df_v4df;
22600 case VOID_FTYPE_PV4SF_V4SF_V4SF:
22601 type = void_ftype_pv4sf_v4sf_v4sf;
22603 case VOID_FTYPE_PV2DF_V2DF_V2DF:
22604 type = void_ftype_pv2df_v2df_v2df;
22607 gcc_unreachable ();
22610 def_builtin (d->mask, d->name, type, d->code);
22613 /* Add all builtins with variable number of operands. */
22614 for (i = 0, d = bdesc_args;
22615 i < ARRAY_SIZE (bdesc_args);
22623 switch ((enum ix86_builtin_type) d->flag)
22625 case FLOAT_FTYPE_FLOAT:
22626 type = float_ftype_float;
22628 case INT_FTYPE_V8SF_V8SF_PTEST:
22629 type = int_ftype_v8sf_v8sf;
22631 case INT_FTYPE_V4DI_V4DI_PTEST:
22632 type = int_ftype_v4di_v4di;
22634 case INT_FTYPE_V4DF_V4DF_PTEST:
22635 type = int_ftype_v4df_v4df;
22637 case INT_FTYPE_V4SF_V4SF_PTEST:
22638 type = int_ftype_v4sf_v4sf;
22640 case INT_FTYPE_V2DI_V2DI_PTEST:
22641 type = int_ftype_v2di_v2di;
22643 case INT_FTYPE_V2DF_V2DF_PTEST:
22644 type = int_ftype_v2df_v2df;
22646 case INT64_FTYPE_V4SF:
22647 type = int64_ftype_v4sf;
22649 case INT64_FTYPE_V2DF:
22650 type = int64_ftype_v2df;
22652 case INT_FTYPE_V16QI:
22653 type = int_ftype_v16qi;
22655 case INT_FTYPE_V8QI:
22656 type = int_ftype_v8qi;
22658 case INT_FTYPE_V8SF:
22659 type = int_ftype_v8sf;
22661 case INT_FTYPE_V4DF:
22662 type = int_ftype_v4df;
22664 case INT_FTYPE_V4SF:
22665 type = int_ftype_v4sf;
22667 case INT_FTYPE_V2DF:
22668 type = int_ftype_v2df;
22670 case V16QI_FTYPE_V16QI:
22671 type = v16qi_ftype_v16qi;
22673 case V8SI_FTYPE_V8SF:
22674 type = v8si_ftype_v8sf;
22676 case V8SI_FTYPE_V4SI:
22677 type = v8si_ftype_v4si;
22679 case V8HI_FTYPE_V8HI:
22680 type = v8hi_ftype_v8hi;
22682 case V8HI_FTYPE_V16QI:
22683 type = v8hi_ftype_v16qi;
22685 case V8QI_FTYPE_V8QI:
22686 type = v8qi_ftype_v8qi;
22688 case V8SF_FTYPE_V8SF:
22689 type = v8sf_ftype_v8sf;
22691 case V8SF_FTYPE_V8SI:
22692 type = v8sf_ftype_v8si;
22694 case V8SF_FTYPE_V4SF:
22695 type = v8sf_ftype_v4sf;
22697 case V4SI_FTYPE_V4DF:
22698 type = v4si_ftype_v4df;
22700 case V4SI_FTYPE_V4SI:
22701 type = v4si_ftype_v4si;
22703 case V4SI_FTYPE_V16QI:
22704 type = v4si_ftype_v16qi;
22706 case V4SI_FTYPE_V8SI:
22707 type = v4si_ftype_v8si;
22709 case V4SI_FTYPE_V8HI:
22710 type = v4si_ftype_v8hi;
22712 case V4SI_FTYPE_V4SF:
22713 type = v4si_ftype_v4sf;
22715 case V4SI_FTYPE_V2DF:
22716 type = v4si_ftype_v2df;
22718 case V4HI_FTYPE_V4HI:
22719 type = v4hi_ftype_v4hi;
22721 case V4DF_FTYPE_V4DF:
22722 type = v4df_ftype_v4df;
22724 case V4DF_FTYPE_V4SI:
22725 type = v4df_ftype_v4si;
22727 case V4DF_FTYPE_V4SF:
22728 type = v4df_ftype_v4sf;
22730 case V4DF_FTYPE_V2DF:
22731 type = v4df_ftype_v2df;
22733 case V4SF_FTYPE_V4SF:
22734 case V4SF_FTYPE_V4SF_VEC_MERGE:
22735 type = v4sf_ftype_v4sf;
22737 case V4SF_FTYPE_V8SF:
22738 type = v4sf_ftype_v8sf;
22740 case V4SF_FTYPE_V4SI:
22741 type = v4sf_ftype_v4si;
22743 case V4SF_FTYPE_V4DF:
22744 type = v4sf_ftype_v4df;
22746 case V4SF_FTYPE_V2DF:
22747 type = v4sf_ftype_v2df;
22749 case V2DI_FTYPE_V2DI:
22750 type = v2di_ftype_v2di;
22752 case V2DI_FTYPE_V16QI:
22753 type = v2di_ftype_v16qi;
22755 case V2DI_FTYPE_V8HI:
22756 type = v2di_ftype_v8hi;
22758 case V2DI_FTYPE_V4SI:
22759 type = v2di_ftype_v4si;
22761 case V2SI_FTYPE_V2SI:
22762 type = v2si_ftype_v2si;
22764 case V2SI_FTYPE_V4SF:
22765 type = v2si_ftype_v4sf;
22767 case V2SI_FTYPE_V2DF:
22768 type = v2si_ftype_v2df;
22770 case V2SI_FTYPE_V2SF:
22771 type = v2si_ftype_v2sf;
22773 case V2DF_FTYPE_V4DF:
22774 type = v2df_ftype_v4df;
22776 case V2DF_FTYPE_V4SF:
22777 type = v2df_ftype_v4sf;
22779 case V2DF_FTYPE_V2DF:
22780 case V2DF_FTYPE_V2DF_VEC_MERGE:
22781 type = v2df_ftype_v2df;
22783 case V2DF_FTYPE_V2SI:
22784 type = v2df_ftype_v2si;
22786 case V2DF_FTYPE_V4SI:
22787 type = v2df_ftype_v4si;
22789 case V2SF_FTYPE_V2SF:
22790 type = v2sf_ftype_v2sf;
22792 case V2SF_FTYPE_V2SI:
22793 type = v2sf_ftype_v2si;
22795 case V16QI_FTYPE_V16QI_V16QI:
22796 type = v16qi_ftype_v16qi_v16qi;
22798 case V16QI_FTYPE_V8HI_V8HI:
22799 type = v16qi_ftype_v8hi_v8hi;
22801 case V8QI_FTYPE_V8QI_V8QI:
22802 type = v8qi_ftype_v8qi_v8qi;
22804 case V8QI_FTYPE_V4HI_V4HI:
22805 type = v8qi_ftype_v4hi_v4hi;
22807 case V8HI_FTYPE_V8HI_V8HI:
22808 case V8HI_FTYPE_V8HI_V8HI_COUNT:
22809 type = v8hi_ftype_v8hi_v8hi;
22811 case V8HI_FTYPE_V16QI_V16QI:
22812 type = v8hi_ftype_v16qi_v16qi;
22814 case V8HI_FTYPE_V4SI_V4SI:
22815 type = v8hi_ftype_v4si_v4si;
22817 case V8HI_FTYPE_V8HI_SI_COUNT:
22818 type = v8hi_ftype_v8hi_int;
22820 case V8SF_FTYPE_V8SF_V8SF:
22821 type = v8sf_ftype_v8sf_v8sf;
22823 case V8SF_FTYPE_V8SF_V8SI:
22824 type = v8sf_ftype_v8sf_v8si;
22826 case V4SI_FTYPE_V4SI_V4SI:
22827 case V4SI_FTYPE_V4SI_V4SI_COUNT:
22828 type = v4si_ftype_v4si_v4si;
22830 case V4SI_FTYPE_V8HI_V8HI:
22831 type = v4si_ftype_v8hi_v8hi;
22833 case V4SI_FTYPE_V4SF_V4SF:
22834 type = v4si_ftype_v4sf_v4sf;
22836 case V4SI_FTYPE_V2DF_V2DF:
22837 type = v4si_ftype_v2df_v2df;
22839 case V4SI_FTYPE_V4SI_SI_COUNT:
22840 type = v4si_ftype_v4si_int;
22842 case V4HI_FTYPE_V4HI_V4HI:
22843 case V4HI_FTYPE_V4HI_V4HI_COUNT:
22844 type = v4hi_ftype_v4hi_v4hi;
22846 case V4HI_FTYPE_V8QI_V8QI:
22847 type = v4hi_ftype_v8qi_v8qi;
22849 case V4HI_FTYPE_V2SI_V2SI:
22850 type = v4hi_ftype_v2si_v2si;
22852 case V4HI_FTYPE_V4HI_SI_COUNT:
22853 type = v4hi_ftype_v4hi_int;
22855 case V4DF_FTYPE_V4DF_V4DF:
22856 type = v4df_ftype_v4df_v4df;
22858 case V4DF_FTYPE_V4DF_V4DI:
22859 type = v4df_ftype_v4df_v4di;
22861 case V4SF_FTYPE_V4SF_V4SF:
22862 case V4SF_FTYPE_V4SF_V4SF_SWAP:
22863 type = v4sf_ftype_v4sf_v4sf;
22865 case V4SF_FTYPE_V4SF_V4SI:
22866 type = v4sf_ftype_v4sf_v4si;
22868 case V4SF_FTYPE_V4SF_V2SI:
22869 type = v4sf_ftype_v4sf_v2si;
22871 case V4SF_FTYPE_V4SF_V2DF:
22872 type = v4sf_ftype_v4sf_v2df;
22874 case V4SF_FTYPE_V4SF_DI:
22875 type = v4sf_ftype_v4sf_int64;
22877 case V4SF_FTYPE_V4SF_SI:
22878 type = v4sf_ftype_v4sf_int;
22880 case V2DI_FTYPE_V2DI_V2DI:
22881 case V2DI_FTYPE_V2DI_V2DI_COUNT:
22882 type = v2di_ftype_v2di_v2di;
22884 case V2DI_FTYPE_V16QI_V16QI:
22885 type = v2di_ftype_v16qi_v16qi;
22887 case V2DI_FTYPE_V4SI_V4SI:
22888 type = v2di_ftype_v4si_v4si;
22890 case V2DI_FTYPE_V2DI_V16QI:
22891 type = v2di_ftype_v2di_v16qi;
22893 case V2DI_FTYPE_V2DF_V2DF:
22894 type = v2di_ftype_v2df_v2df;
22896 case V2DI_FTYPE_V2DI_SI_COUNT:
22897 type = v2di_ftype_v2di_int;
22899 case V2SI_FTYPE_V2SI_V2SI:
22900 case V2SI_FTYPE_V2SI_V2SI_COUNT:
22901 type = v2si_ftype_v2si_v2si;
22903 case V2SI_FTYPE_V4HI_V4HI:
22904 type = v2si_ftype_v4hi_v4hi;
22906 case V2SI_FTYPE_V2SF_V2SF:
22907 type = v2si_ftype_v2sf_v2sf;
22909 case V2SI_FTYPE_V2SI_SI_COUNT:
22910 type = v2si_ftype_v2si_int;
22912 case V2DF_FTYPE_V2DF_V2DF:
22913 case V2DF_FTYPE_V2DF_V2DF_SWAP:
22914 type = v2df_ftype_v2df_v2df;
22916 case V2DF_FTYPE_V2DF_V4SF:
22917 type = v2df_ftype_v2df_v4sf;
22919 case V2DF_FTYPE_V2DF_V2DI:
22920 type = v2df_ftype_v2df_v2di;
22922 case V2DF_FTYPE_V2DF_DI:
22923 type = v2df_ftype_v2df_int64;
22925 case V2DF_FTYPE_V2DF_SI:
22926 type = v2df_ftype_v2df_int;
22928 case V2SF_FTYPE_V2SF_V2SF:
22929 type = v2sf_ftype_v2sf_v2sf;
22931 case V1DI_FTYPE_V1DI_V1DI:
22932 case V1DI_FTYPE_V1DI_V1DI_COUNT:
22933 type = v1di_ftype_v1di_v1di;
22935 case V1DI_FTYPE_V8QI_V8QI:
22936 type = v1di_ftype_v8qi_v8qi;
22938 case V1DI_FTYPE_V2SI_V2SI:
22939 type = v1di_ftype_v2si_v2si;
22941 case V1DI_FTYPE_V1DI_SI_COUNT:
22942 type = v1di_ftype_v1di_int;
22944 case UINT64_FTYPE_UINT64_UINT64:
22945 type = uint64_ftype_uint64_uint64;
22947 case UINT_FTYPE_UINT_UINT:
22948 type = unsigned_ftype_unsigned_unsigned;
22950 case UINT_FTYPE_UINT_USHORT:
22951 type = unsigned_ftype_unsigned_ushort;
22953 case UINT_FTYPE_UINT_UCHAR:
22954 type = unsigned_ftype_unsigned_uchar;
22956 case V8HI_FTYPE_V8HI_INT:
22957 type = v8hi_ftype_v8hi_int;
22959 case V8SF_FTYPE_V8SF_INT:
22960 type = v8sf_ftype_v8sf_int;
22962 case V4SI_FTYPE_V4SI_INT:
22963 type = v4si_ftype_v4si_int;
22965 case V4SI_FTYPE_V8SI_INT:
22966 type = v4si_ftype_v8si_int;
22968 case V4HI_FTYPE_V4HI_INT:
22969 type = v4hi_ftype_v4hi_int;
22971 case V4DF_FTYPE_V4DF_INT:
22972 type = v4df_ftype_v4df_int;
22974 case V4SF_FTYPE_V4SF_INT:
22975 type = v4sf_ftype_v4sf_int;
22977 case V4SF_FTYPE_V8SF_INT:
22978 type = v4sf_ftype_v8sf_int;
22980 case V2DI_FTYPE_V2DI_INT:
22981 case V2DI2TI_FTYPE_V2DI_INT:
22982 type = v2di_ftype_v2di_int;
22984 case V2DF_FTYPE_V2DF_INT:
22985 type = v2df_ftype_v2df_int;
22987 case V2DF_FTYPE_V4DF_INT:
22988 type = v2df_ftype_v4df_int;
22990 case V16QI_FTYPE_V16QI_V16QI_V16QI:
22991 type = v16qi_ftype_v16qi_v16qi_v16qi;
22993 case V8SF_FTYPE_V8SF_V8SF_V8SF:
22994 type = v8sf_ftype_v8sf_v8sf_v8sf;
22996 case V4DF_FTYPE_V4DF_V4DF_V4DF:
22997 type = v4df_ftype_v4df_v4df_v4df;
22999 case V4SF_FTYPE_V4SF_V4SF_V4SF:
23000 type = v4sf_ftype_v4sf_v4sf_v4sf;
23002 case V2DF_FTYPE_V2DF_V2DF_V2DF:
23003 type = v2df_ftype_v2df_v2df_v2df;
23005 case V16QI_FTYPE_V16QI_V16QI_INT:
23006 type = v16qi_ftype_v16qi_v16qi_int;
23008 case V8SI_FTYPE_V8SI_V8SI_INT:
23009 type = v8si_ftype_v8si_v8si_int;
23011 case V8SI_FTYPE_V8SI_V4SI_INT:
23012 type = v8si_ftype_v8si_v4si_int;
23014 case V8HI_FTYPE_V8HI_V8HI_INT:
23015 type = v8hi_ftype_v8hi_v8hi_int;
23017 case V8SF_FTYPE_V8SF_V8SF_INT:
23018 type = v8sf_ftype_v8sf_v8sf_int;
23020 case V8SF_FTYPE_V8SF_V4SF_INT:
23021 type = v8sf_ftype_v8sf_v4sf_int;
23023 case V4SI_FTYPE_V4SI_V4SI_INT:
23024 type = v4si_ftype_v4si_v4si_int;
23026 case V4DF_FTYPE_V4DF_V4DF_INT:
23027 type = v4df_ftype_v4df_v4df_int;
23029 case V4DF_FTYPE_V4DF_V2DF_INT:
23030 type = v4df_ftype_v4df_v2df_int;
23032 case V4SF_FTYPE_V4SF_V4SF_INT:
23033 type = v4sf_ftype_v4sf_v4sf_int;
23035 case V2DI_FTYPE_V2DI_V2DI_INT:
23036 case V2DI2TI_FTYPE_V2DI_V2DI_INT:
23037 type = v2di_ftype_v2di_v2di_int;
23039 case V2DF_FTYPE_V2DF_V2DF_INT:
23040 type = v2df_ftype_v2df_v2df_int;
23042 case V2DI_FTYPE_V2DI_UINT_UINT:
23043 type = v2di_ftype_v2di_unsigned_unsigned;
23045 case V2DI_FTYPE_V2DI_V2DI_UINT_UINT:
23046 type = v2di_ftype_v2di_v2di_unsigned_unsigned;
23048 case V1DI2DI_FTYPE_V1DI_V1DI_INT:
23049 type = v1di_ftype_v1di_v1di_int;
23052 gcc_unreachable ();
23055 def_builtin_const (d->mask, d->name, type, d->code);
23058 /* pcmpestr[im] insns. */
23059 for (i = 0, d = bdesc_pcmpestr;
23060 i < ARRAY_SIZE (bdesc_pcmpestr);
23063 if (d->code == IX86_BUILTIN_PCMPESTRM128)
23064 ftype = v16qi_ftype_v16qi_int_v16qi_int_int;
23066 ftype = int_ftype_v16qi_int_v16qi_int_int;
23067 def_builtin_const (d->mask, d->name, ftype, d->code);
23070 /* pcmpistr[im] insns. */
23071 for (i = 0, d = bdesc_pcmpistr;
23072 i < ARRAY_SIZE (bdesc_pcmpistr);
23075 if (d->code == IX86_BUILTIN_PCMPISTRM128)
23076 ftype = v16qi_ftype_v16qi_v16qi_int;
23078 ftype = int_ftype_v16qi_v16qi_int;
23079 def_builtin_const (d->mask, d->name, ftype, d->code);
23082 /* comi/ucomi insns. */
23083 for (i = 0, d = bdesc_comi; i < ARRAY_SIZE (bdesc_comi); i++, d++)
23084 if (d->mask == OPTION_MASK_ISA_SSE2)
23085 def_builtin_const (d->mask, d->name, int_ftype_v2df_v2df, d->code);
23087 def_builtin_const (d->mask, d->name, int_ftype_v4sf_v4sf, d->code);
23090 def_builtin (OPTION_MASK_ISA_SSE, "__builtin_ia32_ldmxcsr", void_ftype_unsigned, IX86_BUILTIN_LDMXCSR);
23091 def_builtin (OPTION_MASK_ISA_SSE, "__builtin_ia32_stmxcsr", unsigned_ftype_void, IX86_BUILTIN_STMXCSR);
23093 /* SSE or 3DNow!A */
23094 def_builtin (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, "__builtin_ia32_maskmovq", void_ftype_v8qi_v8qi_pchar, IX86_BUILTIN_MASKMOVQ);
23097 def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_maskmovdqu", void_ftype_v16qi_v16qi_pchar, IX86_BUILTIN_MASKMOVDQU);
23099 def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_clflush", void_ftype_pcvoid, IX86_BUILTIN_CLFLUSH);
23100 x86_mfence = def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_mfence", void_ftype_void, IX86_BUILTIN_MFENCE);
23103 def_builtin (OPTION_MASK_ISA_SSE3, "__builtin_ia32_monitor", void_ftype_pcvoid_unsigned_unsigned, IX86_BUILTIN_MONITOR);
23104 def_builtin (OPTION_MASK_ISA_SSE3, "__builtin_ia32_mwait", void_ftype_unsigned_unsigned, IX86_BUILTIN_MWAIT);
23107 def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aesenc128", v2di_ftype_v2di_v2di, IX86_BUILTIN_AESENC128);
23108 def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aesenclast128", v2di_ftype_v2di_v2di, IX86_BUILTIN_AESENCLAST128);
23109 def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aesdec128", v2di_ftype_v2di_v2di, IX86_BUILTIN_AESDEC128);
23110 def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aesdeclast128", v2di_ftype_v2di_v2di, IX86_BUILTIN_AESDECLAST128);
23111 def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aesimc128", v2di_ftype_v2di, IX86_BUILTIN_AESIMC128);
23112 def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aeskeygenassist128", v2di_ftype_v2di_int, IX86_BUILTIN_AESKEYGENASSIST128);
23115 def_builtin_const (OPTION_MASK_ISA_PCLMUL, "__builtin_ia32_pclmulqdq128", v2di_ftype_v2di_v2di_int, IX86_BUILTIN_PCLMULQDQ128);
23118 def_builtin (OPTION_MASK_ISA_AVX, "__builtin_ia32_vzeroupper", void_ftype_void,
23119 TARGET_64BIT ? IX86_BUILTIN_VZEROUPPER_REX64 : IX86_BUILTIN_VZEROUPPER);
23121 /* Access to the vec_init patterns. */
23122 ftype = build_function_type_list (V2SI_type_node, integer_type_node,
23123 integer_type_node, NULL_TREE);
23124 def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_vec_init_v2si", ftype, IX86_BUILTIN_VEC_INIT_V2SI);
23126 ftype = build_function_type_list (V4HI_type_node, short_integer_type_node,
23127 short_integer_type_node,
23128 short_integer_type_node,
23129 short_integer_type_node, NULL_TREE);
23130 def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_vec_init_v4hi", ftype, IX86_BUILTIN_VEC_INIT_V4HI);
23132 ftype = build_function_type_list (V8QI_type_node, char_type_node,
23133 char_type_node, char_type_node,
23134 char_type_node, char_type_node,
23135 char_type_node, char_type_node,
23136 char_type_node, NULL_TREE);
23137 def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_vec_init_v8qi", ftype, IX86_BUILTIN_VEC_INIT_V8QI);
23139 /* Access to the vec_extract patterns. */
23140 ftype = build_function_type_list (double_type_node, V2DF_type_node,
23141 integer_type_node, NULL_TREE);
23142 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v2df", ftype, IX86_BUILTIN_VEC_EXT_V2DF);
23144 ftype = build_function_type_list (long_long_integer_type_node,
23145 V2DI_type_node, integer_type_node,
23147 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v2di", ftype, IX86_BUILTIN_VEC_EXT_V2DI);
23149 ftype = build_function_type_list (float_type_node, V4SF_type_node,
23150 integer_type_node, NULL_TREE);
23151 def_builtin_const (OPTION_MASK_ISA_SSE, "__builtin_ia32_vec_ext_v4sf", ftype, IX86_BUILTIN_VEC_EXT_V4SF);
23153 ftype = build_function_type_list (intSI_type_node, V4SI_type_node,
23154 integer_type_node, NULL_TREE);
23155 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v4si", ftype, IX86_BUILTIN_VEC_EXT_V4SI);
23157 ftype = build_function_type_list (intHI_type_node, V8HI_type_node,
23158 integer_type_node, NULL_TREE);
23159 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v8hi", ftype, IX86_BUILTIN_VEC_EXT_V8HI);
23161 ftype = build_function_type_list (intHI_type_node, V4HI_type_node,
23162 integer_type_node, NULL_TREE);
23163 def_builtin_const (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, "__builtin_ia32_vec_ext_v4hi", ftype, IX86_BUILTIN_VEC_EXT_V4HI);
23165 ftype = build_function_type_list (intSI_type_node, V2SI_type_node,
23166 integer_type_node, NULL_TREE);
23167 def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_vec_ext_v2si", ftype, IX86_BUILTIN_VEC_EXT_V2SI);
23169 ftype = build_function_type_list (intQI_type_node, V16QI_type_node,
23170 integer_type_node, NULL_TREE);
23171 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v16qi", ftype, IX86_BUILTIN_VEC_EXT_V16QI);
23173 /* Access to the vec_set patterns. */
23174 ftype = build_function_type_list (V2DI_type_node, V2DI_type_node,
23176 integer_type_node, NULL_TREE);
23177 def_builtin_const (OPTION_MASK_ISA_SSE4_1 | OPTION_MASK_ISA_64BIT, "__builtin_ia32_vec_set_v2di", ftype, IX86_BUILTIN_VEC_SET_V2DI);
23179 ftype = build_function_type_list (V4SF_type_node, V4SF_type_node,
23181 integer_type_node, NULL_TREE);
23182 def_builtin_const (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_vec_set_v4sf", ftype, IX86_BUILTIN_VEC_SET_V4SF);
23184 ftype = build_function_type_list (V4SI_type_node, V4SI_type_node,
23186 integer_type_node, NULL_TREE);
23187 def_builtin_const (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_vec_set_v4si", ftype, IX86_BUILTIN_VEC_SET_V4SI);
23189 ftype = build_function_type_list (V8HI_type_node, V8HI_type_node,
23191 integer_type_node, NULL_TREE);
23192 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_set_v8hi", ftype, IX86_BUILTIN_VEC_SET_V8HI);
23194 ftype = build_function_type_list (V4HI_type_node, V4HI_type_node,
23196 integer_type_node, NULL_TREE);
23197 def_builtin_const (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, "__builtin_ia32_vec_set_v4hi", ftype, IX86_BUILTIN_VEC_SET_V4HI);
23199 ftype = build_function_type_list (V16QI_type_node, V16QI_type_node,
23201 integer_type_node, NULL_TREE);
23202 def_builtin_const (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_vec_set_v16qi", ftype, IX86_BUILTIN_VEC_SET_V16QI);
23204 /* Add SSE5 multi-arg argument instructions */
23205 for (i = 0, d = bdesc_multi_arg; i < ARRAY_SIZE (bdesc_multi_arg); i++, d++)
23207 tree mtype = NULL_TREE;
23212 switch ((enum multi_arg_type)d->flag)
23214 case MULTI_ARG_3_SF: mtype = v4sf_ftype_v4sf_v4sf_v4sf; break;
23215 case MULTI_ARG_3_DF: mtype = v2df_ftype_v2df_v2df_v2df; break;
23216 case MULTI_ARG_3_DI: mtype = v2di_ftype_v2di_v2di_v2di; break;
23217 case MULTI_ARG_3_SI: mtype = v4si_ftype_v4si_v4si_v4si; break;
23218 case MULTI_ARG_3_SI_DI: mtype = v4si_ftype_v4si_v4si_v2di; break;
23219 case MULTI_ARG_3_HI: mtype = v8hi_ftype_v8hi_v8hi_v8hi; break;
23220 case MULTI_ARG_3_HI_SI: mtype = v8hi_ftype_v8hi_v8hi_v4si; break;
23221 case MULTI_ARG_3_QI: mtype = v16qi_ftype_v16qi_v16qi_v16qi; break;
23222 case MULTI_ARG_3_PERMPS: mtype = v4sf_ftype_v4sf_v4sf_v16qi; break;
23223 case MULTI_ARG_3_PERMPD: mtype = v2df_ftype_v2df_v2df_v16qi; break;
23224 case MULTI_ARG_2_SF: mtype = v4sf_ftype_v4sf_v4sf; break;
23225 case MULTI_ARG_2_DF: mtype = v2df_ftype_v2df_v2df; break;
23226 case MULTI_ARG_2_DI: mtype = v2di_ftype_v2di_v2di; break;
23227 case MULTI_ARG_2_SI: mtype = v4si_ftype_v4si_v4si; break;
23228 case MULTI_ARG_2_HI: mtype = v8hi_ftype_v8hi_v8hi; break;
23229 case MULTI_ARG_2_QI: mtype = v16qi_ftype_v16qi_v16qi; break;
23230 case MULTI_ARG_2_DI_IMM: mtype = v2di_ftype_v2di_si; break;
23231 case MULTI_ARG_2_SI_IMM: mtype = v4si_ftype_v4si_si; break;
23232 case MULTI_ARG_2_HI_IMM: mtype = v8hi_ftype_v8hi_si; break;
23233 case MULTI_ARG_2_QI_IMM: mtype = v16qi_ftype_v16qi_si; break;
23234 case MULTI_ARG_2_SF_CMP: mtype = v4sf_ftype_v4sf_v4sf; break;
23235 case MULTI_ARG_2_DF_CMP: mtype = v2df_ftype_v2df_v2df; break;
23236 case MULTI_ARG_2_DI_CMP: mtype = v2di_ftype_v2di_v2di; break;
23237 case MULTI_ARG_2_SI_CMP: mtype = v4si_ftype_v4si_v4si; break;
23238 case MULTI_ARG_2_HI_CMP: mtype = v8hi_ftype_v8hi_v8hi; break;
23239 case MULTI_ARG_2_QI_CMP: mtype = v16qi_ftype_v16qi_v16qi; break;
23240 case MULTI_ARG_2_SF_TF: mtype = v4sf_ftype_v4sf_v4sf; break;
23241 case MULTI_ARG_2_DF_TF: mtype = v2df_ftype_v2df_v2df; break;
23242 case MULTI_ARG_2_DI_TF: mtype = v2di_ftype_v2di_v2di; break;
23243 case MULTI_ARG_2_SI_TF: mtype = v4si_ftype_v4si_v4si; break;
23244 case MULTI_ARG_2_HI_TF: mtype = v8hi_ftype_v8hi_v8hi; break;
23245 case MULTI_ARG_2_QI_TF: mtype = v16qi_ftype_v16qi_v16qi; break;
23246 case MULTI_ARG_1_SF: mtype = v4sf_ftype_v4sf; break;
23247 case MULTI_ARG_1_DF: mtype = v2df_ftype_v2df; break;
23248 case MULTI_ARG_1_DI: mtype = v2di_ftype_v2di; break;
23249 case MULTI_ARG_1_SI: mtype = v4si_ftype_v4si; break;
23250 case MULTI_ARG_1_HI: mtype = v8hi_ftype_v8hi; break;
23251 case MULTI_ARG_1_QI: mtype = v16qi_ftype_v16qi; break;
23252 case MULTI_ARG_1_SI_DI: mtype = v2di_ftype_v4si; break;
23253 case MULTI_ARG_1_HI_DI: mtype = v2di_ftype_v8hi; break;
23254 case MULTI_ARG_1_HI_SI: mtype = v4si_ftype_v8hi; break;
23255 case MULTI_ARG_1_QI_DI: mtype = v2di_ftype_v16qi; break;
23256 case MULTI_ARG_1_QI_SI: mtype = v4si_ftype_v16qi; break;
23257 case MULTI_ARG_1_QI_HI: mtype = v8hi_ftype_v16qi; break;
23258 case MULTI_ARG_1_PH2PS: mtype = v4sf_ftype_v4hi; break;
23259 case MULTI_ARG_1_PS2PH: mtype = v4hi_ftype_v4sf; break;
23260 case MULTI_ARG_UNKNOWN:
23262 gcc_unreachable ();
23266 def_builtin_const (d->mask, d->name, mtype, d->code);
23270 /* Internal method for ix86_init_builtins. */
23273 ix86_init_builtins_va_builtins_abi (void)
23275 tree ms_va_ref, sysv_va_ref;
23276 tree fnvoid_va_end_ms, fnvoid_va_end_sysv;
23277 tree fnvoid_va_start_ms, fnvoid_va_start_sysv;
23278 tree fnvoid_va_copy_ms, fnvoid_va_copy_sysv;
23279 tree fnattr_ms = NULL_TREE, fnattr_sysv = NULL_TREE;
23283 fnattr_ms = build_tree_list (get_identifier ("ms_abi"), NULL_TREE);
23284 fnattr_sysv = build_tree_list (get_identifier ("sysv_abi"), NULL_TREE);
23285 ms_va_ref = build_reference_type (ms_va_list_type_node);
23287 build_pointer_type (TREE_TYPE (sysv_va_list_type_node));
23290 build_function_type_list (void_type_node, ms_va_ref, NULL_TREE);
23291 fnvoid_va_start_ms =
23292 build_varargs_function_type_list (void_type_node, ms_va_ref, NULL_TREE);
23293 fnvoid_va_end_sysv =
23294 build_function_type_list (void_type_node, sysv_va_ref, NULL_TREE);
23295 fnvoid_va_start_sysv =
23296 build_varargs_function_type_list (void_type_node, sysv_va_ref,
23298 fnvoid_va_copy_ms =
23299 build_function_type_list (void_type_node, ms_va_ref, ms_va_list_type_node,
23301 fnvoid_va_copy_sysv =
23302 build_function_type_list (void_type_node, sysv_va_ref,
23303 sysv_va_ref, NULL_TREE);
23305 add_builtin_function ("__builtin_ms_va_start", fnvoid_va_start_ms,
23306 BUILT_IN_VA_START, BUILT_IN_NORMAL, NULL, fnattr_ms);
23307 add_builtin_function ("__builtin_ms_va_end", fnvoid_va_end_ms,
23308 BUILT_IN_VA_END, BUILT_IN_NORMAL, NULL, fnattr_ms);
23309 add_builtin_function ("__builtin_ms_va_copy", fnvoid_va_copy_ms,
23310 BUILT_IN_VA_COPY, BUILT_IN_NORMAL, NULL, fnattr_ms);
23311 add_builtin_function ("__builtin_sysv_va_start", fnvoid_va_start_sysv,
23312 BUILT_IN_VA_START, BUILT_IN_NORMAL, NULL, fnattr_sysv);
23313 add_builtin_function ("__builtin_sysv_va_end", fnvoid_va_end_sysv,
23314 BUILT_IN_VA_END, BUILT_IN_NORMAL, NULL, fnattr_sysv);
23315 add_builtin_function ("__builtin_sysv_va_copy", fnvoid_va_copy_sysv,
23316 BUILT_IN_VA_COPY, BUILT_IN_NORMAL, NULL, fnattr_sysv);
23320 ix86_init_builtins (void)
23322 tree float128_type_node = make_node (REAL_TYPE);
23325 /* The __float80 type. */
23326 if (TYPE_MODE (long_double_type_node) == XFmode)
23327 (*lang_hooks.types.register_builtin_type) (long_double_type_node,
23331 /* The __float80 type. */
23332 tree float80_type_node = make_node (REAL_TYPE);
23334 TYPE_PRECISION (float80_type_node) = 80;
23335 layout_type (float80_type_node);
23336 (*lang_hooks.types.register_builtin_type) (float80_type_node,
23340 /* The __float128 type. */
23341 TYPE_PRECISION (float128_type_node) = 128;
23342 layout_type (float128_type_node);
23343 (*lang_hooks.types.register_builtin_type) (float128_type_node,
23346 /* TFmode support builtins. */
23347 ftype = build_function_type (float128_type_node, void_list_node);
23348 decl = add_builtin_function ("__builtin_infq", ftype,
23349 IX86_BUILTIN_INFQ, BUILT_IN_MD,
23351 ix86_builtins[(int) IX86_BUILTIN_INFQ] = decl;
23353 /* We will expand them to normal call if SSE2 isn't available since
23354 they are used by libgcc. */
23355 ftype = build_function_type_list (float128_type_node,
23356 float128_type_node,
23358 decl = add_builtin_function ("__builtin_fabsq", ftype,
23359 IX86_BUILTIN_FABSQ, BUILT_IN_MD,
23360 "__fabstf2", NULL_TREE);
23361 ix86_builtins[(int) IX86_BUILTIN_FABSQ] = decl;
23362 TREE_READONLY (decl) = 1;
23364 ftype = build_function_type_list (float128_type_node,
23365 float128_type_node,
23366 float128_type_node,
23368 decl = add_builtin_function ("__builtin_copysignq", ftype,
23369 IX86_BUILTIN_COPYSIGNQ, BUILT_IN_MD,
23370 "__copysigntf3", NULL_TREE);
23371 ix86_builtins[(int) IX86_BUILTIN_COPYSIGNQ] = decl;
23372 TREE_READONLY (decl) = 1;
23374 ix86_init_mmx_sse_builtins ();
23376 ix86_init_builtins_va_builtins_abi ();
23379 /* Errors in the source file can cause expand_expr to return const0_rtx
23380 where we expect a vector. To avoid crashing, use one of the vector
23381 clear instructions. */
23383 safe_vector_operand (rtx x, enum machine_mode mode)
23385 if (x == const0_rtx)
23386 x = CONST0_RTX (mode);
23390 /* Subroutine of ix86_expand_builtin to take care of binop insns. */
23393 ix86_expand_binop_builtin (enum insn_code icode, tree exp, rtx target)
23396 tree arg0 = CALL_EXPR_ARG (exp, 0);
23397 tree arg1 = CALL_EXPR_ARG (exp, 1);
23398 rtx op0 = expand_normal (arg0);
23399 rtx op1 = expand_normal (arg1);
23400 enum machine_mode tmode = insn_data[icode].operand[0].mode;
23401 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
23402 enum machine_mode mode1 = insn_data[icode].operand[2].mode;
23404 if (VECTOR_MODE_P (mode0))
23405 op0 = safe_vector_operand (op0, mode0);
23406 if (VECTOR_MODE_P (mode1))
23407 op1 = safe_vector_operand (op1, mode1);
23409 if (optimize || !target
23410 || GET_MODE (target) != tmode
23411 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
23412 target = gen_reg_rtx (tmode);
23414 if (GET_MODE (op1) == SImode && mode1 == TImode)
23416 rtx x = gen_reg_rtx (V4SImode);
23417 emit_insn (gen_sse2_loadd (x, op1));
23418 op1 = gen_lowpart (TImode, x);
23421 if (!(*insn_data[icode].operand[1].predicate) (op0, mode0))
23422 op0 = copy_to_mode_reg (mode0, op0);
23423 if (!(*insn_data[icode].operand[2].predicate) (op1, mode1))
23424 op1 = copy_to_mode_reg (mode1, op1);
23426 pat = GEN_FCN (icode) (target, op0, op1);
23435 /* Subroutine of ix86_expand_builtin to take care of 2-4 argument insns. */
23438 ix86_expand_multi_arg_builtin (enum insn_code icode, tree exp, rtx target,
23439 enum multi_arg_type m_type,
23440 enum insn_code sub_code)
23445 bool comparison_p = false;
23447 bool last_arg_constant = false;
23448 int num_memory = 0;
23451 enum machine_mode mode;
23454 enum machine_mode tmode = insn_data[icode].operand[0].mode;
23458 case MULTI_ARG_3_SF:
23459 case MULTI_ARG_3_DF:
23460 case MULTI_ARG_3_DI:
23461 case MULTI_ARG_3_SI:
23462 case MULTI_ARG_3_SI_DI:
23463 case MULTI_ARG_3_HI:
23464 case MULTI_ARG_3_HI_SI:
23465 case MULTI_ARG_3_QI:
23466 case MULTI_ARG_3_PERMPS:
23467 case MULTI_ARG_3_PERMPD:
23471 case MULTI_ARG_2_SF:
23472 case MULTI_ARG_2_DF:
23473 case MULTI_ARG_2_DI:
23474 case MULTI_ARG_2_SI:
23475 case MULTI_ARG_2_HI:
23476 case MULTI_ARG_2_QI:
23480 case MULTI_ARG_2_DI_IMM:
23481 case MULTI_ARG_2_SI_IMM:
23482 case MULTI_ARG_2_HI_IMM:
23483 case MULTI_ARG_2_QI_IMM:
23485 last_arg_constant = true;
23488 case MULTI_ARG_1_SF:
23489 case MULTI_ARG_1_DF:
23490 case MULTI_ARG_1_DI:
23491 case MULTI_ARG_1_SI:
23492 case MULTI_ARG_1_HI:
23493 case MULTI_ARG_1_QI:
23494 case MULTI_ARG_1_SI_DI:
23495 case MULTI_ARG_1_HI_DI:
23496 case MULTI_ARG_1_HI_SI:
23497 case MULTI_ARG_1_QI_DI:
23498 case MULTI_ARG_1_QI_SI:
23499 case MULTI_ARG_1_QI_HI:
23500 case MULTI_ARG_1_PH2PS:
23501 case MULTI_ARG_1_PS2PH:
23505 case MULTI_ARG_2_SF_CMP:
23506 case MULTI_ARG_2_DF_CMP:
23507 case MULTI_ARG_2_DI_CMP:
23508 case MULTI_ARG_2_SI_CMP:
23509 case MULTI_ARG_2_HI_CMP:
23510 case MULTI_ARG_2_QI_CMP:
23512 comparison_p = true;
23515 case MULTI_ARG_2_SF_TF:
23516 case MULTI_ARG_2_DF_TF:
23517 case MULTI_ARG_2_DI_TF:
23518 case MULTI_ARG_2_SI_TF:
23519 case MULTI_ARG_2_HI_TF:
23520 case MULTI_ARG_2_QI_TF:
23525 case MULTI_ARG_UNKNOWN:
23527 gcc_unreachable ();
23530 if (optimize || !target
23531 || GET_MODE (target) != tmode
23532 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
23533 target = gen_reg_rtx (tmode);
23535 gcc_assert (nargs <= 4);
23537 for (i = 0; i < nargs; i++)
23539 tree arg = CALL_EXPR_ARG (exp, i);
23540 rtx op = expand_normal (arg);
23541 int adjust = (comparison_p) ? 1 : 0;
23542 enum machine_mode mode = insn_data[icode].operand[i+adjust+1].mode;
23544 if (last_arg_constant && i == nargs-1)
23546 if (GET_CODE (op) != CONST_INT)
23548 error ("last argument must be an immediate");
23549 return gen_reg_rtx (tmode);
23554 if (VECTOR_MODE_P (mode))
23555 op = safe_vector_operand (op, mode);
23557 /* If we aren't optimizing, only allow one memory operand to be
23559 if (memory_operand (op, mode))
23562 gcc_assert (GET_MODE (op) == mode || GET_MODE (op) == VOIDmode);
23565 || ! (*insn_data[icode].operand[i+adjust+1].predicate) (op, mode)
23567 op = force_reg (mode, op);
23571 args[i].mode = mode;
23577 pat = GEN_FCN (icode) (target, args[0].op);
23582 pat = GEN_FCN (icode) (target, args[0].op, args[1].op,
23583 GEN_INT ((int)sub_code));
23584 else if (! comparison_p)
23585 pat = GEN_FCN (icode) (target, args[0].op, args[1].op);
23588 rtx cmp_op = gen_rtx_fmt_ee (sub_code, GET_MODE (target),
23592 pat = GEN_FCN (icode) (target, cmp_op, args[0].op, args[1].op);
23597 pat = GEN_FCN (icode) (target, args[0].op, args[1].op, args[2].op);
23601 gcc_unreachable ();
23611 /* Subroutine of ix86_expand_args_builtin to take care of scalar unop
23612 insns with vec_merge. */
23615 ix86_expand_unop_vec_merge_builtin (enum insn_code icode, tree exp,
23619 tree arg0 = CALL_EXPR_ARG (exp, 0);
23620 rtx op1, op0 = expand_normal (arg0);
23621 enum machine_mode tmode = insn_data[icode].operand[0].mode;
23622 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
23624 if (optimize || !target
23625 || GET_MODE (target) != tmode
23626 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
23627 target = gen_reg_rtx (tmode);
23629 if (VECTOR_MODE_P (mode0))
23630 op0 = safe_vector_operand (op0, mode0);
23632 if ((optimize && !register_operand (op0, mode0))
23633 || ! (*insn_data[icode].operand[1].predicate) (op0, mode0))
23634 op0 = copy_to_mode_reg (mode0, op0);
23637 if (! (*insn_data[icode].operand[2].predicate) (op1, mode0))
23638 op1 = copy_to_mode_reg (mode0, op1);
23640 pat = GEN_FCN (icode) (target, op0, op1);
23647 /* Subroutine of ix86_expand_builtin to take care of comparison insns. */
23650 ix86_expand_sse_compare (const struct builtin_description *d,
23651 tree exp, rtx target, bool swap)
23654 tree arg0 = CALL_EXPR_ARG (exp, 0);
23655 tree arg1 = CALL_EXPR_ARG (exp, 1);
23656 rtx op0 = expand_normal (arg0);
23657 rtx op1 = expand_normal (arg1);
23659 enum machine_mode tmode = insn_data[d->icode].operand[0].mode;
23660 enum machine_mode mode0 = insn_data[d->icode].operand[1].mode;
23661 enum machine_mode mode1 = insn_data[d->icode].operand[2].mode;
23662 enum rtx_code comparison = d->comparison;
23664 if (VECTOR_MODE_P (mode0))
23665 op0 = safe_vector_operand (op0, mode0);
23666 if (VECTOR_MODE_P (mode1))
23667 op1 = safe_vector_operand (op1, mode1);
23669 /* Swap operands if we have a comparison that isn't available in
23673 rtx tmp = gen_reg_rtx (mode1);
23674 emit_move_insn (tmp, op1);
23679 if (optimize || !target
23680 || GET_MODE (target) != tmode
23681 || ! (*insn_data[d->icode].operand[0].predicate) (target, tmode))
23682 target = gen_reg_rtx (tmode);
23684 if ((optimize && !register_operand (op0, mode0))
23685 || ! (*insn_data[d->icode].operand[1].predicate) (op0, mode0))
23686 op0 = copy_to_mode_reg (mode0, op0);
23687 if ((optimize && !register_operand (op1, mode1))
23688 || ! (*insn_data[d->icode].operand[2].predicate) (op1, mode1))
23689 op1 = copy_to_mode_reg (mode1, op1);
23691 op2 = gen_rtx_fmt_ee (comparison, mode0, op0, op1);
23692 pat = GEN_FCN (d->icode) (target, op0, op1, op2);
23699 /* Subroutine of ix86_expand_builtin to take care of comi insns. */
23702 ix86_expand_sse_comi (const struct builtin_description *d, tree exp,
23706 tree arg0 = CALL_EXPR_ARG (exp, 0);
23707 tree arg1 = CALL_EXPR_ARG (exp, 1);
23708 rtx op0 = expand_normal (arg0);
23709 rtx op1 = expand_normal (arg1);
23710 enum machine_mode mode0 = insn_data[d->icode].operand[0].mode;
23711 enum machine_mode mode1 = insn_data[d->icode].operand[1].mode;
23712 enum rtx_code comparison = d->comparison;
23714 if (VECTOR_MODE_P (mode0))
23715 op0 = safe_vector_operand (op0, mode0);
23716 if (VECTOR_MODE_P (mode1))
23717 op1 = safe_vector_operand (op1, mode1);
23719 /* Swap operands if we have a comparison that isn't available in
23721 if (d->flag & BUILTIN_DESC_SWAP_OPERANDS)
23728 target = gen_reg_rtx (SImode);
23729 emit_move_insn (target, const0_rtx);
23730 target = gen_rtx_SUBREG (QImode, target, 0);
23732 if ((optimize && !register_operand (op0, mode0))
23733 || !(*insn_data[d->icode].operand[0].predicate) (op0, mode0))
23734 op0 = copy_to_mode_reg (mode0, op0);
23735 if ((optimize && !register_operand (op1, mode1))
23736 || !(*insn_data[d->icode].operand[1].predicate) (op1, mode1))
23737 op1 = copy_to_mode_reg (mode1, op1);
23739 pat = GEN_FCN (d->icode) (op0, op1);
23743 emit_insn (gen_rtx_SET (VOIDmode,
23744 gen_rtx_STRICT_LOW_PART (VOIDmode, target),
23745 gen_rtx_fmt_ee (comparison, QImode,
23749 return SUBREG_REG (target);
23752 /* Subroutine of ix86_expand_builtin to take care of ptest insns. */
23755 ix86_expand_sse_ptest (const struct builtin_description *d, tree exp,
23759 tree arg0 = CALL_EXPR_ARG (exp, 0);
23760 tree arg1 = CALL_EXPR_ARG (exp, 1);
23761 rtx op0 = expand_normal (arg0);
23762 rtx op1 = expand_normal (arg1);
23763 enum machine_mode mode0 = insn_data[d->icode].operand[0].mode;
23764 enum machine_mode mode1 = insn_data[d->icode].operand[1].mode;
23765 enum rtx_code comparison = d->comparison;
23767 if (VECTOR_MODE_P (mode0))
23768 op0 = safe_vector_operand (op0, mode0);
23769 if (VECTOR_MODE_P (mode1))
23770 op1 = safe_vector_operand (op1, mode1);
23772 target = gen_reg_rtx (SImode);
23773 emit_move_insn (target, const0_rtx);
23774 target = gen_rtx_SUBREG (QImode, target, 0);
23776 if ((optimize && !register_operand (op0, mode0))
23777 || !(*insn_data[d->icode].operand[0].predicate) (op0, mode0))
23778 op0 = copy_to_mode_reg (mode0, op0);
23779 if ((optimize && !register_operand (op1, mode1))
23780 || !(*insn_data[d->icode].operand[1].predicate) (op1, mode1))
23781 op1 = copy_to_mode_reg (mode1, op1);
23783 pat = GEN_FCN (d->icode) (op0, op1);
23787 emit_insn (gen_rtx_SET (VOIDmode,
23788 gen_rtx_STRICT_LOW_PART (VOIDmode, target),
23789 gen_rtx_fmt_ee (comparison, QImode,
23793 return SUBREG_REG (target);
23796 /* Subroutine of ix86_expand_builtin to take care of pcmpestr[im] insns. */
23799 ix86_expand_sse_pcmpestr (const struct builtin_description *d,
23800 tree exp, rtx target)
23803 tree arg0 = CALL_EXPR_ARG (exp, 0);
23804 tree arg1 = CALL_EXPR_ARG (exp, 1);
23805 tree arg2 = CALL_EXPR_ARG (exp, 2);
23806 tree arg3 = CALL_EXPR_ARG (exp, 3);
23807 tree arg4 = CALL_EXPR_ARG (exp, 4);
23808 rtx scratch0, scratch1;
23809 rtx op0 = expand_normal (arg0);
23810 rtx op1 = expand_normal (arg1);
23811 rtx op2 = expand_normal (arg2);
23812 rtx op3 = expand_normal (arg3);
23813 rtx op4 = expand_normal (arg4);
23814 enum machine_mode tmode0, tmode1, modev2, modei3, modev4, modei5, modeimm;
23816 tmode0 = insn_data[d->icode].operand[0].mode;
23817 tmode1 = insn_data[d->icode].operand[1].mode;
23818 modev2 = insn_data[d->icode].operand[2].mode;
23819 modei3 = insn_data[d->icode].operand[3].mode;
23820 modev4 = insn_data[d->icode].operand[4].mode;
23821 modei5 = insn_data[d->icode].operand[5].mode;
23822 modeimm = insn_data[d->icode].operand[6].mode;
23824 if (VECTOR_MODE_P (modev2))
23825 op0 = safe_vector_operand (op0, modev2);
23826 if (VECTOR_MODE_P (modev4))
23827 op2 = safe_vector_operand (op2, modev4);
23829 if (! (*insn_data[d->icode].operand[2].predicate) (op0, modev2))
23830 op0 = copy_to_mode_reg (modev2, op0);
23831 if (! (*insn_data[d->icode].operand[3].predicate) (op1, modei3))
23832 op1 = copy_to_mode_reg (modei3, op1);
23833 if ((optimize && !register_operand (op2, modev4))
23834 || !(*insn_data[d->icode].operand[4].predicate) (op2, modev4))
23835 op2 = copy_to_mode_reg (modev4, op2);
23836 if (! (*insn_data[d->icode].operand[5].predicate) (op3, modei5))
23837 op3 = copy_to_mode_reg (modei5, op3);
23839 if (! (*insn_data[d->icode].operand[6].predicate) (op4, modeimm))
23841 error ("the fifth argument must be a 8-bit immediate");
23845 if (d->code == IX86_BUILTIN_PCMPESTRI128)
23847 if (optimize || !target
23848 || GET_MODE (target) != tmode0
23849 || ! (*insn_data[d->icode].operand[0].predicate) (target, tmode0))
23850 target = gen_reg_rtx (tmode0);
23852 scratch1 = gen_reg_rtx (tmode1);
23854 pat = GEN_FCN (d->icode) (target, scratch1, op0, op1, op2, op3, op4);
23856 else if (d->code == IX86_BUILTIN_PCMPESTRM128)
23858 if (optimize || !target
23859 || GET_MODE (target) != tmode1
23860 || ! (*insn_data[d->icode].operand[1].predicate) (target, tmode1))
23861 target = gen_reg_rtx (tmode1);
23863 scratch0 = gen_reg_rtx (tmode0);
23865 pat = GEN_FCN (d->icode) (scratch0, target, op0, op1, op2, op3, op4);
23869 gcc_assert (d->flag);
23871 scratch0 = gen_reg_rtx (tmode0);
23872 scratch1 = gen_reg_rtx (tmode1);
23874 pat = GEN_FCN (d->icode) (scratch0, scratch1, op0, op1, op2, op3, op4);
23884 target = gen_reg_rtx (SImode);
23885 emit_move_insn (target, const0_rtx);
23886 target = gen_rtx_SUBREG (QImode, target, 0);
23889 (gen_rtx_SET (VOIDmode, gen_rtx_STRICT_LOW_PART (VOIDmode, target),
23890 gen_rtx_fmt_ee (EQ, QImode,
23891 gen_rtx_REG ((enum machine_mode) d->flag,
23894 return SUBREG_REG (target);
23901 /* Subroutine of ix86_expand_builtin to take care of pcmpistr[im] insns. */
23904 ix86_expand_sse_pcmpistr (const struct builtin_description *d,
23905 tree exp, rtx target)
23908 tree arg0 = CALL_EXPR_ARG (exp, 0);
23909 tree arg1 = CALL_EXPR_ARG (exp, 1);
23910 tree arg2 = CALL_EXPR_ARG (exp, 2);
23911 rtx scratch0, scratch1;
23912 rtx op0 = expand_normal (arg0);
23913 rtx op1 = expand_normal (arg1);
23914 rtx op2 = expand_normal (arg2);
23915 enum machine_mode tmode0, tmode1, modev2, modev3, modeimm;
23917 tmode0 = insn_data[d->icode].operand[0].mode;
23918 tmode1 = insn_data[d->icode].operand[1].mode;
23919 modev2 = insn_data[d->icode].operand[2].mode;
23920 modev3 = insn_data[d->icode].operand[3].mode;
23921 modeimm = insn_data[d->icode].operand[4].mode;
23923 if (VECTOR_MODE_P (modev2))
23924 op0 = safe_vector_operand (op0, modev2);
23925 if (VECTOR_MODE_P (modev3))
23926 op1 = safe_vector_operand (op1, modev3);
23928 if (! (*insn_data[d->icode].operand[2].predicate) (op0, modev2))
23929 op0 = copy_to_mode_reg (modev2, op0);
23930 if ((optimize && !register_operand (op1, modev3))
23931 || !(*insn_data[d->icode].operand[3].predicate) (op1, modev3))
23932 op1 = copy_to_mode_reg (modev3, op1);
23934 if (! (*insn_data[d->icode].operand[4].predicate) (op2, modeimm))
23936 error ("the third argument must be a 8-bit immediate");
23940 if (d->code == IX86_BUILTIN_PCMPISTRI128)
23942 if (optimize || !target
23943 || GET_MODE (target) != tmode0
23944 || ! (*insn_data[d->icode].operand[0].predicate) (target, tmode0))
23945 target = gen_reg_rtx (tmode0);
23947 scratch1 = gen_reg_rtx (tmode1);
23949 pat = GEN_FCN (d->icode) (target, scratch1, op0, op1, op2);
23951 else if (d->code == IX86_BUILTIN_PCMPISTRM128)
23953 if (optimize || !target
23954 || GET_MODE (target) != tmode1
23955 || ! (*insn_data[d->icode].operand[1].predicate) (target, tmode1))
23956 target = gen_reg_rtx (tmode1);
23958 scratch0 = gen_reg_rtx (tmode0);
23960 pat = GEN_FCN (d->icode) (scratch0, target, op0, op1, op2);
23964 gcc_assert (d->flag);
23966 scratch0 = gen_reg_rtx (tmode0);
23967 scratch1 = gen_reg_rtx (tmode1);
23969 pat = GEN_FCN (d->icode) (scratch0, scratch1, op0, op1, op2);
23979 target = gen_reg_rtx (SImode);
23980 emit_move_insn (target, const0_rtx);
23981 target = gen_rtx_SUBREG (QImode, target, 0);
23984 (gen_rtx_SET (VOIDmode, gen_rtx_STRICT_LOW_PART (VOIDmode, target),
23985 gen_rtx_fmt_ee (EQ, QImode,
23986 gen_rtx_REG ((enum machine_mode) d->flag,
23989 return SUBREG_REG (target);
23995 /* Subroutine of ix86_expand_builtin to take care of insns with
23996 variable number of operands. */
23999 ix86_expand_args_builtin (const struct builtin_description *d,
24000 tree exp, rtx target)
24002 rtx pat, real_target;
24003 unsigned int i, nargs;
24004 unsigned int nargs_constant = 0;
24005 int num_memory = 0;
24009 enum machine_mode mode;
24011 bool last_arg_count = false;
24012 enum insn_code icode = d->icode;
24013 const struct insn_data *insn_p = &insn_data[icode];
24014 enum machine_mode tmode = insn_p->operand[0].mode;
24015 enum machine_mode rmode = VOIDmode;
24017 enum rtx_code comparison = d->comparison;
24019 switch ((enum ix86_builtin_type) d->flag)
24021 case INT_FTYPE_V8SF_V8SF_PTEST:
24022 case INT_FTYPE_V4DI_V4DI_PTEST:
24023 case INT_FTYPE_V4DF_V4DF_PTEST:
24024 case INT_FTYPE_V4SF_V4SF_PTEST:
24025 case INT_FTYPE_V2DI_V2DI_PTEST:
24026 case INT_FTYPE_V2DF_V2DF_PTEST:
24027 return ix86_expand_sse_ptest (d, exp, target);
24028 case FLOAT128_FTYPE_FLOAT128:
24029 case FLOAT_FTYPE_FLOAT:
24030 case INT64_FTYPE_V4SF:
24031 case INT64_FTYPE_V2DF:
24032 case INT_FTYPE_V16QI:
24033 case INT_FTYPE_V8QI:
24034 case INT_FTYPE_V8SF:
24035 case INT_FTYPE_V4DF:
24036 case INT_FTYPE_V4SF:
24037 case INT_FTYPE_V2DF:
24038 case V16QI_FTYPE_V16QI:
24039 case V8SI_FTYPE_V8SF:
24040 case V8SI_FTYPE_V4SI:
24041 case V8HI_FTYPE_V8HI:
24042 case V8HI_FTYPE_V16QI:
24043 case V8QI_FTYPE_V8QI:
24044 case V8SF_FTYPE_V8SF:
24045 case V8SF_FTYPE_V8SI:
24046 case V8SF_FTYPE_V4SF:
24047 case V4SI_FTYPE_V4SI:
24048 case V4SI_FTYPE_V16QI:
24049 case V4SI_FTYPE_V4SF:
24050 case V4SI_FTYPE_V8SI:
24051 case V4SI_FTYPE_V8HI:
24052 case V4SI_FTYPE_V4DF:
24053 case V4SI_FTYPE_V2DF:
24054 case V4HI_FTYPE_V4HI:
24055 case V4DF_FTYPE_V4DF:
24056 case V4DF_FTYPE_V4SI:
24057 case V4DF_FTYPE_V4SF:
24058 case V4DF_FTYPE_V2DF:
24059 case V4SF_FTYPE_V4SF:
24060 case V4SF_FTYPE_V4SI:
24061 case V4SF_FTYPE_V8SF:
24062 case V4SF_FTYPE_V4DF:
24063 case V4SF_FTYPE_V2DF:
24064 case V2DI_FTYPE_V2DI:
24065 case V2DI_FTYPE_V16QI:
24066 case V2DI_FTYPE_V8HI:
24067 case V2DI_FTYPE_V4SI:
24068 case V2DF_FTYPE_V2DF:
24069 case V2DF_FTYPE_V4SI:
24070 case V2DF_FTYPE_V4DF:
24071 case V2DF_FTYPE_V4SF:
24072 case V2DF_FTYPE_V2SI:
24073 case V2SI_FTYPE_V2SI:
24074 case V2SI_FTYPE_V4SF:
24075 case V2SI_FTYPE_V2SF:
24076 case V2SI_FTYPE_V2DF:
24077 case V2SF_FTYPE_V2SF:
24078 case V2SF_FTYPE_V2SI:
24081 case V4SF_FTYPE_V4SF_VEC_MERGE:
24082 case V2DF_FTYPE_V2DF_VEC_MERGE:
24083 return ix86_expand_unop_vec_merge_builtin (icode, exp, target);
24084 case FLOAT128_FTYPE_FLOAT128_FLOAT128:
24085 case V16QI_FTYPE_V16QI_V16QI:
24086 case V16QI_FTYPE_V8HI_V8HI:
24087 case V8QI_FTYPE_V8QI_V8QI:
24088 case V8QI_FTYPE_V4HI_V4HI:
24089 case V8HI_FTYPE_V8HI_V8HI:
24090 case V8HI_FTYPE_V16QI_V16QI:
24091 case V8HI_FTYPE_V4SI_V4SI:
24092 case V8SF_FTYPE_V8SF_V8SF:
24093 case V8SF_FTYPE_V8SF_V8SI:
24094 case V4SI_FTYPE_V4SI_V4SI:
24095 case V4SI_FTYPE_V8HI_V8HI:
24096 case V4SI_FTYPE_V4SF_V4SF:
24097 case V4SI_FTYPE_V2DF_V2DF:
24098 case V4HI_FTYPE_V4HI_V4HI:
24099 case V4HI_FTYPE_V8QI_V8QI:
24100 case V4HI_FTYPE_V2SI_V2SI:
24101 case V4DF_FTYPE_V4DF_V4DF:
24102 case V4DF_FTYPE_V4DF_V4DI:
24103 case V4SF_FTYPE_V4SF_V4SF:
24104 case V4SF_FTYPE_V4SF_V4SI:
24105 case V4SF_FTYPE_V4SF_V2SI:
24106 case V4SF_FTYPE_V4SF_V2DF:
24107 case V4SF_FTYPE_V4SF_DI:
24108 case V4SF_FTYPE_V4SF_SI:
24109 case V2DI_FTYPE_V2DI_V2DI:
24110 case V2DI_FTYPE_V16QI_V16QI:
24111 case V2DI_FTYPE_V4SI_V4SI:
24112 case V2DI_FTYPE_V2DI_V16QI:
24113 case V2DI_FTYPE_V2DF_V2DF:
24114 case V2SI_FTYPE_V2SI_V2SI:
24115 case V2SI_FTYPE_V4HI_V4HI:
24116 case V2SI_FTYPE_V2SF_V2SF:
24117 case V2DF_FTYPE_V2DF_V2DF:
24118 case V2DF_FTYPE_V2DF_V4SF:
24119 case V2DF_FTYPE_V2DF_V2DI:
24120 case V2DF_FTYPE_V2DF_DI:
24121 case V2DF_FTYPE_V2DF_SI:
24122 case V2SF_FTYPE_V2SF_V2SF:
24123 case V1DI_FTYPE_V1DI_V1DI:
24124 case V1DI_FTYPE_V8QI_V8QI:
24125 case V1DI_FTYPE_V2SI_V2SI:
24126 if (comparison == UNKNOWN)
24127 return ix86_expand_binop_builtin (icode, exp, target);
24130 case V4SF_FTYPE_V4SF_V4SF_SWAP:
24131 case V2DF_FTYPE_V2DF_V2DF_SWAP:
24132 gcc_assert (comparison != UNKNOWN);
24136 case V8HI_FTYPE_V8HI_V8HI_COUNT:
24137 case V8HI_FTYPE_V8HI_SI_COUNT:
24138 case V4SI_FTYPE_V4SI_V4SI_COUNT:
24139 case V4SI_FTYPE_V4SI_SI_COUNT:
24140 case V4HI_FTYPE_V4HI_V4HI_COUNT:
24141 case V4HI_FTYPE_V4HI_SI_COUNT:
24142 case V2DI_FTYPE_V2DI_V2DI_COUNT:
24143 case V2DI_FTYPE_V2DI_SI_COUNT:
24144 case V2SI_FTYPE_V2SI_V2SI_COUNT:
24145 case V2SI_FTYPE_V2SI_SI_COUNT:
24146 case V1DI_FTYPE_V1DI_V1DI_COUNT:
24147 case V1DI_FTYPE_V1DI_SI_COUNT:
24149 last_arg_count = true;
24151 case UINT64_FTYPE_UINT64_UINT64:
24152 case UINT_FTYPE_UINT_UINT:
24153 case UINT_FTYPE_UINT_USHORT:
24154 case UINT_FTYPE_UINT_UCHAR:
24157 case V2DI2TI_FTYPE_V2DI_INT:
24160 nargs_constant = 1;
24162 case V8HI_FTYPE_V8HI_INT:
24163 case V8SF_FTYPE_V8SF_INT:
24164 case V4SI_FTYPE_V4SI_INT:
24165 case V4SI_FTYPE_V8SI_INT:
24166 case V4HI_FTYPE_V4HI_INT:
24167 case V4DF_FTYPE_V4DF_INT:
24168 case V4SF_FTYPE_V4SF_INT:
24169 case V4SF_FTYPE_V8SF_INT:
24170 case V2DI_FTYPE_V2DI_INT:
24171 case V2DF_FTYPE_V2DF_INT:
24172 case V2DF_FTYPE_V4DF_INT:
24174 nargs_constant = 1;
24176 case V16QI_FTYPE_V16QI_V16QI_V16QI:
24177 case V8SF_FTYPE_V8SF_V8SF_V8SF:
24178 case V4DF_FTYPE_V4DF_V4DF_V4DF:
24179 case V4SF_FTYPE_V4SF_V4SF_V4SF:
24180 case V2DF_FTYPE_V2DF_V2DF_V2DF:
24183 case V16QI_FTYPE_V16QI_V16QI_INT:
24184 case V8HI_FTYPE_V8HI_V8HI_INT:
24185 case V8SI_FTYPE_V8SI_V8SI_INT:
24186 case V8SI_FTYPE_V8SI_V4SI_INT:
24187 case V8SF_FTYPE_V8SF_V8SF_INT:
24188 case V8SF_FTYPE_V8SF_V4SF_INT:
24189 case V4SI_FTYPE_V4SI_V4SI_INT:
24190 case V4DF_FTYPE_V4DF_V4DF_INT:
24191 case V4DF_FTYPE_V4DF_V2DF_INT:
24192 case V4SF_FTYPE_V4SF_V4SF_INT:
24193 case V2DI_FTYPE_V2DI_V2DI_INT:
24194 case V2DF_FTYPE_V2DF_V2DF_INT:
24196 nargs_constant = 1;
24198 case V2DI2TI_FTYPE_V2DI_V2DI_INT:
24201 nargs_constant = 1;
24203 case V1DI2DI_FTYPE_V1DI_V1DI_INT:
24206 nargs_constant = 1;
24208 case V2DI_FTYPE_V2DI_UINT_UINT:
24210 nargs_constant = 2;
24212 case V2DI_FTYPE_V2DI_V2DI_UINT_UINT:
24214 nargs_constant = 2;
24217 gcc_unreachable ();
24220 gcc_assert (nargs <= ARRAY_SIZE (args));
24222 if (comparison != UNKNOWN)
24224 gcc_assert (nargs == 2);
24225 return ix86_expand_sse_compare (d, exp, target, swap);
24228 if (rmode == VOIDmode || rmode == tmode)
24232 || GET_MODE (target) != tmode
24233 || ! (*insn_p->operand[0].predicate) (target, tmode))
24234 target = gen_reg_rtx (tmode);
24235 real_target = target;
24239 target = gen_reg_rtx (rmode);
24240 real_target = simplify_gen_subreg (tmode, target, rmode, 0);
24243 for (i = 0; i < nargs; i++)
24245 tree arg = CALL_EXPR_ARG (exp, i);
24246 rtx op = expand_normal (arg);
24247 enum machine_mode mode = insn_p->operand[i + 1].mode;
24248 bool match = (*insn_p->operand[i + 1].predicate) (op, mode);
24250 if (last_arg_count && (i + 1) == nargs)
24252 /* SIMD shift insns take either an 8-bit immediate or
24253 register as count. But builtin functions take int as
24254 count. If count doesn't match, we put it in register. */
24257 op = simplify_gen_subreg (SImode, op, GET_MODE (op), 0);
24258 if (!(*insn_p->operand[i + 1].predicate) (op, mode))
24259 op = copy_to_reg (op);
24262 else if ((nargs - i) <= nargs_constant)
24267 case CODE_FOR_sse4_1_roundpd:
24268 case CODE_FOR_sse4_1_roundps:
24269 case CODE_FOR_sse4_1_roundsd:
24270 case CODE_FOR_sse4_1_roundss:
24271 case CODE_FOR_sse4_1_blendps:
24272 case CODE_FOR_avx_blendpd256:
24273 case CODE_FOR_avx_vpermilv4df:
24274 case CODE_FOR_avx_roundpd256:
24275 case CODE_FOR_avx_roundps256:
24276 error ("the last argument must be a 4-bit immediate");
24279 case CODE_FOR_sse4_1_blendpd:
24280 case CODE_FOR_avx_vpermilv2df:
24281 error ("the last argument must be a 2-bit immediate");
24284 case CODE_FOR_avx_vextractf128v4df:
24285 case CODE_FOR_avx_vextractf128v8sf:
24286 case CODE_FOR_avx_vextractf128v8si:
24287 case CODE_FOR_avx_vinsertf128v4df:
24288 case CODE_FOR_avx_vinsertf128v8sf:
24289 case CODE_FOR_avx_vinsertf128v8si:
24290 error ("the last argument must be a 1-bit immediate");
24293 case CODE_FOR_avx_cmpsdv2df3:
24294 case CODE_FOR_avx_cmpssv4sf3:
24295 case CODE_FOR_avx_cmppdv2df3:
24296 case CODE_FOR_avx_cmppsv4sf3:
24297 case CODE_FOR_avx_cmppdv4df3:
24298 case CODE_FOR_avx_cmppsv8sf3:
24299 error ("the last argument must be a 5-bit immediate");
24303 switch (nargs_constant)
24306 if ((nargs - i) == nargs_constant)
24308 error ("the next to last argument must be an 8-bit immediate");
24312 error ("the last argument must be an 8-bit immediate");
24315 gcc_unreachable ();
24322 if (VECTOR_MODE_P (mode))
24323 op = safe_vector_operand (op, mode);
24325 /* If we aren't optimizing, only allow one memory operand to
24327 if (memory_operand (op, mode))
24330 if (GET_MODE (op) == mode || GET_MODE (op) == VOIDmode)
24332 if (optimize || !match || num_memory > 1)
24333 op = copy_to_mode_reg (mode, op);
24337 op = copy_to_reg (op);
24338 op = simplify_gen_subreg (mode, op, GET_MODE (op), 0);
24343 args[i].mode = mode;
24349 pat = GEN_FCN (icode) (real_target, args[0].op);
24352 pat = GEN_FCN (icode) (real_target, args[0].op, args[1].op);
24355 pat = GEN_FCN (icode) (real_target, args[0].op, args[1].op,
24359 pat = GEN_FCN (icode) (real_target, args[0].op, args[1].op,
24360 args[2].op, args[3].op);
24363 gcc_unreachable ();
24373 /* Subroutine of ix86_expand_builtin to take care of special insns
24374 with variable number of operands. */
24377 ix86_expand_special_args_builtin (const struct builtin_description *d,
24378 tree exp, rtx target)
24382 unsigned int i, nargs, arg_adjust, memory;
24386 enum machine_mode mode;
24388 enum insn_code icode = d->icode;
24389 bool last_arg_constant = false;
24390 const struct insn_data *insn_p = &insn_data[icode];
24391 enum machine_mode tmode = insn_p->operand[0].mode;
24392 enum { load, store } klass;
24394 switch ((enum ix86_special_builtin_type) d->flag)
24396 case VOID_FTYPE_VOID:
24397 emit_insn (GEN_FCN (icode) (target));
24399 case V2DI_FTYPE_PV2DI:
24400 case V32QI_FTYPE_PCCHAR:
24401 case V16QI_FTYPE_PCCHAR:
24402 case V8SF_FTYPE_PCV4SF:
24403 case V8SF_FTYPE_PCFLOAT:
24404 case V4SF_FTYPE_PCFLOAT:
24405 case V4DF_FTYPE_PCV2DF:
24406 case V4DF_FTYPE_PCDOUBLE:
24407 case V2DF_FTYPE_PCDOUBLE:
24412 case VOID_FTYPE_PV2SF_V4SF:
24413 case VOID_FTYPE_PV4DI_V4DI:
24414 case VOID_FTYPE_PV2DI_V2DI:
24415 case VOID_FTYPE_PCHAR_V32QI:
24416 case VOID_FTYPE_PCHAR_V16QI:
24417 case VOID_FTYPE_PFLOAT_V8SF:
24418 case VOID_FTYPE_PFLOAT_V4SF:
24419 case VOID_FTYPE_PDOUBLE_V4DF:
24420 case VOID_FTYPE_PDOUBLE_V2DF:
24421 case VOID_FTYPE_PDI_DI:
24422 case VOID_FTYPE_PINT_INT:
24425 /* Reserve memory operand for target. */
24426 memory = ARRAY_SIZE (args);
24428 case V4SF_FTYPE_V4SF_PCV2SF:
24429 case V2DF_FTYPE_V2DF_PCDOUBLE:
24434 case V8SF_FTYPE_PCV8SF_V8SF:
24435 case V4DF_FTYPE_PCV4DF_V4DF:
24436 case V4SF_FTYPE_PCV4SF_V4SF:
24437 case V2DF_FTYPE_PCV2DF_V2DF:
24442 case VOID_FTYPE_PV8SF_V8SF_V8SF:
24443 case VOID_FTYPE_PV4DF_V4DF_V4DF:
24444 case VOID_FTYPE_PV4SF_V4SF_V4SF:
24445 case VOID_FTYPE_PV2DF_V2DF_V2DF:
24448 /* Reserve memory operand for target. */
24449 memory = ARRAY_SIZE (args);
24452 gcc_unreachable ();
24455 gcc_assert (nargs <= ARRAY_SIZE (args));
24457 if (klass == store)
24459 arg = CALL_EXPR_ARG (exp, 0);
24460 op = expand_normal (arg);
24461 gcc_assert (target == 0);
24462 target = gen_rtx_MEM (tmode, copy_to_mode_reg (Pmode, op));
24470 || GET_MODE (target) != tmode
24471 || ! (*insn_p->operand[0].predicate) (target, tmode))
24472 target = gen_reg_rtx (tmode);
24475 for (i = 0; i < nargs; i++)
24477 enum machine_mode mode = insn_p->operand[i + 1].mode;
24480 arg = CALL_EXPR_ARG (exp, i + arg_adjust);
24481 op = expand_normal (arg);
24482 match = (*insn_p->operand[i + 1].predicate) (op, mode);
24484 if (last_arg_constant && (i + 1) == nargs)
24490 error ("the last argument must be an 8-bit immediate");
24498 /* This must be the memory operand. */
24499 op = gen_rtx_MEM (mode, copy_to_mode_reg (Pmode, op));
24500 gcc_assert (GET_MODE (op) == mode
24501 || GET_MODE (op) == VOIDmode);
24505 /* This must be register. */
24506 if (VECTOR_MODE_P (mode))
24507 op = safe_vector_operand (op, mode);
24509 gcc_assert (GET_MODE (op) == mode
24510 || GET_MODE (op) == VOIDmode);
24511 op = copy_to_mode_reg (mode, op);
24516 args[i].mode = mode;
24522 pat = GEN_FCN (icode) (target, args[0].op);
24525 pat = GEN_FCN (icode) (target, args[0].op, args[1].op);
24528 gcc_unreachable ();
24534 return klass == store ? 0 : target;
24537 /* Return the integer constant in ARG. Constrain it to be in the range
24538 of the subparts of VEC_TYPE; issue an error if not. */
24541 get_element_number (tree vec_type, tree arg)
24543 unsigned HOST_WIDE_INT elt, max = TYPE_VECTOR_SUBPARTS (vec_type) - 1;
24545 if (!host_integerp (arg, 1)
24546 || (elt = tree_low_cst (arg, 1), elt > max))
24548 error ("selector must be an integer constant in the range 0..%wi", max);
24555 /* A subroutine of ix86_expand_builtin. These builtins are a wrapper around
24556 ix86_expand_vector_init. We DO have language-level syntax for this, in
24557 the form of (type){ init-list }. Except that since we can't place emms
24558 instructions from inside the compiler, we can't allow the use of MMX
24559 registers unless the user explicitly asks for it. So we do *not* define
24560 vec_set/vec_extract/vec_init patterns for MMX modes in mmx.md. Instead
24561 we have builtins invoked by mmintrin.h that gives us license to emit
24562 these sorts of instructions. */
24565 ix86_expand_vec_init_builtin (tree type, tree exp, rtx target)
24567 enum machine_mode tmode = TYPE_MODE (type);
24568 enum machine_mode inner_mode = GET_MODE_INNER (tmode);
24569 int i, n_elt = GET_MODE_NUNITS (tmode);
24570 rtvec v = rtvec_alloc (n_elt);
24572 gcc_assert (VECTOR_MODE_P (tmode));
24573 gcc_assert (call_expr_nargs (exp) == n_elt);
24575 for (i = 0; i < n_elt; ++i)
24577 rtx x = expand_normal (CALL_EXPR_ARG (exp, i));
24578 RTVEC_ELT (v, i) = gen_lowpart (inner_mode, x);
24581 if (!target || !register_operand (target, tmode))
24582 target = gen_reg_rtx (tmode);
24584 ix86_expand_vector_init (true, target, gen_rtx_PARALLEL (tmode, v));
24588 /* A subroutine of ix86_expand_builtin. These builtins are a wrapper around
24589 ix86_expand_vector_extract. They would be redundant (for non-MMX) if we
24590 had a language-level syntax for referencing vector elements. */
24593 ix86_expand_vec_ext_builtin (tree exp, rtx target)
24595 enum machine_mode tmode, mode0;
24600 arg0 = CALL_EXPR_ARG (exp, 0);
24601 arg1 = CALL_EXPR_ARG (exp, 1);
24603 op0 = expand_normal (arg0);
24604 elt = get_element_number (TREE_TYPE (arg0), arg1);
24606 tmode = TYPE_MODE (TREE_TYPE (TREE_TYPE (arg0)));
24607 mode0 = TYPE_MODE (TREE_TYPE (arg0));
24608 gcc_assert (VECTOR_MODE_P (mode0));
24610 op0 = force_reg (mode0, op0);
24612 if (optimize || !target || !register_operand (target, tmode))
24613 target = gen_reg_rtx (tmode);
24615 ix86_expand_vector_extract (true, target, op0, elt);
24620 /* A subroutine of ix86_expand_builtin. These builtins are a wrapper around
24621 ix86_expand_vector_set. They would be redundant (for non-MMX) if we had
24622 a language-level syntax for referencing vector elements. */
24625 ix86_expand_vec_set_builtin (tree exp)
24627 enum machine_mode tmode, mode1;
24628 tree arg0, arg1, arg2;
24630 rtx op0, op1, target;
24632 arg0 = CALL_EXPR_ARG (exp, 0);
24633 arg1 = CALL_EXPR_ARG (exp, 1);
24634 arg2 = CALL_EXPR_ARG (exp, 2);
24636 tmode = TYPE_MODE (TREE_TYPE (arg0));
24637 mode1 = TYPE_MODE (TREE_TYPE (TREE_TYPE (arg0)));
24638 gcc_assert (VECTOR_MODE_P (tmode));
24640 op0 = expand_expr (arg0, NULL_RTX, tmode, EXPAND_NORMAL);
24641 op1 = expand_expr (arg1, NULL_RTX, mode1, EXPAND_NORMAL);
24642 elt = get_element_number (TREE_TYPE (arg0), arg2);
24644 if (GET_MODE (op1) != mode1 && GET_MODE (op1) != VOIDmode)
24645 op1 = convert_modes (mode1, GET_MODE (op1), op1, true);
24647 op0 = force_reg (tmode, op0);
24648 op1 = force_reg (mode1, op1);
24650 /* OP0 is the source of these builtin functions and shouldn't be
24651 modified. Create a copy, use it and return it as target. */
24652 target = gen_reg_rtx (tmode);
24653 emit_move_insn (target, op0);
24654 ix86_expand_vector_set (true, target, op1, elt);
24659 /* Expand an expression EXP that calls a built-in function,
24660 with result going to TARGET if that's convenient
24661 (and in mode MODE if that's convenient).
24662 SUBTARGET may be used as the target for computing one of EXP's operands.
24663 IGNORE is nonzero if the value is to be ignored. */
24666 ix86_expand_builtin (tree exp, rtx target, rtx subtarget ATTRIBUTE_UNUSED,
24667 enum machine_mode mode ATTRIBUTE_UNUSED,
24668 int ignore ATTRIBUTE_UNUSED)
24670 const struct builtin_description *d;
24672 enum insn_code icode;
24673 tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
24674 tree arg0, arg1, arg2;
24675 rtx op0, op1, op2, pat;
24676 enum machine_mode mode0, mode1, mode2;
24677 unsigned int fcode = DECL_FUNCTION_CODE (fndecl);
24679 /* Determine whether the builtin function is available under the current ISA.
24680 Originally the builtin was not created if it wasn't applicable to the
24681 current ISA based on the command line switches. With function specific
24682 options, we need to check in the context of the function making the call
24683 whether it is supported. */
24684 if (ix86_builtins_isa[fcode].isa
24685 && !(ix86_builtins_isa[fcode].isa & ix86_isa_flags))
24687 char *opts = ix86_target_string (ix86_builtins_isa[fcode].isa, 0, NULL,
24688 NULL, NULL, false);
24691 error ("%qE needs unknown isa option", fndecl);
24694 gcc_assert (opts != NULL);
24695 error ("%qE needs isa option %s", fndecl, opts);
24703 case IX86_BUILTIN_MASKMOVQ:
24704 case IX86_BUILTIN_MASKMOVDQU:
24705 icode = (fcode == IX86_BUILTIN_MASKMOVQ
24706 ? CODE_FOR_mmx_maskmovq
24707 : CODE_FOR_sse2_maskmovdqu);
24708 /* Note the arg order is different from the operand order. */
24709 arg1 = CALL_EXPR_ARG (exp, 0);
24710 arg2 = CALL_EXPR_ARG (exp, 1);
24711 arg0 = CALL_EXPR_ARG (exp, 2);
24712 op0 = expand_normal (arg0);
24713 op1 = expand_normal (arg1);
24714 op2 = expand_normal (arg2);
24715 mode0 = insn_data[icode].operand[0].mode;
24716 mode1 = insn_data[icode].operand[1].mode;
24717 mode2 = insn_data[icode].operand[2].mode;
24719 op0 = force_reg (Pmode, op0);
24720 op0 = gen_rtx_MEM (mode1, op0);
24722 if (! (*insn_data[icode].operand[0].predicate) (op0, mode0))
24723 op0 = copy_to_mode_reg (mode0, op0);
24724 if (! (*insn_data[icode].operand[1].predicate) (op1, mode1))
24725 op1 = copy_to_mode_reg (mode1, op1);
24726 if (! (*insn_data[icode].operand[2].predicate) (op2, mode2))
24727 op2 = copy_to_mode_reg (mode2, op2);
24728 pat = GEN_FCN (icode) (op0, op1, op2);
24734 case IX86_BUILTIN_LDMXCSR:
24735 op0 = expand_normal (CALL_EXPR_ARG (exp, 0));
24736 target = assign_386_stack_local (SImode, SLOT_VIRTUAL);
24737 emit_move_insn (target, op0);
24738 emit_insn (gen_sse_ldmxcsr (target));
24741 case IX86_BUILTIN_STMXCSR:
24742 target = assign_386_stack_local (SImode, SLOT_VIRTUAL);
24743 emit_insn (gen_sse_stmxcsr (target));
24744 return copy_to_mode_reg (SImode, target);
24746 case IX86_BUILTIN_CLFLUSH:
24747 arg0 = CALL_EXPR_ARG (exp, 0);
24748 op0 = expand_normal (arg0);
24749 icode = CODE_FOR_sse2_clflush;
24750 if (! (*insn_data[icode].operand[0].predicate) (op0, Pmode))
24751 op0 = copy_to_mode_reg (Pmode, op0);
24753 emit_insn (gen_sse2_clflush (op0));
24756 case IX86_BUILTIN_MONITOR:
24757 arg0 = CALL_EXPR_ARG (exp, 0);
24758 arg1 = CALL_EXPR_ARG (exp, 1);
24759 arg2 = CALL_EXPR_ARG (exp, 2);
24760 op0 = expand_normal (arg0);
24761 op1 = expand_normal (arg1);
24762 op2 = expand_normal (arg2);
24764 op0 = copy_to_mode_reg (Pmode, op0);
24766 op1 = copy_to_mode_reg (SImode, op1);
24768 op2 = copy_to_mode_reg (SImode, op2);
24769 emit_insn ((*ix86_gen_monitor) (op0, op1, op2));
24772 case IX86_BUILTIN_MWAIT:
24773 arg0 = CALL_EXPR_ARG (exp, 0);
24774 arg1 = CALL_EXPR_ARG (exp, 1);
24775 op0 = expand_normal (arg0);
24776 op1 = expand_normal (arg1);
24778 op0 = copy_to_mode_reg (SImode, op0);
24780 op1 = copy_to_mode_reg (SImode, op1);
24781 emit_insn (gen_sse3_mwait (op0, op1));
24784 case IX86_BUILTIN_VEC_INIT_V2SI:
24785 case IX86_BUILTIN_VEC_INIT_V4HI:
24786 case IX86_BUILTIN_VEC_INIT_V8QI:
24787 return ix86_expand_vec_init_builtin (TREE_TYPE (exp), exp, target);
24789 case IX86_BUILTIN_VEC_EXT_V2DF:
24790 case IX86_BUILTIN_VEC_EXT_V2DI:
24791 case IX86_BUILTIN_VEC_EXT_V4SF:
24792 case IX86_BUILTIN_VEC_EXT_V4SI:
24793 case IX86_BUILTIN_VEC_EXT_V8HI:
24794 case IX86_BUILTIN_VEC_EXT_V2SI:
24795 case IX86_BUILTIN_VEC_EXT_V4HI:
24796 case IX86_BUILTIN_VEC_EXT_V16QI:
24797 return ix86_expand_vec_ext_builtin (exp, target);
24799 case IX86_BUILTIN_VEC_SET_V2DI:
24800 case IX86_BUILTIN_VEC_SET_V4SF:
24801 case IX86_BUILTIN_VEC_SET_V4SI:
24802 case IX86_BUILTIN_VEC_SET_V8HI:
24803 case IX86_BUILTIN_VEC_SET_V4HI:
24804 case IX86_BUILTIN_VEC_SET_V16QI:
24805 return ix86_expand_vec_set_builtin (exp);
24807 case IX86_BUILTIN_INFQ:
24809 REAL_VALUE_TYPE inf;
24813 tmp = CONST_DOUBLE_FROM_REAL_VALUE (inf, mode);
24815 tmp = validize_mem (force_const_mem (mode, tmp));
24818 target = gen_reg_rtx (mode);
24820 emit_move_insn (target, tmp);
24828 for (i = 0, d = bdesc_special_args;
24829 i < ARRAY_SIZE (bdesc_special_args);
24831 if (d->code == fcode)
24832 return ix86_expand_special_args_builtin (d, exp, target);
24834 for (i = 0, d = bdesc_args;
24835 i < ARRAY_SIZE (bdesc_args);
24837 if (d->code == fcode)
24840 case IX86_BUILTIN_FABSQ:
24841 case IX86_BUILTIN_COPYSIGNQ:
24843 /* Emit a normal call if SSE2 isn't available. */
24844 return expand_call (exp, target, ignore);
24846 return ix86_expand_args_builtin (d, exp, target);
24849 for (i = 0, d = bdesc_comi; i < ARRAY_SIZE (bdesc_comi); i++, d++)
24850 if (d->code == fcode)
24851 return ix86_expand_sse_comi (d, exp, target);
24853 for (i = 0, d = bdesc_pcmpestr;
24854 i < ARRAY_SIZE (bdesc_pcmpestr);
24856 if (d->code == fcode)
24857 return ix86_expand_sse_pcmpestr (d, exp, target);
24859 for (i = 0, d = bdesc_pcmpistr;
24860 i < ARRAY_SIZE (bdesc_pcmpistr);
24862 if (d->code == fcode)
24863 return ix86_expand_sse_pcmpistr (d, exp, target);
24865 for (i = 0, d = bdesc_multi_arg; i < ARRAY_SIZE (bdesc_multi_arg); i++, d++)
24866 if (d->code == fcode)
24867 return ix86_expand_multi_arg_builtin (d->icode, exp, target,
24868 (enum multi_arg_type)d->flag,
24871 gcc_unreachable ();
24874 /* Returns a function decl for a vectorized version of the builtin function
24875 with builtin function code FN and the result vector type TYPE, or NULL_TREE
24876 if it is not available. */
24879 ix86_builtin_vectorized_function (unsigned int fn, tree type_out,
24882 enum machine_mode in_mode, out_mode;
24885 if (TREE_CODE (type_out) != VECTOR_TYPE
24886 || TREE_CODE (type_in) != VECTOR_TYPE)
24889 out_mode = TYPE_MODE (TREE_TYPE (type_out));
24890 out_n = TYPE_VECTOR_SUBPARTS (type_out);
24891 in_mode = TYPE_MODE (TREE_TYPE (type_in));
24892 in_n = TYPE_VECTOR_SUBPARTS (type_in);
24896 case BUILT_IN_SQRT:
24897 if (out_mode == DFmode && out_n == 2
24898 && in_mode == DFmode && in_n == 2)
24899 return ix86_builtins[IX86_BUILTIN_SQRTPD];
24902 case BUILT_IN_SQRTF:
24903 if (out_mode == SFmode && out_n == 4
24904 && in_mode == SFmode && in_n == 4)
24905 return ix86_builtins[IX86_BUILTIN_SQRTPS_NR];
24908 case BUILT_IN_LRINT:
24909 if (out_mode == SImode && out_n == 4
24910 && in_mode == DFmode && in_n == 2)
24911 return ix86_builtins[IX86_BUILTIN_VEC_PACK_SFIX];
24914 case BUILT_IN_LRINTF:
24915 if (out_mode == SImode && out_n == 4
24916 && in_mode == SFmode && in_n == 4)
24917 return ix86_builtins[IX86_BUILTIN_CVTPS2DQ];
24924 /* Dispatch to a handler for a vectorization library. */
24925 if (ix86_veclib_handler)
24926 return (*ix86_veclib_handler)(fn, type_out, type_in);
24931 /* Handler for an SVML-style interface to
24932 a library with vectorized intrinsics. */
24935 ix86_veclibabi_svml (enum built_in_function fn, tree type_out, tree type_in)
24938 tree fntype, new_fndecl, args;
24941 enum machine_mode el_mode, in_mode;
24944 /* The SVML is suitable for unsafe math only. */
24945 if (!flag_unsafe_math_optimizations)
24948 el_mode = TYPE_MODE (TREE_TYPE (type_out));
24949 n = TYPE_VECTOR_SUBPARTS (type_out);
24950 in_mode = TYPE_MODE (TREE_TYPE (type_in));
24951 in_n = TYPE_VECTOR_SUBPARTS (type_in);
24952 if (el_mode != in_mode
24960 case BUILT_IN_LOG10:
24962 case BUILT_IN_TANH:
24964 case BUILT_IN_ATAN:
24965 case BUILT_IN_ATAN2:
24966 case BUILT_IN_ATANH:
24967 case BUILT_IN_CBRT:
24968 case BUILT_IN_SINH:
24970 case BUILT_IN_ASINH:
24971 case BUILT_IN_ASIN:
24972 case BUILT_IN_COSH:
24974 case BUILT_IN_ACOSH:
24975 case BUILT_IN_ACOS:
24976 if (el_mode != DFmode || n != 2)
24980 case BUILT_IN_EXPF:
24981 case BUILT_IN_LOGF:
24982 case BUILT_IN_LOG10F:
24983 case BUILT_IN_POWF:
24984 case BUILT_IN_TANHF:
24985 case BUILT_IN_TANF:
24986 case BUILT_IN_ATANF:
24987 case BUILT_IN_ATAN2F:
24988 case BUILT_IN_ATANHF:
24989 case BUILT_IN_CBRTF:
24990 case BUILT_IN_SINHF:
24991 case BUILT_IN_SINF:
24992 case BUILT_IN_ASINHF:
24993 case BUILT_IN_ASINF:
24994 case BUILT_IN_COSHF:
24995 case BUILT_IN_COSF:
24996 case BUILT_IN_ACOSHF:
24997 case BUILT_IN_ACOSF:
24998 if (el_mode != SFmode || n != 4)
25006 bname = IDENTIFIER_POINTER (DECL_NAME (implicit_built_in_decls[fn]));
25008 if (fn == BUILT_IN_LOGF)
25009 strcpy (name, "vmlsLn4");
25010 else if (fn == BUILT_IN_LOG)
25011 strcpy (name, "vmldLn2");
25014 sprintf (name, "vmls%s", bname+10);
25015 name[strlen (name)-1] = '4';
25018 sprintf (name, "vmld%s2", bname+10);
25020 /* Convert to uppercase. */
25024 for (args = DECL_ARGUMENTS (implicit_built_in_decls[fn]); args;
25025 args = TREE_CHAIN (args))
25029 fntype = build_function_type_list (type_out, type_in, NULL);
25031 fntype = build_function_type_list (type_out, type_in, type_in, NULL);
25033 /* Build a function declaration for the vectorized function. */
25034 new_fndecl = build_decl (FUNCTION_DECL, get_identifier (name), fntype);
25035 TREE_PUBLIC (new_fndecl) = 1;
25036 DECL_EXTERNAL (new_fndecl) = 1;
25037 DECL_IS_NOVOPS (new_fndecl) = 1;
25038 TREE_READONLY (new_fndecl) = 1;
25043 /* Handler for an ACML-style interface to
25044 a library with vectorized intrinsics. */
25047 ix86_veclibabi_acml (enum built_in_function fn, tree type_out, tree type_in)
25049 char name[20] = "__vr.._";
25050 tree fntype, new_fndecl, args;
25053 enum machine_mode el_mode, in_mode;
25056 /* The ACML is 64bits only and suitable for unsafe math only as
25057 it does not correctly support parts of IEEE with the required
25058 precision such as denormals. */
25060 || !flag_unsafe_math_optimizations)
25063 el_mode = TYPE_MODE (TREE_TYPE (type_out));
25064 n = TYPE_VECTOR_SUBPARTS (type_out);
25065 in_mode = TYPE_MODE (TREE_TYPE (type_in));
25066 in_n = TYPE_VECTOR_SUBPARTS (type_in);
25067 if (el_mode != in_mode
25077 case BUILT_IN_LOG2:
25078 case BUILT_IN_LOG10:
25081 if (el_mode != DFmode
25086 case BUILT_IN_SINF:
25087 case BUILT_IN_COSF:
25088 case BUILT_IN_EXPF:
25089 case BUILT_IN_POWF:
25090 case BUILT_IN_LOGF:
25091 case BUILT_IN_LOG2F:
25092 case BUILT_IN_LOG10F:
25095 if (el_mode != SFmode
25104 bname = IDENTIFIER_POINTER (DECL_NAME (implicit_built_in_decls[fn]));
25105 sprintf (name + 7, "%s", bname+10);
25108 for (args = DECL_ARGUMENTS (implicit_built_in_decls[fn]); args;
25109 args = TREE_CHAIN (args))
25113 fntype = build_function_type_list (type_out, type_in, NULL);
25115 fntype = build_function_type_list (type_out, type_in, type_in, NULL);
25117 /* Build a function declaration for the vectorized function. */
25118 new_fndecl = build_decl (FUNCTION_DECL, get_identifier (name), fntype);
25119 TREE_PUBLIC (new_fndecl) = 1;
25120 DECL_EXTERNAL (new_fndecl) = 1;
25121 DECL_IS_NOVOPS (new_fndecl) = 1;
25122 TREE_READONLY (new_fndecl) = 1;
25128 /* Returns a decl of a function that implements conversion of an integer vector
25129 into a floating-point vector, or vice-versa. TYPE is the type of the integer
25130 side of the conversion.
25131 Return NULL_TREE if it is not available. */
25134 ix86_vectorize_builtin_conversion (unsigned int code, tree type)
25136 if (TREE_CODE (type) != VECTOR_TYPE)
25142 switch (TYPE_MODE (type))
25145 return ix86_builtins[IX86_BUILTIN_CVTDQ2PS];
25150 case FIX_TRUNC_EXPR:
25151 switch (TYPE_MODE (type))
25154 return ix86_builtins[IX86_BUILTIN_CVTTPS2DQ];
25164 /* Returns a code for a target-specific builtin that implements
25165 reciprocal of the function, or NULL_TREE if not available. */
25168 ix86_builtin_reciprocal (unsigned int fn, bool md_fn,
25169 bool sqrt ATTRIBUTE_UNUSED)
25171 if (! (TARGET_SSE_MATH && TARGET_RECIP && !optimize_insn_for_size_p ()
25172 && flag_finite_math_only && !flag_trapping_math
25173 && flag_unsafe_math_optimizations))
25177 /* Machine dependent builtins. */
25180 /* Vectorized version of sqrt to rsqrt conversion. */
25181 case IX86_BUILTIN_SQRTPS_NR:
25182 return ix86_builtins[IX86_BUILTIN_RSQRTPS_NR];
25188 /* Normal builtins. */
25191 /* Sqrt to rsqrt conversion. */
25192 case BUILT_IN_SQRTF:
25193 return ix86_builtins[IX86_BUILTIN_RSQRTF];
25200 /* Store OPERAND to the memory after reload is completed. This means
25201 that we can't easily use assign_stack_local. */
25203 ix86_force_to_memory (enum machine_mode mode, rtx operand)
25207 gcc_assert (reload_completed);
25208 if (!TARGET_64BIT_MS_ABI && TARGET_RED_ZONE)
25210 result = gen_rtx_MEM (mode,
25211 gen_rtx_PLUS (Pmode,
25213 GEN_INT (-RED_ZONE_SIZE)));
25214 emit_move_insn (result, operand);
25216 else if ((TARGET_64BIT_MS_ABI || !TARGET_RED_ZONE) && TARGET_64BIT)
25222 operand = gen_lowpart (DImode, operand);
25226 gen_rtx_SET (VOIDmode,
25227 gen_rtx_MEM (DImode,
25228 gen_rtx_PRE_DEC (DImode,
25229 stack_pointer_rtx)),
25233 gcc_unreachable ();
25235 result = gen_rtx_MEM (mode, stack_pointer_rtx);
25244 split_di (&operand, 1, operands, operands + 1);
25246 gen_rtx_SET (VOIDmode,
25247 gen_rtx_MEM (SImode,
25248 gen_rtx_PRE_DEC (Pmode,
25249 stack_pointer_rtx)),
25252 gen_rtx_SET (VOIDmode,
25253 gen_rtx_MEM (SImode,
25254 gen_rtx_PRE_DEC (Pmode,
25255 stack_pointer_rtx)),
25260 /* Store HImodes as SImodes. */
25261 operand = gen_lowpart (SImode, operand);
25265 gen_rtx_SET (VOIDmode,
25266 gen_rtx_MEM (GET_MODE (operand),
25267 gen_rtx_PRE_DEC (SImode,
25268 stack_pointer_rtx)),
25272 gcc_unreachable ();
25274 result = gen_rtx_MEM (mode, stack_pointer_rtx);
25279 /* Free operand from the memory. */
25281 ix86_free_from_memory (enum machine_mode mode)
25283 if (!TARGET_RED_ZONE || TARGET_64BIT_MS_ABI)
25287 if (mode == DImode || TARGET_64BIT)
25291 /* Use LEA to deallocate stack space. In peephole2 it will be converted
25292 to pop or add instruction if registers are available. */
25293 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
25294 gen_rtx_PLUS (Pmode, stack_pointer_rtx,
25299 /* Put float CONST_DOUBLE in the constant pool instead of fp regs.
25300 QImode must go into class Q_REGS.
25301 Narrow ALL_REGS to GENERAL_REGS. This supports allowing movsf and
25302 movdf to do mem-to-mem moves through integer regs. */
25304 ix86_preferred_reload_class (rtx x, enum reg_class regclass)
25306 enum machine_mode mode = GET_MODE (x);
25308 /* We're only allowed to return a subclass of CLASS. Many of the
25309 following checks fail for NO_REGS, so eliminate that early. */
25310 if (regclass == NO_REGS)
25313 /* All classes can load zeros. */
25314 if (x == CONST0_RTX (mode))
25317 /* Force constants into memory if we are loading a (nonzero) constant into
25318 an MMX or SSE register. This is because there are no MMX/SSE instructions
25319 to load from a constant. */
25321 && (MAYBE_MMX_CLASS_P (regclass) || MAYBE_SSE_CLASS_P (regclass)))
25324 /* Prefer SSE regs only, if we can use them for math. */
25325 if (TARGET_SSE_MATH && !TARGET_MIX_SSE_I387 && SSE_FLOAT_MODE_P (mode))
25326 return SSE_CLASS_P (regclass) ? regclass : NO_REGS;
25328 /* Floating-point constants need more complex checks. */
25329 if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) != VOIDmode)
25331 /* General regs can load everything. */
25332 if (reg_class_subset_p (regclass, GENERAL_REGS))
25335 /* Floats can load 0 and 1 plus some others. Note that we eliminated
25336 zero above. We only want to wind up preferring 80387 registers if
25337 we plan on doing computation with them. */
25339 && standard_80387_constant_p (x))
25341 /* Limit class to non-sse. */
25342 if (regclass == FLOAT_SSE_REGS)
25344 if (regclass == FP_TOP_SSE_REGS)
25346 if (regclass == FP_SECOND_SSE_REGS)
25347 return FP_SECOND_REG;
25348 if (regclass == FLOAT_INT_REGS || regclass == FLOAT_REGS)
25355 /* Generally when we see PLUS here, it's the function invariant
25356 (plus soft-fp const_int). Which can only be computed into general
25358 if (GET_CODE (x) == PLUS)
25359 return reg_class_subset_p (regclass, GENERAL_REGS) ? regclass : NO_REGS;
25361 /* QImode constants are easy to load, but non-constant QImode data
25362 must go into Q_REGS. */
25363 if (GET_MODE (x) == QImode && !CONSTANT_P (x))
25365 if (reg_class_subset_p (regclass, Q_REGS))
25367 if (reg_class_subset_p (Q_REGS, regclass))
25375 /* Discourage putting floating-point values in SSE registers unless
25376 SSE math is being used, and likewise for the 387 registers. */
25378 ix86_preferred_output_reload_class (rtx x, enum reg_class regclass)
25380 enum machine_mode mode = GET_MODE (x);
25382 /* Restrict the output reload class to the register bank that we are doing
25383 math on. If we would like not to return a subset of CLASS, reject this
25384 alternative: if reload cannot do this, it will still use its choice. */
25385 mode = GET_MODE (x);
25386 if (TARGET_SSE_MATH && SSE_FLOAT_MODE_P (mode))
25387 return MAYBE_SSE_CLASS_P (regclass) ? SSE_REGS : NO_REGS;
25389 if (X87_FLOAT_MODE_P (mode))
25391 if (regclass == FP_TOP_SSE_REGS)
25393 else if (regclass == FP_SECOND_SSE_REGS)
25394 return FP_SECOND_REG;
25396 return FLOAT_CLASS_P (regclass) ? regclass : NO_REGS;
25402 static enum reg_class
25403 ix86_secondary_reload (bool in_p, rtx x, enum reg_class rclass,
25404 enum machine_mode mode,
25405 secondary_reload_info *sri ATTRIBUTE_UNUSED)
25407 /* QImode spills from non-QI registers require
25408 intermediate register on 32bit targets. */
25409 if (!in_p && mode == QImode && !TARGET_64BIT
25410 && (rclass == GENERAL_REGS
25411 || rclass == LEGACY_REGS
25412 || rclass == INDEX_REGS))
25421 if (regno >= FIRST_PSEUDO_REGISTER || GET_CODE (x) == SUBREG)
25422 regno = true_regnum (x);
25424 /* Return Q_REGS if the operand is in memory. */
25432 /* If we are copying between general and FP registers, we need a memory
25433 location. The same is true for SSE and MMX registers.
25435 To optimize register_move_cost performance, allow inline variant.
25437 The macro can't work reliably when one of the CLASSES is class containing
25438 registers from multiple units (SSE, MMX, integer). We avoid this by never
25439 combining those units in single alternative in the machine description.
25440 Ensure that this constraint holds to avoid unexpected surprises.
25442 When STRICT is false, we are being called from REGISTER_MOVE_COST, so do not
25443 enforce these sanity checks. */
25446 inline_secondary_memory_needed (enum reg_class class1, enum reg_class class2,
25447 enum machine_mode mode, int strict)
25449 if (MAYBE_FLOAT_CLASS_P (class1) != FLOAT_CLASS_P (class1)
25450 || MAYBE_FLOAT_CLASS_P (class2) != FLOAT_CLASS_P (class2)
25451 || MAYBE_SSE_CLASS_P (class1) != SSE_CLASS_P (class1)
25452 || MAYBE_SSE_CLASS_P (class2) != SSE_CLASS_P (class2)
25453 || MAYBE_MMX_CLASS_P (class1) != MMX_CLASS_P (class1)
25454 || MAYBE_MMX_CLASS_P (class2) != MMX_CLASS_P (class2))
25456 gcc_assert (!strict);
25460 if (FLOAT_CLASS_P (class1) != FLOAT_CLASS_P (class2))
25463 /* ??? This is a lie. We do have moves between mmx/general, and for
25464 mmx/sse2. But by saying we need secondary memory we discourage the
25465 register allocator from using the mmx registers unless needed. */
25466 if (MMX_CLASS_P (class1) != MMX_CLASS_P (class2))
25469 if (SSE_CLASS_P (class1) != SSE_CLASS_P (class2))
25471 /* SSE1 doesn't have any direct moves from other classes. */
25475 /* If the target says that inter-unit moves are more expensive
25476 than moving through memory, then don't generate them. */
25477 if (!TARGET_INTER_UNIT_MOVES)
25480 /* Between SSE and general, we have moves no larger than word size. */
25481 if (GET_MODE_SIZE (mode) > UNITS_PER_WORD)
25489 ix86_secondary_memory_needed (enum reg_class class1, enum reg_class class2,
25490 enum machine_mode mode, int strict)
25492 return inline_secondary_memory_needed (class1, class2, mode, strict);
25495 /* Return true if the registers in CLASS cannot represent the change from
25496 modes FROM to TO. */
25499 ix86_cannot_change_mode_class (enum machine_mode from, enum machine_mode to,
25500 enum reg_class regclass)
25505 /* x87 registers can't do subreg at all, as all values are reformatted
25506 to extended precision. */
25507 if (MAYBE_FLOAT_CLASS_P (regclass))
25510 if (MAYBE_SSE_CLASS_P (regclass) || MAYBE_MMX_CLASS_P (regclass))
25512 /* Vector registers do not support QI or HImode loads. If we don't
25513 disallow a change to these modes, reload will assume it's ok to
25514 drop the subreg from (subreg:SI (reg:HI 100) 0). This affects
25515 the vec_dupv4hi pattern. */
25516 if (GET_MODE_SIZE (from) < 4)
25519 /* Vector registers do not support subreg with nonzero offsets, which
25520 are otherwise valid for integer registers. Since we can't see
25521 whether we have a nonzero offset from here, prohibit all
25522 nonparadoxical subregs changing size. */
25523 if (GET_MODE_SIZE (to) < GET_MODE_SIZE (from))
25530 /* Return the cost of moving data of mode M between a
25531 register and memory. A value of 2 is the default; this cost is
25532 relative to those in `REGISTER_MOVE_COST'.
25534 This function is used extensively by register_move_cost that is used to
25535 build tables at startup. Make it inline in this case.
25536 When IN is 2, return maximum of in and out move cost.
25538 If moving between registers and memory is more expensive than
25539 between two registers, you should define this macro to express the
25542 Model also increased moving costs of QImode registers in non
25546 inline_memory_move_cost (enum machine_mode mode, enum reg_class regclass,
25550 if (FLOAT_CLASS_P (regclass))
25568 return MAX (ix86_cost->fp_load [index], ix86_cost->fp_store [index]);
25569 return in ? ix86_cost->fp_load [index] : ix86_cost->fp_store [index];
25571 if (SSE_CLASS_P (regclass))
25574 switch (GET_MODE_SIZE (mode))
25589 return MAX (ix86_cost->sse_load [index], ix86_cost->sse_store [index]);
25590 return in ? ix86_cost->sse_load [index] : ix86_cost->sse_store [index];
25592 if (MMX_CLASS_P (regclass))
25595 switch (GET_MODE_SIZE (mode))
25607 return MAX (ix86_cost->mmx_load [index], ix86_cost->mmx_store [index]);
25608 return in ? ix86_cost->mmx_load [index] : ix86_cost->mmx_store [index];
25610 switch (GET_MODE_SIZE (mode))
25613 if (Q_CLASS_P (regclass) || TARGET_64BIT)
25616 return ix86_cost->int_store[0];
25617 if (TARGET_PARTIAL_REG_DEPENDENCY
25618 && optimize_function_for_speed_p (cfun))
25619 cost = ix86_cost->movzbl_load;
25621 cost = ix86_cost->int_load[0];
25623 return MAX (cost, ix86_cost->int_store[0]);
25629 return MAX (ix86_cost->movzbl_load, ix86_cost->int_store[0] + 4);
25631 return ix86_cost->movzbl_load;
25633 return ix86_cost->int_store[0] + 4;
25638 return MAX (ix86_cost->int_load[1], ix86_cost->int_store[1]);
25639 return in ? ix86_cost->int_load[1] : ix86_cost->int_store[1];
25641 /* Compute number of 32bit moves needed. TFmode is moved as XFmode. */
25642 if (mode == TFmode)
25645 cost = MAX (ix86_cost->int_load[2] , ix86_cost->int_store[2]);
25647 cost = ix86_cost->int_load[2];
25649 cost = ix86_cost->int_store[2];
25650 return (cost * (((int) GET_MODE_SIZE (mode)
25651 + UNITS_PER_WORD - 1) / UNITS_PER_WORD));
25656 ix86_memory_move_cost (enum machine_mode mode, enum reg_class regclass, int in)
25658 return inline_memory_move_cost (mode, regclass, in);
25662 /* Return the cost of moving data from a register in class CLASS1 to
25663 one in class CLASS2.
25665 It is not required that the cost always equal 2 when FROM is the same as TO;
25666 on some machines it is expensive to move between registers if they are not
25667 general registers. */
25670 ix86_register_move_cost (enum machine_mode mode, enum reg_class class1,
25671 enum reg_class class2)
25673 /* In case we require secondary memory, compute cost of the store followed
25674 by load. In order to avoid bad register allocation choices, we need
25675 for this to be *at least* as high as the symmetric MEMORY_MOVE_COST. */
25677 if (inline_secondary_memory_needed (class1, class2, mode, 0))
25681 cost += inline_memory_move_cost (mode, class1, 2);
25682 cost += inline_memory_move_cost (mode, class2, 2);
25684 /* In case of copying from general_purpose_register we may emit multiple
25685 stores followed by single load causing memory size mismatch stall.
25686 Count this as arbitrarily high cost of 20. */
25687 if (CLASS_MAX_NREGS (class1, mode) > CLASS_MAX_NREGS (class2, mode))
25690 /* In the case of FP/MMX moves, the registers actually overlap, and we
25691 have to switch modes in order to treat them differently. */
25692 if ((MMX_CLASS_P (class1) && MAYBE_FLOAT_CLASS_P (class2))
25693 || (MMX_CLASS_P (class2) && MAYBE_FLOAT_CLASS_P (class1)))
25699 /* Moves between SSE/MMX and integer unit are expensive. */
25700 if (MMX_CLASS_P (class1) != MMX_CLASS_P (class2)
25701 || SSE_CLASS_P (class1) != SSE_CLASS_P (class2))
25703 /* ??? By keeping returned value relatively high, we limit the number
25704 of moves between integer and MMX/SSE registers for all targets.
25705 Additionally, high value prevents problem with x86_modes_tieable_p(),
25706 where integer modes in MMX/SSE registers are not tieable
25707 because of missing QImode and HImode moves to, from or between
25708 MMX/SSE registers. */
25709 return MAX (8, ix86_cost->mmxsse_to_integer);
25711 if (MAYBE_FLOAT_CLASS_P (class1))
25712 return ix86_cost->fp_move;
25713 if (MAYBE_SSE_CLASS_P (class1))
25714 return ix86_cost->sse_move;
25715 if (MAYBE_MMX_CLASS_P (class1))
25716 return ix86_cost->mmx_move;
25720 /* Return 1 if hard register REGNO can hold a value of machine-mode MODE. */
25723 ix86_hard_regno_mode_ok (int regno, enum machine_mode mode)
25725 /* Flags and only flags can only hold CCmode values. */
25726 if (CC_REGNO_P (regno))
25727 return GET_MODE_CLASS (mode) == MODE_CC;
25728 if (GET_MODE_CLASS (mode) == MODE_CC
25729 || GET_MODE_CLASS (mode) == MODE_RANDOM
25730 || GET_MODE_CLASS (mode) == MODE_PARTIAL_INT)
25732 if (FP_REGNO_P (regno))
25733 return VALID_FP_MODE_P (mode);
25734 if (SSE_REGNO_P (regno))
25736 /* We implement the move patterns for all vector modes into and
25737 out of SSE registers, even when no operation instructions
25738 are available. OImode move is available only when AVX is
25740 return ((TARGET_AVX && mode == OImode)
25741 || VALID_AVX256_REG_MODE (mode)
25742 || VALID_SSE_REG_MODE (mode)
25743 || VALID_SSE2_REG_MODE (mode)
25744 || VALID_MMX_REG_MODE (mode)
25745 || VALID_MMX_REG_MODE_3DNOW (mode));
25747 if (MMX_REGNO_P (regno))
25749 /* We implement the move patterns for 3DNOW modes even in MMX mode,
25750 so if the register is available at all, then we can move data of
25751 the given mode into or out of it. */
25752 return (VALID_MMX_REG_MODE (mode)
25753 || VALID_MMX_REG_MODE_3DNOW (mode));
25756 if (mode == QImode)
25758 /* Take care for QImode values - they can be in non-QI regs,
25759 but then they do cause partial register stalls. */
25760 if (regno < 4 || TARGET_64BIT)
25762 if (!TARGET_PARTIAL_REG_STALL)
25764 return reload_in_progress || reload_completed;
25766 /* We handle both integer and floats in the general purpose registers. */
25767 else if (VALID_INT_MODE_P (mode))
25769 else if (VALID_FP_MODE_P (mode))
25771 else if (VALID_DFP_MODE_P (mode))
25773 /* Lots of MMX code casts 8 byte vector modes to DImode. If we then go
25774 on to use that value in smaller contexts, this can easily force a
25775 pseudo to be allocated to GENERAL_REGS. Since this is no worse than
25776 supporting DImode, allow it. */
25777 else if (VALID_MMX_REG_MODE_3DNOW (mode) || VALID_MMX_REG_MODE (mode))
25783 /* A subroutine of ix86_modes_tieable_p. Return true if MODE is a
25784 tieable integer mode. */
25787 ix86_tieable_integer_mode_p (enum machine_mode mode)
25796 return TARGET_64BIT || !TARGET_PARTIAL_REG_STALL;
25799 return TARGET_64BIT;
25806 /* Return true if MODE1 is accessible in a register that can hold MODE2
25807 without copying. That is, all register classes that can hold MODE2
25808 can also hold MODE1. */
25811 ix86_modes_tieable_p (enum machine_mode mode1, enum machine_mode mode2)
25813 if (mode1 == mode2)
25816 if (ix86_tieable_integer_mode_p (mode1)
25817 && ix86_tieable_integer_mode_p (mode2))
25820 /* MODE2 being XFmode implies fp stack or general regs, which means we
25821 can tie any smaller floating point modes to it. Note that we do not
25822 tie this with TFmode. */
25823 if (mode2 == XFmode)
25824 return mode1 == SFmode || mode1 == DFmode;
25826 /* MODE2 being DFmode implies fp stack, general or sse regs, which means
25827 that we can tie it with SFmode. */
25828 if (mode2 == DFmode)
25829 return mode1 == SFmode;
25831 /* If MODE2 is only appropriate for an SSE register, then tie with
25832 any other mode acceptable to SSE registers. */
25833 if (GET_MODE_SIZE (mode2) == 16
25834 && ix86_hard_regno_mode_ok (FIRST_SSE_REG, mode2))
25835 return (GET_MODE_SIZE (mode1) == 16
25836 && ix86_hard_regno_mode_ok (FIRST_SSE_REG, mode1));
25838 /* If MODE2 is appropriate for an MMX register, then tie
25839 with any other mode acceptable to MMX registers. */
25840 if (GET_MODE_SIZE (mode2) == 8
25841 && ix86_hard_regno_mode_ok (FIRST_MMX_REG, mode2))
25842 return (GET_MODE_SIZE (mode1) == 8
25843 && ix86_hard_regno_mode_ok (FIRST_MMX_REG, mode1));
25848 /* Compute a (partial) cost for rtx X. Return true if the complete
25849 cost has been computed, and false if subexpressions should be
25850 scanned. In either case, *TOTAL contains the cost result. */
25853 ix86_rtx_costs (rtx x, int code, int outer_code_i, int *total, bool speed)
25855 enum rtx_code outer_code = (enum rtx_code) outer_code_i;
25856 enum machine_mode mode = GET_MODE (x);
25857 const struct processor_costs *cost = speed ? ix86_cost : &ix86_size_cost;
25865 if (TARGET_64BIT && !x86_64_immediate_operand (x, VOIDmode))
25867 else if (TARGET_64BIT && !x86_64_zext_immediate_operand (x, VOIDmode))
25869 else if (flag_pic && SYMBOLIC_CONST (x)
25871 || (!GET_CODE (x) != LABEL_REF
25872 && (GET_CODE (x) != SYMBOL_REF
25873 || !SYMBOL_REF_LOCAL_P (x)))))
25880 if (mode == VOIDmode)
25883 switch (standard_80387_constant_p (x))
25888 default: /* Other constants */
25893 /* Start with (MEM (SYMBOL_REF)), since that's where
25894 it'll probably end up. Add a penalty for size. */
25895 *total = (COSTS_N_INSNS (1)
25896 + (flag_pic != 0 && !TARGET_64BIT)
25897 + (mode == SFmode ? 0 : mode == DFmode ? 1 : 2));
25903 /* The zero extensions is often completely free on x86_64, so make
25904 it as cheap as possible. */
25905 if (TARGET_64BIT && mode == DImode
25906 && GET_MODE (XEXP (x, 0)) == SImode)
25908 else if (TARGET_ZERO_EXTEND_WITH_AND)
25909 *total = cost->add;
25911 *total = cost->movzx;
25915 *total = cost->movsx;
25919 if (CONST_INT_P (XEXP (x, 1))
25920 && (GET_MODE (XEXP (x, 0)) != DImode || TARGET_64BIT))
25922 HOST_WIDE_INT value = INTVAL (XEXP (x, 1));
25925 *total = cost->add;
25928 if ((value == 2 || value == 3)
25929 && cost->lea <= cost->shift_const)
25931 *total = cost->lea;
25941 if (!TARGET_64BIT && GET_MODE (XEXP (x, 0)) == DImode)
25943 if (CONST_INT_P (XEXP (x, 1)))
25945 if (INTVAL (XEXP (x, 1)) > 32)
25946 *total = cost->shift_const + COSTS_N_INSNS (2);
25948 *total = cost->shift_const * 2;
25952 if (GET_CODE (XEXP (x, 1)) == AND)
25953 *total = cost->shift_var * 2;
25955 *total = cost->shift_var * 6 + COSTS_N_INSNS (2);
25960 if (CONST_INT_P (XEXP (x, 1)))
25961 *total = cost->shift_const;
25963 *total = cost->shift_var;
25968 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
25970 /* ??? SSE scalar cost should be used here. */
25971 *total = cost->fmul;
25974 else if (X87_FLOAT_MODE_P (mode))
25976 *total = cost->fmul;
25979 else if (FLOAT_MODE_P (mode))
25981 /* ??? SSE vector cost should be used here. */
25982 *total = cost->fmul;
25987 rtx op0 = XEXP (x, 0);
25988 rtx op1 = XEXP (x, 1);
25990 if (CONST_INT_P (XEXP (x, 1)))
25992 unsigned HOST_WIDE_INT value = INTVAL (XEXP (x, 1));
25993 for (nbits = 0; value != 0; value &= value - 1)
25997 /* This is arbitrary. */
26000 /* Compute costs correctly for widening multiplication. */
26001 if ((GET_CODE (op0) == SIGN_EXTEND || GET_CODE (op0) == ZERO_EXTEND)
26002 && GET_MODE_SIZE (GET_MODE (XEXP (op0, 0))) * 2
26003 == GET_MODE_SIZE (mode))
26005 int is_mulwiden = 0;
26006 enum machine_mode inner_mode = GET_MODE (op0);
26008 if (GET_CODE (op0) == GET_CODE (op1))
26009 is_mulwiden = 1, op1 = XEXP (op1, 0);
26010 else if (CONST_INT_P (op1))
26012 if (GET_CODE (op0) == SIGN_EXTEND)
26013 is_mulwiden = trunc_int_for_mode (INTVAL (op1), inner_mode)
26016 is_mulwiden = !(INTVAL (op1) & ~GET_MODE_MASK (inner_mode));
26020 op0 = XEXP (op0, 0), mode = GET_MODE (op0);
26023 *total = (cost->mult_init[MODE_INDEX (mode)]
26024 + nbits * cost->mult_bit
26025 + rtx_cost (op0, outer_code, speed) + rtx_cost (op1, outer_code, speed));
26034 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
26035 /* ??? SSE cost should be used here. */
26036 *total = cost->fdiv;
26037 else if (X87_FLOAT_MODE_P (mode))
26038 *total = cost->fdiv;
26039 else if (FLOAT_MODE_P (mode))
26040 /* ??? SSE vector cost should be used here. */
26041 *total = cost->fdiv;
26043 *total = cost->divide[MODE_INDEX (mode)];
26047 if (GET_MODE_CLASS (mode) == MODE_INT
26048 && GET_MODE_BITSIZE (mode) <= GET_MODE_BITSIZE (Pmode))
26050 if (GET_CODE (XEXP (x, 0)) == PLUS
26051 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
26052 && CONST_INT_P (XEXP (XEXP (XEXP (x, 0), 0), 1))
26053 && CONSTANT_P (XEXP (x, 1)))
26055 HOST_WIDE_INT val = INTVAL (XEXP (XEXP (XEXP (x, 0), 0), 1));
26056 if (val == 2 || val == 4 || val == 8)
26058 *total = cost->lea;
26059 *total += rtx_cost (XEXP (XEXP (x, 0), 1), outer_code, speed);
26060 *total += rtx_cost (XEXP (XEXP (XEXP (x, 0), 0), 0),
26061 outer_code, speed);
26062 *total += rtx_cost (XEXP (x, 1), outer_code, speed);
26066 else if (GET_CODE (XEXP (x, 0)) == MULT
26067 && CONST_INT_P (XEXP (XEXP (x, 0), 1)))
26069 HOST_WIDE_INT val = INTVAL (XEXP (XEXP (x, 0), 1));
26070 if (val == 2 || val == 4 || val == 8)
26072 *total = cost->lea;
26073 *total += rtx_cost (XEXP (XEXP (x, 0), 0), outer_code, speed);
26074 *total += rtx_cost (XEXP (x, 1), outer_code, speed);
26078 else if (GET_CODE (XEXP (x, 0)) == PLUS)
26080 *total = cost->lea;
26081 *total += rtx_cost (XEXP (XEXP (x, 0), 0), outer_code, speed);
26082 *total += rtx_cost (XEXP (XEXP (x, 0), 1), outer_code, speed);
26083 *total += rtx_cost (XEXP (x, 1), outer_code, speed);
26090 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
26092 /* ??? SSE cost should be used here. */
26093 *total = cost->fadd;
26096 else if (X87_FLOAT_MODE_P (mode))
26098 *total = cost->fadd;
26101 else if (FLOAT_MODE_P (mode))
26103 /* ??? SSE vector cost should be used here. */
26104 *total = cost->fadd;
26112 if (!TARGET_64BIT && mode == DImode)
26114 *total = (cost->add * 2
26115 + (rtx_cost (XEXP (x, 0), outer_code, speed)
26116 << (GET_MODE (XEXP (x, 0)) != DImode))
26117 + (rtx_cost (XEXP (x, 1), outer_code, speed)
26118 << (GET_MODE (XEXP (x, 1)) != DImode)));
26124 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
26126 /* ??? SSE cost should be used here. */
26127 *total = cost->fchs;
26130 else if (X87_FLOAT_MODE_P (mode))
26132 *total = cost->fchs;
26135 else if (FLOAT_MODE_P (mode))
26137 /* ??? SSE vector cost should be used here. */
26138 *total = cost->fchs;
26144 if (!TARGET_64BIT && mode == DImode)
26145 *total = cost->add * 2;
26147 *total = cost->add;
26151 if (GET_CODE (XEXP (x, 0)) == ZERO_EXTRACT
26152 && XEXP (XEXP (x, 0), 1) == const1_rtx
26153 && CONST_INT_P (XEXP (XEXP (x, 0), 2))
26154 && XEXP (x, 1) == const0_rtx)
26156 /* This kind of construct is implemented using test[bwl].
26157 Treat it as if we had an AND. */
26158 *total = (cost->add
26159 + rtx_cost (XEXP (XEXP (x, 0), 0), outer_code, speed)
26160 + rtx_cost (const1_rtx, outer_code, speed));
26166 if (!(SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH))
26171 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
26172 /* ??? SSE cost should be used here. */
26173 *total = cost->fabs;
26174 else if (X87_FLOAT_MODE_P (mode))
26175 *total = cost->fabs;
26176 else if (FLOAT_MODE_P (mode))
26177 /* ??? SSE vector cost should be used here. */
26178 *total = cost->fabs;
26182 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
26183 /* ??? SSE cost should be used here. */
26184 *total = cost->fsqrt;
26185 else if (X87_FLOAT_MODE_P (mode))
26186 *total = cost->fsqrt;
26187 else if (FLOAT_MODE_P (mode))
26188 /* ??? SSE vector cost should be used here. */
26189 *total = cost->fsqrt;
26193 if (XINT (x, 1) == UNSPEC_TP)
26204 static int current_machopic_label_num;
26206 /* Given a symbol name and its associated stub, write out the
26207 definition of the stub. */
26210 machopic_output_stub (FILE *file, const char *symb, const char *stub)
26212 unsigned int length;
26213 char *binder_name, *symbol_name, lazy_ptr_name[32];
26214 int label = ++current_machopic_label_num;
26216 /* For 64-bit we shouldn't get here. */
26217 gcc_assert (!TARGET_64BIT);
26219 /* Lose our funky encoding stuff so it doesn't contaminate the stub. */
26220 symb = (*targetm.strip_name_encoding) (symb);
26222 length = strlen (stub);
26223 binder_name = XALLOCAVEC (char, length + 32);
26224 GEN_BINDER_NAME_FOR_STUB (binder_name, stub, length);
26226 length = strlen (symb);
26227 symbol_name = XALLOCAVEC (char, length + 32);
26228 GEN_SYMBOL_NAME_FOR_SYMBOL (symbol_name, symb, length);
26230 sprintf (lazy_ptr_name, "L%d$lz", label);
26233 switch_to_section (darwin_sections[machopic_picsymbol_stub_section]);
26235 switch_to_section (darwin_sections[machopic_symbol_stub_section]);
26237 fprintf (file, "%s:\n", stub);
26238 fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
26242 fprintf (file, "\tcall\tLPC$%d\nLPC$%d:\tpopl\t%%eax\n", label, label);
26243 fprintf (file, "\tmovl\t%s-LPC$%d(%%eax),%%edx\n", lazy_ptr_name, label);
26244 fprintf (file, "\tjmp\t*%%edx\n");
26247 fprintf (file, "\tjmp\t*%s\n", lazy_ptr_name);
26249 fprintf (file, "%s:\n", binder_name);
26253 fprintf (file, "\tlea\t%s-LPC$%d(%%eax),%%eax\n", lazy_ptr_name, label);
26254 fprintf (file, "\tpushl\t%%eax\n");
26257 fprintf (file, "\tpushl\t$%s\n", lazy_ptr_name);
26259 fprintf (file, "\tjmp\tdyld_stub_binding_helper\n");
26261 switch_to_section (darwin_sections[machopic_lazy_symbol_ptr_section]);
26262 fprintf (file, "%s:\n", lazy_ptr_name);
26263 fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
26264 fprintf (file, "\t.long %s\n", binder_name);
26268 darwin_x86_file_end (void)
26270 darwin_file_end ();
26273 #endif /* TARGET_MACHO */
26275 /* Order the registers for register allocator. */
26278 x86_order_regs_for_local_alloc (void)
26283 /* First allocate the local general purpose registers. */
26284 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
26285 if (GENERAL_REGNO_P (i) && call_used_regs[i])
26286 reg_alloc_order [pos++] = i;
26288 /* Global general purpose registers. */
26289 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
26290 if (GENERAL_REGNO_P (i) && !call_used_regs[i])
26291 reg_alloc_order [pos++] = i;
26293 /* x87 registers come first in case we are doing FP math
26295 if (!TARGET_SSE_MATH)
26296 for (i = FIRST_STACK_REG; i <= LAST_STACK_REG; i++)
26297 reg_alloc_order [pos++] = i;
26299 /* SSE registers. */
26300 for (i = FIRST_SSE_REG; i <= LAST_SSE_REG; i++)
26301 reg_alloc_order [pos++] = i;
26302 for (i = FIRST_REX_SSE_REG; i <= LAST_REX_SSE_REG; i++)
26303 reg_alloc_order [pos++] = i;
26305 /* x87 registers. */
26306 if (TARGET_SSE_MATH)
26307 for (i = FIRST_STACK_REG; i <= LAST_STACK_REG; i++)
26308 reg_alloc_order [pos++] = i;
26310 for (i = FIRST_MMX_REG; i <= LAST_MMX_REG; i++)
26311 reg_alloc_order [pos++] = i;
26313 /* Initialize the rest of array as we do not allocate some registers
26315 while (pos < FIRST_PSEUDO_REGISTER)
26316 reg_alloc_order [pos++] = 0;
26319 /* Handle a "ms_abi" or "sysv" attribute; arguments as in
26320 struct attribute_spec.handler. */
26322 ix86_handle_abi_attribute (tree *node, tree name,
26323 tree args ATTRIBUTE_UNUSED,
26324 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
26326 if (TREE_CODE (*node) != FUNCTION_TYPE
26327 && TREE_CODE (*node) != METHOD_TYPE
26328 && TREE_CODE (*node) != FIELD_DECL
26329 && TREE_CODE (*node) != TYPE_DECL)
26331 warning (OPT_Wattributes, "%qs attribute only applies to functions",
26332 IDENTIFIER_POINTER (name));
26333 *no_add_attrs = true;
26338 warning (OPT_Wattributes, "%qs attribute only available for 64-bit",
26339 IDENTIFIER_POINTER (name));
26340 *no_add_attrs = true;
26344 /* Can combine regparm with all attributes but fastcall. */
26345 if (is_attribute_p ("ms_abi", name))
26347 if (lookup_attribute ("sysv_abi", TYPE_ATTRIBUTES (*node)))
26349 error ("ms_abi and sysv_abi attributes are not compatible");
26354 else if (is_attribute_p ("sysv_abi", name))
26356 if (lookup_attribute ("ms_abi", TYPE_ATTRIBUTES (*node)))
26358 error ("ms_abi and sysv_abi attributes are not compatible");
26367 /* Handle a "ms_struct" or "gcc_struct" attribute; arguments as in
26368 struct attribute_spec.handler. */
26370 ix86_handle_struct_attribute (tree *node, tree name,
26371 tree args ATTRIBUTE_UNUSED,
26372 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
26375 if (DECL_P (*node))
26377 if (TREE_CODE (*node) == TYPE_DECL)
26378 type = &TREE_TYPE (*node);
26383 if (!(type && (TREE_CODE (*type) == RECORD_TYPE
26384 || TREE_CODE (*type) == UNION_TYPE)))
26386 warning (OPT_Wattributes, "%qs attribute ignored",
26387 IDENTIFIER_POINTER (name));
26388 *no_add_attrs = true;
26391 else if ((is_attribute_p ("ms_struct", name)
26392 && lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (*type)))
26393 || ((is_attribute_p ("gcc_struct", name)
26394 && lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (*type)))))
26396 warning (OPT_Wattributes, "%qs incompatible attribute ignored",
26397 IDENTIFIER_POINTER (name));
26398 *no_add_attrs = true;
26405 ix86_ms_bitfield_layout_p (const_tree record_type)
26407 return (TARGET_MS_BITFIELD_LAYOUT &&
26408 !lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (record_type)))
26409 || lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (record_type));
26412 /* Returns an expression indicating where the this parameter is
26413 located on entry to the FUNCTION. */
26416 x86_this_parameter (tree function)
26418 tree type = TREE_TYPE (function);
26419 bool aggr = aggregate_value_p (TREE_TYPE (type), type) != 0;
26424 const int *parm_regs;
26426 if (ix86_function_type_abi (type) == MS_ABI)
26427 parm_regs = x86_64_ms_abi_int_parameter_registers;
26429 parm_regs = x86_64_int_parameter_registers;
26430 return gen_rtx_REG (DImode, parm_regs[aggr]);
26433 nregs = ix86_function_regparm (type, function);
26435 if (nregs > 0 && !stdarg_p (type))
26439 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type)))
26440 regno = aggr ? DX_REG : CX_REG;
26448 return gen_rtx_MEM (SImode,
26449 plus_constant (stack_pointer_rtx, 4));
26452 return gen_rtx_REG (SImode, regno);
26455 return gen_rtx_MEM (SImode, plus_constant (stack_pointer_rtx, aggr ? 8 : 4));
26458 /* Determine whether x86_output_mi_thunk can succeed. */
26461 x86_can_output_mi_thunk (const_tree thunk ATTRIBUTE_UNUSED,
26462 HOST_WIDE_INT delta ATTRIBUTE_UNUSED,
26463 HOST_WIDE_INT vcall_offset, const_tree function)
26465 /* 64-bit can handle anything. */
26469 /* For 32-bit, everything's fine if we have one free register. */
26470 if (ix86_function_regparm (TREE_TYPE (function), function) < 3)
26473 /* Need a free register for vcall_offset. */
26477 /* Need a free register for GOT references. */
26478 if (flag_pic && !(*targetm.binds_local_p) (function))
26481 /* Otherwise ok. */
26485 /* Output the assembler code for a thunk function. THUNK_DECL is the
26486 declaration for the thunk function itself, FUNCTION is the decl for
26487 the target function. DELTA is an immediate constant offset to be
26488 added to THIS. If VCALL_OFFSET is nonzero, the word at
26489 *(*this + vcall_offset) should be added to THIS. */
26492 x86_output_mi_thunk (FILE *file ATTRIBUTE_UNUSED,
26493 tree thunk ATTRIBUTE_UNUSED, HOST_WIDE_INT delta,
26494 HOST_WIDE_INT vcall_offset, tree function)
26497 rtx this_param = x86_this_parameter (function);
26500 /* If VCALL_OFFSET, we'll need THIS in a register. Might as well
26501 pull it in now and let DELTA benefit. */
26502 if (REG_P (this_param))
26503 this_reg = this_param;
26504 else if (vcall_offset)
26506 /* Put the this parameter into %eax. */
26507 xops[0] = this_param;
26508 xops[1] = this_reg = gen_rtx_REG (Pmode, AX_REG);
26509 output_asm_insn ("mov%z1\t{%0, %1|%1, %0}", xops);
26512 this_reg = NULL_RTX;
26514 /* Adjust the this parameter by a fixed constant. */
26517 xops[0] = GEN_INT (delta);
26518 xops[1] = this_reg ? this_reg : this_param;
26521 if (!x86_64_general_operand (xops[0], DImode))
26523 tmp = gen_rtx_REG (DImode, R10_REG);
26525 output_asm_insn ("mov{q}\t{%1, %0|%0, %1}", xops);
26527 xops[1] = this_param;
26529 output_asm_insn ("add{q}\t{%0, %1|%1, %0}", xops);
26532 output_asm_insn ("add{l}\t{%0, %1|%1, %0}", xops);
26535 /* Adjust the this parameter by a value stored in the vtable. */
26539 tmp = gen_rtx_REG (DImode, R10_REG);
26542 int tmp_regno = CX_REG;
26543 if (lookup_attribute ("fastcall",
26544 TYPE_ATTRIBUTES (TREE_TYPE (function))))
26545 tmp_regno = AX_REG;
26546 tmp = gen_rtx_REG (SImode, tmp_regno);
26549 xops[0] = gen_rtx_MEM (Pmode, this_reg);
26551 output_asm_insn ("mov%z1\t{%0, %1|%1, %0}", xops);
26553 /* Adjust the this parameter. */
26554 xops[0] = gen_rtx_MEM (Pmode, plus_constant (tmp, vcall_offset));
26555 if (TARGET_64BIT && !memory_operand (xops[0], Pmode))
26557 rtx tmp2 = gen_rtx_REG (DImode, R11_REG);
26558 xops[0] = GEN_INT (vcall_offset);
26560 output_asm_insn ("mov{q}\t{%0, %1|%1, %0}", xops);
26561 xops[0] = gen_rtx_MEM (Pmode, gen_rtx_PLUS (Pmode, tmp, tmp2));
26563 xops[1] = this_reg;
26564 output_asm_insn ("add%z1\t{%0, %1|%1, %0}", xops);
26567 /* If necessary, drop THIS back to its stack slot. */
26568 if (this_reg && this_reg != this_param)
26570 xops[0] = this_reg;
26571 xops[1] = this_param;
26572 output_asm_insn ("mov%z1\t{%0, %1|%1, %0}", xops);
26575 xops[0] = XEXP (DECL_RTL (function), 0);
26578 if (!flag_pic || (*targetm.binds_local_p) (function))
26579 output_asm_insn ("jmp\t%P0", xops);
26580 /* All thunks should be in the same object as their target,
26581 and thus binds_local_p should be true. */
26582 else if (TARGET_64BIT && cfun->machine->call_abi == MS_ABI)
26583 gcc_unreachable ();
26586 tmp = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, xops[0]), UNSPEC_GOTPCREL);
26587 tmp = gen_rtx_CONST (Pmode, tmp);
26588 tmp = gen_rtx_MEM (QImode, tmp);
26590 output_asm_insn ("jmp\t%A0", xops);
26595 if (!flag_pic || (*targetm.binds_local_p) (function))
26596 output_asm_insn ("jmp\t%P0", xops);
26601 rtx sym_ref = XEXP (DECL_RTL (function), 0);
26602 tmp = (gen_rtx_SYMBOL_REF
26604 machopic_indirection_name (sym_ref, /*stub_p=*/true)));
26605 tmp = gen_rtx_MEM (QImode, tmp);
26607 output_asm_insn ("jmp\t%0", xops);
26610 #endif /* TARGET_MACHO */
26612 tmp = gen_rtx_REG (SImode, CX_REG);
26613 output_set_got (tmp, NULL_RTX);
26616 output_asm_insn ("mov{l}\t{%0@GOT(%1), %1|%1, %0@GOT[%1]}", xops);
26617 output_asm_insn ("jmp\t{*}%1", xops);
26623 x86_file_start (void)
26625 default_file_start ();
26627 darwin_file_start ();
26629 if (X86_FILE_START_VERSION_DIRECTIVE)
26630 fputs ("\t.version\t\"01.01\"\n", asm_out_file);
26631 if (X86_FILE_START_FLTUSED)
26632 fputs ("\t.global\t__fltused\n", asm_out_file);
26633 if (ix86_asm_dialect == ASM_INTEL)
26634 fputs ("\t.intel_syntax noprefix\n", asm_out_file);
26638 x86_field_alignment (tree field, int computed)
26640 enum machine_mode mode;
26641 tree type = TREE_TYPE (field);
26643 if (TARGET_64BIT || TARGET_ALIGN_DOUBLE)
26645 mode = TYPE_MODE (strip_array_types (type));
26646 if (mode == DFmode || mode == DCmode
26647 || GET_MODE_CLASS (mode) == MODE_INT
26648 || GET_MODE_CLASS (mode) == MODE_COMPLEX_INT)
26649 return MIN (32, computed);
26653 /* Output assembler code to FILE to increment profiler label # LABELNO
26654 for profiling a function entry. */
26656 x86_function_profiler (FILE *file, int labelno ATTRIBUTE_UNUSED)
26660 #ifndef NO_PROFILE_COUNTERS
26661 fprintf (file, "\tleaq\t%sP%d@(%%rip),%%r11\n", LPREFIX, labelno);
26664 if (DEFAULT_ABI == SYSV_ABI && flag_pic)
26665 fprintf (file, "\tcall\t*%s@GOTPCREL(%%rip)\n", MCOUNT_NAME);
26667 fprintf (file, "\tcall\t%s\n", MCOUNT_NAME);
26671 #ifndef NO_PROFILE_COUNTERS
26672 fprintf (file, "\tleal\t%sP%d@GOTOFF(%%ebx),%%%s\n",
26673 LPREFIX, labelno, PROFILE_COUNT_REGISTER);
26675 fprintf (file, "\tcall\t*%s@GOT(%%ebx)\n", MCOUNT_NAME);
26679 #ifndef NO_PROFILE_COUNTERS
26680 fprintf (file, "\tmovl\t$%sP%d,%%%s\n", LPREFIX, labelno,
26681 PROFILE_COUNT_REGISTER);
26683 fprintf (file, "\tcall\t%s\n", MCOUNT_NAME);
26687 /* We don't have exact information about the insn sizes, but we may assume
26688 quite safely that we are informed about all 1 byte insns and memory
26689 address sizes. This is enough to eliminate unnecessary padding in
26693 min_insn_size (rtx insn)
26697 if (!INSN_P (insn) || !active_insn_p (insn))
26700 /* Discard alignments we've emit and jump instructions. */
26701 if (GET_CODE (PATTERN (insn)) == UNSPEC_VOLATILE
26702 && XINT (PATTERN (insn), 1) == UNSPECV_ALIGN)
26705 && (GET_CODE (PATTERN (insn)) == ADDR_VEC
26706 || GET_CODE (PATTERN (insn)) == ADDR_DIFF_VEC))
26709 /* Important case - calls are always 5 bytes.
26710 It is common to have many calls in the row. */
26712 && symbolic_reference_mentioned_p (PATTERN (insn))
26713 && !SIBLING_CALL_P (insn))
26715 if (get_attr_length (insn) <= 1)
26718 /* For normal instructions we may rely on the sizes of addresses
26719 and the presence of symbol to require 4 bytes of encoding.
26720 This is not the case for jumps where references are PC relative. */
26721 if (!JUMP_P (insn))
26723 l = get_attr_length_address (insn);
26724 if (l < 4 && symbolic_reference_mentioned_p (PATTERN (insn)))
26733 /* AMD K8 core mispredicts jumps when there are more than 3 jumps in 16 byte
26737 ix86_avoid_jump_misspredicts (void)
26739 rtx insn, start = get_insns ();
26740 int nbytes = 0, njumps = 0;
26743 /* Look for all minimal intervals of instructions containing 4 jumps.
26744 The intervals are bounded by START and INSN. NBYTES is the total
26745 size of instructions in the interval including INSN and not including
26746 START. When the NBYTES is smaller than 16 bytes, it is possible
26747 that the end of START and INSN ends up in the same 16byte page.
26749 The smallest offset in the page INSN can start is the case where START
26750 ends on the offset 0. Offset of INSN is then NBYTES - sizeof (INSN).
26751 We add p2align to 16byte window with maxskip 17 - NBYTES + sizeof (INSN).
26753 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
26756 nbytes += min_insn_size (insn);
26758 fprintf(dump_file, "Insn %i estimated to %i bytes\n",
26759 INSN_UID (insn), min_insn_size (insn));
26761 && GET_CODE (PATTERN (insn)) != ADDR_VEC
26762 && GET_CODE (PATTERN (insn)) != ADDR_DIFF_VEC)
26770 start = NEXT_INSN (start);
26771 if ((JUMP_P (start)
26772 && GET_CODE (PATTERN (start)) != ADDR_VEC
26773 && GET_CODE (PATTERN (start)) != ADDR_DIFF_VEC)
26775 njumps--, isjump = 1;
26778 nbytes -= min_insn_size (start);
26780 gcc_assert (njumps >= 0);
26782 fprintf (dump_file, "Interval %i to %i has %i bytes\n",
26783 INSN_UID (start), INSN_UID (insn), nbytes);
26785 if (njumps == 3 && isjump && nbytes < 16)
26787 int padsize = 15 - nbytes + min_insn_size (insn);
26790 fprintf (dump_file, "Padding insn %i by %i bytes!\n",
26791 INSN_UID (insn), padsize);
26792 emit_insn_before (gen_align (GEN_INT (padsize)), insn);
26797 /* AMD Athlon works faster
26798 when RET is not destination of conditional jump or directly preceded
26799 by other jump instruction. We avoid the penalty by inserting NOP just
26800 before the RET instructions in such cases. */
26802 ix86_pad_returns (void)
26807 FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR->preds)
26809 basic_block bb = e->src;
26810 rtx ret = BB_END (bb);
26812 bool replace = false;
26814 if (!JUMP_P (ret) || GET_CODE (PATTERN (ret)) != RETURN
26815 || optimize_bb_for_size_p (bb))
26817 for (prev = PREV_INSN (ret); prev; prev = PREV_INSN (prev))
26818 if (active_insn_p (prev) || LABEL_P (prev))
26820 if (prev && LABEL_P (prev))
26825 FOR_EACH_EDGE (e, ei, bb->preds)
26826 if (EDGE_FREQUENCY (e) && e->src->index >= 0
26827 && !(e->flags & EDGE_FALLTHRU))
26832 prev = prev_active_insn (ret);
26834 && ((JUMP_P (prev) && any_condjump_p (prev))
26837 /* Empty functions get branch mispredict even when the jump destination
26838 is not visible to us. */
26839 if (!prev && cfun->function_frequency > FUNCTION_FREQUENCY_UNLIKELY_EXECUTED)
26844 emit_insn_before (gen_return_internal_long (), ret);
26850 /* Implement machine specific optimizations. We implement padding of returns
26851 for K8 CPUs and pass to avoid 4 jumps in the single 16 byte window. */
26855 if (TARGET_PAD_RETURNS && optimize
26856 && optimize_function_for_speed_p (cfun))
26857 ix86_pad_returns ();
26858 if (TARGET_FOUR_JUMP_LIMIT && optimize
26859 && optimize_function_for_speed_p (cfun))
26860 ix86_avoid_jump_misspredicts ();
26863 /* Return nonzero when QImode register that must be represented via REX prefix
26866 x86_extended_QIreg_mentioned_p (rtx insn)
26869 extract_insn_cached (insn);
26870 for (i = 0; i < recog_data.n_operands; i++)
26871 if (REG_P (recog_data.operand[i])
26872 && REGNO (recog_data.operand[i]) >= 4)
26877 /* Return nonzero when P points to register encoded via REX prefix.
26878 Called via for_each_rtx. */
26880 extended_reg_mentioned_1 (rtx *p, void *data ATTRIBUTE_UNUSED)
26882 unsigned int regno;
26885 regno = REGNO (*p);
26886 return REX_INT_REGNO_P (regno) || REX_SSE_REGNO_P (regno);
26889 /* Return true when INSN mentions register that must be encoded using REX
26892 x86_extended_reg_mentioned_p (rtx insn)
26894 return for_each_rtx (INSN_P (insn) ? &PATTERN (insn) : &insn,
26895 extended_reg_mentioned_1, NULL);
26898 /* Generate an unsigned DImode/SImode to FP conversion. This is the same code
26899 optabs would emit if we didn't have TFmode patterns. */
26902 x86_emit_floatuns (rtx operands[2])
26904 rtx neglab, donelab, i0, i1, f0, in, out;
26905 enum machine_mode mode, inmode;
26907 inmode = GET_MODE (operands[1]);
26908 gcc_assert (inmode == SImode || inmode == DImode);
26911 in = force_reg (inmode, operands[1]);
26912 mode = GET_MODE (out);
26913 neglab = gen_label_rtx ();
26914 donelab = gen_label_rtx ();
26915 f0 = gen_reg_rtx (mode);
26917 emit_cmp_and_jump_insns (in, const0_rtx, LT, const0_rtx, inmode, 0, neglab);
26919 expand_float (out, in, 0);
26921 emit_jump_insn (gen_jump (donelab));
26924 emit_label (neglab);
26926 i0 = expand_simple_binop (inmode, LSHIFTRT, in, const1_rtx, NULL,
26928 i1 = expand_simple_binop (inmode, AND, in, const1_rtx, NULL,
26930 i0 = expand_simple_binop (inmode, IOR, i0, i1, i0, 1, OPTAB_DIRECT);
26932 expand_float (f0, i0, 0);
26934 emit_insn (gen_rtx_SET (VOIDmode, out, gen_rtx_PLUS (mode, f0, f0)));
26936 emit_label (donelab);
26939 /* A subroutine of ix86_expand_vector_init. Store into TARGET a vector
26940 with all elements equal to VAR. Return true if successful. */
26943 ix86_expand_vector_init_duplicate (bool mmx_ok, enum machine_mode mode,
26944 rtx target, rtx val)
26946 enum machine_mode hmode, smode, wsmode, wvmode;
26961 val = force_reg (GET_MODE_INNER (mode), val);
26962 x = gen_rtx_VEC_DUPLICATE (mode, val);
26963 emit_insn (gen_rtx_SET (VOIDmode, target, x));
26969 if (TARGET_SSE || TARGET_3DNOW_A)
26971 val = gen_lowpart (SImode, val);
26972 x = gen_rtx_TRUNCATE (HImode, val);
26973 x = gen_rtx_VEC_DUPLICATE (mode, x);
26974 emit_insn (gen_rtx_SET (VOIDmode, target, x));
26996 /* Extend HImode to SImode using a paradoxical SUBREG. */
26997 tmp1 = gen_reg_rtx (SImode);
26998 emit_move_insn (tmp1, gen_lowpart (SImode, val));
26999 /* Insert the SImode value as low element of V4SImode vector. */
27000 tmp2 = gen_reg_rtx (V4SImode);
27001 tmp1 = gen_rtx_VEC_MERGE (V4SImode,
27002 gen_rtx_VEC_DUPLICATE (V4SImode, tmp1),
27003 CONST0_RTX (V4SImode),
27005 emit_insn (gen_rtx_SET (VOIDmode, tmp2, tmp1));
27006 /* Cast the V4SImode vector back to a V8HImode vector. */
27007 tmp1 = gen_reg_rtx (V8HImode);
27008 emit_move_insn (tmp1, gen_lowpart (V8HImode, tmp2));
27009 /* Duplicate the low short through the whole low SImode word. */
27010 emit_insn (gen_sse2_punpcklwd (tmp1, tmp1, tmp1));
27011 /* Cast the V8HImode vector back to a V4SImode vector. */
27012 tmp2 = gen_reg_rtx (V4SImode);
27013 emit_move_insn (tmp2, gen_lowpart (V4SImode, tmp1));
27014 /* Replicate the low element of the V4SImode vector. */
27015 emit_insn (gen_sse2_pshufd (tmp2, tmp2, const0_rtx));
27016 /* Cast the V2SImode back to V8HImode, and store in target. */
27017 emit_move_insn (target, gen_lowpart (V8HImode, tmp2));
27028 /* Extend QImode to SImode using a paradoxical SUBREG. */
27029 tmp1 = gen_reg_rtx (SImode);
27030 emit_move_insn (tmp1, gen_lowpart (SImode, val));
27031 /* Insert the SImode value as low element of V4SImode vector. */
27032 tmp2 = gen_reg_rtx (V4SImode);
27033 tmp1 = gen_rtx_VEC_MERGE (V4SImode,
27034 gen_rtx_VEC_DUPLICATE (V4SImode, tmp1),
27035 CONST0_RTX (V4SImode),
27037 emit_insn (gen_rtx_SET (VOIDmode, tmp2, tmp1));
27038 /* Cast the V4SImode vector back to a V16QImode vector. */
27039 tmp1 = gen_reg_rtx (V16QImode);
27040 emit_move_insn (tmp1, gen_lowpart (V16QImode, tmp2));
27041 /* Duplicate the low byte through the whole low SImode word. */
27042 emit_insn (gen_sse2_punpcklbw (tmp1, tmp1, tmp1));
27043 emit_insn (gen_sse2_punpcklbw (tmp1, tmp1, tmp1));
27044 /* Cast the V16QImode vector back to a V4SImode vector. */
27045 tmp2 = gen_reg_rtx (V4SImode);
27046 emit_move_insn (tmp2, gen_lowpart (V4SImode, tmp1));
27047 /* Replicate the low element of the V4SImode vector. */
27048 emit_insn (gen_sse2_pshufd (tmp2, tmp2, const0_rtx));
27049 /* Cast the V2SImode back to V16QImode, and store in target. */
27050 emit_move_insn (target, gen_lowpart (V16QImode, tmp2));
27058 /* Replicate the value once into the next wider mode and recurse. */
27059 val = convert_modes (wsmode, smode, val, true);
27060 x = expand_simple_binop (wsmode, ASHIFT, val,
27061 GEN_INT (GET_MODE_BITSIZE (smode)),
27062 NULL_RTX, 1, OPTAB_LIB_WIDEN);
27063 val = expand_simple_binop (wsmode, IOR, val, x, x, 1, OPTAB_LIB_WIDEN);
27065 x = gen_reg_rtx (wvmode);
27066 if (!ix86_expand_vector_init_duplicate (mmx_ok, wvmode, x, val))
27067 gcc_unreachable ();
27068 emit_move_insn (target, gen_lowpart (mode, x));
27091 rtx tmp = gen_reg_rtx (hmode);
27092 ix86_expand_vector_init_duplicate (mmx_ok, hmode, tmp, val);
27093 emit_insn (gen_rtx_SET (VOIDmode, target,
27094 gen_rtx_VEC_CONCAT (mode, tmp, tmp)));
27103 /* A subroutine of ix86_expand_vector_init. Store into TARGET a vector
27104 whose ONE_VAR element is VAR, and other elements are zero. Return true
27108 ix86_expand_vector_init_one_nonzero (bool mmx_ok, enum machine_mode mode,
27109 rtx target, rtx var, int one_var)
27111 enum machine_mode vsimode;
27114 bool use_vector_set = false;
27119 /* For SSE4.1, we normally use vector set. But if the second
27120 element is zero and inter-unit moves are OK, we use movq
27122 use_vector_set = (TARGET_64BIT
27124 && !(TARGET_INTER_UNIT_MOVES
27130 use_vector_set = TARGET_SSE4_1;
27133 use_vector_set = TARGET_SSE2;
27136 use_vector_set = TARGET_SSE || TARGET_3DNOW_A;
27143 use_vector_set = TARGET_AVX;
27146 /* Use ix86_expand_vector_set in 64bit mode only. */
27147 use_vector_set = TARGET_AVX && TARGET_64BIT;
27153 if (use_vector_set)
27155 emit_insn (gen_rtx_SET (VOIDmode, target, CONST0_RTX (mode)));
27156 var = force_reg (GET_MODE_INNER (mode), var);
27157 ix86_expand_vector_set (mmx_ok, target, var, one_var);
27173 var = force_reg (GET_MODE_INNER (mode), var);
27174 x = gen_rtx_VEC_CONCAT (mode, var, CONST0_RTX (GET_MODE_INNER (mode)));
27175 emit_insn (gen_rtx_SET (VOIDmode, target, x));
27180 if (!REG_P (target) || REGNO (target) < FIRST_PSEUDO_REGISTER)
27181 new_target = gen_reg_rtx (mode);
27183 new_target = target;
27184 var = force_reg (GET_MODE_INNER (mode), var);
27185 x = gen_rtx_VEC_DUPLICATE (mode, var);
27186 x = gen_rtx_VEC_MERGE (mode, x, CONST0_RTX (mode), const1_rtx);
27187 emit_insn (gen_rtx_SET (VOIDmode, new_target, x));
27190 /* We need to shuffle the value to the correct position, so
27191 create a new pseudo to store the intermediate result. */
27193 /* With SSE2, we can use the integer shuffle insns. */
27194 if (mode != V4SFmode && TARGET_SSE2)
27196 emit_insn (gen_sse2_pshufd_1 (new_target, new_target,
27198 GEN_INT (one_var == 1 ? 0 : 1),
27199 GEN_INT (one_var == 2 ? 0 : 1),
27200 GEN_INT (one_var == 3 ? 0 : 1)));
27201 if (target != new_target)
27202 emit_move_insn (target, new_target);
27206 /* Otherwise convert the intermediate result to V4SFmode and
27207 use the SSE1 shuffle instructions. */
27208 if (mode != V4SFmode)
27210 tmp = gen_reg_rtx (V4SFmode);
27211 emit_move_insn (tmp, gen_lowpart (V4SFmode, new_target));
27216 emit_insn (gen_sse_shufps_v4sf (tmp, tmp, tmp,
27218 GEN_INT (one_var == 1 ? 0 : 1),
27219 GEN_INT (one_var == 2 ? 0+4 : 1+4),
27220 GEN_INT (one_var == 3 ? 0+4 : 1+4)));
27222 if (mode != V4SFmode)
27223 emit_move_insn (target, gen_lowpart (V4SImode, tmp));
27224 else if (tmp != target)
27225 emit_move_insn (target, tmp);
27227 else if (target != new_target)
27228 emit_move_insn (target, new_target);
27233 vsimode = V4SImode;
27239 vsimode = V2SImode;
27245 /* Zero extend the variable element to SImode and recurse. */
27246 var = convert_modes (SImode, GET_MODE_INNER (mode), var, true);
27248 x = gen_reg_rtx (vsimode);
27249 if (!ix86_expand_vector_init_one_nonzero (mmx_ok, vsimode, x,
27251 gcc_unreachable ();
27253 emit_move_insn (target, gen_lowpart (mode, x));
27261 /* A subroutine of ix86_expand_vector_init. Store into TARGET a vector
27262 consisting of the values in VALS. It is known that all elements
27263 except ONE_VAR are constants. Return true if successful. */
27266 ix86_expand_vector_init_one_var (bool mmx_ok, enum machine_mode mode,
27267 rtx target, rtx vals, int one_var)
27269 rtx var = XVECEXP (vals, 0, one_var);
27270 enum machine_mode wmode;
27273 const_vec = copy_rtx (vals);
27274 XVECEXP (const_vec, 0, one_var) = CONST0_RTX (GET_MODE_INNER (mode));
27275 const_vec = gen_rtx_CONST_VECTOR (mode, XVEC (const_vec, 0));
27283 /* For the two element vectors, it's just as easy to use
27284 the general case. */
27288 /* Use ix86_expand_vector_set in 64bit mode only. */
27311 /* There's no way to set one QImode entry easily. Combine
27312 the variable value with its adjacent constant value, and
27313 promote to an HImode set. */
27314 x = XVECEXP (vals, 0, one_var ^ 1);
27317 var = convert_modes (HImode, QImode, var, true);
27318 var = expand_simple_binop (HImode, ASHIFT, var, GEN_INT (8),
27319 NULL_RTX, 1, OPTAB_LIB_WIDEN);
27320 x = GEN_INT (INTVAL (x) & 0xff);
27324 var = convert_modes (HImode, QImode, var, true);
27325 x = gen_int_mode (INTVAL (x) << 8, HImode);
27327 if (x != const0_rtx)
27328 var = expand_simple_binop (HImode, IOR, var, x, var,
27329 1, OPTAB_LIB_WIDEN);
27331 x = gen_reg_rtx (wmode);
27332 emit_move_insn (x, gen_lowpart (wmode, const_vec));
27333 ix86_expand_vector_set (mmx_ok, x, var, one_var >> 1);
27335 emit_move_insn (target, gen_lowpart (mode, x));
27342 emit_move_insn (target, const_vec);
27343 ix86_expand_vector_set (mmx_ok, target, var, one_var);
27347 /* A subroutine of ix86_expand_vector_init_general. Use vector
27348 concatenate to handle the most general case: all values variable,
27349 and none identical. */
27352 ix86_expand_vector_init_concat (enum machine_mode mode,
27353 rtx target, rtx *ops, int n)
27355 enum machine_mode cmode, hmode = VOIDmode;
27356 rtx first[8], second[4];
27396 gcc_unreachable ();
27399 if (!register_operand (ops[1], cmode))
27400 ops[1] = force_reg (cmode, ops[1]);
27401 if (!register_operand (ops[0], cmode))
27402 ops[0] = force_reg (cmode, ops[0]);
27403 emit_insn (gen_rtx_SET (VOIDmode, target,
27404 gen_rtx_VEC_CONCAT (mode, ops[0],
27424 gcc_unreachable ();
27440 gcc_unreachable ();
27445 /* FIXME: We process inputs backward to help RA. PR 36222. */
27448 for (; i > 0; i -= 2, j--)
27450 first[j] = gen_reg_rtx (cmode);
27451 v = gen_rtvec (2, ops[i - 1], ops[i]);
27452 ix86_expand_vector_init (false, first[j],
27453 gen_rtx_PARALLEL (cmode, v));
27459 gcc_assert (hmode != VOIDmode);
27460 for (i = j = 0; i < n; i += 2, j++)
27462 second[j] = gen_reg_rtx (hmode);
27463 ix86_expand_vector_init_concat (hmode, second [j],
27467 ix86_expand_vector_init_concat (mode, target, second, n);
27470 ix86_expand_vector_init_concat (mode, target, first, n);
27474 gcc_unreachable ();
27478 /* A subroutine of ix86_expand_vector_init_general. Use vector
27479 interleave to handle the most general case: all values variable,
27480 and none identical. */
27483 ix86_expand_vector_init_interleave (enum machine_mode mode,
27484 rtx target, rtx *ops, int n)
27486 enum machine_mode first_imode, second_imode, third_imode, inner_mode;
27489 rtx (*gen_load_even) (rtx, rtx, rtx);
27490 rtx (*gen_interleave_first_low) (rtx, rtx, rtx);
27491 rtx (*gen_interleave_second_low) (rtx, rtx, rtx);
27496 gen_load_even = gen_vec_setv8hi;
27497 gen_interleave_first_low = gen_vec_interleave_lowv4si;
27498 gen_interleave_second_low = gen_vec_interleave_lowv2di;
27499 inner_mode = HImode;
27500 first_imode = V4SImode;
27501 second_imode = V2DImode;
27502 third_imode = VOIDmode;
27505 gen_load_even = gen_vec_setv16qi;
27506 gen_interleave_first_low = gen_vec_interleave_lowv8hi;
27507 gen_interleave_second_low = gen_vec_interleave_lowv4si;
27508 inner_mode = QImode;
27509 first_imode = V8HImode;
27510 second_imode = V4SImode;
27511 third_imode = V2DImode;
27514 gcc_unreachable ();
27517 for (i = 0; i < n; i++)
27519 /* Extend the odd elment to SImode using a paradoxical SUBREG. */
27520 op0 = gen_reg_rtx (SImode);
27521 emit_move_insn (op0, gen_lowpart (SImode, ops [i + i]));
27523 /* Insert the SImode value as low element of V4SImode vector. */
27524 op1 = gen_reg_rtx (V4SImode);
27525 op0 = gen_rtx_VEC_MERGE (V4SImode,
27526 gen_rtx_VEC_DUPLICATE (V4SImode,
27528 CONST0_RTX (V4SImode),
27530 emit_insn (gen_rtx_SET (VOIDmode, op1, op0));
27532 /* Cast the V4SImode vector back to a vector in orignal mode. */
27533 op0 = gen_reg_rtx (mode);
27534 emit_move_insn (op0, gen_lowpart (mode, op1));
27536 /* Load even elements into the second positon. */
27537 emit_insn ((*gen_load_even) (op0,
27538 force_reg (inner_mode,
27542 /* Cast vector to FIRST_IMODE vector. */
27543 ops[i] = gen_reg_rtx (first_imode);
27544 emit_move_insn (ops[i], gen_lowpart (first_imode, op0));
27547 /* Interleave low FIRST_IMODE vectors. */
27548 for (i = j = 0; i < n; i += 2, j++)
27550 op0 = gen_reg_rtx (first_imode);
27551 emit_insn ((*gen_interleave_first_low) (op0, ops[i], ops[i + 1]));
27553 /* Cast FIRST_IMODE vector to SECOND_IMODE vector. */
27554 ops[j] = gen_reg_rtx (second_imode);
27555 emit_move_insn (ops[j], gen_lowpart (second_imode, op0));
27558 /* Interleave low SECOND_IMODE vectors. */
27559 switch (second_imode)
27562 for (i = j = 0; i < n / 2; i += 2, j++)
27564 op0 = gen_reg_rtx (second_imode);
27565 emit_insn ((*gen_interleave_second_low) (op0, ops[i],
27568 /* Cast the SECOND_IMODE vector to the THIRD_IMODE
27570 ops[j] = gen_reg_rtx (third_imode);
27571 emit_move_insn (ops[j], gen_lowpart (third_imode, op0));
27573 second_imode = V2DImode;
27574 gen_interleave_second_low = gen_vec_interleave_lowv2di;
27578 op0 = gen_reg_rtx (second_imode);
27579 emit_insn ((*gen_interleave_second_low) (op0, ops[0],
27582 /* Cast the SECOND_IMODE vector back to a vector on original
27584 emit_insn (gen_rtx_SET (VOIDmode, target,
27585 gen_lowpart (mode, op0)));
27589 gcc_unreachable ();
27593 /* A subroutine of ix86_expand_vector_init. Handle the most general case:
27594 all values variable, and none identical. */
27597 ix86_expand_vector_init_general (bool mmx_ok, enum machine_mode mode,
27598 rtx target, rtx vals)
27600 rtx ops[32], op0, op1;
27601 enum machine_mode half_mode = VOIDmode;
27608 if (!mmx_ok && !TARGET_SSE)
27620 n = GET_MODE_NUNITS (mode);
27621 for (i = 0; i < n; i++)
27622 ops[i] = XVECEXP (vals, 0, i);
27623 ix86_expand_vector_init_concat (mode, target, ops, n);
27627 half_mode = V16QImode;
27631 half_mode = V8HImode;
27635 n = GET_MODE_NUNITS (mode);
27636 for (i = 0; i < n; i++)
27637 ops[i] = XVECEXP (vals, 0, i);
27638 op0 = gen_reg_rtx (half_mode);
27639 op1 = gen_reg_rtx (half_mode);
27640 ix86_expand_vector_init_interleave (half_mode, op0, ops,
27642 ix86_expand_vector_init_interleave (half_mode, op1,
27643 &ops [n >> 1], n >> 2);
27644 emit_insn (gen_rtx_SET (VOIDmode, target,
27645 gen_rtx_VEC_CONCAT (mode, op0, op1)));
27649 if (!TARGET_SSE4_1)
27657 /* Don't use ix86_expand_vector_init_interleave if we can't
27658 move from GPR to SSE register directly. */
27659 if (!TARGET_INTER_UNIT_MOVES)
27662 n = GET_MODE_NUNITS (mode);
27663 for (i = 0; i < n; i++)
27664 ops[i] = XVECEXP (vals, 0, i);
27665 ix86_expand_vector_init_interleave (mode, target, ops, n >> 1);
27673 gcc_unreachable ();
27677 int i, j, n_elts, n_words, n_elt_per_word;
27678 enum machine_mode inner_mode;
27679 rtx words[4], shift;
27681 inner_mode = GET_MODE_INNER (mode);
27682 n_elts = GET_MODE_NUNITS (mode);
27683 n_words = GET_MODE_SIZE (mode) / UNITS_PER_WORD;
27684 n_elt_per_word = n_elts / n_words;
27685 shift = GEN_INT (GET_MODE_BITSIZE (inner_mode));
27687 for (i = 0; i < n_words; ++i)
27689 rtx word = NULL_RTX;
27691 for (j = 0; j < n_elt_per_word; ++j)
27693 rtx elt = XVECEXP (vals, 0, (i+1)*n_elt_per_word - j - 1);
27694 elt = convert_modes (word_mode, inner_mode, elt, true);
27700 word = expand_simple_binop (word_mode, ASHIFT, word, shift,
27701 word, 1, OPTAB_LIB_WIDEN);
27702 word = expand_simple_binop (word_mode, IOR, word, elt,
27703 word, 1, OPTAB_LIB_WIDEN);
27711 emit_move_insn (target, gen_lowpart (mode, words[0]));
27712 else if (n_words == 2)
27714 rtx tmp = gen_reg_rtx (mode);
27715 emit_clobber (tmp);
27716 emit_move_insn (gen_lowpart (word_mode, tmp), words[0]);
27717 emit_move_insn (gen_highpart (word_mode, tmp), words[1]);
27718 emit_move_insn (target, tmp);
27720 else if (n_words == 4)
27722 rtx tmp = gen_reg_rtx (V4SImode);
27723 gcc_assert (word_mode == SImode);
27724 vals = gen_rtx_PARALLEL (V4SImode, gen_rtvec_v (4, words));
27725 ix86_expand_vector_init_general (false, V4SImode, tmp, vals);
27726 emit_move_insn (target, gen_lowpart (mode, tmp));
27729 gcc_unreachable ();
27733 /* Initialize vector TARGET via VALS. Suppress the use of MMX
27734 instructions unless MMX_OK is true. */
27737 ix86_expand_vector_init (bool mmx_ok, rtx target, rtx vals)
27739 enum machine_mode mode = GET_MODE (target);
27740 enum machine_mode inner_mode = GET_MODE_INNER (mode);
27741 int n_elts = GET_MODE_NUNITS (mode);
27742 int n_var = 0, one_var = -1;
27743 bool all_same = true, all_const_zero = true;
27747 for (i = 0; i < n_elts; ++i)
27749 x = XVECEXP (vals, 0, i);
27750 if (!(CONST_INT_P (x)
27751 || GET_CODE (x) == CONST_DOUBLE
27752 || GET_CODE (x) == CONST_FIXED))
27753 n_var++, one_var = i;
27754 else if (x != CONST0_RTX (inner_mode))
27755 all_const_zero = false;
27756 if (i > 0 && !rtx_equal_p (x, XVECEXP (vals, 0, 0)))
27760 /* Constants are best loaded from the constant pool. */
27763 emit_move_insn (target, gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0)));
27767 /* If all values are identical, broadcast the value. */
27769 && ix86_expand_vector_init_duplicate (mmx_ok, mode, target,
27770 XVECEXP (vals, 0, 0)))
27773 /* Values where only one field is non-constant are best loaded from
27774 the pool and overwritten via move later. */
27778 && ix86_expand_vector_init_one_nonzero (mmx_ok, mode, target,
27779 XVECEXP (vals, 0, one_var),
27783 if (ix86_expand_vector_init_one_var (mmx_ok, mode, target, vals, one_var))
27787 ix86_expand_vector_init_general (mmx_ok, mode, target, vals);
27791 ix86_expand_vector_set (bool mmx_ok, rtx target, rtx val, int elt)
27793 enum machine_mode mode = GET_MODE (target);
27794 enum machine_mode inner_mode = GET_MODE_INNER (mode);
27795 enum machine_mode half_mode;
27796 bool use_vec_merge = false;
27798 static rtx (*gen_extract[6][2]) (rtx, rtx)
27800 { gen_vec_extract_lo_v32qi, gen_vec_extract_hi_v32qi },
27801 { gen_vec_extract_lo_v16hi, gen_vec_extract_hi_v16hi },
27802 { gen_vec_extract_lo_v8si, gen_vec_extract_hi_v8si },
27803 { gen_vec_extract_lo_v4di, gen_vec_extract_hi_v4di },
27804 { gen_vec_extract_lo_v8sf, gen_vec_extract_hi_v8sf },
27805 { gen_vec_extract_lo_v4df, gen_vec_extract_hi_v4df }
27807 static rtx (*gen_insert[6][2]) (rtx, rtx, rtx)
27809 { gen_vec_set_lo_v32qi, gen_vec_set_hi_v32qi },
27810 { gen_vec_set_lo_v16hi, gen_vec_set_hi_v16hi },
27811 { gen_vec_set_lo_v8si, gen_vec_set_hi_v8si },
27812 { gen_vec_set_lo_v4di, gen_vec_set_hi_v4di },
27813 { gen_vec_set_lo_v8sf, gen_vec_set_hi_v8sf },
27814 { gen_vec_set_lo_v4df, gen_vec_set_hi_v4df }
27824 tmp = gen_reg_rtx (GET_MODE_INNER (mode));
27825 ix86_expand_vector_extract (true, tmp, target, 1 - elt);
27827 tmp = gen_rtx_VEC_CONCAT (mode, tmp, val);
27829 tmp = gen_rtx_VEC_CONCAT (mode, val, tmp);
27830 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
27836 use_vec_merge = TARGET_SSE4_1;
27844 /* For the two element vectors, we implement a VEC_CONCAT with
27845 the extraction of the other element. */
27847 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, GEN_INT (1 - elt)));
27848 tmp = gen_rtx_VEC_SELECT (inner_mode, target, tmp);
27851 op0 = val, op1 = tmp;
27853 op0 = tmp, op1 = val;
27855 tmp = gen_rtx_VEC_CONCAT (mode, op0, op1);
27856 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
27861 use_vec_merge = TARGET_SSE4_1;
27868 use_vec_merge = true;
27872 /* tmp = target = A B C D */
27873 tmp = copy_to_reg (target);
27874 /* target = A A B B */
27875 emit_insn (gen_sse_unpcklps (target, target, target));
27876 /* target = X A B B */
27877 ix86_expand_vector_set (false, target, val, 0);
27878 /* target = A X C D */
27879 emit_insn (gen_sse_shufps_v4sf (target, target, tmp,
27880 GEN_INT (1), GEN_INT (0),
27881 GEN_INT (2+4), GEN_INT (3+4)));
27885 /* tmp = target = A B C D */
27886 tmp = copy_to_reg (target);
27887 /* tmp = X B C D */
27888 ix86_expand_vector_set (false, tmp, val, 0);
27889 /* target = A B X D */
27890 emit_insn (gen_sse_shufps_v4sf (target, target, tmp,
27891 GEN_INT (0), GEN_INT (1),
27892 GEN_INT (0+4), GEN_INT (3+4)));
27896 /* tmp = target = A B C D */
27897 tmp = copy_to_reg (target);
27898 /* tmp = X B C D */
27899 ix86_expand_vector_set (false, tmp, val, 0);
27900 /* target = A B X D */
27901 emit_insn (gen_sse_shufps_v4sf (target, target, tmp,
27902 GEN_INT (0), GEN_INT (1),
27903 GEN_INT (2+4), GEN_INT (0+4)));
27907 gcc_unreachable ();
27912 use_vec_merge = TARGET_SSE4_1;
27916 /* Element 0 handled by vec_merge below. */
27919 use_vec_merge = true;
27925 /* With SSE2, use integer shuffles to swap element 0 and ELT,
27926 store into element 0, then shuffle them back. */
27930 order[0] = GEN_INT (elt);
27931 order[1] = const1_rtx;
27932 order[2] = const2_rtx;
27933 order[3] = GEN_INT (3);
27934 order[elt] = const0_rtx;
27936 emit_insn (gen_sse2_pshufd_1 (target, target, order[0],
27937 order[1], order[2], order[3]));
27939 ix86_expand_vector_set (false, target, val, 0);
27941 emit_insn (gen_sse2_pshufd_1 (target, target, order[0],
27942 order[1], order[2], order[3]));
27946 /* For SSE1, we have to reuse the V4SF code. */
27947 ix86_expand_vector_set (false, gen_lowpart (V4SFmode, target),
27948 gen_lowpart (SFmode, val), elt);
27953 use_vec_merge = TARGET_SSE2;
27956 use_vec_merge = mmx_ok && (TARGET_SSE || TARGET_3DNOW_A);
27960 use_vec_merge = TARGET_SSE4_1;
27967 half_mode = V16QImode;
27973 half_mode = V8HImode;
27979 half_mode = V4SImode;
27985 half_mode = V2DImode;
27991 half_mode = V4SFmode;
27997 half_mode = V2DFmode;
28003 /* Compute offset. */
28007 gcc_assert (i <= 1);
28009 /* Extract the half. */
28010 tmp = gen_reg_rtx (half_mode);
28011 emit_insn ((*gen_extract[j][i]) (tmp, target));
28013 /* Put val in tmp at elt. */
28014 ix86_expand_vector_set (false, tmp, val, elt);
28017 emit_insn ((*gen_insert[j][i]) (target, target, tmp));
28026 tmp = gen_rtx_VEC_DUPLICATE (mode, val);
28027 tmp = gen_rtx_VEC_MERGE (mode, tmp, target, GEN_INT (1 << elt));
28028 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
28032 rtx mem = assign_stack_temp (mode, GET_MODE_SIZE (mode), false);
28034 emit_move_insn (mem, target);
28036 tmp = adjust_address (mem, inner_mode, elt*GET_MODE_SIZE (inner_mode));
28037 emit_move_insn (tmp, val);
28039 emit_move_insn (target, mem);
28044 ix86_expand_vector_extract (bool mmx_ok, rtx target, rtx vec, int elt)
28046 enum machine_mode mode = GET_MODE (vec);
28047 enum machine_mode inner_mode = GET_MODE_INNER (mode);
28048 bool use_vec_extr = false;
28061 use_vec_extr = true;
28065 use_vec_extr = TARGET_SSE4_1;
28077 tmp = gen_reg_rtx (mode);
28078 emit_insn (gen_sse_shufps_v4sf (tmp, vec, vec,
28079 GEN_INT (elt), GEN_INT (elt),
28080 GEN_INT (elt+4), GEN_INT (elt+4)));
28084 tmp = gen_reg_rtx (mode);
28085 emit_insn (gen_sse_unpckhps (tmp, vec, vec));
28089 gcc_unreachable ();
28092 use_vec_extr = true;
28097 use_vec_extr = TARGET_SSE4_1;
28111 tmp = gen_reg_rtx (mode);
28112 emit_insn (gen_sse2_pshufd_1 (tmp, vec,
28113 GEN_INT (elt), GEN_INT (elt),
28114 GEN_INT (elt), GEN_INT (elt)));
28118 tmp = gen_reg_rtx (mode);
28119 emit_insn (gen_sse2_punpckhdq (tmp, vec, vec));
28123 gcc_unreachable ();
28126 use_vec_extr = true;
28131 /* For SSE1, we have to reuse the V4SF code. */
28132 ix86_expand_vector_extract (false, gen_lowpart (SFmode, target),
28133 gen_lowpart (V4SFmode, vec), elt);
28139 use_vec_extr = TARGET_SSE2;
28142 use_vec_extr = mmx_ok && (TARGET_SSE || TARGET_3DNOW_A);
28146 use_vec_extr = TARGET_SSE4_1;
28150 /* ??? Could extract the appropriate HImode element and shift. */
28157 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, GEN_INT (elt)));
28158 tmp = gen_rtx_VEC_SELECT (inner_mode, vec, tmp);
28160 /* Let the rtl optimizers know about the zero extension performed. */
28161 if (inner_mode == QImode || inner_mode == HImode)
28163 tmp = gen_rtx_ZERO_EXTEND (SImode, tmp);
28164 target = gen_lowpart (SImode, target);
28167 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
28171 rtx mem = assign_stack_temp (mode, GET_MODE_SIZE (mode), false);
28173 emit_move_insn (mem, vec);
28175 tmp = adjust_address (mem, inner_mode, elt*GET_MODE_SIZE (inner_mode));
28176 emit_move_insn (target, tmp);
28180 /* Expand a vector reduction on V4SFmode for SSE1. FN is the binary
28181 pattern to reduce; DEST is the destination; IN is the input vector. */
28184 ix86_expand_reduc_v4sf (rtx (*fn) (rtx, rtx, rtx), rtx dest, rtx in)
28186 rtx tmp1, tmp2, tmp3;
28188 tmp1 = gen_reg_rtx (V4SFmode);
28189 tmp2 = gen_reg_rtx (V4SFmode);
28190 tmp3 = gen_reg_rtx (V4SFmode);
28192 emit_insn (gen_sse_movhlps (tmp1, in, in));
28193 emit_insn (fn (tmp2, tmp1, in));
28195 emit_insn (gen_sse_shufps_v4sf (tmp3, tmp2, tmp2,
28196 GEN_INT (1), GEN_INT (1),
28197 GEN_INT (1+4), GEN_INT (1+4)));
28198 emit_insn (fn (dest, tmp2, tmp3));
28201 /* Target hook for scalar_mode_supported_p. */
28203 ix86_scalar_mode_supported_p (enum machine_mode mode)
28205 if (DECIMAL_FLOAT_MODE_P (mode))
28207 else if (mode == TFmode)
28210 return default_scalar_mode_supported_p (mode);
28213 /* Implements target hook vector_mode_supported_p. */
28215 ix86_vector_mode_supported_p (enum machine_mode mode)
28217 if (TARGET_SSE && VALID_SSE_REG_MODE (mode))
28219 if (TARGET_SSE2 && VALID_SSE2_REG_MODE (mode))
28221 if (TARGET_AVX && VALID_AVX256_REG_MODE (mode))
28223 if (TARGET_MMX && VALID_MMX_REG_MODE (mode))
28225 if (TARGET_3DNOW && VALID_MMX_REG_MODE_3DNOW (mode))
28230 /* Target hook for c_mode_for_suffix. */
28231 static enum machine_mode
28232 ix86_c_mode_for_suffix (char suffix)
28242 /* Worker function for TARGET_MD_ASM_CLOBBERS.
28244 We do this in the new i386 backend to maintain source compatibility
28245 with the old cc0-based compiler. */
28248 ix86_md_asm_clobbers (tree outputs ATTRIBUTE_UNUSED,
28249 tree inputs ATTRIBUTE_UNUSED,
28252 clobbers = tree_cons (NULL_TREE, build_string (5, "flags"),
28254 clobbers = tree_cons (NULL_TREE, build_string (4, "fpsr"),
28259 /* Implements target vector targetm.asm.encode_section_info. This
28260 is not used by netware. */
28262 static void ATTRIBUTE_UNUSED
28263 ix86_encode_section_info (tree decl, rtx rtl, int first)
28265 default_encode_section_info (decl, rtl, first);
28267 if (TREE_CODE (decl) == VAR_DECL
28268 && (TREE_STATIC (decl) || DECL_EXTERNAL (decl))
28269 && ix86_in_large_data_p (decl))
28270 SYMBOL_REF_FLAGS (XEXP (rtl, 0)) |= SYMBOL_FLAG_FAR_ADDR;
28273 /* Worker function for REVERSE_CONDITION. */
28276 ix86_reverse_condition (enum rtx_code code, enum machine_mode mode)
28278 return (mode != CCFPmode && mode != CCFPUmode
28279 ? reverse_condition (code)
28280 : reverse_condition_maybe_unordered (code));
28283 /* Output code to perform an x87 FP register move, from OPERANDS[1]
28287 output_387_reg_move (rtx insn, rtx *operands)
28289 if (REG_P (operands[0]))
28291 if (REG_P (operands[1])
28292 && find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
28294 if (REGNO (operands[0]) == FIRST_STACK_REG)
28295 return output_387_ffreep (operands, 0);
28296 return "fstp\t%y0";
28298 if (STACK_TOP_P (operands[0]))
28299 return "fld%z1\t%y1";
28302 else if (MEM_P (operands[0]))
28304 gcc_assert (REG_P (operands[1]));
28305 if (find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
28306 return "fstp%z0\t%y0";
28309 /* There is no non-popping store to memory for XFmode.
28310 So if we need one, follow the store with a load. */
28311 if (GET_MODE (operands[0]) == XFmode)
28312 return "fstp%z0\t%y0\n\tfld%z0\t%y0";
28314 return "fst%z0\t%y0";
28321 /* Output code to perform a conditional jump to LABEL, if C2 flag in
28322 FP status register is set. */
28325 ix86_emit_fp_unordered_jump (rtx label)
28327 rtx reg = gen_reg_rtx (HImode);
28330 emit_insn (gen_x86_fnstsw_1 (reg));
28332 if (TARGET_SAHF && (TARGET_USE_SAHF || optimize_insn_for_size_p ()))
28334 emit_insn (gen_x86_sahf_1 (reg));
28336 temp = gen_rtx_REG (CCmode, FLAGS_REG);
28337 temp = gen_rtx_UNORDERED (VOIDmode, temp, const0_rtx);
28341 emit_insn (gen_testqi_ext_ccno_0 (reg, GEN_INT (0x04)));
28343 temp = gen_rtx_REG (CCNOmode, FLAGS_REG);
28344 temp = gen_rtx_NE (VOIDmode, temp, const0_rtx);
28347 temp = gen_rtx_IF_THEN_ELSE (VOIDmode, temp,
28348 gen_rtx_LABEL_REF (VOIDmode, label),
28350 temp = gen_rtx_SET (VOIDmode, pc_rtx, temp);
28352 emit_jump_insn (temp);
28353 predict_jump (REG_BR_PROB_BASE * 10 / 100);
28356 /* Output code to perform a log1p XFmode calculation. */
28358 void ix86_emit_i387_log1p (rtx op0, rtx op1)
28360 rtx label1 = gen_label_rtx ();
28361 rtx label2 = gen_label_rtx ();
28363 rtx tmp = gen_reg_rtx (XFmode);
28364 rtx tmp2 = gen_reg_rtx (XFmode);
28366 emit_insn (gen_absxf2 (tmp, op1));
28367 emit_insn (gen_cmpxf (tmp,
28368 CONST_DOUBLE_FROM_REAL_VALUE (
28369 REAL_VALUE_ATOF ("0.29289321881345247561810596348408353", XFmode),
28371 emit_jump_insn (gen_bge (label1));
28373 emit_move_insn (tmp2, standard_80387_constant_rtx (4)); /* fldln2 */
28374 emit_insn (gen_fyl2xp1xf3_i387 (op0, op1, tmp2));
28375 emit_jump (label2);
28377 emit_label (label1);
28378 emit_move_insn (tmp, CONST1_RTX (XFmode));
28379 emit_insn (gen_addxf3 (tmp, op1, tmp));
28380 emit_move_insn (tmp2, standard_80387_constant_rtx (4)); /* fldln2 */
28381 emit_insn (gen_fyl2xxf3_i387 (op0, tmp, tmp2));
28383 emit_label (label2);
28386 /* Output code to perform a Newton-Rhapson approximation of a single precision
28387 floating point divide [http://en.wikipedia.org/wiki/N-th_root_algorithm]. */
28389 void ix86_emit_swdivsf (rtx res, rtx a, rtx b, enum machine_mode mode)
28391 rtx x0, x1, e0, e1, two;
28393 x0 = gen_reg_rtx (mode);
28394 e0 = gen_reg_rtx (mode);
28395 e1 = gen_reg_rtx (mode);
28396 x1 = gen_reg_rtx (mode);
28398 two = CONST_DOUBLE_FROM_REAL_VALUE (dconst2, SFmode);
28400 if (VECTOR_MODE_P (mode))
28401 two = ix86_build_const_vector (SFmode, true, two);
28403 two = force_reg (mode, two);
28405 /* a / b = a * rcp(b) * (2.0 - b * rcp(b)) */
28407 /* x0 = rcp(b) estimate */
28408 emit_insn (gen_rtx_SET (VOIDmode, x0,
28409 gen_rtx_UNSPEC (mode, gen_rtvec (1, b),
28412 emit_insn (gen_rtx_SET (VOIDmode, e0,
28413 gen_rtx_MULT (mode, x0, b)));
28415 emit_insn (gen_rtx_SET (VOIDmode, e1,
28416 gen_rtx_MINUS (mode, two, e0)));
28418 emit_insn (gen_rtx_SET (VOIDmode, x1,
28419 gen_rtx_MULT (mode, x0, e1)));
28421 emit_insn (gen_rtx_SET (VOIDmode, res,
28422 gen_rtx_MULT (mode, a, x1)));
28425 /* Output code to perform a Newton-Rhapson approximation of a
28426 single precision floating point [reciprocal] square root. */
28428 void ix86_emit_swsqrtsf (rtx res, rtx a, enum machine_mode mode,
28431 rtx x0, e0, e1, e2, e3, mthree, mhalf;
28434 x0 = gen_reg_rtx (mode);
28435 e0 = gen_reg_rtx (mode);
28436 e1 = gen_reg_rtx (mode);
28437 e2 = gen_reg_rtx (mode);
28438 e3 = gen_reg_rtx (mode);
28440 real_from_integer (&r, VOIDmode, -3, -1, 0);
28441 mthree = CONST_DOUBLE_FROM_REAL_VALUE (r, SFmode);
28443 real_arithmetic (&r, NEGATE_EXPR, &dconsthalf, NULL);
28444 mhalf = CONST_DOUBLE_FROM_REAL_VALUE (r, SFmode);
28446 if (VECTOR_MODE_P (mode))
28448 mthree = ix86_build_const_vector (SFmode, true, mthree);
28449 mhalf = ix86_build_const_vector (SFmode, true, mhalf);
28452 /* sqrt(a) = -0.5 * a * rsqrtss(a) * (a * rsqrtss(a) * rsqrtss(a) - 3.0)
28453 rsqrt(a) = -0.5 * rsqrtss(a) * (a * rsqrtss(a) * rsqrtss(a) - 3.0) */
28455 /* x0 = rsqrt(a) estimate */
28456 emit_insn (gen_rtx_SET (VOIDmode, x0,
28457 gen_rtx_UNSPEC (mode, gen_rtvec (1, a),
28460 /* If (a == 0.0) Filter out infinity to prevent NaN for sqrt(0.0). */
28465 zero = gen_reg_rtx (mode);
28466 mask = gen_reg_rtx (mode);
28468 zero = force_reg (mode, CONST0_RTX(mode));
28469 emit_insn (gen_rtx_SET (VOIDmode, mask,
28470 gen_rtx_NE (mode, zero, a)));
28472 emit_insn (gen_rtx_SET (VOIDmode, x0,
28473 gen_rtx_AND (mode, x0, mask)));
28477 emit_insn (gen_rtx_SET (VOIDmode, e0,
28478 gen_rtx_MULT (mode, x0, a)));
28480 emit_insn (gen_rtx_SET (VOIDmode, e1,
28481 gen_rtx_MULT (mode, e0, x0)));
28484 mthree = force_reg (mode, mthree);
28485 emit_insn (gen_rtx_SET (VOIDmode, e2,
28486 gen_rtx_PLUS (mode, e1, mthree)));
28488 mhalf = force_reg (mode, mhalf);
28490 /* e3 = -.5 * x0 */
28491 emit_insn (gen_rtx_SET (VOIDmode, e3,
28492 gen_rtx_MULT (mode, x0, mhalf)));
28494 /* e3 = -.5 * e0 */
28495 emit_insn (gen_rtx_SET (VOIDmode, e3,
28496 gen_rtx_MULT (mode, e0, mhalf)));
28497 /* ret = e2 * e3 */
28498 emit_insn (gen_rtx_SET (VOIDmode, res,
28499 gen_rtx_MULT (mode, e2, e3)));
28502 /* Solaris implementation of TARGET_ASM_NAMED_SECTION. */
28504 static void ATTRIBUTE_UNUSED
28505 i386_solaris_elf_named_section (const char *name, unsigned int flags,
28508 /* With Binutils 2.15, the "@unwind" marker must be specified on
28509 every occurrence of the ".eh_frame" section, not just the first
28512 && strcmp (name, ".eh_frame") == 0)
28514 fprintf (asm_out_file, "\t.section\t%s,\"%s\",@unwind\n", name,
28515 flags & SECTION_WRITE ? "aw" : "a");
28518 default_elf_asm_named_section (name, flags, decl);
28521 /* Return the mangling of TYPE if it is an extended fundamental type. */
28523 static const char *
28524 ix86_mangle_type (const_tree type)
28526 type = TYPE_MAIN_VARIANT (type);
28528 if (TREE_CODE (type) != VOID_TYPE && TREE_CODE (type) != BOOLEAN_TYPE
28529 && TREE_CODE (type) != INTEGER_TYPE && TREE_CODE (type) != REAL_TYPE)
28532 switch (TYPE_MODE (type))
28535 /* __float128 is "g". */
28538 /* "long double" or __float80 is "e". */
28545 /* For 32-bit code we can save PIC register setup by using
28546 __stack_chk_fail_local hidden function instead of calling
28547 __stack_chk_fail directly. 64-bit code doesn't need to setup any PIC
28548 register, so it is better to call __stack_chk_fail directly. */
28551 ix86_stack_protect_fail (void)
28553 return TARGET_64BIT
28554 ? default_external_stack_protect_fail ()
28555 : default_hidden_stack_protect_fail ();
28558 /* Select a format to encode pointers in exception handling data. CODE
28559 is 0 for data, 1 for code labels, 2 for function pointers. GLOBAL is
28560 true if the symbol may be affected by dynamic relocations.
28562 ??? All x86 object file formats are capable of representing this.
28563 After all, the relocation needed is the same as for the call insn.
28564 Whether or not a particular assembler allows us to enter such, I
28565 guess we'll have to see. */
28567 asm_preferred_eh_data_format (int code, int global)
28571 int type = DW_EH_PE_sdata8;
28573 || ix86_cmodel == CM_SMALL_PIC
28574 || (ix86_cmodel == CM_MEDIUM_PIC && (global || code)))
28575 type = DW_EH_PE_sdata4;
28576 return (global ? DW_EH_PE_indirect : 0) | DW_EH_PE_pcrel | type;
28578 if (ix86_cmodel == CM_SMALL
28579 || (ix86_cmodel == CM_MEDIUM && code))
28580 return DW_EH_PE_udata4;
28581 return DW_EH_PE_absptr;
28584 /* Expand copysign from SIGN to the positive value ABS_VALUE
28585 storing in RESULT. If MASK is non-null, it shall be a mask to mask out
28588 ix86_sse_copysign_to_positive (rtx result, rtx abs_value, rtx sign, rtx mask)
28590 enum machine_mode mode = GET_MODE (sign);
28591 rtx sgn = gen_reg_rtx (mode);
28592 if (mask == NULL_RTX)
28594 mask = ix86_build_signbit_mask (mode, VECTOR_MODE_P (mode), false);
28595 if (!VECTOR_MODE_P (mode))
28597 /* We need to generate a scalar mode mask in this case. */
28598 rtx tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, const0_rtx));
28599 tmp = gen_rtx_VEC_SELECT (mode, mask, tmp);
28600 mask = gen_reg_rtx (mode);
28601 emit_insn (gen_rtx_SET (VOIDmode, mask, tmp));
28605 mask = gen_rtx_NOT (mode, mask);
28606 emit_insn (gen_rtx_SET (VOIDmode, sgn,
28607 gen_rtx_AND (mode, mask, sign)));
28608 emit_insn (gen_rtx_SET (VOIDmode, result,
28609 gen_rtx_IOR (mode, abs_value, sgn)));
28612 /* Expand fabs (OP0) and return a new rtx that holds the result. The
28613 mask for masking out the sign-bit is stored in *SMASK, if that is
28616 ix86_expand_sse_fabs (rtx op0, rtx *smask)
28618 enum machine_mode mode = GET_MODE (op0);
28621 xa = gen_reg_rtx (mode);
28622 mask = ix86_build_signbit_mask (mode, VECTOR_MODE_P (mode), true);
28623 if (!VECTOR_MODE_P (mode))
28625 /* We need to generate a scalar mode mask in this case. */
28626 rtx tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, const0_rtx));
28627 tmp = gen_rtx_VEC_SELECT (mode, mask, tmp);
28628 mask = gen_reg_rtx (mode);
28629 emit_insn (gen_rtx_SET (VOIDmode, mask, tmp));
28631 emit_insn (gen_rtx_SET (VOIDmode, xa,
28632 gen_rtx_AND (mode, op0, mask)));
28640 /* Expands a comparison of OP0 with OP1 using comparison code CODE,
28641 swapping the operands if SWAP_OPERANDS is true. The expanded
28642 code is a forward jump to a newly created label in case the
28643 comparison is true. The generated label rtx is returned. */
28645 ix86_expand_sse_compare_and_jump (enum rtx_code code, rtx op0, rtx op1,
28646 bool swap_operands)
28657 label = gen_label_rtx ();
28658 tmp = gen_rtx_REG (CCFPUmode, FLAGS_REG);
28659 emit_insn (gen_rtx_SET (VOIDmode, tmp,
28660 gen_rtx_COMPARE (CCFPUmode, op0, op1)));
28661 tmp = gen_rtx_fmt_ee (code, VOIDmode, tmp, const0_rtx);
28662 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
28663 gen_rtx_LABEL_REF (VOIDmode, label), pc_rtx);
28664 tmp = emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
28665 JUMP_LABEL (tmp) = label;
28670 /* Expand a mask generating SSE comparison instruction comparing OP0 with OP1
28671 using comparison code CODE. Operands are swapped for the comparison if
28672 SWAP_OPERANDS is true. Returns a rtx for the generated mask. */
28674 ix86_expand_sse_compare_mask (enum rtx_code code, rtx op0, rtx op1,
28675 bool swap_operands)
28677 enum machine_mode mode = GET_MODE (op0);
28678 rtx mask = gen_reg_rtx (mode);
28687 if (mode == DFmode)
28688 emit_insn (gen_sse2_maskcmpdf3 (mask, op0, op1,
28689 gen_rtx_fmt_ee (code, mode, op0, op1)));
28691 emit_insn (gen_sse_maskcmpsf3 (mask, op0, op1,
28692 gen_rtx_fmt_ee (code, mode, op0, op1)));
28697 /* Generate and return a rtx of mode MODE for 2**n where n is the number
28698 of bits of the mantissa of MODE, which must be one of DFmode or SFmode. */
28700 ix86_gen_TWO52 (enum machine_mode mode)
28702 REAL_VALUE_TYPE TWO52r;
28705 real_ldexp (&TWO52r, &dconst1, mode == DFmode ? 52 : 23);
28706 TWO52 = const_double_from_real_value (TWO52r, mode);
28707 TWO52 = force_reg (mode, TWO52);
28712 /* Expand SSE sequence for computing lround from OP1 storing
28715 ix86_expand_lround (rtx op0, rtx op1)
28717 /* C code for the stuff we're doing below:
28718 tmp = op1 + copysign (nextafter (0.5, 0.0), op1)
28721 enum machine_mode mode = GET_MODE (op1);
28722 const struct real_format *fmt;
28723 REAL_VALUE_TYPE pred_half, half_minus_pred_half;
28726 /* load nextafter (0.5, 0.0) */
28727 fmt = REAL_MODE_FORMAT (mode);
28728 real_2expN (&half_minus_pred_half, -(fmt->p) - 1, mode);
28729 REAL_ARITHMETIC (pred_half, MINUS_EXPR, dconsthalf, half_minus_pred_half);
28731 /* adj = copysign (0.5, op1) */
28732 adj = force_reg (mode, const_double_from_real_value (pred_half, mode));
28733 ix86_sse_copysign_to_positive (adj, adj, force_reg (mode, op1), NULL_RTX);
28735 /* adj = op1 + adj */
28736 adj = expand_simple_binop (mode, PLUS, adj, op1, NULL_RTX, 0, OPTAB_DIRECT);
28738 /* op0 = (imode)adj */
28739 expand_fix (op0, adj, 0);
28742 /* Expand SSE2 sequence for computing lround from OPERAND1 storing
28745 ix86_expand_lfloorceil (rtx op0, rtx op1, bool do_floor)
28747 /* C code for the stuff we're doing below (for do_floor):
28749 xi -= (double)xi > op1 ? 1 : 0;
28752 enum machine_mode fmode = GET_MODE (op1);
28753 enum machine_mode imode = GET_MODE (op0);
28754 rtx ireg, freg, label, tmp;
28756 /* reg = (long)op1 */
28757 ireg = gen_reg_rtx (imode);
28758 expand_fix (ireg, op1, 0);
28760 /* freg = (double)reg */
28761 freg = gen_reg_rtx (fmode);
28762 expand_float (freg, ireg, 0);
28764 /* ireg = (freg > op1) ? ireg - 1 : ireg */
28765 label = ix86_expand_sse_compare_and_jump (UNLE,
28766 freg, op1, !do_floor);
28767 tmp = expand_simple_binop (imode, do_floor ? MINUS : PLUS,
28768 ireg, const1_rtx, NULL_RTX, 0, OPTAB_DIRECT);
28769 emit_move_insn (ireg, tmp);
28771 emit_label (label);
28772 LABEL_NUSES (label) = 1;
28774 emit_move_insn (op0, ireg);
28777 /* Expand rint (IEEE round to nearest) rounding OPERAND1 and storing the
28778 result in OPERAND0. */
28780 ix86_expand_rint (rtx operand0, rtx operand1)
28782 /* C code for the stuff we're doing below:
28783 xa = fabs (operand1);
28784 if (!isless (xa, 2**52))
28786 xa = xa + 2**52 - 2**52;
28787 return copysign (xa, operand1);
28789 enum machine_mode mode = GET_MODE (operand0);
28790 rtx res, xa, label, TWO52, mask;
28792 res = gen_reg_rtx (mode);
28793 emit_move_insn (res, operand1);
28795 /* xa = abs (operand1) */
28796 xa = ix86_expand_sse_fabs (res, &mask);
28798 /* if (!isless (xa, TWO52)) goto label; */
28799 TWO52 = ix86_gen_TWO52 (mode);
28800 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
28802 xa = expand_simple_binop (mode, PLUS, xa, TWO52, NULL_RTX, 0, OPTAB_DIRECT);
28803 xa = expand_simple_binop (mode, MINUS, xa, TWO52, xa, 0, OPTAB_DIRECT);
28805 ix86_sse_copysign_to_positive (res, xa, res, mask);
28807 emit_label (label);
28808 LABEL_NUSES (label) = 1;
28810 emit_move_insn (operand0, res);
28813 /* Expand SSE2 sequence for computing floor or ceil from OPERAND1 storing
28816 ix86_expand_floorceildf_32 (rtx operand0, rtx operand1, bool do_floor)
28818 /* C code for the stuff we expand below.
28819 double xa = fabs (x), x2;
28820 if (!isless (xa, TWO52))
28822 xa = xa + TWO52 - TWO52;
28823 x2 = copysign (xa, x);
28832 enum machine_mode mode = GET_MODE (operand0);
28833 rtx xa, TWO52, tmp, label, one, res, mask;
28835 TWO52 = ix86_gen_TWO52 (mode);
28837 /* Temporary for holding the result, initialized to the input
28838 operand to ease control flow. */
28839 res = gen_reg_rtx (mode);
28840 emit_move_insn (res, operand1);
28842 /* xa = abs (operand1) */
28843 xa = ix86_expand_sse_fabs (res, &mask);
28845 /* if (!isless (xa, TWO52)) goto label; */
28846 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
28848 /* xa = xa + TWO52 - TWO52; */
28849 xa = expand_simple_binop (mode, PLUS, xa, TWO52, NULL_RTX, 0, OPTAB_DIRECT);
28850 xa = expand_simple_binop (mode, MINUS, xa, TWO52, xa, 0, OPTAB_DIRECT);
28852 /* xa = copysign (xa, operand1) */
28853 ix86_sse_copysign_to_positive (xa, xa, res, mask);
28855 /* generate 1.0 or -1.0 */
28856 one = force_reg (mode,
28857 const_double_from_real_value (do_floor
28858 ? dconst1 : dconstm1, mode));
28860 /* Compensate: xa = xa - (xa > operand1 ? 1 : 0) */
28861 tmp = ix86_expand_sse_compare_mask (UNGT, xa, res, !do_floor);
28862 emit_insn (gen_rtx_SET (VOIDmode, tmp,
28863 gen_rtx_AND (mode, one, tmp)));
28864 /* We always need to subtract here to preserve signed zero. */
28865 tmp = expand_simple_binop (mode, MINUS,
28866 xa, tmp, NULL_RTX, 0, OPTAB_DIRECT);
28867 emit_move_insn (res, tmp);
28869 emit_label (label);
28870 LABEL_NUSES (label) = 1;
28872 emit_move_insn (operand0, res);
28875 /* Expand SSE2 sequence for computing floor or ceil from OPERAND1 storing
28878 ix86_expand_floorceil (rtx operand0, rtx operand1, bool do_floor)
28880 /* C code for the stuff we expand below.
28881 double xa = fabs (x), x2;
28882 if (!isless (xa, TWO52))
28884 x2 = (double)(long)x;
28891 if (HONOR_SIGNED_ZEROS (mode))
28892 return copysign (x2, x);
28895 enum machine_mode mode = GET_MODE (operand0);
28896 rtx xa, xi, TWO52, tmp, label, one, res, mask;
28898 TWO52 = ix86_gen_TWO52 (mode);
28900 /* Temporary for holding the result, initialized to the input
28901 operand to ease control flow. */
28902 res = gen_reg_rtx (mode);
28903 emit_move_insn (res, operand1);
28905 /* xa = abs (operand1) */
28906 xa = ix86_expand_sse_fabs (res, &mask);
28908 /* if (!isless (xa, TWO52)) goto label; */
28909 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
28911 /* xa = (double)(long)x */
28912 xi = gen_reg_rtx (mode == DFmode ? DImode : SImode);
28913 expand_fix (xi, res, 0);
28914 expand_float (xa, xi, 0);
28917 one = force_reg (mode, const_double_from_real_value (dconst1, mode));
28919 /* Compensate: xa = xa - (xa > operand1 ? 1 : 0) */
28920 tmp = ix86_expand_sse_compare_mask (UNGT, xa, res, !do_floor);
28921 emit_insn (gen_rtx_SET (VOIDmode, tmp,
28922 gen_rtx_AND (mode, one, tmp)));
28923 tmp = expand_simple_binop (mode, do_floor ? MINUS : PLUS,
28924 xa, tmp, NULL_RTX, 0, OPTAB_DIRECT);
28925 emit_move_insn (res, tmp);
28927 if (HONOR_SIGNED_ZEROS (mode))
28928 ix86_sse_copysign_to_positive (res, res, force_reg (mode, operand1), mask);
28930 emit_label (label);
28931 LABEL_NUSES (label) = 1;
28933 emit_move_insn (operand0, res);
28936 /* Expand SSE sequence for computing round from OPERAND1 storing
28937 into OPERAND0. Sequence that works without relying on DImode truncation
28938 via cvttsd2siq that is only available on 64bit targets. */
28940 ix86_expand_rounddf_32 (rtx operand0, rtx operand1)
28942 /* C code for the stuff we expand below.
28943 double xa = fabs (x), xa2, x2;
28944 if (!isless (xa, TWO52))
28946 Using the absolute value and copying back sign makes
28947 -0.0 -> -0.0 correct.
28948 xa2 = xa + TWO52 - TWO52;
28953 else if (dxa > 0.5)
28955 x2 = copysign (xa2, x);
28958 enum machine_mode mode = GET_MODE (operand0);
28959 rtx xa, xa2, dxa, TWO52, tmp, label, half, mhalf, one, res, mask;
28961 TWO52 = ix86_gen_TWO52 (mode);
28963 /* Temporary for holding the result, initialized to the input
28964 operand to ease control flow. */
28965 res = gen_reg_rtx (mode);
28966 emit_move_insn (res, operand1);
28968 /* xa = abs (operand1) */
28969 xa = ix86_expand_sse_fabs (res, &mask);
28971 /* if (!isless (xa, TWO52)) goto label; */
28972 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
28974 /* xa2 = xa + TWO52 - TWO52; */
28975 xa2 = expand_simple_binop (mode, PLUS, xa, TWO52, NULL_RTX, 0, OPTAB_DIRECT);
28976 xa2 = expand_simple_binop (mode, MINUS, xa2, TWO52, xa2, 0, OPTAB_DIRECT);
28978 /* dxa = xa2 - xa; */
28979 dxa = expand_simple_binop (mode, MINUS, xa2, xa, NULL_RTX, 0, OPTAB_DIRECT);
28981 /* generate 0.5, 1.0 and -0.5 */
28982 half = force_reg (mode, const_double_from_real_value (dconsthalf, mode));
28983 one = expand_simple_binop (mode, PLUS, half, half, NULL_RTX, 0, OPTAB_DIRECT);
28984 mhalf = expand_simple_binop (mode, MINUS, half, one, NULL_RTX,
28988 tmp = gen_reg_rtx (mode);
28989 /* xa2 = xa2 - (dxa > 0.5 ? 1 : 0) */
28990 tmp = ix86_expand_sse_compare_mask (UNGT, dxa, half, false);
28991 emit_insn (gen_rtx_SET (VOIDmode, tmp,
28992 gen_rtx_AND (mode, one, tmp)));
28993 xa2 = expand_simple_binop (mode, MINUS, xa2, tmp, NULL_RTX, 0, OPTAB_DIRECT);
28994 /* xa2 = xa2 + (dxa <= -0.5 ? 1 : 0) */
28995 tmp = ix86_expand_sse_compare_mask (UNGE, mhalf, dxa, false);
28996 emit_insn (gen_rtx_SET (VOIDmode, tmp,
28997 gen_rtx_AND (mode, one, tmp)));
28998 xa2 = expand_simple_binop (mode, PLUS, xa2, tmp, NULL_RTX, 0, OPTAB_DIRECT);
29000 /* res = copysign (xa2, operand1) */
29001 ix86_sse_copysign_to_positive (res, xa2, force_reg (mode, operand1), mask);
29003 emit_label (label);
29004 LABEL_NUSES (label) = 1;
29006 emit_move_insn (operand0, res);
29009 /* Expand SSE sequence for computing trunc from OPERAND1 storing
29012 ix86_expand_trunc (rtx operand0, rtx operand1)
29014 /* C code for SSE variant we expand below.
29015 double xa = fabs (x), x2;
29016 if (!isless (xa, TWO52))
29018 x2 = (double)(long)x;
29019 if (HONOR_SIGNED_ZEROS (mode))
29020 return copysign (x2, x);
29023 enum machine_mode mode = GET_MODE (operand0);
29024 rtx xa, xi, TWO52, label, res, mask;
29026 TWO52 = ix86_gen_TWO52 (mode);
29028 /* Temporary for holding the result, initialized to the input
29029 operand to ease control flow. */
29030 res = gen_reg_rtx (mode);
29031 emit_move_insn (res, operand1);
29033 /* xa = abs (operand1) */
29034 xa = ix86_expand_sse_fabs (res, &mask);
29036 /* if (!isless (xa, TWO52)) goto label; */
29037 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
29039 /* x = (double)(long)x */
29040 xi = gen_reg_rtx (mode == DFmode ? DImode : SImode);
29041 expand_fix (xi, res, 0);
29042 expand_float (res, xi, 0);
29044 if (HONOR_SIGNED_ZEROS (mode))
29045 ix86_sse_copysign_to_positive (res, res, force_reg (mode, operand1), mask);
29047 emit_label (label);
29048 LABEL_NUSES (label) = 1;
29050 emit_move_insn (operand0, res);
29053 /* Expand SSE sequence for computing trunc from OPERAND1 storing
29056 ix86_expand_truncdf_32 (rtx operand0, rtx operand1)
29058 enum machine_mode mode = GET_MODE (operand0);
29059 rtx xa, mask, TWO52, label, one, res, smask, tmp;
29061 /* C code for SSE variant we expand below.
29062 double xa = fabs (x), x2;
29063 if (!isless (xa, TWO52))
29065 xa2 = xa + TWO52 - TWO52;
29069 x2 = copysign (xa2, x);
29073 TWO52 = ix86_gen_TWO52 (mode);
29075 /* Temporary for holding the result, initialized to the input
29076 operand to ease control flow. */
29077 res = gen_reg_rtx (mode);
29078 emit_move_insn (res, operand1);
29080 /* xa = abs (operand1) */
29081 xa = ix86_expand_sse_fabs (res, &smask);
29083 /* if (!isless (xa, TWO52)) goto label; */
29084 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
29086 /* res = xa + TWO52 - TWO52; */
29087 tmp = expand_simple_binop (mode, PLUS, xa, TWO52, NULL_RTX, 0, OPTAB_DIRECT);
29088 tmp = expand_simple_binop (mode, MINUS, tmp, TWO52, tmp, 0, OPTAB_DIRECT);
29089 emit_move_insn (res, tmp);
29092 one = force_reg (mode, const_double_from_real_value (dconst1, mode));
29094 /* Compensate: res = xa2 - (res > xa ? 1 : 0) */
29095 mask = ix86_expand_sse_compare_mask (UNGT, res, xa, false);
29096 emit_insn (gen_rtx_SET (VOIDmode, mask,
29097 gen_rtx_AND (mode, mask, one)));
29098 tmp = expand_simple_binop (mode, MINUS,
29099 res, mask, NULL_RTX, 0, OPTAB_DIRECT);
29100 emit_move_insn (res, tmp);
29102 /* res = copysign (res, operand1) */
29103 ix86_sse_copysign_to_positive (res, res, force_reg (mode, operand1), smask);
29105 emit_label (label);
29106 LABEL_NUSES (label) = 1;
29108 emit_move_insn (operand0, res);
29111 /* Expand SSE sequence for computing round from OPERAND1 storing
29114 ix86_expand_round (rtx operand0, rtx operand1)
29116 /* C code for the stuff we're doing below:
29117 double xa = fabs (x);
29118 if (!isless (xa, TWO52))
29120 xa = (double)(long)(xa + nextafter (0.5, 0.0));
29121 return copysign (xa, x);
29123 enum machine_mode mode = GET_MODE (operand0);
29124 rtx res, TWO52, xa, label, xi, half, mask;
29125 const struct real_format *fmt;
29126 REAL_VALUE_TYPE pred_half, half_minus_pred_half;
29128 /* Temporary for holding the result, initialized to the input
29129 operand to ease control flow. */
29130 res = gen_reg_rtx (mode);
29131 emit_move_insn (res, operand1);
29133 TWO52 = ix86_gen_TWO52 (mode);
29134 xa = ix86_expand_sse_fabs (res, &mask);
29135 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
29137 /* load nextafter (0.5, 0.0) */
29138 fmt = REAL_MODE_FORMAT (mode);
29139 real_2expN (&half_minus_pred_half, -(fmt->p) - 1, mode);
29140 REAL_ARITHMETIC (pred_half, MINUS_EXPR, dconsthalf, half_minus_pred_half);
29142 /* xa = xa + 0.5 */
29143 half = force_reg (mode, const_double_from_real_value (pred_half, mode));
29144 xa = expand_simple_binop (mode, PLUS, xa, half, NULL_RTX, 0, OPTAB_DIRECT);
29146 /* xa = (double)(int64_t)xa */
29147 xi = gen_reg_rtx (mode == DFmode ? DImode : SImode);
29148 expand_fix (xi, xa, 0);
29149 expand_float (xa, xi, 0);
29151 /* res = copysign (xa, operand1) */
29152 ix86_sse_copysign_to_positive (res, xa, force_reg (mode, operand1), mask);
29154 emit_label (label);
29155 LABEL_NUSES (label) = 1;
29157 emit_move_insn (operand0, res);
29161 /* Validate whether a SSE5 instruction is valid or not.
29162 OPERANDS is the array of operands.
29163 NUM is the number of operands.
29164 USES_OC0 is true if the instruction uses OC0 and provides 4 variants.
29165 NUM_MEMORY is the maximum number of memory operands to accept.
29166 when COMMUTATIVE is set, operand 1 and 2 can be swapped. */
29169 ix86_sse5_valid_op_p (rtx operands[], rtx insn ATTRIBUTE_UNUSED, int num,
29170 bool uses_oc0, int num_memory, bool commutative)
29176 /* Count the number of memory arguments */
29179 for (i = 0; i < num; i++)
29181 enum machine_mode mode = GET_MODE (operands[i]);
29182 if (register_operand (operands[i], mode))
29185 else if (memory_operand (operands[i], mode))
29187 mem_mask |= (1 << i);
29193 rtx pattern = PATTERN (insn);
29195 /* allow 0 for pcmov */
29196 if (GET_CODE (pattern) != SET
29197 || GET_CODE (SET_SRC (pattern)) != IF_THEN_ELSE
29199 || operands[i] != CONST0_RTX (mode))
29204 /* Special case pmacsdq{l,h} where we allow the 3rd argument to be
29205 a memory operation. */
29206 if (num_memory < 0)
29208 num_memory = -num_memory;
29209 if ((mem_mask & (1 << (num-1))) != 0)
29211 mem_mask &= ~(1 << (num-1));
29216 /* If there were no memory operations, allow the insn */
29220 /* Do not allow the destination register to be a memory operand. */
29221 else if (mem_mask & (1 << 0))
29224 /* If there are too many memory operations, disallow the instruction. While
29225 the hardware only allows 1 memory reference, before register allocation
29226 for some insns, we allow two memory operations sometimes in order to allow
29227 code like the following to be optimized:
29229 float fmadd (float *a, float *b, float *c) { return (*a * *b) + *c; }
29231 or similar cases that are vectorized into using the fmaddss
29233 else if (mem_count > num_memory)
29236 /* Don't allow more than one memory operation if not optimizing. */
29237 else if (mem_count > 1 && !optimize)
29240 else if (num == 4 && mem_count == 1)
29242 /* formats (destination is the first argument), example fmaddss:
29243 xmm1, xmm1, xmm2, xmm3/mem
29244 xmm1, xmm1, xmm2/mem, xmm3
29245 xmm1, xmm2, xmm3/mem, xmm1
29246 xmm1, xmm2/mem, xmm3, xmm1 */
29248 return ((mem_mask == (1 << 1))
29249 || (mem_mask == (1 << 2))
29250 || (mem_mask == (1 << 3)));
29252 /* format, example pmacsdd:
29253 xmm1, xmm2, xmm3/mem, xmm1 */
29255 return (mem_mask == (1 << 2) || mem_mask == (1 << 1));
29257 return (mem_mask == (1 << 2));
29260 else if (num == 4 && num_memory == 2)
29262 /* If there are two memory operations, we can load one of the memory ops
29263 into the destination register. This is for optimizing the
29264 multiply/add ops, which the combiner has optimized both the multiply
29265 and the add insns to have a memory operation. We have to be careful
29266 that the destination doesn't overlap with the inputs. */
29267 rtx op0 = operands[0];
29269 if (reg_mentioned_p (op0, operands[1])
29270 || reg_mentioned_p (op0, operands[2])
29271 || reg_mentioned_p (op0, operands[3]))
29274 /* formats (destination is the first argument), example fmaddss:
29275 xmm1, xmm1, xmm2, xmm3/mem
29276 xmm1, xmm1, xmm2/mem, xmm3
29277 xmm1, xmm2, xmm3/mem, xmm1
29278 xmm1, xmm2/mem, xmm3, xmm1
29280 For the oc0 case, we will load either operands[1] or operands[3] into
29281 operands[0], so any combination of 2 memory operands is ok. */
29285 /* format, example pmacsdd:
29286 xmm1, xmm2, xmm3/mem, xmm1
29288 For the integer multiply/add instructions be more restrictive and
29289 require operands[2] and operands[3] to be the memory operands. */
29291 return (mem_mask == ((1 << 1) | (1 << 3)) || ((1 << 2) | (1 << 3)));
29293 return (mem_mask == ((1 << 2) | (1 << 3)));
29296 else if (num == 3 && num_memory == 1)
29298 /* formats, example protb:
29299 xmm1, xmm2, xmm3/mem
29300 xmm1, xmm2/mem, xmm3 */
29302 return ((mem_mask == (1 << 1)) || (mem_mask == (1 << 2)));
29304 /* format, example comeq:
29305 xmm1, xmm2, xmm3/mem */
29307 return (mem_mask == (1 << 2));
29311 gcc_unreachable ();
29317 /* Fixup an SSE5 instruction that has 2 memory input references into a form the
29318 hardware will allow by using the destination register to load one of the
29319 memory operations. Presently this is used by the multiply/add routines to
29320 allow 2 memory references. */
29323 ix86_expand_sse5_multiple_memory (rtx operands[],
29325 enum machine_mode mode)
29327 rtx op0 = operands[0];
29329 || memory_operand (op0, mode)
29330 || reg_mentioned_p (op0, operands[1])
29331 || reg_mentioned_p (op0, operands[2])
29332 || reg_mentioned_p (op0, operands[3]))
29333 gcc_unreachable ();
29335 /* For 2 memory operands, pick either operands[1] or operands[3] to move into
29336 the destination register. */
29337 if (memory_operand (operands[1], mode))
29339 emit_move_insn (op0, operands[1]);
29342 else if (memory_operand (operands[3], mode))
29344 emit_move_insn (op0, operands[3]);
29348 gcc_unreachable ();
29354 /* Table of valid machine attributes. */
29355 static const struct attribute_spec ix86_attribute_table[] =
29357 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler } */
29358 /* Stdcall attribute says callee is responsible for popping arguments
29359 if they are not variable. */
29360 { "stdcall", 0, 0, false, true, true, ix86_handle_cconv_attribute },
29361 /* Fastcall attribute says callee is responsible for popping arguments
29362 if they are not variable. */
29363 { "fastcall", 0, 0, false, true, true, ix86_handle_cconv_attribute },
29364 /* Cdecl attribute says the callee is a normal C declaration */
29365 { "cdecl", 0, 0, false, true, true, ix86_handle_cconv_attribute },
29366 /* Regparm attribute specifies how many integer arguments are to be
29367 passed in registers. */
29368 { "regparm", 1, 1, false, true, true, ix86_handle_cconv_attribute },
29369 /* Sseregparm attribute says we are using x86_64 calling conventions
29370 for FP arguments. */
29371 { "sseregparm", 0, 0, false, true, true, ix86_handle_cconv_attribute },
29372 /* force_align_arg_pointer says this function realigns the stack at entry. */
29373 { (const char *)&ix86_force_align_arg_pointer_string, 0, 0,
29374 false, true, true, ix86_handle_cconv_attribute },
29375 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
29376 { "dllimport", 0, 0, false, false, false, handle_dll_attribute },
29377 { "dllexport", 0, 0, false, false, false, handle_dll_attribute },
29378 { "shared", 0, 0, true, false, false, ix86_handle_shared_attribute },
29380 { "ms_struct", 0, 0, false, false, false, ix86_handle_struct_attribute },
29381 { "gcc_struct", 0, 0, false, false, false, ix86_handle_struct_attribute },
29382 #ifdef SUBTARGET_ATTRIBUTE_TABLE
29383 SUBTARGET_ATTRIBUTE_TABLE,
29385 /* ms_abi and sysv_abi calling convention function attributes. */
29386 { "ms_abi", 0, 0, false, true, true, ix86_handle_abi_attribute },
29387 { "sysv_abi", 0, 0, false, true, true, ix86_handle_abi_attribute },
29389 { NULL, 0, 0, false, false, false, NULL }
29392 /* Implement targetm.vectorize.builtin_vectorization_cost. */
29394 x86_builtin_vectorization_cost (bool runtime_test)
29396 /* If the branch of the runtime test is taken - i.e. - the vectorized
29397 version is skipped - this incurs a misprediction cost (because the
29398 vectorized version is expected to be the fall-through). So we subtract
29399 the latency of a mispredicted branch from the costs that are incured
29400 when the vectorized version is executed.
29402 TODO: The values in individual target tables have to be tuned or new
29403 fields may be needed. For eg. on K8, the default branch path is the
29404 not-taken path. If the taken path is predicted correctly, the minimum
29405 penalty of going down the taken-path is 1 cycle. If the taken-path is
29406 not predicted correctly, then the minimum penalty is 10 cycles. */
29410 return (-(ix86_cost->cond_taken_branch_cost));
29416 /* This function returns the calling abi specific va_list type node.
29417 It returns the FNDECL specific va_list type. */
29420 ix86_fn_abi_va_list (tree fndecl)
29425 return va_list_type_node;
29426 gcc_assert (fndecl != NULL_TREE);
29427 abi = ix86_function_abi ((const_tree) fndecl);
29430 return ms_va_list_type_node;
29432 return sysv_va_list_type_node;
29435 /* Returns the canonical va_list type specified by TYPE. If there
29436 is no valid TYPE provided, it return NULL_TREE. */
29439 ix86_canonical_va_list_type (tree type)
29443 /* Resolve references and pointers to va_list type. */
29444 if (INDIRECT_REF_P (type))
29445 type = TREE_TYPE (type);
29446 else if (POINTER_TYPE_P (type) && POINTER_TYPE_P (TREE_TYPE(type)))
29447 type = TREE_TYPE (type);
29451 wtype = va_list_type_node;
29452 gcc_assert (wtype != NULL_TREE);
29454 if (TREE_CODE (wtype) == ARRAY_TYPE)
29456 /* If va_list is an array type, the argument may have decayed
29457 to a pointer type, e.g. by being passed to another function.
29458 In that case, unwrap both types so that we can compare the
29459 underlying records. */
29460 if (TREE_CODE (htype) == ARRAY_TYPE
29461 || POINTER_TYPE_P (htype))
29463 wtype = TREE_TYPE (wtype);
29464 htype = TREE_TYPE (htype);
29467 if (TYPE_MAIN_VARIANT (wtype) == TYPE_MAIN_VARIANT (htype))
29468 return va_list_type_node;
29469 wtype = sysv_va_list_type_node;
29470 gcc_assert (wtype != NULL_TREE);
29472 if (TREE_CODE (wtype) == ARRAY_TYPE)
29474 /* If va_list is an array type, the argument may have decayed
29475 to a pointer type, e.g. by being passed to another function.
29476 In that case, unwrap both types so that we can compare the
29477 underlying records. */
29478 if (TREE_CODE (htype) == ARRAY_TYPE
29479 || POINTER_TYPE_P (htype))
29481 wtype = TREE_TYPE (wtype);
29482 htype = TREE_TYPE (htype);
29485 if (TYPE_MAIN_VARIANT (wtype) == TYPE_MAIN_VARIANT (htype))
29486 return sysv_va_list_type_node;
29487 wtype = ms_va_list_type_node;
29488 gcc_assert (wtype != NULL_TREE);
29490 if (TREE_CODE (wtype) == ARRAY_TYPE)
29492 /* If va_list is an array type, the argument may have decayed
29493 to a pointer type, e.g. by being passed to another function.
29494 In that case, unwrap both types so that we can compare the
29495 underlying records. */
29496 if (TREE_CODE (htype) == ARRAY_TYPE
29497 || POINTER_TYPE_P (htype))
29499 wtype = TREE_TYPE (wtype);
29500 htype = TREE_TYPE (htype);
29503 if (TYPE_MAIN_VARIANT (wtype) == TYPE_MAIN_VARIANT (htype))
29504 return ms_va_list_type_node;
29507 return std_canonical_va_list_type (type);
29510 /* Iterate through the target-specific builtin types for va_list.
29511 IDX denotes the iterator, *PTREE is set to the result type of
29512 the va_list builtin, and *PNAME to its internal type.
29513 Returns zero if there is no element for this index, otherwise
29514 IDX should be increased upon the next call.
29515 Note, do not iterate a base builtin's name like __builtin_va_list.
29516 Used from c_common_nodes_and_builtins. */
29519 ix86_enum_va_list (int idx, const char **pname, tree *ptree)
29525 *ptree = ms_va_list_type_node;
29526 *pname = "__builtin_ms_va_list";
29529 *ptree = sysv_va_list_type_node;
29530 *pname = "__builtin_sysv_va_list";
29538 /* Initialize the GCC target structure. */
29539 #undef TARGET_RETURN_IN_MEMORY
29540 #define TARGET_RETURN_IN_MEMORY ix86_return_in_memory
29542 #undef TARGET_ATTRIBUTE_TABLE
29543 #define TARGET_ATTRIBUTE_TABLE ix86_attribute_table
29544 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
29545 # undef TARGET_MERGE_DECL_ATTRIBUTES
29546 # define TARGET_MERGE_DECL_ATTRIBUTES merge_dllimport_decl_attributes
29549 #undef TARGET_COMP_TYPE_ATTRIBUTES
29550 #define TARGET_COMP_TYPE_ATTRIBUTES ix86_comp_type_attributes
29552 #undef TARGET_INIT_BUILTINS
29553 #define TARGET_INIT_BUILTINS ix86_init_builtins
29554 #undef TARGET_EXPAND_BUILTIN
29555 #define TARGET_EXPAND_BUILTIN ix86_expand_builtin
29557 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION
29558 #define TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION \
29559 ix86_builtin_vectorized_function
29561 #undef TARGET_VECTORIZE_BUILTIN_CONVERSION
29562 #define TARGET_VECTORIZE_BUILTIN_CONVERSION ix86_vectorize_builtin_conversion
29564 #undef TARGET_BUILTIN_RECIPROCAL
29565 #define TARGET_BUILTIN_RECIPROCAL ix86_builtin_reciprocal
29567 #undef TARGET_ASM_FUNCTION_EPILOGUE
29568 #define TARGET_ASM_FUNCTION_EPILOGUE ix86_output_function_epilogue
29570 #undef TARGET_ENCODE_SECTION_INFO
29571 #ifndef SUBTARGET_ENCODE_SECTION_INFO
29572 #define TARGET_ENCODE_SECTION_INFO ix86_encode_section_info
29574 #define TARGET_ENCODE_SECTION_INFO SUBTARGET_ENCODE_SECTION_INFO
29577 #undef TARGET_ASM_OPEN_PAREN
29578 #define TARGET_ASM_OPEN_PAREN ""
29579 #undef TARGET_ASM_CLOSE_PAREN
29580 #define TARGET_ASM_CLOSE_PAREN ""
29582 #undef TARGET_ASM_ALIGNED_HI_OP
29583 #define TARGET_ASM_ALIGNED_HI_OP ASM_SHORT
29584 #undef TARGET_ASM_ALIGNED_SI_OP
29585 #define TARGET_ASM_ALIGNED_SI_OP ASM_LONG
29587 #undef TARGET_ASM_ALIGNED_DI_OP
29588 #define TARGET_ASM_ALIGNED_DI_OP ASM_QUAD
29591 #undef TARGET_ASM_UNALIGNED_HI_OP
29592 #define TARGET_ASM_UNALIGNED_HI_OP TARGET_ASM_ALIGNED_HI_OP
29593 #undef TARGET_ASM_UNALIGNED_SI_OP
29594 #define TARGET_ASM_UNALIGNED_SI_OP TARGET_ASM_ALIGNED_SI_OP
29595 #undef TARGET_ASM_UNALIGNED_DI_OP
29596 #define TARGET_ASM_UNALIGNED_DI_OP TARGET_ASM_ALIGNED_DI_OP
29598 #undef TARGET_SCHED_ADJUST_COST
29599 #define TARGET_SCHED_ADJUST_COST ix86_adjust_cost
29600 #undef TARGET_SCHED_ISSUE_RATE
29601 #define TARGET_SCHED_ISSUE_RATE ix86_issue_rate
29602 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD
29603 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD \
29604 ia32_multipass_dfa_lookahead
29606 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
29607 #define TARGET_FUNCTION_OK_FOR_SIBCALL ix86_function_ok_for_sibcall
29610 #undef TARGET_HAVE_TLS
29611 #define TARGET_HAVE_TLS true
29613 #undef TARGET_CANNOT_FORCE_CONST_MEM
29614 #define TARGET_CANNOT_FORCE_CONST_MEM ix86_cannot_force_const_mem
29615 #undef TARGET_USE_BLOCKS_FOR_CONSTANT_P
29616 #define TARGET_USE_BLOCKS_FOR_CONSTANT_P hook_bool_mode_const_rtx_true
29618 #undef TARGET_DELEGITIMIZE_ADDRESS
29619 #define TARGET_DELEGITIMIZE_ADDRESS ix86_delegitimize_address
29621 #undef TARGET_MS_BITFIELD_LAYOUT_P
29622 #define TARGET_MS_BITFIELD_LAYOUT_P ix86_ms_bitfield_layout_p
29625 #undef TARGET_BINDS_LOCAL_P
29626 #define TARGET_BINDS_LOCAL_P darwin_binds_local_p
29628 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
29629 #undef TARGET_BINDS_LOCAL_P
29630 #define TARGET_BINDS_LOCAL_P i386_pe_binds_local_p
29633 #undef TARGET_ASM_OUTPUT_MI_THUNK
29634 #define TARGET_ASM_OUTPUT_MI_THUNK x86_output_mi_thunk
29635 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
29636 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK x86_can_output_mi_thunk
29638 #undef TARGET_ASM_FILE_START
29639 #define TARGET_ASM_FILE_START x86_file_start
29641 #undef TARGET_DEFAULT_TARGET_FLAGS
29642 #define TARGET_DEFAULT_TARGET_FLAGS \
29644 | TARGET_SUBTARGET_DEFAULT \
29645 | TARGET_TLS_DIRECT_SEG_REFS_DEFAULT)
29647 #undef TARGET_HANDLE_OPTION
29648 #define TARGET_HANDLE_OPTION ix86_handle_option
29650 #undef TARGET_RTX_COSTS
29651 #define TARGET_RTX_COSTS ix86_rtx_costs
29652 #undef TARGET_ADDRESS_COST
29653 #define TARGET_ADDRESS_COST ix86_address_cost
29655 #undef TARGET_FIXED_CONDITION_CODE_REGS
29656 #define TARGET_FIXED_CONDITION_CODE_REGS ix86_fixed_condition_code_regs
29657 #undef TARGET_CC_MODES_COMPATIBLE
29658 #define TARGET_CC_MODES_COMPATIBLE ix86_cc_modes_compatible
29660 #undef TARGET_MACHINE_DEPENDENT_REORG
29661 #define TARGET_MACHINE_DEPENDENT_REORG ix86_reorg
29663 #undef TARGET_BUILTIN_SETJMP_FRAME_VALUE
29664 #define TARGET_BUILTIN_SETJMP_FRAME_VALUE ix86_builtin_setjmp_frame_value
29666 #undef TARGET_BUILD_BUILTIN_VA_LIST
29667 #define TARGET_BUILD_BUILTIN_VA_LIST ix86_build_builtin_va_list
29669 #undef TARGET_FN_ABI_VA_LIST
29670 #define TARGET_FN_ABI_VA_LIST ix86_fn_abi_va_list
29672 #undef TARGET_CANONICAL_VA_LIST_TYPE
29673 #define TARGET_CANONICAL_VA_LIST_TYPE ix86_canonical_va_list_type
29675 #undef TARGET_EXPAND_BUILTIN_VA_START
29676 #define TARGET_EXPAND_BUILTIN_VA_START ix86_va_start
29678 #undef TARGET_MD_ASM_CLOBBERS
29679 #define TARGET_MD_ASM_CLOBBERS ix86_md_asm_clobbers
29681 #undef TARGET_PROMOTE_PROTOTYPES
29682 #define TARGET_PROMOTE_PROTOTYPES hook_bool_const_tree_true
29683 #undef TARGET_STRUCT_VALUE_RTX
29684 #define TARGET_STRUCT_VALUE_RTX ix86_struct_value_rtx
29685 #undef TARGET_SETUP_INCOMING_VARARGS
29686 #define TARGET_SETUP_INCOMING_VARARGS ix86_setup_incoming_varargs
29687 #undef TARGET_MUST_PASS_IN_STACK
29688 #define TARGET_MUST_PASS_IN_STACK ix86_must_pass_in_stack
29689 #undef TARGET_PASS_BY_REFERENCE
29690 #define TARGET_PASS_BY_REFERENCE ix86_pass_by_reference
29691 #undef TARGET_INTERNAL_ARG_POINTER
29692 #define TARGET_INTERNAL_ARG_POINTER ix86_internal_arg_pointer
29693 #undef TARGET_UPDATE_STACK_BOUNDARY
29694 #define TARGET_UPDATE_STACK_BOUNDARY ix86_update_stack_boundary
29695 #undef TARGET_GET_DRAP_RTX
29696 #define TARGET_GET_DRAP_RTX ix86_get_drap_rtx
29697 #undef TARGET_DWARF_HANDLE_FRAME_UNSPEC
29698 #define TARGET_DWARF_HANDLE_FRAME_UNSPEC ix86_dwarf_handle_frame_unspec
29699 #undef TARGET_STRICT_ARGUMENT_NAMING
29700 #define TARGET_STRICT_ARGUMENT_NAMING hook_bool_CUMULATIVE_ARGS_true
29702 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
29703 #define TARGET_GIMPLIFY_VA_ARG_EXPR ix86_gimplify_va_arg
29705 #undef TARGET_SCALAR_MODE_SUPPORTED_P
29706 #define TARGET_SCALAR_MODE_SUPPORTED_P ix86_scalar_mode_supported_p
29708 #undef TARGET_VECTOR_MODE_SUPPORTED_P
29709 #define TARGET_VECTOR_MODE_SUPPORTED_P ix86_vector_mode_supported_p
29711 #undef TARGET_C_MODE_FOR_SUFFIX
29712 #define TARGET_C_MODE_FOR_SUFFIX ix86_c_mode_for_suffix
29715 #undef TARGET_ASM_OUTPUT_DWARF_DTPREL
29716 #define TARGET_ASM_OUTPUT_DWARF_DTPREL i386_output_dwarf_dtprel
29719 #ifdef SUBTARGET_INSERT_ATTRIBUTES
29720 #undef TARGET_INSERT_ATTRIBUTES
29721 #define TARGET_INSERT_ATTRIBUTES SUBTARGET_INSERT_ATTRIBUTES
29724 #undef TARGET_MANGLE_TYPE
29725 #define TARGET_MANGLE_TYPE ix86_mangle_type
29727 #undef TARGET_STACK_PROTECT_FAIL
29728 #define TARGET_STACK_PROTECT_FAIL ix86_stack_protect_fail
29730 #undef TARGET_FUNCTION_VALUE
29731 #define TARGET_FUNCTION_VALUE ix86_function_value
29733 #undef TARGET_SECONDARY_RELOAD
29734 #define TARGET_SECONDARY_RELOAD ix86_secondary_reload
29736 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST
29737 #define TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST x86_builtin_vectorization_cost
29739 #undef TARGET_SET_CURRENT_FUNCTION
29740 #define TARGET_SET_CURRENT_FUNCTION ix86_set_current_function
29742 #undef TARGET_OPTION_VALID_ATTRIBUTE_P
29743 #define TARGET_OPTION_VALID_ATTRIBUTE_P ix86_valid_target_attribute_p
29745 #undef TARGET_OPTION_SAVE
29746 #define TARGET_OPTION_SAVE ix86_function_specific_save
29748 #undef TARGET_OPTION_RESTORE
29749 #define TARGET_OPTION_RESTORE ix86_function_specific_restore
29751 #undef TARGET_OPTION_PRINT
29752 #define TARGET_OPTION_PRINT ix86_function_specific_print
29754 #undef TARGET_OPTION_CAN_INLINE_P
29755 #define TARGET_OPTION_CAN_INLINE_P ix86_can_inline_p
29757 #undef TARGET_EXPAND_TO_RTL_HOOK
29758 #define TARGET_EXPAND_TO_RTL_HOOK ix86_maybe_switch_abi
29760 struct gcc_target targetm = TARGET_INITIALIZER;
29762 #include "gt-i386.h"