1 /* Subroutines used for code generation on IA-32.
2 Copyright (C) 1988, 1992, 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001,
3 2002, 2003, 2004, 2005, 2006, 2007, 2008
4 Free Software Foundation, Inc.
6 This file is part of GCC.
8 GCC is free software; you can redistribute it and/or modify
9 it under the terms of the GNU General Public License as published by
10 the Free Software Foundation; either version 3, or (at your option)
13 GCC is distributed in the hope that it will be useful,
14 but WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 GNU General Public License for more details.
18 You should have received a copy of the GNU General Public License
19 along with GCC; see the file COPYING3. If not see
20 <http://www.gnu.org/licenses/>. */
24 #include "coretypes.h"
30 #include "hard-reg-set.h"
32 #include "insn-config.h"
33 #include "conditions.h"
35 #include "insn-codes.h"
36 #include "insn-attr.h"
45 #include "basic-block.h"
48 #include "target-def.h"
49 #include "langhooks.h"
54 #include "tm-constrs.h"
58 static int x86_builtin_vectorization_cost (bool);
59 static rtx legitimize_dllimport_symbol (rtx, bool);
61 #ifndef CHECK_STACK_LIMIT
62 #define CHECK_STACK_LIMIT (-1)
65 /* Return index of given mode in mult and division cost tables. */
66 #define MODE_INDEX(mode) \
67 ((mode) == QImode ? 0 \
68 : (mode) == HImode ? 1 \
69 : (mode) == SImode ? 2 \
70 : (mode) == DImode ? 3 \
73 /* Processor costs (relative to an add) */
74 /* We assume COSTS_N_INSNS is defined as (N)*4 and an addition is 2 bytes. */
75 #define COSTS_N_BYTES(N) ((N) * 2)
77 #define DUMMY_STRINGOP_ALGS {libcall, {{-1, libcall}}}
80 struct processor_costs ix86_size_cost = {/* costs for tuning for size */
81 COSTS_N_BYTES (2), /* cost of an add instruction */
82 COSTS_N_BYTES (3), /* cost of a lea instruction */
83 COSTS_N_BYTES (2), /* variable shift costs */
84 COSTS_N_BYTES (3), /* constant shift costs */
85 {COSTS_N_BYTES (3), /* cost of starting multiply for QI */
86 COSTS_N_BYTES (3), /* HI */
87 COSTS_N_BYTES (3), /* SI */
88 COSTS_N_BYTES (3), /* DI */
89 COSTS_N_BYTES (5)}, /* other */
90 0, /* cost of multiply per each bit set */
91 {COSTS_N_BYTES (3), /* cost of a divide/mod for QI */
92 COSTS_N_BYTES (3), /* HI */
93 COSTS_N_BYTES (3), /* SI */
94 COSTS_N_BYTES (3), /* DI */
95 COSTS_N_BYTES (5)}, /* other */
96 COSTS_N_BYTES (3), /* cost of movsx */
97 COSTS_N_BYTES (3), /* cost of movzx */
100 2, /* cost for loading QImode using movzbl */
101 {2, 2, 2}, /* cost of loading integer registers
102 in QImode, HImode and SImode.
103 Relative to reg-reg move (2). */
104 {2, 2, 2}, /* cost of storing integer registers */
105 2, /* cost of reg,reg fld/fst */
106 {2, 2, 2}, /* cost of loading fp registers
107 in SFmode, DFmode and XFmode */
108 {2, 2, 2}, /* cost of storing fp registers
109 in SFmode, DFmode and XFmode */
110 3, /* cost of moving MMX register */
111 {3, 3}, /* cost of loading MMX registers
112 in SImode and DImode */
113 {3, 3}, /* cost of storing MMX registers
114 in SImode and DImode */
115 3, /* cost of moving SSE register */
116 {3, 3, 3}, /* cost of loading SSE registers
117 in SImode, DImode and TImode */
118 {3, 3, 3}, /* cost of storing SSE registers
119 in SImode, DImode and TImode */
120 3, /* MMX or SSE register to integer */
121 0, /* size of l1 cache */
122 0, /* size of l2 cache */
123 0, /* size of prefetch block */
124 0, /* number of parallel prefetches */
126 COSTS_N_BYTES (2), /* cost of FADD and FSUB insns. */
127 COSTS_N_BYTES (2), /* cost of FMUL instruction. */
128 COSTS_N_BYTES (2), /* cost of FDIV instruction. */
129 COSTS_N_BYTES (2), /* cost of FABS instruction. */
130 COSTS_N_BYTES (2), /* cost of FCHS instruction. */
131 COSTS_N_BYTES (2), /* cost of FSQRT instruction. */
132 {{rep_prefix_1_byte, {{-1, rep_prefix_1_byte}}},
133 {rep_prefix_1_byte, {{-1, rep_prefix_1_byte}}}},
134 {{rep_prefix_1_byte, {{-1, rep_prefix_1_byte}}},
135 {rep_prefix_1_byte, {{-1, rep_prefix_1_byte}}}},
136 1, /* scalar_stmt_cost. */
137 1, /* scalar load_cost. */
138 1, /* scalar_store_cost. */
139 1, /* vec_stmt_cost. */
140 1, /* vec_to_scalar_cost. */
141 1, /* scalar_to_vec_cost. */
142 1, /* vec_align_load_cost. */
143 1, /* vec_unalign_load_cost. */
144 1, /* vec_store_cost. */
145 1, /* cond_taken_branch_cost. */
146 1, /* cond_not_taken_branch_cost. */
149 /* Processor costs (relative to an add) */
151 struct processor_costs i386_cost = { /* 386 specific costs */
152 COSTS_N_INSNS (1), /* cost of an add instruction */
153 COSTS_N_INSNS (1), /* cost of a lea instruction */
154 COSTS_N_INSNS (3), /* variable shift costs */
155 COSTS_N_INSNS (2), /* constant shift costs */
156 {COSTS_N_INSNS (6), /* cost of starting multiply for QI */
157 COSTS_N_INSNS (6), /* HI */
158 COSTS_N_INSNS (6), /* SI */
159 COSTS_N_INSNS (6), /* DI */
160 COSTS_N_INSNS (6)}, /* other */
161 COSTS_N_INSNS (1), /* cost of multiply per each bit set */
162 {COSTS_N_INSNS (23), /* cost of a divide/mod for QI */
163 COSTS_N_INSNS (23), /* HI */
164 COSTS_N_INSNS (23), /* SI */
165 COSTS_N_INSNS (23), /* DI */
166 COSTS_N_INSNS (23)}, /* other */
167 COSTS_N_INSNS (3), /* cost of movsx */
168 COSTS_N_INSNS (2), /* cost of movzx */
169 15, /* "large" insn */
171 4, /* cost for loading QImode using movzbl */
172 {2, 4, 2}, /* cost of loading integer registers
173 in QImode, HImode and SImode.
174 Relative to reg-reg move (2). */
175 {2, 4, 2}, /* cost of storing integer registers */
176 2, /* cost of reg,reg fld/fst */
177 {8, 8, 8}, /* cost of loading fp registers
178 in SFmode, DFmode and XFmode */
179 {8, 8, 8}, /* cost of storing fp registers
180 in SFmode, DFmode and XFmode */
181 2, /* cost of moving MMX register */
182 {4, 8}, /* cost of loading MMX registers
183 in SImode and DImode */
184 {4, 8}, /* cost of storing MMX registers
185 in SImode and DImode */
186 2, /* cost of moving SSE register */
187 {4, 8, 16}, /* cost of loading SSE registers
188 in SImode, DImode and TImode */
189 {4, 8, 16}, /* cost of storing SSE registers
190 in SImode, DImode and TImode */
191 3, /* MMX or SSE register to integer */
192 0, /* size of l1 cache */
193 0, /* size of l2 cache */
194 0, /* size of prefetch block */
195 0, /* number of parallel prefetches */
197 COSTS_N_INSNS (23), /* cost of FADD and FSUB insns. */
198 COSTS_N_INSNS (27), /* cost of FMUL instruction. */
199 COSTS_N_INSNS (88), /* cost of FDIV instruction. */
200 COSTS_N_INSNS (22), /* cost of FABS instruction. */
201 COSTS_N_INSNS (24), /* cost of FCHS instruction. */
202 COSTS_N_INSNS (122), /* cost of FSQRT instruction. */
203 {{rep_prefix_1_byte, {{-1, rep_prefix_1_byte}}},
204 DUMMY_STRINGOP_ALGS},
205 {{rep_prefix_1_byte, {{-1, rep_prefix_1_byte}}},
206 DUMMY_STRINGOP_ALGS},
207 1, /* scalar_stmt_cost. */
208 1, /* scalar load_cost. */
209 1, /* scalar_store_cost. */
210 1, /* vec_stmt_cost. */
211 1, /* vec_to_scalar_cost. */
212 1, /* scalar_to_vec_cost. */
213 1, /* vec_align_load_cost. */
214 2, /* vec_unalign_load_cost. */
215 1, /* vec_store_cost. */
216 3, /* cond_taken_branch_cost. */
217 1, /* cond_not_taken_branch_cost. */
221 struct processor_costs i486_cost = { /* 486 specific costs */
222 COSTS_N_INSNS (1), /* cost of an add instruction */
223 COSTS_N_INSNS (1), /* cost of a lea instruction */
224 COSTS_N_INSNS (3), /* variable shift costs */
225 COSTS_N_INSNS (2), /* constant shift costs */
226 {COSTS_N_INSNS (12), /* cost of starting multiply for QI */
227 COSTS_N_INSNS (12), /* HI */
228 COSTS_N_INSNS (12), /* SI */
229 COSTS_N_INSNS (12), /* DI */
230 COSTS_N_INSNS (12)}, /* other */
231 1, /* cost of multiply per each bit set */
232 {COSTS_N_INSNS (40), /* cost of a divide/mod for QI */
233 COSTS_N_INSNS (40), /* HI */
234 COSTS_N_INSNS (40), /* SI */
235 COSTS_N_INSNS (40), /* DI */
236 COSTS_N_INSNS (40)}, /* other */
237 COSTS_N_INSNS (3), /* cost of movsx */
238 COSTS_N_INSNS (2), /* cost of movzx */
239 15, /* "large" insn */
241 4, /* cost for loading QImode using movzbl */
242 {2, 4, 2}, /* cost of loading integer registers
243 in QImode, HImode and SImode.
244 Relative to reg-reg move (2). */
245 {2, 4, 2}, /* cost of storing integer registers */
246 2, /* cost of reg,reg fld/fst */
247 {8, 8, 8}, /* cost of loading fp registers
248 in SFmode, DFmode and XFmode */
249 {8, 8, 8}, /* cost of storing fp registers
250 in SFmode, DFmode and XFmode */
251 2, /* cost of moving MMX register */
252 {4, 8}, /* cost of loading MMX registers
253 in SImode and DImode */
254 {4, 8}, /* cost of storing MMX registers
255 in SImode and DImode */
256 2, /* cost of moving SSE register */
257 {4, 8, 16}, /* cost of loading SSE registers
258 in SImode, DImode and TImode */
259 {4, 8, 16}, /* cost of storing SSE registers
260 in SImode, DImode and TImode */
261 3, /* MMX or SSE register to integer */
262 4, /* size of l1 cache. 486 has 8kB cache
263 shared for code and data, so 4kB is
264 not really precise. */
265 4, /* size of l2 cache */
266 0, /* size of prefetch block */
267 0, /* number of parallel prefetches */
269 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
270 COSTS_N_INSNS (16), /* cost of FMUL instruction. */
271 COSTS_N_INSNS (73), /* cost of FDIV instruction. */
272 COSTS_N_INSNS (3), /* cost of FABS instruction. */
273 COSTS_N_INSNS (3), /* cost of FCHS instruction. */
274 COSTS_N_INSNS (83), /* cost of FSQRT instruction. */
275 {{rep_prefix_4_byte, {{-1, rep_prefix_4_byte}}},
276 DUMMY_STRINGOP_ALGS},
277 {{rep_prefix_4_byte, {{-1, rep_prefix_4_byte}}},
278 DUMMY_STRINGOP_ALGS},
279 1, /* scalar_stmt_cost. */
280 1, /* scalar load_cost. */
281 1, /* scalar_store_cost. */
282 1, /* vec_stmt_cost. */
283 1, /* vec_to_scalar_cost. */
284 1, /* scalar_to_vec_cost. */
285 1, /* vec_align_load_cost. */
286 2, /* vec_unalign_load_cost. */
287 1, /* vec_store_cost. */
288 3, /* cond_taken_branch_cost. */
289 1, /* cond_not_taken_branch_cost. */
293 struct processor_costs pentium_cost = {
294 COSTS_N_INSNS (1), /* cost of an add instruction */
295 COSTS_N_INSNS (1), /* cost of a lea instruction */
296 COSTS_N_INSNS (4), /* variable shift costs */
297 COSTS_N_INSNS (1), /* constant shift costs */
298 {COSTS_N_INSNS (11), /* cost of starting multiply for QI */
299 COSTS_N_INSNS (11), /* HI */
300 COSTS_N_INSNS (11), /* SI */
301 COSTS_N_INSNS (11), /* DI */
302 COSTS_N_INSNS (11)}, /* other */
303 0, /* cost of multiply per each bit set */
304 {COSTS_N_INSNS (25), /* cost of a divide/mod for QI */
305 COSTS_N_INSNS (25), /* HI */
306 COSTS_N_INSNS (25), /* SI */
307 COSTS_N_INSNS (25), /* DI */
308 COSTS_N_INSNS (25)}, /* other */
309 COSTS_N_INSNS (3), /* cost of movsx */
310 COSTS_N_INSNS (2), /* cost of movzx */
311 8, /* "large" insn */
313 6, /* cost for loading QImode using movzbl */
314 {2, 4, 2}, /* cost of loading integer registers
315 in QImode, HImode and SImode.
316 Relative to reg-reg move (2). */
317 {2, 4, 2}, /* cost of storing integer registers */
318 2, /* cost of reg,reg fld/fst */
319 {2, 2, 6}, /* cost of loading fp registers
320 in SFmode, DFmode and XFmode */
321 {4, 4, 6}, /* cost of storing fp registers
322 in SFmode, DFmode and XFmode */
323 8, /* cost of moving MMX register */
324 {8, 8}, /* cost of loading MMX registers
325 in SImode and DImode */
326 {8, 8}, /* cost of storing MMX registers
327 in SImode and DImode */
328 2, /* cost of moving SSE register */
329 {4, 8, 16}, /* cost of loading SSE registers
330 in SImode, DImode and TImode */
331 {4, 8, 16}, /* cost of storing SSE registers
332 in SImode, DImode and TImode */
333 3, /* MMX or SSE register to integer */
334 8, /* size of l1 cache. */
335 8, /* size of l2 cache */
336 0, /* size of prefetch block */
337 0, /* number of parallel prefetches */
339 COSTS_N_INSNS (3), /* cost of FADD and FSUB insns. */
340 COSTS_N_INSNS (3), /* cost of FMUL instruction. */
341 COSTS_N_INSNS (39), /* cost of FDIV instruction. */
342 COSTS_N_INSNS (1), /* cost of FABS instruction. */
343 COSTS_N_INSNS (1), /* cost of FCHS instruction. */
344 COSTS_N_INSNS (70), /* cost of FSQRT instruction. */
345 {{libcall, {{256, rep_prefix_4_byte}, {-1, libcall}}},
346 DUMMY_STRINGOP_ALGS},
347 {{libcall, {{-1, rep_prefix_4_byte}}},
348 DUMMY_STRINGOP_ALGS},
349 1, /* scalar_stmt_cost. */
350 1, /* scalar load_cost. */
351 1, /* scalar_store_cost. */
352 1, /* vec_stmt_cost. */
353 1, /* vec_to_scalar_cost. */
354 1, /* scalar_to_vec_cost. */
355 1, /* vec_align_load_cost. */
356 2, /* vec_unalign_load_cost. */
357 1, /* vec_store_cost. */
358 3, /* cond_taken_branch_cost. */
359 1, /* cond_not_taken_branch_cost. */
363 struct processor_costs pentiumpro_cost = {
364 COSTS_N_INSNS (1), /* cost of an add instruction */
365 COSTS_N_INSNS (1), /* cost of a lea instruction */
366 COSTS_N_INSNS (1), /* variable shift costs */
367 COSTS_N_INSNS (1), /* constant shift costs */
368 {COSTS_N_INSNS (4), /* cost of starting multiply for QI */
369 COSTS_N_INSNS (4), /* HI */
370 COSTS_N_INSNS (4), /* SI */
371 COSTS_N_INSNS (4), /* DI */
372 COSTS_N_INSNS (4)}, /* other */
373 0, /* cost of multiply per each bit set */
374 {COSTS_N_INSNS (17), /* cost of a divide/mod for QI */
375 COSTS_N_INSNS (17), /* HI */
376 COSTS_N_INSNS (17), /* SI */
377 COSTS_N_INSNS (17), /* DI */
378 COSTS_N_INSNS (17)}, /* other */
379 COSTS_N_INSNS (1), /* cost of movsx */
380 COSTS_N_INSNS (1), /* cost of movzx */
381 8, /* "large" insn */
383 2, /* cost for loading QImode using movzbl */
384 {4, 4, 4}, /* cost of loading integer registers
385 in QImode, HImode and SImode.
386 Relative to reg-reg move (2). */
387 {2, 2, 2}, /* cost of storing integer registers */
388 2, /* cost of reg,reg fld/fst */
389 {2, 2, 6}, /* cost of loading fp registers
390 in SFmode, DFmode and XFmode */
391 {4, 4, 6}, /* cost of storing fp registers
392 in SFmode, DFmode and XFmode */
393 2, /* cost of moving MMX register */
394 {2, 2}, /* cost of loading MMX registers
395 in SImode and DImode */
396 {2, 2}, /* cost of storing MMX registers
397 in SImode and DImode */
398 2, /* cost of moving SSE register */
399 {2, 2, 8}, /* cost of loading SSE registers
400 in SImode, DImode and TImode */
401 {2, 2, 8}, /* cost of storing SSE registers
402 in SImode, DImode and TImode */
403 3, /* MMX or SSE register to integer */
404 8, /* size of l1 cache. */
405 256, /* size of l2 cache */
406 32, /* size of prefetch block */
407 6, /* number of parallel prefetches */
409 COSTS_N_INSNS (3), /* cost of FADD and FSUB insns. */
410 COSTS_N_INSNS (5), /* cost of FMUL instruction. */
411 COSTS_N_INSNS (56), /* cost of FDIV instruction. */
412 COSTS_N_INSNS (2), /* cost of FABS instruction. */
413 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
414 COSTS_N_INSNS (56), /* cost of FSQRT instruction. */
415 /* PentiumPro has optimized rep instructions for blocks aligned by 8 bytes (we ensure
416 the alignment). For small blocks inline loop is still a noticeable win, for bigger
417 blocks either rep movsl or rep movsb is way to go. Rep movsb has apparently
418 more expensive startup time in CPU, but after 4K the difference is down in the noise.
420 {{rep_prefix_4_byte, {{128, loop}, {1024, unrolled_loop},
421 {8192, rep_prefix_4_byte}, {-1, rep_prefix_1_byte}}},
422 DUMMY_STRINGOP_ALGS},
423 {{rep_prefix_4_byte, {{1024, unrolled_loop},
424 {8192, rep_prefix_4_byte}, {-1, libcall}}},
425 DUMMY_STRINGOP_ALGS},
426 1, /* scalar_stmt_cost. */
427 1, /* scalar load_cost. */
428 1, /* scalar_store_cost. */
429 1, /* vec_stmt_cost. */
430 1, /* vec_to_scalar_cost. */
431 1, /* scalar_to_vec_cost. */
432 1, /* vec_align_load_cost. */
433 2, /* vec_unalign_load_cost. */
434 1, /* vec_store_cost. */
435 3, /* cond_taken_branch_cost. */
436 1, /* cond_not_taken_branch_cost. */
440 struct processor_costs geode_cost = {
441 COSTS_N_INSNS (1), /* cost of an add instruction */
442 COSTS_N_INSNS (1), /* cost of a lea instruction */
443 COSTS_N_INSNS (2), /* variable shift costs */
444 COSTS_N_INSNS (1), /* constant shift costs */
445 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
446 COSTS_N_INSNS (4), /* HI */
447 COSTS_N_INSNS (7), /* SI */
448 COSTS_N_INSNS (7), /* DI */
449 COSTS_N_INSNS (7)}, /* other */
450 0, /* cost of multiply per each bit set */
451 {COSTS_N_INSNS (15), /* cost of a divide/mod for QI */
452 COSTS_N_INSNS (23), /* HI */
453 COSTS_N_INSNS (39), /* SI */
454 COSTS_N_INSNS (39), /* DI */
455 COSTS_N_INSNS (39)}, /* other */
456 COSTS_N_INSNS (1), /* cost of movsx */
457 COSTS_N_INSNS (1), /* cost of movzx */
458 8, /* "large" insn */
460 1, /* cost for loading QImode using movzbl */
461 {1, 1, 1}, /* cost of loading integer registers
462 in QImode, HImode and SImode.
463 Relative to reg-reg move (2). */
464 {1, 1, 1}, /* cost of storing integer registers */
465 1, /* cost of reg,reg fld/fst */
466 {1, 1, 1}, /* cost of loading fp registers
467 in SFmode, DFmode and XFmode */
468 {4, 6, 6}, /* cost of storing fp registers
469 in SFmode, DFmode and XFmode */
471 1, /* cost of moving MMX register */
472 {1, 1}, /* cost of loading MMX registers
473 in SImode and DImode */
474 {1, 1}, /* cost of storing MMX registers
475 in SImode and DImode */
476 1, /* cost of moving SSE register */
477 {1, 1, 1}, /* cost of loading SSE registers
478 in SImode, DImode and TImode */
479 {1, 1, 1}, /* cost of storing SSE registers
480 in SImode, DImode and TImode */
481 1, /* MMX or SSE register to integer */
482 64, /* size of l1 cache. */
483 128, /* size of l2 cache. */
484 32, /* size of prefetch block */
485 1, /* number of parallel prefetches */
487 COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */
488 COSTS_N_INSNS (11), /* cost of FMUL instruction. */
489 COSTS_N_INSNS (47), /* cost of FDIV instruction. */
490 COSTS_N_INSNS (1), /* cost of FABS instruction. */
491 COSTS_N_INSNS (1), /* cost of FCHS instruction. */
492 COSTS_N_INSNS (54), /* cost of FSQRT instruction. */
493 {{libcall, {{256, rep_prefix_4_byte}, {-1, libcall}}},
494 DUMMY_STRINGOP_ALGS},
495 {{libcall, {{256, rep_prefix_4_byte}, {-1, libcall}}},
496 DUMMY_STRINGOP_ALGS},
497 1, /* scalar_stmt_cost. */
498 1, /* scalar load_cost. */
499 1, /* scalar_store_cost. */
500 1, /* vec_stmt_cost. */
501 1, /* vec_to_scalar_cost. */
502 1, /* scalar_to_vec_cost. */
503 1, /* vec_align_load_cost. */
504 2, /* vec_unalign_load_cost. */
505 1, /* vec_store_cost. */
506 3, /* cond_taken_branch_cost. */
507 1, /* cond_not_taken_branch_cost. */
511 struct processor_costs k6_cost = {
512 COSTS_N_INSNS (1), /* cost of an add instruction */
513 COSTS_N_INSNS (2), /* cost of a lea instruction */
514 COSTS_N_INSNS (1), /* variable shift costs */
515 COSTS_N_INSNS (1), /* constant shift costs */
516 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
517 COSTS_N_INSNS (3), /* HI */
518 COSTS_N_INSNS (3), /* SI */
519 COSTS_N_INSNS (3), /* DI */
520 COSTS_N_INSNS (3)}, /* other */
521 0, /* cost of multiply per each bit set */
522 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
523 COSTS_N_INSNS (18), /* HI */
524 COSTS_N_INSNS (18), /* SI */
525 COSTS_N_INSNS (18), /* DI */
526 COSTS_N_INSNS (18)}, /* other */
527 COSTS_N_INSNS (2), /* cost of movsx */
528 COSTS_N_INSNS (2), /* cost of movzx */
529 8, /* "large" insn */
531 3, /* cost for loading QImode using movzbl */
532 {4, 5, 4}, /* cost of loading integer registers
533 in QImode, HImode and SImode.
534 Relative to reg-reg move (2). */
535 {2, 3, 2}, /* cost of storing integer registers */
536 4, /* cost of reg,reg fld/fst */
537 {6, 6, 6}, /* cost of loading fp registers
538 in SFmode, DFmode and XFmode */
539 {4, 4, 4}, /* cost of storing fp registers
540 in SFmode, DFmode and XFmode */
541 2, /* cost of moving MMX register */
542 {2, 2}, /* cost of loading MMX registers
543 in SImode and DImode */
544 {2, 2}, /* cost of storing MMX registers
545 in SImode and DImode */
546 2, /* cost of moving SSE register */
547 {2, 2, 8}, /* cost of loading SSE registers
548 in SImode, DImode and TImode */
549 {2, 2, 8}, /* cost of storing SSE registers
550 in SImode, DImode and TImode */
551 6, /* MMX or SSE register to integer */
552 32, /* size of l1 cache. */
553 32, /* size of l2 cache. Some models
554 have integrated l2 cache, but
555 optimizing for k6 is not important
556 enough to worry about that. */
557 32, /* size of prefetch block */
558 1, /* number of parallel prefetches */
560 COSTS_N_INSNS (2), /* cost of FADD and FSUB insns. */
561 COSTS_N_INSNS (2), /* cost of FMUL instruction. */
562 COSTS_N_INSNS (56), /* cost of FDIV instruction. */
563 COSTS_N_INSNS (2), /* cost of FABS instruction. */
564 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
565 COSTS_N_INSNS (56), /* cost of FSQRT instruction. */
566 {{libcall, {{256, rep_prefix_4_byte}, {-1, libcall}}},
567 DUMMY_STRINGOP_ALGS},
568 {{libcall, {{256, rep_prefix_4_byte}, {-1, libcall}}},
569 DUMMY_STRINGOP_ALGS},
570 1, /* scalar_stmt_cost. */
571 1, /* scalar load_cost. */
572 1, /* scalar_store_cost. */
573 1, /* vec_stmt_cost. */
574 1, /* vec_to_scalar_cost. */
575 1, /* scalar_to_vec_cost. */
576 1, /* vec_align_load_cost. */
577 2, /* vec_unalign_load_cost. */
578 1, /* vec_store_cost. */
579 3, /* cond_taken_branch_cost. */
580 1, /* cond_not_taken_branch_cost. */
584 struct processor_costs athlon_cost = {
585 COSTS_N_INSNS (1), /* cost of an add instruction */
586 COSTS_N_INSNS (2), /* cost of a lea instruction */
587 COSTS_N_INSNS (1), /* variable shift costs */
588 COSTS_N_INSNS (1), /* constant shift costs */
589 {COSTS_N_INSNS (5), /* cost of starting multiply for QI */
590 COSTS_N_INSNS (5), /* HI */
591 COSTS_N_INSNS (5), /* SI */
592 COSTS_N_INSNS (5), /* DI */
593 COSTS_N_INSNS (5)}, /* other */
594 0, /* cost of multiply per each bit set */
595 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
596 COSTS_N_INSNS (26), /* HI */
597 COSTS_N_INSNS (42), /* SI */
598 COSTS_N_INSNS (74), /* DI */
599 COSTS_N_INSNS (74)}, /* other */
600 COSTS_N_INSNS (1), /* cost of movsx */
601 COSTS_N_INSNS (1), /* cost of movzx */
602 8, /* "large" insn */
604 4, /* cost for loading QImode using movzbl */
605 {3, 4, 3}, /* cost of loading integer registers
606 in QImode, HImode and SImode.
607 Relative to reg-reg move (2). */
608 {3, 4, 3}, /* cost of storing integer registers */
609 4, /* cost of reg,reg fld/fst */
610 {4, 4, 12}, /* cost of loading fp registers
611 in SFmode, DFmode and XFmode */
612 {6, 6, 8}, /* cost of storing fp registers
613 in SFmode, DFmode and XFmode */
614 2, /* cost of moving MMX register */
615 {4, 4}, /* cost of loading MMX registers
616 in SImode and DImode */
617 {4, 4}, /* cost of storing MMX registers
618 in SImode and DImode */
619 2, /* cost of moving SSE register */
620 {4, 4, 6}, /* cost of loading SSE registers
621 in SImode, DImode and TImode */
622 {4, 4, 5}, /* cost of storing SSE registers
623 in SImode, DImode and TImode */
624 5, /* MMX or SSE register to integer */
625 64, /* size of l1 cache. */
626 256, /* size of l2 cache. */
627 64, /* size of prefetch block */
628 6, /* number of parallel prefetches */
630 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
631 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
632 COSTS_N_INSNS (24), /* cost of FDIV instruction. */
633 COSTS_N_INSNS (2), /* cost of FABS instruction. */
634 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
635 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
636 /* For some reason, Athlon deals better with REP prefix (relative to loops)
637 compared to K8. Alignment becomes important after 8 bytes for memcpy and
638 128 bytes for memset. */
639 {{libcall, {{2048, rep_prefix_4_byte}, {-1, libcall}}},
640 DUMMY_STRINGOP_ALGS},
641 {{libcall, {{2048, rep_prefix_4_byte}, {-1, libcall}}},
642 DUMMY_STRINGOP_ALGS},
643 1, /* scalar_stmt_cost. */
644 1, /* scalar load_cost. */
645 1, /* scalar_store_cost. */
646 1, /* vec_stmt_cost. */
647 1, /* vec_to_scalar_cost. */
648 1, /* scalar_to_vec_cost. */
649 1, /* vec_align_load_cost. */
650 2, /* vec_unalign_load_cost. */
651 1, /* vec_store_cost. */
652 3, /* cond_taken_branch_cost. */
653 1, /* cond_not_taken_branch_cost. */
657 struct processor_costs k8_cost = {
658 COSTS_N_INSNS (1), /* cost of an add instruction */
659 COSTS_N_INSNS (2), /* cost of a lea instruction */
660 COSTS_N_INSNS (1), /* variable shift costs */
661 COSTS_N_INSNS (1), /* constant shift costs */
662 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
663 COSTS_N_INSNS (4), /* HI */
664 COSTS_N_INSNS (3), /* SI */
665 COSTS_N_INSNS (4), /* DI */
666 COSTS_N_INSNS (5)}, /* other */
667 0, /* cost of multiply per each bit set */
668 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
669 COSTS_N_INSNS (26), /* HI */
670 COSTS_N_INSNS (42), /* SI */
671 COSTS_N_INSNS (74), /* DI */
672 COSTS_N_INSNS (74)}, /* other */
673 COSTS_N_INSNS (1), /* cost of movsx */
674 COSTS_N_INSNS (1), /* cost of movzx */
675 8, /* "large" insn */
677 4, /* cost for loading QImode using movzbl */
678 {3, 4, 3}, /* cost of loading integer registers
679 in QImode, HImode and SImode.
680 Relative to reg-reg move (2). */
681 {3, 4, 3}, /* cost of storing integer registers */
682 4, /* cost of reg,reg fld/fst */
683 {4, 4, 12}, /* cost of loading fp registers
684 in SFmode, DFmode and XFmode */
685 {6, 6, 8}, /* cost of storing fp registers
686 in SFmode, DFmode and XFmode */
687 2, /* cost of moving MMX register */
688 {3, 3}, /* cost of loading MMX registers
689 in SImode and DImode */
690 {4, 4}, /* cost of storing MMX registers
691 in SImode and DImode */
692 2, /* cost of moving SSE register */
693 {4, 3, 6}, /* cost of loading SSE registers
694 in SImode, DImode and TImode */
695 {4, 4, 5}, /* cost of storing SSE registers
696 in SImode, DImode and TImode */
697 5, /* MMX or SSE register to integer */
698 64, /* size of l1 cache. */
699 512, /* size of l2 cache. */
700 64, /* size of prefetch block */
701 /* New AMD processors never drop prefetches; if they cannot be performed
702 immediately, they are queued. We set number of simultaneous prefetches
703 to a large constant to reflect this (it probably is not a good idea not
704 to limit number of prefetches at all, as their execution also takes some
706 100, /* number of parallel prefetches */
708 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
709 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
710 COSTS_N_INSNS (19), /* cost of FDIV instruction. */
711 COSTS_N_INSNS (2), /* cost of FABS instruction. */
712 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
713 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
714 /* K8 has optimized REP instruction for medium sized blocks, but for very small
715 blocks it is better to use loop. For large blocks, libcall can do
716 nontemporary accesses and beat inline considerably. */
717 {{libcall, {{6, loop}, {14, unrolled_loop}, {-1, rep_prefix_4_byte}}},
718 {libcall, {{16, loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
719 {{libcall, {{8, loop}, {24, unrolled_loop},
720 {2048, rep_prefix_4_byte}, {-1, libcall}}},
721 {libcall, {{48, unrolled_loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
722 4, /* scalar_stmt_cost. */
723 2, /* scalar load_cost. */
724 2, /* scalar_store_cost. */
725 5, /* vec_stmt_cost. */
726 0, /* vec_to_scalar_cost. */
727 2, /* scalar_to_vec_cost. */
728 2, /* vec_align_load_cost. */
729 3, /* vec_unalign_load_cost. */
730 3, /* vec_store_cost. */
731 3, /* cond_taken_branch_cost. */
732 2, /* cond_not_taken_branch_cost. */
735 struct processor_costs amdfam10_cost = {
736 COSTS_N_INSNS (1), /* cost of an add instruction */
737 COSTS_N_INSNS (2), /* cost of a lea instruction */
738 COSTS_N_INSNS (1), /* variable shift costs */
739 COSTS_N_INSNS (1), /* constant shift costs */
740 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
741 COSTS_N_INSNS (4), /* HI */
742 COSTS_N_INSNS (3), /* SI */
743 COSTS_N_INSNS (4), /* DI */
744 COSTS_N_INSNS (5)}, /* other */
745 0, /* cost of multiply per each bit set */
746 {COSTS_N_INSNS (19), /* cost of a divide/mod for QI */
747 COSTS_N_INSNS (35), /* HI */
748 COSTS_N_INSNS (51), /* SI */
749 COSTS_N_INSNS (83), /* DI */
750 COSTS_N_INSNS (83)}, /* other */
751 COSTS_N_INSNS (1), /* cost of movsx */
752 COSTS_N_INSNS (1), /* cost of movzx */
753 8, /* "large" insn */
755 4, /* cost for loading QImode using movzbl */
756 {3, 4, 3}, /* cost of loading integer registers
757 in QImode, HImode and SImode.
758 Relative to reg-reg move (2). */
759 {3, 4, 3}, /* cost of storing integer registers */
760 4, /* cost of reg,reg fld/fst */
761 {4, 4, 12}, /* cost of loading fp registers
762 in SFmode, DFmode and XFmode */
763 {6, 6, 8}, /* cost of storing fp registers
764 in SFmode, DFmode and XFmode */
765 2, /* cost of moving MMX register */
766 {3, 3}, /* cost of loading MMX registers
767 in SImode and DImode */
768 {4, 4}, /* cost of storing MMX registers
769 in SImode and DImode */
770 2, /* cost of moving SSE register */
771 {4, 4, 3}, /* cost of loading SSE registers
772 in SImode, DImode and TImode */
773 {4, 4, 5}, /* cost of storing SSE registers
774 in SImode, DImode and TImode */
775 3, /* MMX or SSE register to integer */
777 MOVD reg64, xmmreg Double FSTORE 4
778 MOVD reg32, xmmreg Double FSTORE 4
780 MOVD reg64, xmmreg Double FADD 3
782 MOVD reg32, xmmreg Double FADD 3
784 64, /* size of l1 cache. */
785 512, /* size of l2 cache. */
786 64, /* size of prefetch block */
787 /* New AMD processors never drop prefetches; if they cannot be performed
788 immediately, they are queued. We set number of simultaneous prefetches
789 to a large constant to reflect this (it probably is not a good idea not
790 to limit number of prefetches at all, as their execution also takes some
792 100, /* number of parallel prefetches */
794 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
795 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
796 COSTS_N_INSNS (19), /* cost of FDIV instruction. */
797 COSTS_N_INSNS (2), /* cost of FABS instruction. */
798 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
799 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
801 /* AMDFAM10 has optimized REP instruction for medium sized blocks, but for
802 very small blocks it is better to use loop. For large blocks, libcall can
803 do nontemporary accesses and beat inline considerably. */
804 {{libcall, {{6, loop}, {14, unrolled_loop}, {-1, rep_prefix_4_byte}}},
805 {libcall, {{16, loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
806 {{libcall, {{8, loop}, {24, unrolled_loop},
807 {2048, rep_prefix_4_byte}, {-1, libcall}}},
808 {libcall, {{48, unrolled_loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
809 4, /* scalar_stmt_cost. */
810 2, /* scalar load_cost. */
811 2, /* scalar_store_cost. */
812 6, /* vec_stmt_cost. */
813 0, /* vec_to_scalar_cost. */
814 2, /* scalar_to_vec_cost. */
815 2, /* vec_align_load_cost. */
816 2, /* vec_unalign_load_cost. */
817 2, /* vec_store_cost. */
818 2, /* cond_taken_branch_cost. */
819 1, /* cond_not_taken_branch_cost. */
823 struct processor_costs pentium4_cost = {
824 COSTS_N_INSNS (1), /* cost of an add instruction */
825 COSTS_N_INSNS (3), /* cost of a lea instruction */
826 COSTS_N_INSNS (4), /* variable shift costs */
827 COSTS_N_INSNS (4), /* constant shift costs */
828 {COSTS_N_INSNS (15), /* cost of starting multiply for QI */
829 COSTS_N_INSNS (15), /* HI */
830 COSTS_N_INSNS (15), /* SI */
831 COSTS_N_INSNS (15), /* DI */
832 COSTS_N_INSNS (15)}, /* other */
833 0, /* cost of multiply per each bit set */
834 {COSTS_N_INSNS (56), /* cost of a divide/mod for QI */
835 COSTS_N_INSNS (56), /* HI */
836 COSTS_N_INSNS (56), /* SI */
837 COSTS_N_INSNS (56), /* DI */
838 COSTS_N_INSNS (56)}, /* other */
839 COSTS_N_INSNS (1), /* cost of movsx */
840 COSTS_N_INSNS (1), /* cost of movzx */
841 16, /* "large" insn */
843 2, /* cost for loading QImode using movzbl */
844 {4, 5, 4}, /* cost of loading integer registers
845 in QImode, HImode and SImode.
846 Relative to reg-reg move (2). */
847 {2, 3, 2}, /* cost of storing integer registers */
848 2, /* cost of reg,reg fld/fst */
849 {2, 2, 6}, /* cost of loading fp registers
850 in SFmode, DFmode and XFmode */
851 {4, 4, 6}, /* cost of storing fp registers
852 in SFmode, DFmode and XFmode */
853 2, /* cost of moving MMX register */
854 {2, 2}, /* cost of loading MMX registers
855 in SImode and DImode */
856 {2, 2}, /* cost of storing MMX registers
857 in SImode and DImode */
858 12, /* cost of moving SSE register */
859 {12, 12, 12}, /* cost of loading SSE registers
860 in SImode, DImode and TImode */
861 {2, 2, 8}, /* cost of storing SSE registers
862 in SImode, DImode and TImode */
863 10, /* MMX or SSE register to integer */
864 8, /* size of l1 cache. */
865 256, /* size of l2 cache. */
866 64, /* size of prefetch block */
867 6, /* number of parallel prefetches */
869 COSTS_N_INSNS (5), /* cost of FADD and FSUB insns. */
870 COSTS_N_INSNS (7), /* cost of FMUL instruction. */
871 COSTS_N_INSNS (43), /* cost of FDIV instruction. */
872 COSTS_N_INSNS (2), /* cost of FABS instruction. */
873 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
874 COSTS_N_INSNS (43), /* cost of FSQRT instruction. */
875 {{libcall, {{12, loop_1_byte}, {-1, rep_prefix_4_byte}}},
876 DUMMY_STRINGOP_ALGS},
877 {{libcall, {{6, loop_1_byte}, {48, loop}, {20480, rep_prefix_4_byte},
879 DUMMY_STRINGOP_ALGS},
880 1, /* scalar_stmt_cost. */
881 1, /* scalar load_cost. */
882 1, /* scalar_store_cost. */
883 1, /* vec_stmt_cost. */
884 1, /* vec_to_scalar_cost. */
885 1, /* scalar_to_vec_cost. */
886 1, /* vec_align_load_cost. */
887 2, /* vec_unalign_load_cost. */
888 1, /* vec_store_cost. */
889 3, /* cond_taken_branch_cost. */
890 1, /* cond_not_taken_branch_cost. */
894 struct processor_costs nocona_cost = {
895 COSTS_N_INSNS (1), /* cost of an add instruction */
896 COSTS_N_INSNS (1), /* cost of a lea instruction */
897 COSTS_N_INSNS (1), /* variable shift costs */
898 COSTS_N_INSNS (1), /* constant shift costs */
899 {COSTS_N_INSNS (10), /* cost of starting multiply for QI */
900 COSTS_N_INSNS (10), /* HI */
901 COSTS_N_INSNS (10), /* SI */
902 COSTS_N_INSNS (10), /* DI */
903 COSTS_N_INSNS (10)}, /* other */
904 0, /* cost of multiply per each bit set */
905 {COSTS_N_INSNS (66), /* cost of a divide/mod for QI */
906 COSTS_N_INSNS (66), /* HI */
907 COSTS_N_INSNS (66), /* SI */
908 COSTS_N_INSNS (66), /* DI */
909 COSTS_N_INSNS (66)}, /* other */
910 COSTS_N_INSNS (1), /* cost of movsx */
911 COSTS_N_INSNS (1), /* cost of movzx */
912 16, /* "large" insn */
914 4, /* cost for loading QImode using movzbl */
915 {4, 4, 4}, /* cost of loading integer registers
916 in QImode, HImode and SImode.
917 Relative to reg-reg move (2). */
918 {4, 4, 4}, /* cost of storing integer registers */
919 3, /* cost of reg,reg fld/fst */
920 {12, 12, 12}, /* cost of loading fp registers
921 in SFmode, DFmode and XFmode */
922 {4, 4, 4}, /* cost of storing fp registers
923 in SFmode, DFmode and XFmode */
924 6, /* cost of moving MMX register */
925 {12, 12}, /* cost of loading MMX registers
926 in SImode and DImode */
927 {12, 12}, /* cost of storing MMX registers
928 in SImode and DImode */
929 6, /* cost of moving SSE register */
930 {12, 12, 12}, /* cost of loading SSE registers
931 in SImode, DImode and TImode */
932 {12, 12, 12}, /* cost of storing SSE registers
933 in SImode, DImode and TImode */
934 8, /* MMX or SSE register to integer */
935 8, /* size of l1 cache. */
936 1024, /* size of l2 cache. */
937 128, /* size of prefetch block */
938 8, /* number of parallel prefetches */
940 COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */
941 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
942 COSTS_N_INSNS (40), /* cost of FDIV instruction. */
943 COSTS_N_INSNS (3), /* cost of FABS instruction. */
944 COSTS_N_INSNS (3), /* cost of FCHS instruction. */
945 COSTS_N_INSNS (44), /* cost of FSQRT instruction. */
946 {{libcall, {{12, loop_1_byte}, {-1, rep_prefix_4_byte}}},
947 {libcall, {{32, loop}, {20000, rep_prefix_8_byte},
948 {100000, unrolled_loop}, {-1, libcall}}}},
949 {{libcall, {{6, loop_1_byte}, {48, loop}, {20480, rep_prefix_4_byte},
951 {libcall, {{24, loop}, {64, unrolled_loop},
952 {8192, rep_prefix_8_byte}, {-1, libcall}}}},
953 1, /* scalar_stmt_cost. */
954 1, /* scalar load_cost. */
955 1, /* scalar_store_cost. */
956 1, /* vec_stmt_cost. */
957 1, /* vec_to_scalar_cost. */
958 1, /* scalar_to_vec_cost. */
959 1, /* vec_align_load_cost. */
960 2, /* vec_unalign_load_cost. */
961 1, /* vec_store_cost. */
962 3, /* cond_taken_branch_cost. */
963 1, /* cond_not_taken_branch_cost. */
967 struct processor_costs core2_cost = {
968 COSTS_N_INSNS (1), /* cost of an add instruction */
969 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
970 COSTS_N_INSNS (1), /* variable shift costs */
971 COSTS_N_INSNS (1), /* constant shift costs */
972 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
973 COSTS_N_INSNS (3), /* HI */
974 COSTS_N_INSNS (3), /* SI */
975 COSTS_N_INSNS (3), /* DI */
976 COSTS_N_INSNS (3)}, /* other */
977 0, /* cost of multiply per each bit set */
978 {COSTS_N_INSNS (22), /* cost of a divide/mod for QI */
979 COSTS_N_INSNS (22), /* HI */
980 COSTS_N_INSNS (22), /* SI */
981 COSTS_N_INSNS (22), /* DI */
982 COSTS_N_INSNS (22)}, /* other */
983 COSTS_N_INSNS (1), /* cost of movsx */
984 COSTS_N_INSNS (1), /* cost of movzx */
985 8, /* "large" insn */
987 2, /* cost for loading QImode using movzbl */
988 {6, 6, 6}, /* cost of loading integer registers
989 in QImode, HImode and SImode.
990 Relative to reg-reg move (2). */
991 {4, 4, 4}, /* cost of storing integer registers */
992 2, /* cost of reg,reg fld/fst */
993 {6, 6, 6}, /* cost of loading fp registers
994 in SFmode, DFmode and XFmode */
995 {4, 4, 4}, /* cost of storing fp registers
996 in SFmode, DFmode and XFmode */
997 2, /* cost of moving MMX register */
998 {6, 6}, /* cost of loading MMX registers
999 in SImode and DImode */
1000 {4, 4}, /* cost of storing MMX registers
1001 in SImode and DImode */
1002 2, /* cost of moving SSE register */
1003 {6, 6, 6}, /* cost of loading SSE registers
1004 in SImode, DImode and TImode */
1005 {4, 4, 4}, /* cost of storing SSE registers
1006 in SImode, DImode and TImode */
1007 2, /* MMX or SSE register to integer */
1008 32, /* size of l1 cache. */
1009 2048, /* size of l2 cache. */
1010 128, /* size of prefetch block */
1011 8, /* number of parallel prefetches */
1012 3, /* Branch cost */
1013 COSTS_N_INSNS (3), /* cost of FADD and FSUB insns. */
1014 COSTS_N_INSNS (5), /* cost of FMUL instruction. */
1015 COSTS_N_INSNS (32), /* cost of FDIV instruction. */
1016 COSTS_N_INSNS (1), /* cost of FABS instruction. */
1017 COSTS_N_INSNS (1), /* cost of FCHS instruction. */
1018 COSTS_N_INSNS (58), /* cost of FSQRT instruction. */
1019 {{libcall, {{11, loop}, {-1, rep_prefix_4_byte}}},
1020 {libcall, {{32, loop}, {64, rep_prefix_4_byte},
1021 {8192, rep_prefix_8_byte}, {-1, libcall}}}},
1022 {{libcall, {{8, loop}, {15, unrolled_loop},
1023 {2048, rep_prefix_4_byte}, {-1, libcall}}},
1024 {libcall, {{24, loop}, {32, unrolled_loop},
1025 {8192, rep_prefix_8_byte}, {-1, libcall}}}},
1026 1, /* scalar_stmt_cost. */
1027 1, /* scalar load_cost. */
1028 1, /* scalar_store_cost. */
1029 1, /* vec_stmt_cost. */
1030 1, /* vec_to_scalar_cost. */
1031 1, /* scalar_to_vec_cost. */
1032 1, /* vec_align_load_cost. */
1033 2, /* vec_unalign_load_cost. */
1034 1, /* vec_store_cost. */
1035 3, /* cond_taken_branch_cost. */
1036 1, /* cond_not_taken_branch_cost. */
1039 /* Generic64 should produce code tuned for Nocona and K8. */
1041 struct processor_costs generic64_cost = {
1042 COSTS_N_INSNS (1), /* cost of an add instruction */
1043 /* On all chips taken into consideration lea is 2 cycles and more. With
1044 this cost however our current implementation of synth_mult results in
1045 use of unnecessary temporary registers causing regression on several
1046 SPECfp benchmarks. */
1047 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
1048 COSTS_N_INSNS (1), /* variable shift costs */
1049 COSTS_N_INSNS (1), /* constant shift costs */
1050 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1051 COSTS_N_INSNS (4), /* HI */
1052 COSTS_N_INSNS (3), /* SI */
1053 COSTS_N_INSNS (4), /* DI */
1054 COSTS_N_INSNS (2)}, /* other */
1055 0, /* cost of multiply per each bit set */
1056 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
1057 COSTS_N_INSNS (26), /* HI */
1058 COSTS_N_INSNS (42), /* SI */
1059 COSTS_N_INSNS (74), /* DI */
1060 COSTS_N_INSNS (74)}, /* other */
1061 COSTS_N_INSNS (1), /* cost of movsx */
1062 COSTS_N_INSNS (1), /* cost of movzx */
1063 8, /* "large" insn */
1064 17, /* MOVE_RATIO */
1065 4, /* cost for loading QImode using movzbl */
1066 {4, 4, 4}, /* cost of loading integer registers
1067 in QImode, HImode and SImode.
1068 Relative to reg-reg move (2). */
1069 {4, 4, 4}, /* cost of storing integer registers */
1070 4, /* cost of reg,reg fld/fst */
1071 {12, 12, 12}, /* cost of loading fp registers
1072 in SFmode, DFmode and XFmode */
1073 {6, 6, 8}, /* cost of storing fp registers
1074 in SFmode, DFmode and XFmode */
1075 2, /* cost of moving MMX register */
1076 {8, 8}, /* cost of loading MMX registers
1077 in SImode and DImode */
1078 {8, 8}, /* cost of storing MMX registers
1079 in SImode and DImode */
1080 2, /* cost of moving SSE register */
1081 {8, 8, 8}, /* cost of loading SSE registers
1082 in SImode, DImode and TImode */
1083 {8, 8, 8}, /* cost of storing SSE registers
1084 in SImode, DImode and TImode */
1085 5, /* MMX or SSE register to integer */
1086 32, /* size of l1 cache. */
1087 512, /* size of l2 cache. */
1088 64, /* size of prefetch block */
1089 6, /* number of parallel prefetches */
1090 /* Benchmarks shows large regressions on K8 sixtrack benchmark when this value
1091 is increased to perhaps more appropriate value of 5. */
1092 3, /* Branch cost */
1093 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
1094 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
1095 COSTS_N_INSNS (20), /* cost of FDIV instruction. */
1096 COSTS_N_INSNS (8), /* cost of FABS instruction. */
1097 COSTS_N_INSNS (8), /* cost of FCHS instruction. */
1098 COSTS_N_INSNS (40), /* cost of FSQRT instruction. */
1099 {DUMMY_STRINGOP_ALGS,
1100 {libcall, {{32, loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
1101 {DUMMY_STRINGOP_ALGS,
1102 {libcall, {{32, loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
1103 1, /* scalar_stmt_cost. */
1104 1, /* scalar load_cost. */
1105 1, /* scalar_store_cost. */
1106 1, /* vec_stmt_cost. */
1107 1, /* vec_to_scalar_cost. */
1108 1, /* scalar_to_vec_cost. */
1109 1, /* vec_align_load_cost. */
1110 2, /* vec_unalign_load_cost. */
1111 1, /* vec_store_cost. */
1112 3, /* cond_taken_branch_cost. */
1113 1, /* cond_not_taken_branch_cost. */
1116 /* Generic32 should produce code tuned for Athlon, PPro, Pentium4, Nocona and K8. */
1118 struct processor_costs generic32_cost = {
1119 COSTS_N_INSNS (1), /* cost of an add instruction */
1120 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
1121 COSTS_N_INSNS (1), /* variable shift costs */
1122 COSTS_N_INSNS (1), /* constant shift costs */
1123 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1124 COSTS_N_INSNS (4), /* HI */
1125 COSTS_N_INSNS (3), /* SI */
1126 COSTS_N_INSNS (4), /* DI */
1127 COSTS_N_INSNS (2)}, /* other */
1128 0, /* cost of multiply per each bit set */
1129 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
1130 COSTS_N_INSNS (26), /* HI */
1131 COSTS_N_INSNS (42), /* SI */
1132 COSTS_N_INSNS (74), /* DI */
1133 COSTS_N_INSNS (74)}, /* other */
1134 COSTS_N_INSNS (1), /* cost of movsx */
1135 COSTS_N_INSNS (1), /* cost of movzx */
1136 8, /* "large" insn */
1137 17, /* MOVE_RATIO */
1138 4, /* cost for loading QImode using movzbl */
1139 {4, 4, 4}, /* cost of loading integer registers
1140 in QImode, HImode and SImode.
1141 Relative to reg-reg move (2). */
1142 {4, 4, 4}, /* cost of storing integer registers */
1143 4, /* cost of reg,reg fld/fst */
1144 {12, 12, 12}, /* cost of loading fp registers
1145 in SFmode, DFmode and XFmode */
1146 {6, 6, 8}, /* cost of storing fp registers
1147 in SFmode, DFmode and XFmode */
1148 2, /* cost of moving MMX register */
1149 {8, 8}, /* cost of loading MMX registers
1150 in SImode and DImode */
1151 {8, 8}, /* cost of storing MMX registers
1152 in SImode and DImode */
1153 2, /* cost of moving SSE register */
1154 {8, 8, 8}, /* cost of loading SSE registers
1155 in SImode, DImode and TImode */
1156 {8, 8, 8}, /* cost of storing SSE registers
1157 in SImode, DImode and TImode */
1158 5, /* MMX or SSE register to integer */
1159 32, /* size of l1 cache. */
1160 256, /* size of l2 cache. */
1161 64, /* size of prefetch block */
1162 6, /* number of parallel prefetches */
1163 3, /* Branch cost */
1164 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
1165 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
1166 COSTS_N_INSNS (20), /* cost of FDIV instruction. */
1167 COSTS_N_INSNS (8), /* cost of FABS instruction. */
1168 COSTS_N_INSNS (8), /* cost of FCHS instruction. */
1169 COSTS_N_INSNS (40), /* cost of FSQRT instruction. */
1170 {{libcall, {{32, loop}, {8192, rep_prefix_4_byte}, {-1, libcall}}},
1171 DUMMY_STRINGOP_ALGS},
1172 {{libcall, {{32, loop}, {8192, rep_prefix_4_byte}, {-1, libcall}}},
1173 DUMMY_STRINGOP_ALGS},
1174 1, /* scalar_stmt_cost. */
1175 1, /* scalar load_cost. */
1176 1, /* scalar_store_cost. */
1177 1, /* vec_stmt_cost. */
1178 1, /* vec_to_scalar_cost. */
1179 1, /* scalar_to_vec_cost. */
1180 1, /* vec_align_load_cost. */
1181 2, /* vec_unalign_load_cost. */
1182 1, /* vec_store_cost. */
1183 3, /* cond_taken_branch_cost. */
1184 1, /* cond_not_taken_branch_cost. */
1187 const struct processor_costs *ix86_cost = &pentium_cost;
1189 /* Processor feature/optimization bitmasks. */
1190 #define m_386 (1<<PROCESSOR_I386)
1191 #define m_486 (1<<PROCESSOR_I486)
1192 #define m_PENT (1<<PROCESSOR_PENTIUM)
1193 #define m_PPRO (1<<PROCESSOR_PENTIUMPRO)
1194 #define m_PENT4 (1<<PROCESSOR_PENTIUM4)
1195 #define m_NOCONA (1<<PROCESSOR_NOCONA)
1196 #define m_CORE2 (1<<PROCESSOR_CORE2)
1198 #define m_GEODE (1<<PROCESSOR_GEODE)
1199 #define m_K6 (1<<PROCESSOR_K6)
1200 #define m_K6_GEODE (m_K6 | m_GEODE)
1201 #define m_K8 (1<<PROCESSOR_K8)
1202 #define m_ATHLON (1<<PROCESSOR_ATHLON)
1203 #define m_ATHLON_K8 (m_K8 | m_ATHLON)
1204 #define m_AMDFAM10 (1<<PROCESSOR_AMDFAM10)
1205 #define m_AMD_MULTIPLE (m_K8 | m_ATHLON | m_AMDFAM10)
1207 #define m_GENERIC32 (1<<PROCESSOR_GENERIC32)
1208 #define m_GENERIC64 (1<<PROCESSOR_GENERIC64)
1210 /* Generic instruction choice should be common subset of supported CPUs
1211 (PPro/PENT4/NOCONA/CORE2/Athlon/K8). */
1212 #define m_GENERIC (m_GENERIC32 | m_GENERIC64)
1214 /* Feature tests against the various tunings. */
1215 unsigned char ix86_tune_features[X86_TUNE_LAST];
1217 /* Feature tests against the various tunings used to create ix86_tune_features
1218 based on the processor mask. */
1219 static unsigned int initial_ix86_tune_features[X86_TUNE_LAST] = {
1220 /* X86_TUNE_USE_LEAVE: Leave does not affect Nocona SPEC2000 results
1221 negatively, so enabling for Generic64 seems like good code size
1222 tradeoff. We can't enable it for 32bit generic because it does not
1223 work well with PPro base chips. */
1224 m_386 | m_K6_GEODE | m_AMD_MULTIPLE | m_CORE2 | m_GENERIC64,
1226 /* X86_TUNE_PUSH_MEMORY */
1227 m_386 | m_K6_GEODE | m_AMD_MULTIPLE | m_PENT4
1228 | m_NOCONA | m_CORE2 | m_GENERIC,
1230 /* X86_TUNE_ZERO_EXTEND_WITH_AND */
1233 /* X86_TUNE_UNROLL_STRLEN */
1234 m_486 | m_PENT | m_PPRO | m_AMD_MULTIPLE | m_K6 | m_CORE2 | m_GENERIC,
1236 /* X86_TUNE_DEEP_BRANCH_PREDICTION */
1237 m_PPRO | m_K6_GEODE | m_AMD_MULTIPLE | m_PENT4 | m_GENERIC,
1239 /* X86_TUNE_BRANCH_PREDICTION_HINTS: Branch hints were put in P4 based
1240 on simulation result. But after P4 was made, no performance benefit
1241 was observed with branch hints. It also increases the code size.
1242 As a result, icc never generates branch hints. */
1245 /* X86_TUNE_DOUBLE_WITH_ADD */
1248 /* X86_TUNE_USE_SAHF */
1249 m_PPRO | m_K6_GEODE | m_K8 | m_AMDFAM10 | m_PENT4
1250 | m_NOCONA | m_CORE2 | m_GENERIC,
1252 /* X86_TUNE_MOVX: Enable to zero extend integer registers to avoid
1253 partial dependencies. */
1254 m_AMD_MULTIPLE | m_PPRO | m_PENT4 | m_NOCONA
1255 | m_CORE2 | m_GENERIC | m_GEODE /* m_386 | m_K6 */,
1257 /* X86_TUNE_PARTIAL_REG_STALL: We probably ought to watch for partial
1258 register stalls on Generic32 compilation setting as well. However
1259 in current implementation the partial register stalls are not eliminated
1260 very well - they can be introduced via subregs synthesized by combine
1261 and can happen in caller/callee saving sequences. Because this option
1262 pays back little on PPro based chips and is in conflict with partial reg
1263 dependencies used by Athlon/P4 based chips, it is better to leave it off
1264 for generic32 for now. */
1267 /* X86_TUNE_PARTIAL_FLAG_REG_STALL */
1268 m_CORE2 | m_GENERIC,
1270 /* X86_TUNE_USE_HIMODE_FIOP */
1271 m_386 | m_486 | m_K6_GEODE,
1273 /* X86_TUNE_USE_SIMODE_FIOP */
1274 ~(m_PPRO | m_AMD_MULTIPLE | m_PENT | m_CORE2 | m_GENERIC),
1276 /* X86_TUNE_USE_MOV0 */
1279 /* X86_TUNE_USE_CLTD */
1280 ~(m_PENT | m_K6 | m_CORE2 | m_GENERIC),
1282 /* X86_TUNE_USE_XCHGB: Use xchgb %rh,%rl instead of rolw/rorw $8,rx. */
1285 /* X86_TUNE_SPLIT_LONG_MOVES */
1288 /* X86_TUNE_READ_MODIFY_WRITE */
1291 /* X86_TUNE_READ_MODIFY */
1294 /* X86_TUNE_PROMOTE_QIMODE */
1295 m_K6_GEODE | m_PENT | m_386 | m_486 | m_AMD_MULTIPLE | m_CORE2
1296 | m_GENERIC /* | m_PENT4 ? */,
1298 /* X86_TUNE_FAST_PREFIX */
1299 ~(m_PENT | m_486 | m_386),
1301 /* X86_TUNE_SINGLE_STRINGOP */
1302 m_386 | m_PENT4 | m_NOCONA,
1304 /* X86_TUNE_QIMODE_MATH */
1307 /* X86_TUNE_HIMODE_MATH: On PPro this flag is meant to avoid partial
1308 register stalls. Just like X86_TUNE_PARTIAL_REG_STALL this option
1309 might be considered for Generic32 if our scheme for avoiding partial
1310 stalls was more effective. */
1313 /* X86_TUNE_PROMOTE_QI_REGS */
1316 /* X86_TUNE_PROMOTE_HI_REGS */
1319 /* X86_TUNE_ADD_ESP_4: Enable if add/sub is preferred over 1/2 push/pop. */
1320 m_AMD_MULTIPLE | m_K6_GEODE | m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC,
1322 /* X86_TUNE_ADD_ESP_8 */
1323 m_AMD_MULTIPLE | m_PPRO | m_K6_GEODE | m_386
1324 | m_486 | m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC,
1326 /* X86_TUNE_SUB_ESP_4 */
1327 m_AMD_MULTIPLE | m_PPRO | m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC,
1329 /* X86_TUNE_SUB_ESP_8 */
1330 m_AMD_MULTIPLE | m_PPRO | m_386 | m_486
1331 | m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC,
1333 /* X86_TUNE_INTEGER_DFMODE_MOVES: Enable if integer moves are preferred
1334 for DFmode copies */
1335 ~(m_AMD_MULTIPLE | m_PENT4 | m_NOCONA | m_PPRO | m_CORE2
1336 | m_GENERIC | m_GEODE),
1338 /* X86_TUNE_PARTIAL_REG_DEPENDENCY */
1339 m_AMD_MULTIPLE | m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC,
1341 /* X86_TUNE_SSE_PARTIAL_REG_DEPENDENCY: In the Generic model we have a
1342 conflict here in between PPro/Pentium4 based chips that thread 128bit
1343 SSE registers as single units versus K8 based chips that divide SSE
1344 registers to two 64bit halves. This knob promotes all store destinations
1345 to be 128bit to allow register renaming on 128bit SSE units, but usually
1346 results in one extra microop on 64bit SSE units. Experimental results
1347 shows that disabling this option on P4 brings over 20% SPECfp regression,
1348 while enabling it on K8 brings roughly 2.4% regression that can be partly
1349 masked by careful scheduling of moves. */
1350 m_PENT4 | m_NOCONA | m_PPRO | m_CORE2 | m_GENERIC | m_AMDFAM10,
1352 /* X86_TUNE_SSE_UNALIGNED_MOVE_OPTIMAL */
1355 /* X86_TUNE_SSE_SPLIT_REGS: Set for machines where the type and dependencies
1356 are resolved on SSE register parts instead of whole registers, so we may
1357 maintain just lower part of scalar values in proper format leaving the
1358 upper part undefined. */
1361 /* X86_TUNE_SSE_TYPELESS_STORES */
1364 /* X86_TUNE_SSE_LOAD0_BY_PXOR */
1365 m_PPRO | m_PENT4 | m_NOCONA,
1367 /* X86_TUNE_MEMORY_MISMATCH_STALL */
1368 m_AMD_MULTIPLE | m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC,
1370 /* X86_TUNE_PROLOGUE_USING_MOVE */
1371 m_ATHLON_K8 | m_PPRO | m_CORE2 | m_GENERIC,
1373 /* X86_TUNE_EPILOGUE_USING_MOVE */
1374 m_ATHLON_K8 | m_PPRO | m_CORE2 | m_GENERIC,
1376 /* X86_TUNE_SHIFT1 */
1379 /* X86_TUNE_USE_FFREEP */
1382 /* X86_TUNE_INTER_UNIT_MOVES */
1383 ~(m_AMD_MULTIPLE | m_GENERIC),
1385 /* X86_TUNE_INTER_UNIT_CONVERSIONS */
1388 /* X86_TUNE_FOUR_JUMP_LIMIT: Some CPU cores are not able to predict more
1389 than 4 branch instructions in the 16 byte window. */
1390 m_PPRO | m_AMD_MULTIPLE | m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC,
1392 /* X86_TUNE_SCHEDULE */
1393 m_PPRO | m_AMD_MULTIPLE | m_K6_GEODE | m_PENT | m_CORE2 | m_GENERIC,
1395 /* X86_TUNE_USE_BT */
1396 m_AMD_MULTIPLE | m_CORE2 | m_GENERIC,
1398 /* X86_TUNE_USE_INCDEC */
1399 ~(m_PENT4 | m_NOCONA | m_GENERIC),
1401 /* X86_TUNE_PAD_RETURNS */
1402 m_AMD_MULTIPLE | m_CORE2 | m_GENERIC,
1404 /* X86_TUNE_EXT_80387_CONSTANTS */
1405 m_K6_GEODE | m_ATHLON_K8 | m_PENT4 | m_NOCONA | m_PPRO | m_CORE2 | m_GENERIC,
1407 /* X86_TUNE_SHORTEN_X87_SSE */
1410 /* X86_TUNE_AVOID_VECTOR_DECODE */
1413 /* X86_TUNE_PROMOTE_HIMODE_IMUL: Modern CPUs have same latency for HImode
1414 and SImode multiply, but 386 and 486 do HImode multiply faster. */
1417 /* X86_TUNE_SLOW_IMUL_IMM32_MEM: Imul of 32-bit constant and memory is
1418 vector path on AMD machines. */
1419 m_K8 | m_GENERIC64 | m_AMDFAM10,
1421 /* X86_TUNE_SLOW_IMUL_IMM8: Imul of 8-bit constant is vector path on AMD
1423 m_K8 | m_GENERIC64 | m_AMDFAM10,
1425 /* X86_TUNE_MOVE_M1_VIA_OR: On pentiums, it is faster to load -1 via OR
1429 /* X86_TUNE_NOT_UNPAIRABLE: NOT is not pairable on Pentium, while XOR is,
1430 but one byte longer. */
1433 /* X86_TUNE_NOT_VECTORMODE: On AMD K6, NOT is vector decoded with memory
1434 operand that cannot be represented using a modRM byte. The XOR
1435 replacement is long decoded, so this split helps here as well. */
1438 /* X86_TUNE_USE_VECTOR_FP_CONVERTS: Prefer vector packed SSE conversion
1440 m_AMDFAM10 | m_GENERIC,
1442 /* X86_TUNE_USE_VECTOR_CONVERTS: Prefer vector packed SSE conversion
1443 from integer to FP. */
1446 /* X86_TUNE_FUSE_CMP_AND_BRANCH: Fuse a compare or test instruction
1447 with a subsequent conditional jump instruction into a single
1448 compare-and-branch uop. */
1452 /* Feature tests against the various architecture variations. */
1453 unsigned char ix86_arch_features[X86_ARCH_LAST];
1455 /* Feature tests against the various architecture variations, used to create
1456 ix86_arch_features based on the processor mask. */
1457 static unsigned int initial_ix86_arch_features[X86_ARCH_LAST] = {
1458 /* X86_ARCH_CMOVE: Conditional move was added for pentiumpro. */
1459 ~(m_386 | m_486 | m_PENT | m_K6),
1461 /* X86_ARCH_CMPXCHG: Compare and exchange was added for 80486. */
1464 /* X86_ARCH_CMPXCHG8B: Compare and exchange 8 bytes was added for pentium. */
1467 /* X86_ARCH_XADD: Exchange and add was added for 80486. */
1470 /* X86_ARCH_BSWAP: Byteswap was added for 80486. */
1474 static const unsigned int x86_accumulate_outgoing_args
1475 = m_AMD_MULTIPLE | m_PENT4 | m_NOCONA | m_PPRO | m_CORE2 | m_GENERIC;
1477 static const unsigned int x86_arch_always_fancy_math_387
1478 = m_PENT | m_PPRO | m_AMD_MULTIPLE | m_PENT4
1479 | m_NOCONA | m_CORE2 | m_GENERIC;
1481 static enum stringop_alg stringop_alg = no_stringop;
1483 /* In case the average insn count for single function invocation is
1484 lower than this constant, emit fast (but longer) prologue and
1486 #define FAST_PROLOGUE_INSN_COUNT 20
1488 /* Names for 8 (low), 8 (high), and 16-bit registers, respectively. */
1489 static const char *const qi_reg_name[] = QI_REGISTER_NAMES;
1490 static const char *const qi_high_reg_name[] = QI_HIGH_REGISTER_NAMES;
1491 static const char *const hi_reg_name[] = HI_REGISTER_NAMES;
1493 /* Array of the smallest class containing reg number REGNO, indexed by
1494 REGNO. Used by REGNO_REG_CLASS in i386.h. */
1496 enum reg_class const regclass_map[FIRST_PSEUDO_REGISTER] =
1498 /* ax, dx, cx, bx */
1499 AREG, DREG, CREG, BREG,
1500 /* si, di, bp, sp */
1501 SIREG, DIREG, NON_Q_REGS, NON_Q_REGS,
1503 FP_TOP_REG, FP_SECOND_REG, FLOAT_REGS, FLOAT_REGS,
1504 FLOAT_REGS, FLOAT_REGS, FLOAT_REGS, FLOAT_REGS,
1507 /* flags, fpsr, fpcr, frame */
1508 NO_REGS, NO_REGS, NO_REGS, NON_Q_REGS,
1510 SSE_FIRST_REG, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS,
1513 MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS,
1516 NON_Q_REGS, NON_Q_REGS, NON_Q_REGS, NON_Q_REGS,
1517 NON_Q_REGS, NON_Q_REGS, NON_Q_REGS, NON_Q_REGS,
1518 /* SSE REX registers */
1519 SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS,
1523 /* The "default" register map used in 32bit mode. */
1525 int const dbx_register_map[FIRST_PSEUDO_REGISTER] =
1527 0, 2, 1, 3, 6, 7, 4, 5, /* general regs */
1528 12, 13, 14, 15, 16, 17, 18, 19, /* fp regs */
1529 -1, -1, -1, -1, -1, /* arg, flags, fpsr, fpcr, frame */
1530 21, 22, 23, 24, 25, 26, 27, 28, /* SSE */
1531 29, 30, 31, 32, 33, 34, 35, 36, /* MMX */
1532 -1, -1, -1, -1, -1, -1, -1, -1, /* extended integer registers */
1533 -1, -1, -1, -1, -1, -1, -1, -1, /* extended SSE registers */
1536 static int const x86_64_int_parameter_registers[6] =
1538 5 /*RDI*/, 4 /*RSI*/, 1 /*RDX*/, 2 /*RCX*/,
1539 FIRST_REX_INT_REG /*R8 */, FIRST_REX_INT_REG + 1 /*R9 */
1542 static int const x86_64_ms_abi_int_parameter_registers[4] =
1544 2 /*RCX*/, 1 /*RDX*/,
1545 FIRST_REX_INT_REG /*R8 */, FIRST_REX_INT_REG + 1 /*R9 */
1548 static int const x86_64_int_return_registers[4] =
1550 0 /*RAX*/, 1 /*RDX*/, 5 /*RDI*/, 4 /*RSI*/
1553 /* The "default" register map used in 64bit mode. */
1554 int const dbx64_register_map[FIRST_PSEUDO_REGISTER] =
1556 0, 1, 2, 3, 4, 5, 6, 7, /* general regs */
1557 33, 34, 35, 36, 37, 38, 39, 40, /* fp regs */
1558 -1, -1, -1, -1, -1, /* arg, flags, fpsr, fpcr, frame */
1559 17, 18, 19, 20, 21, 22, 23, 24, /* SSE */
1560 41, 42, 43, 44, 45, 46, 47, 48, /* MMX */
1561 8,9,10,11,12,13,14,15, /* extended integer registers */
1562 25, 26, 27, 28, 29, 30, 31, 32, /* extended SSE registers */
1565 /* Define the register numbers to be used in Dwarf debugging information.
1566 The SVR4 reference port C compiler uses the following register numbers
1567 in its Dwarf output code:
1568 0 for %eax (gcc regno = 0)
1569 1 for %ecx (gcc regno = 2)
1570 2 for %edx (gcc regno = 1)
1571 3 for %ebx (gcc regno = 3)
1572 4 for %esp (gcc regno = 7)
1573 5 for %ebp (gcc regno = 6)
1574 6 for %esi (gcc regno = 4)
1575 7 for %edi (gcc regno = 5)
1576 The following three DWARF register numbers are never generated by
1577 the SVR4 C compiler or by the GNU compilers, but SDB on x86/svr4
1578 believes these numbers have these meanings.
1579 8 for %eip (no gcc equivalent)
1580 9 for %eflags (gcc regno = 17)
1581 10 for %trapno (no gcc equivalent)
1582 It is not at all clear how we should number the FP stack registers
1583 for the x86 architecture. If the version of SDB on x86/svr4 were
1584 a bit less brain dead with respect to floating-point then we would
1585 have a precedent to follow with respect to DWARF register numbers
1586 for x86 FP registers, but the SDB on x86/svr4 is so completely
1587 broken with respect to FP registers that it is hardly worth thinking
1588 of it as something to strive for compatibility with.
1589 The version of x86/svr4 SDB I have at the moment does (partially)
1590 seem to believe that DWARF register number 11 is associated with
1591 the x86 register %st(0), but that's about all. Higher DWARF
1592 register numbers don't seem to be associated with anything in
1593 particular, and even for DWARF regno 11, SDB only seems to under-
1594 stand that it should say that a variable lives in %st(0) (when
1595 asked via an `=' command) if we said it was in DWARF regno 11,
1596 but SDB still prints garbage when asked for the value of the
1597 variable in question (via a `/' command).
1598 (Also note that the labels SDB prints for various FP stack regs
1599 when doing an `x' command are all wrong.)
1600 Note that these problems generally don't affect the native SVR4
1601 C compiler because it doesn't allow the use of -O with -g and
1602 because when it is *not* optimizing, it allocates a memory
1603 location for each floating-point variable, and the memory
1604 location is what gets described in the DWARF AT_location
1605 attribute for the variable in question.
1606 Regardless of the severe mental illness of the x86/svr4 SDB, we
1607 do something sensible here and we use the following DWARF
1608 register numbers. Note that these are all stack-top-relative
1610 11 for %st(0) (gcc regno = 8)
1611 12 for %st(1) (gcc regno = 9)
1612 13 for %st(2) (gcc regno = 10)
1613 14 for %st(3) (gcc regno = 11)
1614 15 for %st(4) (gcc regno = 12)
1615 16 for %st(5) (gcc regno = 13)
1616 17 for %st(6) (gcc regno = 14)
1617 18 for %st(7) (gcc regno = 15)
1619 int const svr4_dbx_register_map[FIRST_PSEUDO_REGISTER] =
1621 0, 2, 1, 3, 6, 7, 5, 4, /* general regs */
1622 11, 12, 13, 14, 15, 16, 17, 18, /* fp regs */
1623 -1, 9, -1, -1, -1, /* arg, flags, fpsr, fpcr, frame */
1624 21, 22, 23, 24, 25, 26, 27, 28, /* SSE registers */
1625 29, 30, 31, 32, 33, 34, 35, 36, /* MMX registers */
1626 -1, -1, -1, -1, -1, -1, -1, -1, /* extended integer registers */
1627 -1, -1, -1, -1, -1, -1, -1, -1, /* extended SSE registers */
1630 /* Test and compare insns in i386.md store the information needed to
1631 generate branch and scc insns here. */
1633 rtx ix86_compare_op0 = NULL_RTX;
1634 rtx ix86_compare_op1 = NULL_RTX;
1635 rtx ix86_compare_emitted = NULL_RTX;
1637 /* Define the structure for the machine field in struct function. */
1639 struct stack_local_entry GTY(())
1641 unsigned short mode;
1644 struct stack_local_entry *next;
1647 /* Structure describing stack frame layout.
1648 Stack grows downward:
1654 saved frame pointer if frame_pointer_needed
1655 <- HARD_FRAME_POINTER
1660 [va_arg registers] (
1661 > to_allocate <- FRAME_POINTER
1671 HOST_WIDE_INT frame;
1673 int outgoing_arguments_size;
1676 HOST_WIDE_INT to_allocate;
1677 /* The offsets relative to ARG_POINTER. */
1678 HOST_WIDE_INT frame_pointer_offset;
1679 HOST_WIDE_INT hard_frame_pointer_offset;
1680 HOST_WIDE_INT stack_pointer_offset;
1682 /* When save_regs_using_mov is set, emit prologue using
1683 move instead of push instructions. */
1684 bool save_regs_using_mov;
1687 /* Code model option. */
1688 enum cmodel ix86_cmodel;
1690 enum asm_dialect ix86_asm_dialect = ASM_ATT;
1692 enum tls_dialect ix86_tls_dialect = TLS_DIALECT_GNU;
1694 /* Which unit we are generating floating point math for. */
1695 enum fpmath_unit ix86_fpmath;
1697 /* Which cpu are we scheduling for. */
1698 enum attr_cpu ix86_schedule;
1700 /* Which cpu are we optimizing for. */
1701 enum processor_type ix86_tune;
1703 /* Which instruction set architecture to use. */
1704 enum processor_type ix86_arch;
1706 /* true if sse prefetch instruction is not NOOP. */
1707 int x86_prefetch_sse;
1709 /* ix86_regparm_string as a number */
1710 static int ix86_regparm;
1712 /* -mstackrealign option */
1713 extern int ix86_force_align_arg_pointer;
1714 static const char ix86_force_align_arg_pointer_string[]
1715 = "force_align_arg_pointer";
1717 static rtx (*ix86_gen_leave) (void);
1718 static rtx (*ix86_gen_pop1) (rtx);
1719 static rtx (*ix86_gen_add3) (rtx, rtx, rtx);
1720 static rtx (*ix86_gen_sub3) (rtx, rtx, rtx);
1721 static rtx (*ix86_gen_sub3_carry) (rtx, rtx, rtx, rtx);
1722 static rtx (*ix86_gen_one_cmpl2) (rtx, rtx);
1723 static rtx (*ix86_gen_monitor) (rtx, rtx, rtx);
1724 static rtx (*ix86_gen_andsp) (rtx, rtx, rtx);
1726 /* Preferred alignment for stack boundary in bits. */
1727 unsigned int ix86_preferred_stack_boundary;
1729 /* Alignment for incoming stack boundary in bits specified at
1731 static unsigned int ix86_user_incoming_stack_boundary;
1733 /* Default alignment for incoming stack boundary in bits. */
1734 static unsigned int ix86_default_incoming_stack_boundary;
1736 /* Alignment for incoming stack boundary in bits. */
1737 unsigned int ix86_incoming_stack_boundary;
1739 /* Values 1-5: see jump.c */
1740 int ix86_branch_cost;
1742 /* Calling abi specific va_list type nodes. */
1743 static GTY(()) tree sysv_va_list_type_node;
1744 static GTY(()) tree ms_va_list_type_node;
1746 /* Variables which are this size or smaller are put in the data/bss
1747 or ldata/lbss sections. */
1749 int ix86_section_threshold = 65536;
1751 /* Prefix built by ASM_GENERATE_INTERNAL_LABEL. */
1752 char internal_label_prefix[16];
1753 int internal_label_prefix_len;
1755 /* Fence to use after loop using movnt. */
1758 /* Register class used for passing given 64bit part of the argument.
1759 These represent classes as documented by the PS ABI, with the exception
1760 of SSESF, SSEDF classes, that are basically SSE class, just gcc will
1761 use SF or DFmode move instead of DImode to avoid reformatting penalties.
1763 Similarly we play games with INTEGERSI_CLASS to use cheaper SImode moves
1764 whenever possible (upper half does contain padding). */
1765 enum x86_64_reg_class
1768 X86_64_INTEGER_CLASS,
1769 X86_64_INTEGERSI_CLASS,
1777 X86_64_COMPLEX_X87_CLASS,
1780 static const char * const x86_64_reg_class_name[] =
1782 "no", "integer", "integerSI", "sse", "sseSF", "sseDF",
1783 "sseup", "x87", "x87up", "cplx87", "no"
1786 #define MAX_CLASSES 4
1788 /* Table of constants used by fldpi, fldln2, etc.... */
1789 static REAL_VALUE_TYPE ext_80387_constants_table [5];
1790 static bool ext_80387_constants_init = 0;
1793 static struct machine_function * ix86_init_machine_status (void);
1794 static rtx ix86_function_value (const_tree, const_tree, bool);
1795 static int ix86_function_regparm (const_tree, const_tree);
1796 static void ix86_compute_frame_layout (struct ix86_frame *);
1797 static bool ix86_expand_vector_init_one_nonzero (bool, enum machine_mode,
1799 static void ix86_add_new_builtins (int);
1801 enum ix86_function_specific_strings
1803 IX86_FUNCTION_SPECIFIC_ARCH,
1804 IX86_FUNCTION_SPECIFIC_TUNE,
1805 IX86_FUNCTION_SPECIFIC_FPMATH,
1806 IX86_FUNCTION_SPECIFIC_MAX
1809 static char *ix86_target_string (int, int, const char *, const char *,
1810 const char *, bool);
1811 static void ix86_debug_options (void) ATTRIBUTE_UNUSED;
1812 static void ix86_function_specific_save (struct cl_target_option *);
1813 static void ix86_function_specific_restore (struct cl_target_option *);
1814 static void ix86_function_specific_print (FILE *, int,
1815 struct cl_target_option *);
1816 static bool ix86_valid_target_attribute_p (tree, tree, tree, int);
1817 static bool ix86_valid_target_attribute_inner_p (tree, char *[]);
1818 static bool ix86_can_inline_p (tree, tree);
1819 static void ix86_set_current_function (tree);
1822 /* The svr4 ABI for the i386 says that records and unions are returned
1824 #ifndef DEFAULT_PCC_STRUCT_RETURN
1825 #define DEFAULT_PCC_STRUCT_RETURN 1
1828 /* Whether -mtune= or -march= were specified */
1829 static int ix86_tune_defaulted;
1830 static int ix86_arch_specified;
1832 /* Bit flags that specify the ISA we are compiling for. */
1833 int ix86_isa_flags = TARGET_64BIT_DEFAULT | TARGET_SUBTARGET_ISA_DEFAULT;
1835 /* A mask of ix86_isa_flags that includes bit X if X
1836 was set or cleared on the command line. */
1837 static int ix86_isa_flags_explicit;
1839 /* Define a set of ISAs which are available when a given ISA is
1840 enabled. MMX and SSE ISAs are handled separately. */
1842 #define OPTION_MASK_ISA_MMX_SET OPTION_MASK_ISA_MMX
1843 #define OPTION_MASK_ISA_3DNOW_SET \
1844 (OPTION_MASK_ISA_3DNOW | OPTION_MASK_ISA_MMX_SET)
1846 #define OPTION_MASK_ISA_SSE_SET OPTION_MASK_ISA_SSE
1847 #define OPTION_MASK_ISA_SSE2_SET \
1848 (OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_SSE_SET)
1849 #define OPTION_MASK_ISA_SSE3_SET \
1850 (OPTION_MASK_ISA_SSE3 | OPTION_MASK_ISA_SSE2_SET)
1851 #define OPTION_MASK_ISA_SSSE3_SET \
1852 (OPTION_MASK_ISA_SSSE3 | OPTION_MASK_ISA_SSE3_SET)
1853 #define OPTION_MASK_ISA_SSE4_1_SET \
1854 (OPTION_MASK_ISA_SSE4_1 | OPTION_MASK_ISA_SSSE3_SET)
1855 #define OPTION_MASK_ISA_SSE4_2_SET \
1856 (OPTION_MASK_ISA_SSE4_2 | OPTION_MASK_ISA_SSE4_1_SET)
1857 #define OPTION_MASK_ISA_AVX_SET \
1858 (OPTION_MASK_ISA_AVX | OPTION_MASK_ISA_SSE4_2_SET)
1859 #define OPTION_MASK_ISA_FMA_SET \
1860 (OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_AVX_SET)
1862 /* SSE4 includes both SSE4.1 and SSE4.2. -msse4 should be the same
1864 #define OPTION_MASK_ISA_SSE4_SET OPTION_MASK_ISA_SSE4_2_SET
1866 #define OPTION_MASK_ISA_SSE4A_SET \
1867 (OPTION_MASK_ISA_SSE4A | OPTION_MASK_ISA_SSE3_SET)
1868 #define OPTION_MASK_ISA_SSE5_SET \
1869 (OPTION_MASK_ISA_SSE5 | OPTION_MASK_ISA_SSE4A_SET)
1871 /* AES and PCLMUL need SSE2 because they use xmm registers */
1872 #define OPTION_MASK_ISA_AES_SET \
1873 (OPTION_MASK_ISA_AES | OPTION_MASK_ISA_SSE2_SET)
1874 #define OPTION_MASK_ISA_PCLMUL_SET \
1875 (OPTION_MASK_ISA_PCLMUL | OPTION_MASK_ISA_SSE2_SET)
1877 #define OPTION_MASK_ISA_ABM_SET \
1878 (OPTION_MASK_ISA_ABM | OPTION_MASK_ISA_POPCNT)
1879 #define OPTION_MASK_ISA_POPCNT_SET OPTION_MASK_ISA_POPCNT
1880 #define OPTION_MASK_ISA_CX16_SET OPTION_MASK_ISA_CX16
1881 #define OPTION_MASK_ISA_SAHF_SET OPTION_MASK_ISA_SAHF
1883 /* Define a set of ISAs which aren't available when a given ISA is
1884 disabled. MMX and SSE ISAs are handled separately. */
1886 #define OPTION_MASK_ISA_MMX_UNSET \
1887 (OPTION_MASK_ISA_MMX | OPTION_MASK_ISA_3DNOW_UNSET)
1888 #define OPTION_MASK_ISA_3DNOW_UNSET \
1889 (OPTION_MASK_ISA_3DNOW | OPTION_MASK_ISA_3DNOW_A_UNSET)
1890 #define OPTION_MASK_ISA_3DNOW_A_UNSET OPTION_MASK_ISA_3DNOW_A
1892 #define OPTION_MASK_ISA_SSE_UNSET \
1893 (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_SSE2_UNSET)
1894 #define OPTION_MASK_ISA_SSE2_UNSET \
1895 (OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_SSE3_UNSET)
1896 #define OPTION_MASK_ISA_SSE3_UNSET \
1897 (OPTION_MASK_ISA_SSE3 \
1898 | OPTION_MASK_ISA_SSSE3_UNSET \
1899 | OPTION_MASK_ISA_SSE4A_UNSET )
1900 #define OPTION_MASK_ISA_SSSE3_UNSET \
1901 (OPTION_MASK_ISA_SSSE3 | OPTION_MASK_ISA_SSE4_1_UNSET)
1902 #define OPTION_MASK_ISA_SSE4_1_UNSET \
1903 (OPTION_MASK_ISA_SSE4_1 | OPTION_MASK_ISA_SSE4_2_UNSET)
1904 #define OPTION_MASK_ISA_SSE4_2_UNSET \
1905 (OPTION_MASK_ISA_SSE4_2 | OPTION_MASK_ISA_AVX_UNSET )
1906 #define OPTION_MASK_ISA_AVX_UNSET \
1907 (OPTION_MASK_ISA_AVX | OPTION_MASK_ISA_FMA_UNSET)
1908 #define OPTION_MASK_ISA_FMA_UNSET OPTION_MASK_ISA_FMA
1910 /* SSE4 includes both SSE4.1 and SSE4.2. -mno-sse4 should the same
1912 #define OPTION_MASK_ISA_SSE4_UNSET OPTION_MASK_ISA_SSE4_1_UNSET
1914 #define OPTION_MASK_ISA_SSE4A_UNSET \
1915 (OPTION_MASK_ISA_SSE4A | OPTION_MASK_ISA_SSE5_UNSET)
1916 #define OPTION_MASK_ISA_SSE5_UNSET OPTION_MASK_ISA_SSE5
1917 #define OPTION_MASK_ISA_AES_UNSET OPTION_MASK_ISA_AES
1918 #define OPTION_MASK_ISA_PCLMUL_UNSET OPTION_MASK_ISA_PCLMUL
1919 #define OPTION_MASK_ISA_ABM_UNSET OPTION_MASK_ISA_ABM
1920 #define OPTION_MASK_ISA_POPCNT_UNSET OPTION_MASK_ISA_POPCNT
1921 #define OPTION_MASK_ISA_CX16_UNSET OPTION_MASK_ISA_CX16
1922 #define OPTION_MASK_ISA_SAHF_UNSET OPTION_MASK_ISA_SAHF
1924 /* Vectorization library interface and handlers. */
1925 tree (*ix86_veclib_handler)(enum built_in_function, tree, tree) = NULL;
1926 static tree ix86_veclibabi_svml (enum built_in_function, tree, tree);
1927 static tree ix86_veclibabi_acml (enum built_in_function, tree, tree);
1929 /* Processor target table, indexed by processor number */
1932 const struct processor_costs *cost; /* Processor costs */
1933 const int align_loop; /* Default alignments. */
1934 const int align_loop_max_skip;
1935 const int align_jump;
1936 const int align_jump_max_skip;
1937 const int align_func;
1940 static const struct ptt processor_target_table[PROCESSOR_max] =
1942 {&i386_cost, 4, 3, 4, 3, 4},
1943 {&i486_cost, 16, 15, 16, 15, 16},
1944 {&pentium_cost, 16, 7, 16, 7, 16},
1945 {&pentiumpro_cost, 16, 15, 16, 10, 16},
1946 {&geode_cost, 0, 0, 0, 0, 0},
1947 {&k6_cost, 32, 7, 32, 7, 32},
1948 {&athlon_cost, 16, 7, 16, 7, 16},
1949 {&pentium4_cost, 0, 0, 0, 0, 0},
1950 {&k8_cost, 16, 7, 16, 7, 16},
1951 {&nocona_cost, 0, 0, 0, 0, 0},
1952 {&core2_cost, 16, 10, 16, 10, 16},
1953 {&generic32_cost, 16, 7, 16, 7, 16},
1954 {&generic64_cost, 16, 10, 16, 10, 16},
1955 {&amdfam10_cost, 32, 24, 32, 7, 32}
1958 static const char *const cpu_names[TARGET_CPU_DEFAULT_max] =
1983 /* Implement TARGET_HANDLE_OPTION. */
1986 ix86_handle_option (size_t code, const char *arg ATTRIBUTE_UNUSED, int value)
1993 ix86_isa_flags |= OPTION_MASK_ISA_MMX_SET;
1994 ix86_isa_flags_explicit |= OPTION_MASK_ISA_MMX_SET;
1998 ix86_isa_flags &= ~OPTION_MASK_ISA_MMX_UNSET;
1999 ix86_isa_flags_explicit |= OPTION_MASK_ISA_MMX_UNSET;
2006 ix86_isa_flags |= OPTION_MASK_ISA_3DNOW_SET;
2007 ix86_isa_flags_explicit |= OPTION_MASK_ISA_3DNOW_SET;
2011 ix86_isa_flags &= ~OPTION_MASK_ISA_3DNOW_UNSET;
2012 ix86_isa_flags_explicit |= OPTION_MASK_ISA_3DNOW_UNSET;
2022 ix86_isa_flags |= OPTION_MASK_ISA_SSE_SET;
2023 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE_SET;
2027 ix86_isa_flags &= ~OPTION_MASK_ISA_SSE_UNSET;
2028 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE_UNSET;
2035 ix86_isa_flags |= OPTION_MASK_ISA_SSE2_SET;
2036 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE2_SET;
2040 ix86_isa_flags &= ~OPTION_MASK_ISA_SSE2_UNSET;
2041 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE2_UNSET;
2048 ix86_isa_flags |= OPTION_MASK_ISA_SSE3_SET;
2049 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE3_SET;
2053 ix86_isa_flags &= ~OPTION_MASK_ISA_SSE3_UNSET;
2054 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE3_UNSET;
2061 ix86_isa_flags |= OPTION_MASK_ISA_SSSE3_SET;
2062 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSSE3_SET;
2066 ix86_isa_flags &= ~OPTION_MASK_ISA_SSSE3_UNSET;
2067 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSSE3_UNSET;
2074 ix86_isa_flags |= OPTION_MASK_ISA_SSE4_1_SET;
2075 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE4_1_SET;
2079 ix86_isa_flags &= ~OPTION_MASK_ISA_SSE4_1_UNSET;
2080 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE4_1_UNSET;
2087 ix86_isa_flags |= OPTION_MASK_ISA_SSE4_2_SET;
2088 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE4_2_SET;
2092 ix86_isa_flags &= ~OPTION_MASK_ISA_SSE4_2_UNSET;
2093 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE4_2_UNSET;
2100 ix86_isa_flags |= OPTION_MASK_ISA_AVX_SET;
2101 ix86_isa_flags_explicit |= OPTION_MASK_ISA_AVX_SET;
2105 ix86_isa_flags &= ~OPTION_MASK_ISA_AVX_UNSET;
2106 ix86_isa_flags_explicit |= OPTION_MASK_ISA_AVX_UNSET;
2113 ix86_isa_flags |= OPTION_MASK_ISA_FMA_SET;
2114 ix86_isa_flags_explicit |= OPTION_MASK_ISA_FMA_SET;
2118 ix86_isa_flags &= ~OPTION_MASK_ISA_FMA_UNSET;
2119 ix86_isa_flags_explicit |= OPTION_MASK_ISA_FMA_UNSET;
2124 ix86_isa_flags |= OPTION_MASK_ISA_SSE4_SET;
2125 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE4_SET;
2129 ix86_isa_flags &= ~OPTION_MASK_ISA_SSE4_UNSET;
2130 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE4_UNSET;
2136 ix86_isa_flags |= OPTION_MASK_ISA_SSE4A_SET;
2137 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE4A_SET;
2141 ix86_isa_flags &= ~OPTION_MASK_ISA_SSE4A_UNSET;
2142 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE4A_UNSET;
2149 ix86_isa_flags |= OPTION_MASK_ISA_SSE5_SET;
2150 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE5_SET;
2154 ix86_isa_flags &= ~OPTION_MASK_ISA_SSE5_UNSET;
2155 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE5_UNSET;
2162 ix86_isa_flags |= OPTION_MASK_ISA_ABM_SET;
2163 ix86_isa_flags_explicit |= OPTION_MASK_ISA_ABM_SET;
2167 ix86_isa_flags &= ~OPTION_MASK_ISA_ABM_UNSET;
2168 ix86_isa_flags_explicit |= OPTION_MASK_ISA_ABM_UNSET;
2175 ix86_isa_flags |= OPTION_MASK_ISA_POPCNT_SET;
2176 ix86_isa_flags_explicit |= OPTION_MASK_ISA_POPCNT_SET;
2180 ix86_isa_flags &= ~OPTION_MASK_ISA_POPCNT_UNSET;
2181 ix86_isa_flags_explicit |= OPTION_MASK_ISA_POPCNT_UNSET;
2188 ix86_isa_flags |= OPTION_MASK_ISA_SAHF_SET;
2189 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SAHF_SET;
2193 ix86_isa_flags &= ~OPTION_MASK_ISA_SAHF_UNSET;
2194 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SAHF_UNSET;
2201 ix86_isa_flags |= OPTION_MASK_ISA_CX16_SET;
2202 ix86_isa_flags_explicit |= OPTION_MASK_ISA_CX16_SET;
2206 ix86_isa_flags &= ~OPTION_MASK_ISA_CX16_UNSET;
2207 ix86_isa_flags_explicit |= OPTION_MASK_ISA_CX16_UNSET;
2214 ix86_isa_flags |= OPTION_MASK_ISA_AES_SET;
2215 ix86_isa_flags_explicit |= OPTION_MASK_ISA_AES_SET;
2219 ix86_isa_flags &= ~OPTION_MASK_ISA_AES_UNSET;
2220 ix86_isa_flags_explicit |= OPTION_MASK_ISA_AES_UNSET;
2227 ix86_isa_flags |= OPTION_MASK_ISA_PCLMUL_SET;
2228 ix86_isa_flags_explicit |= OPTION_MASK_ISA_PCLMUL_SET;
2232 ix86_isa_flags &= ~OPTION_MASK_ISA_PCLMUL_UNSET;
2233 ix86_isa_flags_explicit |= OPTION_MASK_ISA_PCLMUL_UNSET;
2242 /* Return a string the documents the current -m options. The caller is
2243 responsible for freeing the string. */
2246 ix86_target_string (int isa, int flags, const char *arch, const char *tune,
2247 const char *fpmath, bool add_nl_p)
2249 struct ix86_target_opts
2251 const char *option; /* option string */
2252 int mask; /* isa mask options */
2255 /* This table is ordered so that options like -msse5 or -msse4.2 that imply
2256 preceding options while match those first. */
2257 static struct ix86_target_opts isa_opts[] =
2259 { "-m64", OPTION_MASK_ISA_64BIT },
2260 { "-msse5", OPTION_MASK_ISA_SSE5 },
2261 { "-msse4a", OPTION_MASK_ISA_SSE4A },
2262 { "-msse4.2", OPTION_MASK_ISA_SSE4_2 },
2263 { "-msse4.1", OPTION_MASK_ISA_SSE4_1 },
2264 { "-mssse3", OPTION_MASK_ISA_SSSE3 },
2265 { "-msse3", OPTION_MASK_ISA_SSE3 },
2266 { "-msse2", OPTION_MASK_ISA_SSE2 },
2267 { "-msse", OPTION_MASK_ISA_SSE },
2268 { "-m3dnow", OPTION_MASK_ISA_3DNOW },
2269 { "-m3dnowa", OPTION_MASK_ISA_3DNOW_A },
2270 { "-mmmx", OPTION_MASK_ISA_MMX },
2271 { "-mabm", OPTION_MASK_ISA_ABM },
2272 { "-mpopcnt", OPTION_MASK_ISA_POPCNT },
2273 { "-maes", OPTION_MASK_ISA_AES },
2274 { "-mpclmul", OPTION_MASK_ISA_PCLMUL },
2278 static struct ix86_target_opts flag_opts[] =
2280 { "-m128bit-long-double", MASK_128BIT_LONG_DOUBLE },
2281 { "-m80387", MASK_80387 },
2282 { "-maccumulate-outgoing-args", MASK_ACCUMULATE_OUTGOING_ARGS },
2283 { "-malign-double", MASK_ALIGN_DOUBLE },
2284 { "-mcld", MASK_CLD },
2285 { "-mfp-ret-in-387", MASK_FLOAT_RETURNS },
2286 { "-mieee-fp", MASK_IEEE_FP },
2287 { "-minline-all-stringops", MASK_INLINE_ALL_STRINGOPS },
2288 { "-minline-stringops-dynamically", MASK_INLINE_STRINGOPS_DYNAMICALLY },
2289 { "-mms-bitfields", MASK_MS_BITFIELD_LAYOUT },
2290 { "-mno-align-stringops", MASK_NO_ALIGN_STRINGOPS },
2291 { "-mno-fancy-math-387", MASK_NO_FANCY_MATH_387 },
2292 { "-mno-fused-madd", MASK_NO_FUSED_MADD },
2293 { "-mno-push-args", MASK_NO_PUSH_ARGS },
2294 { "-mno-red-zone", MASK_NO_RED_ZONE },
2295 { "-momit-leaf-frame-pointer", MASK_OMIT_LEAF_FRAME_POINTER },
2296 { "-mrecip", MASK_RECIP },
2297 { "-mrtd", MASK_RTD },
2298 { "-msseregparm", MASK_SSEREGPARM },
2299 { "-mstack-arg-probe", MASK_STACK_PROBE },
2300 { "-mtls-direct-seg-refs", MASK_TLS_DIRECT_SEG_REFS },
2303 const char *opts[ (sizeof (isa_opts) / sizeof (isa_opts[0])
2304 + sizeof (flag_opts) / sizeof (flag_opts[0])
2308 char target_other[40];
2317 memset (opts, '\0', sizeof (opts));
2319 /* Add -march= option. */
2322 opts[num][0] = "-march=";
2323 opts[num++][1] = arch;
2326 /* Add -mtune= option. */
2329 opts[num][0] = "-mtune=";
2330 opts[num++][1] = tune;
2333 /* Pick out the options in isa options. */
2334 for (i = 0; i < sizeof (isa_opts) / sizeof (isa_opts[0]); i++)
2336 if ((isa & isa_opts[i].mask) != 0)
2338 opts[num++][0] = isa_opts[i].option;
2339 isa &= ~ isa_opts[i].mask;
2343 if (isa && add_nl_p)
2345 opts[num++][0] = isa_other;
2346 sprintf (isa_other, "(other isa: 0x%x)", isa);
2349 /* Add flag options. */
2350 for (i = 0; i < sizeof (flag_opts) / sizeof (flag_opts[0]); i++)
2352 if ((flags & flag_opts[i].mask) != 0)
2354 opts[num++][0] = flag_opts[i].option;
2355 flags &= ~ flag_opts[i].mask;
2359 if (flags && add_nl_p)
2361 opts[num++][0] = target_other;
2362 sprintf (target_other, "(other flags: 0x%x)", isa);
2365 /* Add -fpmath= option. */
2368 opts[num][0] = "-mfpmath=";
2369 opts[num++][1] = fpmath;
2376 gcc_assert (num < sizeof (opts) / sizeof (opts[0]));
2378 /* Size the string. */
2380 sep_len = (add_nl_p) ? 3 : 1;
2381 for (i = 0; i < num; i++)
2384 for (j = 0; j < 2; j++)
2386 len += strlen (opts[i][j]);
2389 /* Build the string. */
2390 ret = ptr = (char *) xmalloc (len);
2393 for (i = 0; i < num; i++)
2397 for (j = 0; j < 2; j++)
2398 len2[j] = (opts[i][j]) ? strlen (opts[i][j]) : 0;
2405 if (add_nl_p && line_len + len2[0] + len2[1] > 70)
2413 for (j = 0; j < 2; j++)
2416 memcpy (ptr, opts[i][j], len2[j]);
2418 line_len += len2[j];
2423 gcc_assert (ret + len >= ptr);
2428 /* Function that is callable from the debugger to print the current
2431 ix86_debug_options (void)
2433 char *opts = ix86_target_string (ix86_isa_flags, target_flags,
2434 ix86_arch_string, ix86_tune_string,
2435 ix86_fpmath_string, true);
2439 fprintf (stderr, "%s\n\n", opts);
2443 fprintf (stderr, "<no options>\n\n");
2448 /* Sometimes certain combinations of command options do not make
2449 sense on a particular target machine. You can define a macro
2450 `OVERRIDE_OPTIONS' to take account of this. This macro, if
2451 defined, is executed once just after all the command options have
2454 Don't use this macro to turn on various extra optimizations for
2455 `-O'. That is what `OPTIMIZATION_OPTIONS' is for. */
2458 override_options (bool main_args_p)
2461 unsigned int ix86_arch_mask, ix86_tune_mask;
2466 /* Comes from final.c -- no real reason to change it. */
2467 #define MAX_CODE_ALIGN 16
2475 PTA_PREFETCH_SSE = 1 << 4,
2477 PTA_3DNOW_A = 1 << 6,
2481 PTA_POPCNT = 1 << 10,
2483 PTA_SSE4A = 1 << 12,
2484 PTA_NO_SAHF = 1 << 13,
2485 PTA_SSE4_1 = 1 << 14,
2486 PTA_SSE4_2 = 1 << 15,
2489 PTA_PCLMUL = 1 << 18,
2496 const char *const name; /* processor name or nickname. */
2497 const enum processor_type processor;
2498 const enum attr_cpu schedule;
2499 const unsigned /*enum pta_flags*/ flags;
2501 const processor_alias_table[] =
2503 {"i386", PROCESSOR_I386, CPU_NONE, 0},
2504 {"i486", PROCESSOR_I486, CPU_NONE, 0},
2505 {"i586", PROCESSOR_PENTIUM, CPU_PENTIUM, 0},
2506 {"pentium", PROCESSOR_PENTIUM, CPU_PENTIUM, 0},
2507 {"pentium-mmx", PROCESSOR_PENTIUM, CPU_PENTIUM, PTA_MMX},
2508 {"winchip-c6", PROCESSOR_I486, CPU_NONE, PTA_MMX},
2509 {"winchip2", PROCESSOR_I486, CPU_NONE, PTA_MMX | PTA_3DNOW},
2510 {"c3", PROCESSOR_I486, CPU_NONE, PTA_MMX | PTA_3DNOW},
2511 {"c3-2", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO, PTA_MMX | PTA_SSE},
2512 {"i686", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO, 0},
2513 {"pentiumpro", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO, 0},
2514 {"pentium2", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO, PTA_MMX},
2515 {"pentium3", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO,
2517 {"pentium3m", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO,
2519 {"pentium-m", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO,
2520 PTA_MMX | PTA_SSE | PTA_SSE2},
2521 {"pentium4", PROCESSOR_PENTIUM4, CPU_NONE,
2522 PTA_MMX |PTA_SSE | PTA_SSE2},
2523 {"pentium4m", PROCESSOR_PENTIUM4, CPU_NONE,
2524 PTA_MMX | PTA_SSE | PTA_SSE2},
2525 {"prescott", PROCESSOR_NOCONA, CPU_NONE,
2526 PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3},
2527 {"nocona", PROCESSOR_NOCONA, CPU_NONE,
2528 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
2529 | PTA_CX16 | PTA_NO_SAHF},
2530 {"core2", PROCESSOR_CORE2, CPU_CORE2,
2531 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
2532 | PTA_SSSE3 | PTA_CX16},
2533 {"geode", PROCESSOR_GEODE, CPU_GEODE,
2534 PTA_MMX | PTA_3DNOW | PTA_3DNOW_A |PTA_PREFETCH_SSE},
2535 {"k6", PROCESSOR_K6, CPU_K6, PTA_MMX},
2536 {"k6-2", PROCESSOR_K6, CPU_K6, PTA_MMX | PTA_3DNOW},
2537 {"k6-3", PROCESSOR_K6, CPU_K6, PTA_MMX | PTA_3DNOW},
2538 {"athlon", PROCESSOR_ATHLON, CPU_ATHLON,
2539 PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_PREFETCH_SSE},
2540 {"athlon-tbird", PROCESSOR_ATHLON, CPU_ATHLON,
2541 PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_PREFETCH_SSE},
2542 {"athlon-4", PROCESSOR_ATHLON, CPU_ATHLON,
2543 PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE},
2544 {"athlon-xp", PROCESSOR_ATHLON, CPU_ATHLON,
2545 PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE},
2546 {"athlon-mp", PROCESSOR_ATHLON, CPU_ATHLON,
2547 PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE},
2548 {"x86-64", PROCESSOR_K8, CPU_K8,
2549 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_NO_SAHF},
2550 {"k8", PROCESSOR_K8, CPU_K8,
2551 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
2552 | PTA_SSE2 | PTA_NO_SAHF},
2553 {"k8-sse3", PROCESSOR_K8, CPU_K8,
2554 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
2555 | PTA_SSE2 | PTA_SSE3 | PTA_NO_SAHF},
2556 {"opteron", PROCESSOR_K8, CPU_K8,
2557 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
2558 | PTA_SSE2 | PTA_NO_SAHF},
2559 {"opteron-sse3", PROCESSOR_K8, CPU_K8,
2560 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
2561 | PTA_SSE2 | PTA_SSE3 | PTA_NO_SAHF},
2562 {"athlon64", PROCESSOR_K8, CPU_K8,
2563 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
2564 | PTA_SSE2 | PTA_NO_SAHF},
2565 {"athlon64-sse3", PROCESSOR_K8, CPU_K8,
2566 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
2567 | PTA_SSE2 | PTA_SSE3 | PTA_NO_SAHF},
2568 {"athlon-fx", PROCESSOR_K8, CPU_K8,
2569 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
2570 | PTA_SSE2 | PTA_NO_SAHF},
2571 {"amdfam10", PROCESSOR_AMDFAM10, CPU_AMDFAM10,
2572 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
2573 | PTA_SSE2 | PTA_SSE3 | PTA_SSE4A | PTA_CX16 | PTA_ABM},
2574 {"barcelona", PROCESSOR_AMDFAM10, PROCESSOR_AMDFAM10,
2575 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
2576 | PTA_SSE2 | PTA_SSE3 | PTA_SSE4A | PTA_CX16 | PTA_ABM},
2577 {"generic32", PROCESSOR_GENERIC32, CPU_PENTIUMPRO,
2578 0 /* flags are only used for -march switch. */ },
2579 {"generic64", PROCESSOR_GENERIC64, CPU_GENERIC64,
2580 PTA_64BIT /* flags are only used for -march switch. */ },
2583 int const pta_size = ARRAY_SIZE (processor_alias_table);
2585 /* Set up prefix/suffix so the error messages refer to either the command
2586 line argument, or the attribute(target). */
2595 prefix = "option(\"";
2600 #ifdef SUBTARGET_OVERRIDE_OPTIONS
2601 SUBTARGET_OVERRIDE_OPTIONS;
2604 #ifdef SUBSUBTARGET_OVERRIDE_OPTIONS
2605 SUBSUBTARGET_OVERRIDE_OPTIONS;
2608 /* -fPIC is the default for x86_64. */
2609 if (TARGET_MACHO && TARGET_64BIT)
2612 /* Set the default values for switches whose default depends on TARGET_64BIT
2613 in case they weren't overwritten by command line options. */
2616 /* Mach-O doesn't support omitting the frame pointer for now. */
2617 if (flag_omit_frame_pointer == 2)
2618 flag_omit_frame_pointer = (TARGET_MACHO ? 0 : 1);
2619 if (flag_asynchronous_unwind_tables == 2)
2620 flag_asynchronous_unwind_tables = 1;
2621 if (flag_pcc_struct_return == 2)
2622 flag_pcc_struct_return = 0;
2626 if (flag_omit_frame_pointer == 2)
2627 flag_omit_frame_pointer = 0;
2628 if (flag_asynchronous_unwind_tables == 2)
2629 flag_asynchronous_unwind_tables = 0;
2630 if (flag_pcc_struct_return == 2)
2631 flag_pcc_struct_return = DEFAULT_PCC_STRUCT_RETURN;
2634 /* Need to check -mtune=generic first. */
2635 if (ix86_tune_string)
2637 if (!strcmp (ix86_tune_string, "generic")
2638 || !strcmp (ix86_tune_string, "i686")
2639 /* As special support for cross compilers we read -mtune=native
2640 as -mtune=generic. With native compilers we won't see the
2641 -mtune=native, as it was changed by the driver. */
2642 || !strcmp (ix86_tune_string, "native"))
2645 ix86_tune_string = "generic64";
2647 ix86_tune_string = "generic32";
2649 /* If this call is for setting the option attribute, allow the
2650 generic32/generic64 that was previously set. */
2651 else if (!main_args_p
2652 && (!strcmp (ix86_tune_string, "generic32")
2653 || !strcmp (ix86_tune_string, "generic64")))
2655 else if (!strncmp (ix86_tune_string, "generic", 7))
2656 error ("bad value (%s) for %stune=%s %s",
2657 ix86_tune_string, prefix, suffix, sw);
2661 if (ix86_arch_string)
2662 ix86_tune_string = ix86_arch_string;
2663 if (!ix86_tune_string)
2665 ix86_tune_string = cpu_names[TARGET_CPU_DEFAULT];
2666 ix86_tune_defaulted = 1;
2669 /* ix86_tune_string is set to ix86_arch_string or defaulted. We
2670 need to use a sensible tune option. */
2671 if (!strcmp (ix86_tune_string, "generic")
2672 || !strcmp (ix86_tune_string, "x86-64")
2673 || !strcmp (ix86_tune_string, "i686"))
2676 ix86_tune_string = "generic64";
2678 ix86_tune_string = "generic32";
2681 if (ix86_stringop_string)
2683 if (!strcmp (ix86_stringop_string, "rep_byte"))
2684 stringop_alg = rep_prefix_1_byte;
2685 else if (!strcmp (ix86_stringop_string, "libcall"))
2686 stringop_alg = libcall;
2687 else if (!strcmp (ix86_stringop_string, "rep_4byte"))
2688 stringop_alg = rep_prefix_4_byte;
2689 else if (!strcmp (ix86_stringop_string, "rep_8byte"))
2690 stringop_alg = rep_prefix_8_byte;
2691 else if (!strcmp (ix86_stringop_string, "byte_loop"))
2692 stringop_alg = loop_1_byte;
2693 else if (!strcmp (ix86_stringop_string, "loop"))
2694 stringop_alg = loop;
2695 else if (!strcmp (ix86_stringop_string, "unrolled_loop"))
2696 stringop_alg = unrolled_loop;
2698 error ("bad value (%s) for %sstringop-strategy=%s %s",
2699 ix86_stringop_string, prefix, suffix, sw);
2701 if (!strcmp (ix86_tune_string, "x86-64"))
2702 warning (OPT_Wdeprecated, "%stune=x86-64%s is deprecated. Use "
2703 "%stune=k8%s or %stune=generic%s instead as appropriate.",
2704 prefix, suffix, prefix, suffix, prefix, suffix);
2706 if (!ix86_arch_string)
2707 ix86_arch_string = TARGET_64BIT ? "x86-64" : "i386";
2709 ix86_arch_specified = 1;
2711 if (!strcmp (ix86_arch_string, "generic"))
2712 error ("generic CPU can be used only for %stune=%s %s",
2713 prefix, suffix, sw);
2714 if (!strncmp (ix86_arch_string, "generic", 7))
2715 error ("bad value (%s) for %sarch=%s %s",
2716 ix86_arch_string, prefix, suffix, sw);
2718 if (ix86_cmodel_string != 0)
2720 if (!strcmp (ix86_cmodel_string, "small"))
2721 ix86_cmodel = flag_pic ? CM_SMALL_PIC : CM_SMALL;
2722 else if (!strcmp (ix86_cmodel_string, "medium"))
2723 ix86_cmodel = flag_pic ? CM_MEDIUM_PIC : CM_MEDIUM;
2724 else if (!strcmp (ix86_cmodel_string, "large"))
2725 ix86_cmodel = flag_pic ? CM_LARGE_PIC : CM_LARGE;
2727 error ("code model %s does not support PIC mode", ix86_cmodel_string);
2728 else if (!strcmp (ix86_cmodel_string, "32"))
2729 ix86_cmodel = CM_32;
2730 else if (!strcmp (ix86_cmodel_string, "kernel") && !flag_pic)
2731 ix86_cmodel = CM_KERNEL;
2733 error ("bad value (%s) for %scmodel=%s %s",
2734 ix86_cmodel_string, prefix, suffix, sw);
2738 /* For TARGET_64BIT and MS_ABI, force pic on, in order to enable the
2739 use of rip-relative addressing. This eliminates fixups that
2740 would otherwise be needed if this object is to be placed in a
2741 DLL, and is essentially just as efficient as direct addressing. */
2742 if (TARGET_64BIT && DEFAULT_ABI == MS_ABI)
2743 ix86_cmodel = CM_SMALL_PIC, flag_pic = 1;
2744 else if (TARGET_64BIT)
2745 ix86_cmodel = flag_pic ? CM_SMALL_PIC : CM_SMALL;
2747 ix86_cmodel = CM_32;
2749 if (ix86_asm_string != 0)
2752 && !strcmp (ix86_asm_string, "intel"))
2753 ix86_asm_dialect = ASM_INTEL;
2754 else if (!strcmp (ix86_asm_string, "att"))
2755 ix86_asm_dialect = ASM_ATT;
2757 error ("bad value (%s) for %sasm=%s %s",
2758 ix86_asm_string, prefix, suffix, sw);
2760 if ((TARGET_64BIT == 0) != (ix86_cmodel == CM_32))
2761 error ("code model %qs not supported in the %s bit mode",
2762 ix86_cmodel_string, TARGET_64BIT ? "64" : "32");
2763 if ((TARGET_64BIT != 0) != ((ix86_isa_flags & OPTION_MASK_ISA_64BIT) != 0))
2764 sorry ("%i-bit mode not compiled in",
2765 (ix86_isa_flags & OPTION_MASK_ISA_64BIT) ? 64 : 32);
2767 for (i = 0; i < pta_size; i++)
2768 if (! strcmp (ix86_arch_string, processor_alias_table[i].name))
2770 ix86_schedule = processor_alias_table[i].schedule;
2771 ix86_arch = processor_alias_table[i].processor;
2772 /* Default cpu tuning to the architecture. */
2773 ix86_tune = ix86_arch;
2775 if (TARGET_64BIT && !(processor_alias_table[i].flags & PTA_64BIT))
2776 error ("CPU you selected does not support x86-64 "
2779 if (processor_alias_table[i].flags & PTA_MMX
2780 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_MMX))
2781 ix86_isa_flags |= OPTION_MASK_ISA_MMX;
2782 if (processor_alias_table[i].flags & PTA_3DNOW
2783 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_3DNOW))
2784 ix86_isa_flags |= OPTION_MASK_ISA_3DNOW;
2785 if (processor_alias_table[i].flags & PTA_3DNOW_A
2786 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_3DNOW_A))
2787 ix86_isa_flags |= OPTION_MASK_ISA_3DNOW_A;
2788 if (processor_alias_table[i].flags & PTA_SSE
2789 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE))
2790 ix86_isa_flags |= OPTION_MASK_ISA_SSE;
2791 if (processor_alias_table[i].flags & PTA_SSE2
2792 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE2))
2793 ix86_isa_flags |= OPTION_MASK_ISA_SSE2;
2794 if (processor_alias_table[i].flags & PTA_SSE3
2795 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE3))
2796 ix86_isa_flags |= OPTION_MASK_ISA_SSE3;
2797 if (processor_alias_table[i].flags & PTA_SSSE3
2798 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSSE3))
2799 ix86_isa_flags |= OPTION_MASK_ISA_SSSE3;
2800 if (processor_alias_table[i].flags & PTA_SSE4_1
2801 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE4_1))
2802 ix86_isa_flags |= OPTION_MASK_ISA_SSE4_1;
2803 if (processor_alias_table[i].flags & PTA_SSE4_2
2804 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE4_2))
2805 ix86_isa_flags |= OPTION_MASK_ISA_SSE4_2;
2806 if (processor_alias_table[i].flags & PTA_AVX
2807 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_AVX))
2808 ix86_isa_flags |= OPTION_MASK_ISA_AVX;
2809 if (processor_alias_table[i].flags & PTA_FMA
2810 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_FMA))
2811 ix86_isa_flags |= OPTION_MASK_ISA_FMA;
2812 if (processor_alias_table[i].flags & PTA_SSE4A
2813 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE4A))
2814 ix86_isa_flags |= OPTION_MASK_ISA_SSE4A;
2815 if (processor_alias_table[i].flags & PTA_SSE5
2816 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE5))
2817 ix86_isa_flags |= OPTION_MASK_ISA_SSE5;
2818 if (processor_alias_table[i].flags & PTA_ABM
2819 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_ABM))
2820 ix86_isa_flags |= OPTION_MASK_ISA_ABM;
2821 if (processor_alias_table[i].flags & PTA_CX16
2822 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_CX16))
2823 ix86_isa_flags |= OPTION_MASK_ISA_CX16;
2824 if (processor_alias_table[i].flags & (PTA_POPCNT | PTA_ABM)
2825 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_POPCNT))
2826 ix86_isa_flags |= OPTION_MASK_ISA_POPCNT;
2827 if (!(TARGET_64BIT && (processor_alias_table[i].flags & PTA_NO_SAHF))
2828 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SAHF))
2829 ix86_isa_flags |= OPTION_MASK_ISA_SAHF;
2830 if (processor_alias_table[i].flags & PTA_AES
2831 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_AES))
2832 ix86_isa_flags |= OPTION_MASK_ISA_AES;
2833 if (processor_alias_table[i].flags & PTA_PCLMUL
2834 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_PCLMUL))
2835 ix86_isa_flags |= OPTION_MASK_ISA_PCLMUL;
2836 if (processor_alias_table[i].flags & (PTA_PREFETCH_SSE | PTA_SSE))
2837 x86_prefetch_sse = true;
2843 error ("bad value (%s) for %sarch=%s %s",
2844 ix86_arch_string, prefix, suffix, sw);
2846 ix86_arch_mask = 1u << ix86_arch;
2847 for (i = 0; i < X86_ARCH_LAST; ++i)
2848 ix86_arch_features[i] = !!(initial_ix86_arch_features[i] & ix86_arch_mask);
2850 for (i = 0; i < pta_size; i++)
2851 if (! strcmp (ix86_tune_string, processor_alias_table[i].name))
2853 ix86_schedule = processor_alias_table[i].schedule;
2854 ix86_tune = processor_alias_table[i].processor;
2855 if (TARGET_64BIT && !(processor_alias_table[i].flags & PTA_64BIT))
2857 if (ix86_tune_defaulted)
2859 ix86_tune_string = "x86-64";
2860 for (i = 0; i < pta_size; i++)
2861 if (! strcmp (ix86_tune_string,
2862 processor_alias_table[i].name))
2864 ix86_schedule = processor_alias_table[i].schedule;
2865 ix86_tune = processor_alias_table[i].processor;
2868 error ("CPU you selected does not support x86-64 "
2871 /* Intel CPUs have always interpreted SSE prefetch instructions as
2872 NOPs; so, we can enable SSE prefetch instructions even when
2873 -mtune (rather than -march) points us to a processor that has them.
2874 However, the VIA C3 gives a SIGILL, so we only do that for i686 and
2875 higher processors. */
2877 && (processor_alias_table[i].flags & (PTA_PREFETCH_SSE | PTA_SSE)))
2878 x86_prefetch_sse = true;
2882 error ("bad value (%s) for %stune=%s %s",
2883 ix86_tune_string, prefix, suffix, sw);
2885 ix86_tune_mask = 1u << ix86_tune;
2886 for (i = 0; i < X86_TUNE_LAST; ++i)
2887 ix86_tune_features[i] = !!(initial_ix86_tune_features[i] & ix86_tune_mask);
2890 ix86_cost = &ix86_size_cost;
2892 ix86_cost = processor_target_table[ix86_tune].cost;
2894 /* Arrange to set up i386_stack_locals for all functions. */
2895 init_machine_status = ix86_init_machine_status;
2897 /* Validate -mregparm= value. */
2898 if (ix86_regparm_string)
2901 warning (0, "%sregparm%s is ignored in 64-bit mode", prefix, suffix);
2902 i = atoi (ix86_regparm_string);
2903 if (i < 0 || i > REGPARM_MAX)
2904 error ("%sregparm=%d%s is not between 0 and %d",
2905 prefix, i, suffix, REGPARM_MAX);
2910 ix86_regparm = REGPARM_MAX;
2912 /* If the user has provided any of the -malign-* options,
2913 warn and use that value only if -falign-* is not set.
2914 Remove this code in GCC 3.2 or later. */
2915 if (ix86_align_loops_string)
2917 warning (0, "%salign-loops%s is obsolete, use %salign-loops%s",
2918 prefix, suffix, prefix, suffix);
2919 if (align_loops == 0)
2921 i = atoi (ix86_align_loops_string);
2922 if (i < 0 || i > MAX_CODE_ALIGN)
2923 error ("%salign-loops=%d%s is not between 0 and %d",
2924 prefix, i, suffix, MAX_CODE_ALIGN);
2926 align_loops = 1 << i;
2930 if (ix86_align_jumps_string)
2932 warning (0, "%salign-jumps%s is obsolete, use %salign-jumps%s",
2933 prefix, suffix, prefix, suffix);
2934 if (align_jumps == 0)
2936 i = atoi (ix86_align_jumps_string);
2937 if (i < 0 || i > MAX_CODE_ALIGN)
2938 error ("%salign-loops=%d%s is not between 0 and %d",
2939 prefix, i, suffix, MAX_CODE_ALIGN);
2941 align_jumps = 1 << i;
2945 if (ix86_align_funcs_string)
2947 warning (0, "%salign-functions%s is obsolete, use %salign-functions%s",
2948 prefix, suffix, prefix, suffix);
2949 if (align_functions == 0)
2951 i = atoi (ix86_align_funcs_string);
2952 if (i < 0 || i > MAX_CODE_ALIGN)
2953 error ("%salign-loops=%d%s is not between 0 and %d",
2954 prefix, i, suffix, MAX_CODE_ALIGN);
2956 align_functions = 1 << i;
2960 /* Default align_* from the processor table. */
2961 if (align_loops == 0)
2963 align_loops = processor_target_table[ix86_tune].align_loop;
2964 align_loops_max_skip = processor_target_table[ix86_tune].align_loop_max_skip;
2966 if (align_jumps == 0)
2968 align_jumps = processor_target_table[ix86_tune].align_jump;
2969 align_jumps_max_skip = processor_target_table[ix86_tune].align_jump_max_skip;
2971 if (align_functions == 0)
2973 align_functions = processor_target_table[ix86_tune].align_func;
2976 /* Validate -mbranch-cost= value, or provide default. */
2977 ix86_branch_cost = ix86_cost->branch_cost;
2978 if (ix86_branch_cost_string)
2980 i = atoi (ix86_branch_cost_string);
2982 error ("%sbranch-cost=%d%s is not between 0 and 5", prefix, i, suffix);
2984 ix86_branch_cost = i;
2986 if (ix86_section_threshold_string)
2988 i = atoi (ix86_section_threshold_string);
2990 error ("%slarge-data-threshold=%d%s is negative", prefix, i, suffix);
2992 ix86_section_threshold = i;
2995 if (ix86_tls_dialect_string)
2997 if (strcmp (ix86_tls_dialect_string, "gnu") == 0)
2998 ix86_tls_dialect = TLS_DIALECT_GNU;
2999 else if (strcmp (ix86_tls_dialect_string, "gnu2") == 0)
3000 ix86_tls_dialect = TLS_DIALECT_GNU2;
3001 else if (strcmp (ix86_tls_dialect_string, "sun") == 0)
3002 ix86_tls_dialect = TLS_DIALECT_SUN;
3004 error ("bad value (%s) for %stls-dialect=%s %s",
3005 ix86_tls_dialect_string, prefix, suffix, sw);
3008 if (ix87_precision_string)
3010 i = atoi (ix87_precision_string);
3011 if (i != 32 && i != 64 && i != 80)
3012 error ("pc%d is not valid precision setting (32, 64 or 80)", i);
3017 target_flags |= TARGET_SUBTARGET64_DEFAULT & ~target_flags_explicit;
3019 /* Enable by default the SSE and MMX builtins. Do allow the user to
3020 explicitly disable any of these. In particular, disabling SSE and
3021 MMX for kernel code is extremely useful. */
3022 if (!ix86_arch_specified)
3024 |= ((OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_MMX
3025 | TARGET_SUBTARGET64_ISA_DEFAULT) & ~ix86_isa_flags_explicit);
3028 warning (0, "%srtd%s is ignored in 64bit mode", prefix, suffix);
3032 target_flags |= TARGET_SUBTARGET32_DEFAULT & ~target_flags_explicit;
3034 if (!ix86_arch_specified)
3036 |= TARGET_SUBTARGET32_ISA_DEFAULT & ~ix86_isa_flags_explicit;
3038 /* i386 ABI does not specify red zone. It still makes sense to use it
3039 when programmer takes care to stack from being destroyed. */
3040 if (!(target_flags_explicit & MASK_NO_RED_ZONE))
3041 target_flags |= MASK_NO_RED_ZONE;
3044 /* Keep nonleaf frame pointers. */
3045 if (flag_omit_frame_pointer)
3046 target_flags &= ~MASK_OMIT_LEAF_FRAME_POINTER;
3047 else if (TARGET_OMIT_LEAF_FRAME_POINTER)
3048 flag_omit_frame_pointer = 1;
3050 /* If we're doing fast math, we don't care about comparison order
3051 wrt NaNs. This lets us use a shorter comparison sequence. */
3052 if (flag_finite_math_only)
3053 target_flags &= ~MASK_IEEE_FP;
3055 /* If the architecture always has an FPU, turn off NO_FANCY_MATH_387,
3056 since the insns won't need emulation. */
3057 if (x86_arch_always_fancy_math_387 & ix86_arch_mask)
3058 target_flags &= ~MASK_NO_FANCY_MATH_387;
3060 /* Likewise, if the target doesn't have a 387, or we've specified
3061 software floating point, don't use 387 inline intrinsics. */
3063 target_flags |= MASK_NO_FANCY_MATH_387;
3065 /* Turn on MMX builtins for -msse. */
3068 ix86_isa_flags |= OPTION_MASK_ISA_MMX & ~ix86_isa_flags_explicit;
3069 x86_prefetch_sse = true;
3072 /* Turn on popcnt instruction for -msse4.2 or -mabm. */
3073 if (TARGET_SSE4_2 || TARGET_ABM)
3074 ix86_isa_flags |= OPTION_MASK_ISA_POPCNT & ~ix86_isa_flags_explicit;
3076 /* Validate -mpreferred-stack-boundary= value or default it to
3077 PREFERRED_STACK_BOUNDARY_DEFAULT. */
3078 ix86_preferred_stack_boundary = PREFERRED_STACK_BOUNDARY_DEFAULT;
3079 if (ix86_preferred_stack_boundary_string)
3081 i = atoi (ix86_preferred_stack_boundary_string);
3082 if (i < (TARGET_64BIT ? 4 : 2) || i > 12)
3083 error ("%spreferred-stack-boundary=%d%s is not between %d and 12",
3084 prefix, i, suffix, TARGET_64BIT ? 4 : 2);
3086 ix86_preferred_stack_boundary = (1 << i) * BITS_PER_UNIT;
3089 /* Set the default value for -mstackrealign. */
3090 if (ix86_force_align_arg_pointer == -1)
3091 ix86_force_align_arg_pointer = STACK_REALIGN_DEFAULT;
3093 /* Validate -mincoming-stack-boundary= value or default it to
3094 MIN_STACK_BOUNDARY/PREFERRED_STACK_BOUNDARY. */
3095 if (ix86_force_align_arg_pointer)
3096 ix86_default_incoming_stack_boundary = MIN_STACK_BOUNDARY;
3098 ix86_default_incoming_stack_boundary = PREFERRED_STACK_BOUNDARY;
3099 ix86_incoming_stack_boundary = ix86_default_incoming_stack_boundary;
3100 if (ix86_incoming_stack_boundary_string)
3102 i = atoi (ix86_incoming_stack_boundary_string);
3103 if (i < (TARGET_64BIT ? 4 : 2) || i > 12)
3104 error ("-mincoming-stack-boundary=%d is not between %d and 12",
3105 i, TARGET_64BIT ? 4 : 2);
3108 ix86_user_incoming_stack_boundary = (1 << i) * BITS_PER_UNIT;
3109 ix86_incoming_stack_boundary
3110 = ix86_user_incoming_stack_boundary;
3114 /* Accept -msseregparm only if at least SSE support is enabled. */
3115 if (TARGET_SSEREGPARM
3117 error ("%ssseregparm%s used without SSE enabled", prefix, suffix);
3119 ix86_fpmath = TARGET_FPMATH_DEFAULT;
3120 if (ix86_fpmath_string != 0)
3122 if (! strcmp (ix86_fpmath_string, "387"))
3123 ix86_fpmath = FPMATH_387;
3124 else if (! strcmp (ix86_fpmath_string, "sse"))
3128 warning (0, "SSE instruction set disabled, using 387 arithmetics");
3129 ix86_fpmath = FPMATH_387;
3132 ix86_fpmath = FPMATH_SSE;
3134 else if (! strcmp (ix86_fpmath_string, "387,sse")
3135 || ! strcmp (ix86_fpmath_string, "387+sse")
3136 || ! strcmp (ix86_fpmath_string, "sse,387")
3137 || ! strcmp (ix86_fpmath_string, "sse+387")
3138 || ! strcmp (ix86_fpmath_string, "both"))
3142 warning (0, "SSE instruction set disabled, using 387 arithmetics");
3143 ix86_fpmath = FPMATH_387;
3145 else if (!TARGET_80387)
3147 warning (0, "387 instruction set disabled, using SSE arithmetics");
3148 ix86_fpmath = FPMATH_SSE;
3151 ix86_fpmath = (enum fpmath_unit) (FPMATH_SSE | FPMATH_387);
3154 error ("bad value (%s) for %sfpmath=%s %s",
3155 ix86_fpmath_string, prefix, suffix, sw);
3158 /* If the i387 is disabled, then do not return values in it. */
3160 target_flags &= ~MASK_FLOAT_RETURNS;
3162 /* Use external vectorized library in vectorizing intrinsics. */
3163 if (ix86_veclibabi_string)
3165 if (strcmp (ix86_veclibabi_string, "svml") == 0)
3166 ix86_veclib_handler = ix86_veclibabi_svml;
3167 else if (strcmp (ix86_veclibabi_string, "acml") == 0)
3168 ix86_veclib_handler = ix86_veclibabi_acml;
3170 error ("unknown vectorization library ABI type (%s) for "
3171 "%sveclibabi=%s %s", ix86_veclibabi_string,
3172 prefix, suffix, sw);
3175 if ((x86_accumulate_outgoing_args & ix86_tune_mask)
3176 && !(target_flags_explicit & MASK_ACCUMULATE_OUTGOING_ARGS)
3178 target_flags |= MASK_ACCUMULATE_OUTGOING_ARGS;
3180 /* ??? Unwind info is not correct around the CFG unless either a frame
3181 pointer is present or M_A_O_A is set. Fixing this requires rewriting
3182 unwind info generation to be aware of the CFG and propagating states
3184 if ((flag_unwind_tables || flag_asynchronous_unwind_tables
3185 || flag_exceptions || flag_non_call_exceptions)
3186 && flag_omit_frame_pointer
3187 && !(target_flags & MASK_ACCUMULATE_OUTGOING_ARGS))
3189 if (target_flags_explicit & MASK_ACCUMULATE_OUTGOING_ARGS)
3190 warning (0, "unwind tables currently require either a frame pointer "
3191 "or %saccumulate-outgoing-args%s for correctness",
3193 target_flags |= MASK_ACCUMULATE_OUTGOING_ARGS;
3196 /* If stack probes are required, the space used for large function
3197 arguments on the stack must also be probed, so enable
3198 -maccumulate-outgoing-args so this happens in the prologue. */
3199 if (TARGET_STACK_PROBE
3200 && !(target_flags & MASK_ACCUMULATE_OUTGOING_ARGS))
3202 if (target_flags_explicit & MASK_ACCUMULATE_OUTGOING_ARGS)
3203 warning (0, "stack probing requires %saccumulate-outgoing-args%s "
3204 "for correctness", prefix, suffix);
3205 target_flags |= MASK_ACCUMULATE_OUTGOING_ARGS;
3208 /* For sane SSE instruction set generation we need fcomi instruction.
3209 It is safe to enable all CMOVE instructions. */
3213 /* Figure out what ASM_GENERATE_INTERNAL_LABEL builds as a prefix. */
3216 ASM_GENERATE_INTERNAL_LABEL (internal_label_prefix, "LX", 0);
3217 p = strchr (internal_label_prefix, 'X');
3218 internal_label_prefix_len = p - internal_label_prefix;
3222 /* When scheduling description is not available, disable scheduler pass
3223 so it won't slow down the compilation and make x87 code slower. */
3224 if (!TARGET_SCHEDULE)
3225 flag_schedule_insns_after_reload = flag_schedule_insns = 0;
3227 if (!PARAM_SET_P (PARAM_SIMULTANEOUS_PREFETCHES))
3228 set_param_value ("simultaneous-prefetches",
3229 ix86_cost->simultaneous_prefetches);
3230 if (!PARAM_SET_P (PARAM_L1_CACHE_LINE_SIZE))
3231 set_param_value ("l1-cache-line-size", ix86_cost->prefetch_block);
3232 if (!PARAM_SET_P (PARAM_L1_CACHE_SIZE))
3233 set_param_value ("l1-cache-size", ix86_cost->l1_cache_size);
3234 if (!PARAM_SET_P (PARAM_L2_CACHE_SIZE))
3235 set_param_value ("l2-cache-size", ix86_cost->l2_cache_size);
3237 /* If using typedef char *va_list, signal that __builtin_va_start (&ap, 0)
3238 can be optimized to ap = __builtin_next_arg (0). */
3240 targetm.expand_builtin_va_start = NULL;
3244 ix86_gen_leave = gen_leave_rex64;
3245 ix86_gen_pop1 = gen_popdi1;
3246 ix86_gen_add3 = gen_adddi3;
3247 ix86_gen_sub3 = gen_subdi3;
3248 ix86_gen_sub3_carry = gen_subdi3_carry_rex64;
3249 ix86_gen_one_cmpl2 = gen_one_cmpldi2;
3250 ix86_gen_monitor = gen_sse3_monitor64;
3251 ix86_gen_andsp = gen_anddi3;
3255 ix86_gen_leave = gen_leave;
3256 ix86_gen_pop1 = gen_popsi1;
3257 ix86_gen_add3 = gen_addsi3;
3258 ix86_gen_sub3 = gen_subsi3;
3259 ix86_gen_sub3_carry = gen_subsi3_carry;
3260 ix86_gen_one_cmpl2 = gen_one_cmplsi2;
3261 ix86_gen_monitor = gen_sse3_monitor;
3262 ix86_gen_andsp = gen_andsi3;
3266 /* Use -mcld by default for 32-bit code if configured with --enable-cld. */
3268 target_flags |= MASK_CLD & ~target_flags_explicit;
3271 /* Save the initial options in case the user does function specific options */
3273 target_option_default_node = target_option_current_node
3274 = build_target_option_node ();
3277 /* Save the current options */
3280 ix86_function_specific_save (struct cl_target_option *ptr)
3282 gcc_assert (IN_RANGE (ix86_arch, 0, 255));
3283 gcc_assert (IN_RANGE (ix86_schedule, 0, 255));
3284 gcc_assert (IN_RANGE (ix86_tune, 0, 255));
3285 gcc_assert (IN_RANGE (ix86_fpmath, 0, 255));
3286 gcc_assert (IN_RANGE (ix86_branch_cost, 0, 255));
3288 ptr->arch = ix86_arch;
3289 ptr->schedule = ix86_schedule;
3290 ptr->tune = ix86_tune;
3291 ptr->fpmath = ix86_fpmath;
3292 ptr->branch_cost = ix86_branch_cost;
3293 ptr->tune_defaulted = ix86_tune_defaulted;
3294 ptr->arch_specified = ix86_arch_specified;
3295 ptr->ix86_isa_flags_explicit = ix86_isa_flags_explicit;
3296 ptr->target_flags_explicit = target_flags_explicit;
3299 /* Restore the current options */
3302 ix86_function_specific_restore (struct cl_target_option *ptr)
3304 enum processor_type old_tune = ix86_tune;
3305 enum processor_type old_arch = ix86_arch;
3306 unsigned int ix86_arch_mask, ix86_tune_mask;
3309 ix86_arch = ptr->arch;
3310 ix86_schedule = ptr->schedule;
3311 ix86_tune = ptr->tune;
3312 ix86_fpmath = ptr->fpmath;
3313 ix86_branch_cost = ptr->branch_cost;
3314 ix86_tune_defaulted = ptr->tune_defaulted;
3315 ix86_arch_specified = ptr->arch_specified;
3316 ix86_isa_flags_explicit = ptr->ix86_isa_flags_explicit;
3317 target_flags_explicit = ptr->target_flags_explicit;
3319 /* Recreate the arch feature tests if the arch changed */
3320 if (old_arch != ix86_arch)
3322 ix86_arch_mask = 1u << ix86_arch;
3323 for (i = 0; i < X86_ARCH_LAST; ++i)
3324 ix86_arch_features[i]
3325 = !!(initial_ix86_arch_features[i] & ix86_arch_mask);
3328 /* Recreate the tune optimization tests */
3329 if (old_tune != ix86_tune)
3331 ix86_tune_mask = 1u << ix86_tune;
3332 for (i = 0; i < X86_TUNE_LAST; ++i)
3333 ix86_tune_features[i]
3334 = !!(initial_ix86_tune_features[i] & ix86_tune_mask);
3338 /* Print the current options */
3341 ix86_function_specific_print (FILE *file, int indent,
3342 struct cl_target_option *ptr)
3345 = ix86_target_string (ptr->ix86_isa_flags, ptr->target_flags,
3346 NULL, NULL, NULL, false);
3348 fprintf (file, "%*sarch = %d (%s)\n",
3351 ((ptr->arch < TARGET_CPU_DEFAULT_max)
3352 ? cpu_names[ptr->arch]
3355 fprintf (file, "%*stune = %d (%s)\n",
3358 ((ptr->tune < TARGET_CPU_DEFAULT_max)
3359 ? cpu_names[ptr->tune]
3362 fprintf (file, "%*sfpmath = %d%s%s\n", indent, "", ptr->fpmath,
3363 (ptr->fpmath & FPMATH_387) ? ", 387" : "",
3364 (ptr->fpmath & FPMATH_SSE) ? ", sse" : "");
3365 fprintf (file, "%*sbranch_cost = %d\n", indent, "", ptr->branch_cost);
3369 fprintf (file, "%*s%s\n", indent, "", target_string);
3370 free (target_string);
3375 /* Inner function to process the attribute((target(...))), take an argument and
3376 set the current options from the argument. If we have a list, recursively go
3380 ix86_valid_target_attribute_inner_p (tree args, char *p_strings[])
3385 #define IX86_ATTR_ISA(S,O) { S, sizeof (S)-1, ix86_opt_isa, O, 0 }
3386 #define IX86_ATTR_STR(S,O) { S, sizeof (S)-1, ix86_opt_str, O, 0 }
3387 #define IX86_ATTR_YES(S,O,M) { S, sizeof (S)-1, ix86_opt_yes, O, M }
3388 #define IX86_ATTR_NO(S,O,M) { S, sizeof (S)-1, ix86_opt_no, O, M }
3403 enum ix86_opt_type type;
3408 IX86_ATTR_ISA ("3dnow", OPT_m3dnow),
3409 IX86_ATTR_ISA ("abm", OPT_mabm),
3410 IX86_ATTR_ISA ("aes", OPT_maes),
3411 IX86_ATTR_ISA ("mmx", OPT_mmmx),
3412 IX86_ATTR_ISA ("pclmul", OPT_mpclmul),
3413 IX86_ATTR_ISA ("popcnt", OPT_mpopcnt),
3414 IX86_ATTR_ISA ("sse", OPT_msse),
3415 IX86_ATTR_ISA ("sse2", OPT_msse2),
3416 IX86_ATTR_ISA ("sse3", OPT_msse3),
3417 IX86_ATTR_ISA ("sse4", OPT_msse4),
3418 IX86_ATTR_ISA ("sse4.1", OPT_msse4_1),
3419 IX86_ATTR_ISA ("sse4.2", OPT_msse4_2),
3420 IX86_ATTR_ISA ("sse4a", OPT_msse4a),
3421 IX86_ATTR_ISA ("sse5", OPT_msse5),
3422 IX86_ATTR_ISA ("ssse3", OPT_mssse3),
3424 /* string options */
3425 IX86_ATTR_STR ("arch=", IX86_FUNCTION_SPECIFIC_ARCH),
3426 IX86_ATTR_STR ("fpmath=", IX86_FUNCTION_SPECIFIC_FPMATH),
3427 IX86_ATTR_STR ("tune=", IX86_FUNCTION_SPECIFIC_TUNE),
3430 IX86_ATTR_YES ("cld",
3434 IX86_ATTR_NO ("fancy-math-387",
3435 OPT_mfancy_math_387,
3436 MASK_NO_FANCY_MATH_387),
3438 IX86_ATTR_NO ("fused-madd",
3440 MASK_NO_FUSED_MADD),
3442 IX86_ATTR_YES ("ieee-fp",
3446 IX86_ATTR_YES ("inline-all-stringops",
3447 OPT_minline_all_stringops,
3448 MASK_INLINE_ALL_STRINGOPS),
3450 IX86_ATTR_YES ("inline-stringops-dynamically",
3451 OPT_minline_stringops_dynamically,
3452 MASK_INLINE_STRINGOPS_DYNAMICALLY),
3454 IX86_ATTR_NO ("align-stringops",
3455 OPT_mno_align_stringops,
3456 MASK_NO_ALIGN_STRINGOPS),
3458 IX86_ATTR_YES ("recip",
3464 /* If this is a list, recurse to get the options. */
3465 if (TREE_CODE (args) == TREE_LIST)
3469 for (; args; args = TREE_CHAIN (args))
3470 if (TREE_VALUE (args)
3471 && !ix86_valid_target_attribute_inner_p (TREE_VALUE (args), p_strings))
3477 else if (TREE_CODE (args) != STRING_CST)
3480 /* Handle multiple arguments separated by commas. */
3481 next_optstr = ASTRDUP (TREE_STRING_POINTER (args));
3483 while (next_optstr && *next_optstr != '\0')
3485 char *p = next_optstr;
3487 char *comma = strchr (next_optstr, ',');
3488 const char *opt_string;
3489 size_t len, opt_len;
3494 enum ix86_opt_type type = ix86_opt_unknown;
3500 len = comma - next_optstr;
3501 next_optstr = comma + 1;
3509 /* Recognize no-xxx. */
3510 if (len > 3 && p[0] == 'n' && p[1] == 'o' && p[2] == '-')
3519 /* Find the option. */
3522 for (i = 0; i < sizeof (attrs) / sizeof (attrs[0]); i++)
3524 type = attrs[i].type;
3525 opt_len = attrs[i].len;
3526 if (ch == attrs[i].string[0]
3527 && ((type != ix86_opt_str) ? len == opt_len : len > opt_len)
3528 && memcmp (p, attrs[i].string, opt_len) == 0)
3531 mask = attrs[i].mask;
3532 opt_string = attrs[i].string;
3537 /* Process the option. */
3540 error ("attribute(target(\"%s\")) is unknown", orig_p);
3544 else if (type == ix86_opt_isa)
3545 ix86_handle_option (opt, p, opt_set_p);
3547 else if (type == ix86_opt_yes || type == ix86_opt_no)
3549 if (type == ix86_opt_no)
3550 opt_set_p = !opt_set_p;
3553 target_flags |= mask;
3555 target_flags &= ~mask;
3558 else if (type == ix86_opt_str)
3562 error ("option(\"%s\") was already specified", opt_string);
3566 p_strings[opt] = xstrdup (p + opt_len);
3576 /* Return a TARGET_OPTION_NODE tree of the target options listed or NULL. */
3579 ix86_valid_target_attribute_tree (tree args)
3581 const char *orig_arch_string = ix86_arch_string;
3582 const char *orig_tune_string = ix86_tune_string;
3583 const char *orig_fpmath_string = ix86_fpmath_string;
3584 int orig_tune_defaulted = ix86_tune_defaulted;
3585 int orig_arch_specified = ix86_arch_specified;
3586 char *option_strings[IX86_FUNCTION_SPECIFIC_MAX] = { NULL, NULL, NULL };
3589 struct cl_target_option *def
3590 = TREE_TARGET_OPTION (target_option_default_node);
3592 /* Process each of the options on the chain. */
3593 if (! ix86_valid_target_attribute_inner_p (args, option_strings))
3596 /* If the changed options are different from the default, rerun override_options,
3597 and then save the options away. The string options are are attribute options,
3598 and will be undone when we copy the save structure. */
3599 if (ix86_isa_flags != def->ix86_isa_flags
3600 || target_flags != def->target_flags
3601 || option_strings[IX86_FUNCTION_SPECIFIC_ARCH]
3602 || option_strings[IX86_FUNCTION_SPECIFIC_TUNE]
3603 || option_strings[IX86_FUNCTION_SPECIFIC_FPMATH])
3605 /* If we are using the default tune= or arch=, undo the string assigned,
3606 and use the default. */
3607 if (option_strings[IX86_FUNCTION_SPECIFIC_ARCH])
3608 ix86_arch_string = option_strings[IX86_FUNCTION_SPECIFIC_ARCH];
3609 else if (!orig_arch_specified)
3610 ix86_arch_string = NULL;
3612 if (option_strings[IX86_FUNCTION_SPECIFIC_TUNE])
3613 ix86_tune_string = option_strings[IX86_FUNCTION_SPECIFIC_TUNE];
3614 else if (orig_tune_defaulted)
3615 ix86_tune_string = NULL;
3617 /* If fpmath= is not set, and we now have sse2 on 32-bit, use it. */
3618 if (option_strings[IX86_FUNCTION_SPECIFIC_FPMATH])
3619 ix86_fpmath_string = option_strings[IX86_FUNCTION_SPECIFIC_FPMATH];
3620 else if (!TARGET_64BIT && TARGET_SSE)
3621 ix86_fpmath_string = "sse,387";
3623 /* Do any overrides, such as arch=xxx, or tune=xxx support. */
3624 override_options (false);
3626 /* Add any builtin functions with the new isa if any. */
3627 ix86_add_new_builtins (ix86_isa_flags);
3629 /* Save the current options unless we are validating options for
3631 t = build_target_option_node ();
3633 ix86_arch_string = orig_arch_string;
3634 ix86_tune_string = orig_tune_string;
3635 ix86_fpmath_string = orig_fpmath_string;
3637 /* Free up memory allocated to hold the strings */
3638 for (i = 0; i < IX86_FUNCTION_SPECIFIC_MAX; i++)
3639 if (option_strings[i])
3640 free (option_strings[i]);
3646 /* Hook to validate attribute((target("string"))). */
3649 ix86_valid_target_attribute_p (tree fndecl,
3650 tree ARG_UNUSED (name),
3652 int ARG_UNUSED (flags))
3654 struct cl_target_option cur_target;
3656 tree old_optimize = build_optimization_node ();
3657 tree new_target, new_optimize;
3658 tree func_optimize = DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl);
3660 /* If the function changed the optimization levels as well as setting target
3661 options, start with the optimizations specified. */
3662 if (func_optimize && func_optimize != old_optimize)
3663 cl_optimization_restore (TREE_OPTIMIZATION (func_optimize));
3665 /* The target attributes may also change some optimization flags, so update
3666 the optimization options if necessary. */
3667 cl_target_option_save (&cur_target);
3668 new_target = ix86_valid_target_attribute_tree (args);
3669 new_optimize = build_optimization_node ();
3676 DECL_FUNCTION_SPECIFIC_TARGET (fndecl) = new_target;
3678 if (old_optimize != new_optimize)
3679 DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl) = new_optimize;
3682 cl_target_option_restore (&cur_target);
3684 if (old_optimize != new_optimize)
3685 cl_optimization_restore (TREE_OPTIMIZATION (old_optimize));
3691 /* Hook to determine if one function can safely inline another. */
3694 ix86_can_inline_p (tree caller, tree callee)
3697 tree caller_tree = DECL_FUNCTION_SPECIFIC_TARGET (caller);
3698 tree callee_tree = DECL_FUNCTION_SPECIFIC_TARGET (callee);
3700 /* If callee has no option attributes, then it is ok to inline. */
3704 /* If caller has no option attributes, but callee does then it is not ok to
3706 else if (!caller_tree)
3711 struct cl_target_option *caller_opts = TREE_TARGET_OPTION (caller_tree);
3712 struct cl_target_option *callee_opts = TREE_TARGET_OPTION (callee_tree);
3714 /* Callee's isa options should a subset of the caller's, i.e. a SSE5 function
3715 can inline a SSE2 function but a SSE2 function can't inline a SSE5
3717 if ((caller_opts->ix86_isa_flags & callee_opts->ix86_isa_flags)
3718 != callee_opts->ix86_isa_flags)
3721 /* See if we have the same non-isa options. */
3722 else if (caller_opts->target_flags != callee_opts->target_flags)
3725 /* See if arch, tune, etc. are the same. */
3726 else if (caller_opts->arch != callee_opts->arch)
3729 else if (caller_opts->tune != callee_opts->tune)
3732 else if (caller_opts->fpmath != callee_opts->fpmath)
3735 else if (caller_opts->branch_cost != callee_opts->branch_cost)
3746 /* Remember the last target of ix86_set_current_function. */
3747 static GTY(()) tree ix86_previous_fndecl;
3749 /* Establish appropriate back-end context for processing the function
3750 FNDECL. The argument might be NULL to indicate processing at top
3751 level, outside of any function scope. */
3753 ix86_set_current_function (tree fndecl)
3755 /* Only change the context if the function changes. This hook is called
3756 several times in the course of compiling a function, and we don't want to
3757 slow things down too much or call target_reinit when it isn't safe. */
3758 if (fndecl && fndecl != ix86_previous_fndecl)
3760 tree old_tree = (ix86_previous_fndecl
3761 ? DECL_FUNCTION_SPECIFIC_TARGET (ix86_previous_fndecl)
3764 tree new_tree = (fndecl
3765 ? DECL_FUNCTION_SPECIFIC_TARGET (fndecl)
3768 ix86_previous_fndecl = fndecl;
3769 if (old_tree == new_tree)
3774 cl_target_option_restore (TREE_TARGET_OPTION (new_tree));
3780 struct cl_target_option *def
3781 = TREE_TARGET_OPTION (target_option_current_node);
3783 cl_target_option_restore (def);
3790 /* Return true if this goes in large data/bss. */
3793 ix86_in_large_data_p (tree exp)
3795 if (ix86_cmodel != CM_MEDIUM && ix86_cmodel != CM_MEDIUM_PIC)
3798 /* Functions are never large data. */
3799 if (TREE_CODE (exp) == FUNCTION_DECL)
3802 if (TREE_CODE (exp) == VAR_DECL && DECL_SECTION_NAME (exp))
3804 const char *section = TREE_STRING_POINTER (DECL_SECTION_NAME (exp));
3805 if (strcmp (section, ".ldata") == 0
3806 || strcmp (section, ".lbss") == 0)
3812 HOST_WIDE_INT size = int_size_in_bytes (TREE_TYPE (exp));
3814 /* If this is an incomplete type with size 0, then we can't put it
3815 in data because it might be too big when completed. */
3816 if (!size || size > ix86_section_threshold)
3823 /* Switch to the appropriate section for output of DECL.
3824 DECL is either a `VAR_DECL' node or a constant of some sort.
3825 RELOC indicates whether forming the initial value of DECL requires
3826 link-time relocations. */
3828 static section * x86_64_elf_select_section (tree, int, unsigned HOST_WIDE_INT)
3832 x86_64_elf_select_section (tree decl, int reloc,
3833 unsigned HOST_WIDE_INT align)
3835 if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
3836 && ix86_in_large_data_p (decl))
3838 const char *sname = NULL;
3839 unsigned int flags = SECTION_WRITE;
3840 switch (categorize_decl_for_section (decl, reloc))
3845 case SECCAT_DATA_REL:
3846 sname = ".ldata.rel";
3848 case SECCAT_DATA_REL_LOCAL:
3849 sname = ".ldata.rel.local";
3851 case SECCAT_DATA_REL_RO:
3852 sname = ".ldata.rel.ro";
3854 case SECCAT_DATA_REL_RO_LOCAL:
3855 sname = ".ldata.rel.ro.local";
3859 flags |= SECTION_BSS;
3862 case SECCAT_RODATA_MERGE_STR:
3863 case SECCAT_RODATA_MERGE_STR_INIT:
3864 case SECCAT_RODATA_MERGE_CONST:
3868 case SECCAT_SRODATA:
3875 /* We don't split these for medium model. Place them into
3876 default sections and hope for best. */
3878 case SECCAT_EMUTLS_VAR:
3879 case SECCAT_EMUTLS_TMPL:
3884 /* We might get called with string constants, but get_named_section
3885 doesn't like them as they are not DECLs. Also, we need to set
3886 flags in that case. */
3888 return get_section (sname, flags, NULL);
3889 return get_named_section (decl, sname, reloc);
3892 return default_elf_select_section (decl, reloc, align);
3895 /* Build up a unique section name, expressed as a
3896 STRING_CST node, and assign it to DECL_SECTION_NAME (decl).
3897 RELOC indicates whether the initial value of EXP requires
3898 link-time relocations. */
3900 static void ATTRIBUTE_UNUSED
3901 x86_64_elf_unique_section (tree decl, int reloc)
3903 if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
3904 && ix86_in_large_data_p (decl))
3906 const char *prefix = NULL;
3907 /* We only need to use .gnu.linkonce if we don't have COMDAT groups. */
3908 bool one_only = DECL_ONE_ONLY (decl) && !HAVE_COMDAT_GROUP;
3910 switch (categorize_decl_for_section (decl, reloc))
3913 case SECCAT_DATA_REL:
3914 case SECCAT_DATA_REL_LOCAL:
3915 case SECCAT_DATA_REL_RO:
3916 case SECCAT_DATA_REL_RO_LOCAL:
3917 prefix = one_only ? ".ld" : ".ldata";
3920 prefix = one_only ? ".lb" : ".lbss";
3923 case SECCAT_RODATA_MERGE_STR:
3924 case SECCAT_RODATA_MERGE_STR_INIT:
3925 case SECCAT_RODATA_MERGE_CONST:
3926 prefix = one_only ? ".lr" : ".lrodata";
3928 case SECCAT_SRODATA:
3935 /* We don't split these for medium model. Place them into
3936 default sections and hope for best. */
3938 case SECCAT_EMUTLS_VAR:
3939 prefix = targetm.emutls.var_section;
3941 case SECCAT_EMUTLS_TMPL:
3942 prefix = targetm.emutls.tmpl_section;
3947 const char *name, *linkonce;
3950 name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl));
3951 name = targetm.strip_name_encoding (name);
3953 /* If we're using one_only, then there needs to be a .gnu.linkonce
3954 prefix to the section name. */
3955 linkonce = one_only ? ".gnu.linkonce" : "";
3957 string = ACONCAT ((linkonce, prefix, ".", name, NULL));
3959 DECL_SECTION_NAME (decl) = build_string (strlen (string), string);
3963 default_unique_section (decl, reloc);
3966 #ifdef COMMON_ASM_OP
3967 /* This says how to output assembler code to declare an
3968 uninitialized external linkage data object.
3970 For medium model x86-64 we need to use .largecomm opcode for
3973 x86_elf_aligned_common (FILE *file,
3974 const char *name, unsigned HOST_WIDE_INT size,
3977 if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
3978 && size > (unsigned int)ix86_section_threshold)
3979 fprintf (file, ".largecomm\t");
3981 fprintf (file, "%s", COMMON_ASM_OP);
3982 assemble_name (file, name);
3983 fprintf (file, ","HOST_WIDE_INT_PRINT_UNSIGNED",%u\n",
3984 size, align / BITS_PER_UNIT);
3988 /* Utility function for targets to use in implementing
3989 ASM_OUTPUT_ALIGNED_BSS. */
3992 x86_output_aligned_bss (FILE *file, tree decl ATTRIBUTE_UNUSED,
3993 const char *name, unsigned HOST_WIDE_INT size,
3996 if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
3997 && size > (unsigned int)ix86_section_threshold)
3998 switch_to_section (get_named_section (decl, ".lbss", 0));
4000 switch_to_section (bss_section);
4001 ASM_OUTPUT_ALIGN (file, floor_log2 (align / BITS_PER_UNIT));
4002 #ifdef ASM_DECLARE_OBJECT_NAME
4003 last_assemble_variable_decl = decl;
4004 ASM_DECLARE_OBJECT_NAME (file, name, decl);
4006 /* Standard thing is just output label for the object. */
4007 ASM_OUTPUT_LABEL (file, name);
4008 #endif /* ASM_DECLARE_OBJECT_NAME */
4009 ASM_OUTPUT_SKIP (file, size ? size : 1);
4013 optimization_options (int level, int size ATTRIBUTE_UNUSED)
4015 /* For -O2 and beyond, turn off -fschedule-insns by default. It tends to
4016 make the problem with not enough registers even worse. */
4017 #ifdef INSN_SCHEDULING
4019 flag_schedule_insns = 0;
4023 /* The Darwin libraries never set errno, so we might as well
4024 avoid calling them when that's the only reason we would. */
4025 flag_errno_math = 0;
4027 /* The default values of these switches depend on the TARGET_64BIT
4028 that is not known at this moment. Mark these values with 2 and
4029 let user the to override these. In case there is no command line option
4030 specifying them, we will set the defaults in override_options. */
4032 flag_omit_frame_pointer = 2;
4033 flag_pcc_struct_return = 2;
4034 flag_asynchronous_unwind_tables = 2;
4035 flag_vect_cost_model = 1;
4036 #ifdef SUBTARGET_OPTIMIZATION_OPTIONS
4037 SUBTARGET_OPTIMIZATION_OPTIONS;
4041 /* Decide whether we can make a sibling call to a function. DECL is the
4042 declaration of the function being targeted by the call and EXP is the
4043 CALL_EXPR representing the call. */
4046 ix86_function_ok_for_sibcall (tree decl, tree exp)
4051 /* If we are generating position-independent code, we cannot sibcall
4052 optimize any indirect call, or a direct call to a global function,
4053 as the PLT requires %ebx be live. */
4054 if (!TARGET_64BIT && flag_pic && (!decl || !targetm.binds_local_p (decl)))
4061 func = TREE_TYPE (CALL_EXPR_FN (exp));
4062 if (POINTER_TYPE_P (func))
4063 func = TREE_TYPE (func);
4066 /* Check that the return value locations are the same. Like
4067 if we are returning floats on the 80387 register stack, we cannot
4068 make a sibcall from a function that doesn't return a float to a
4069 function that does or, conversely, from a function that does return
4070 a float to a function that doesn't; the necessary stack adjustment
4071 would not be executed. This is also the place we notice
4072 differences in the return value ABI. Note that it is ok for one
4073 of the functions to have void return type as long as the return
4074 value of the other is passed in a register. */
4075 a = ix86_function_value (TREE_TYPE (exp), func, false);
4076 b = ix86_function_value (TREE_TYPE (DECL_RESULT (cfun->decl)),
4078 if (STACK_REG_P (a) || STACK_REG_P (b))
4080 if (!rtx_equal_p (a, b))
4083 else if (VOID_TYPE_P (TREE_TYPE (DECL_RESULT (cfun->decl))))
4085 else if (!rtx_equal_p (a, b))
4088 /* If this call is indirect, we'll need to be able to use a call-clobbered
4089 register for the address of the target function. Make sure that all
4090 such registers are not used for passing parameters. */
4091 if (!decl && !TARGET_64BIT)
4095 /* We're looking at the CALL_EXPR, we need the type of the function. */
4096 type = CALL_EXPR_FN (exp); /* pointer expression */
4097 type = TREE_TYPE (type); /* pointer type */
4098 type = TREE_TYPE (type); /* function type */
4100 if (ix86_function_regparm (type, NULL) >= 3)
4102 /* ??? Need to count the actual number of registers to be used,
4103 not the possible number of registers. Fix later. */
4108 /* Dllimport'd functions are also called indirectly. */
4109 if (TARGET_DLLIMPORT_DECL_ATTRIBUTES
4111 && decl && DECL_DLLIMPORT_P (decl)
4112 && ix86_function_regparm (TREE_TYPE (decl), NULL) >= 3)
4115 /* Otherwise okay. That also includes certain types of indirect calls. */
4119 /* Handle "cdecl", "stdcall", "fastcall", "regparm" and "sseregparm"
4120 calling convention attributes;
4121 arguments as in struct attribute_spec.handler. */
4124 ix86_handle_cconv_attribute (tree *node, tree name,
4126 int flags ATTRIBUTE_UNUSED,
4129 if (TREE_CODE (*node) != FUNCTION_TYPE
4130 && TREE_CODE (*node) != METHOD_TYPE
4131 && TREE_CODE (*node) != FIELD_DECL
4132 && TREE_CODE (*node) != TYPE_DECL)
4134 warning (OPT_Wattributes, "%qs attribute only applies to functions",
4135 IDENTIFIER_POINTER (name));
4136 *no_add_attrs = true;
4140 /* Can combine regparm with all attributes but fastcall. */
4141 if (is_attribute_p ("regparm", name))
4145 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
4147 error ("fastcall and regparm attributes are not compatible");
4150 cst = TREE_VALUE (args);
4151 if (TREE_CODE (cst) != INTEGER_CST)
4153 warning (OPT_Wattributes,
4154 "%qs attribute requires an integer constant argument",
4155 IDENTIFIER_POINTER (name));
4156 *no_add_attrs = true;
4158 else if (compare_tree_int (cst, REGPARM_MAX) > 0)
4160 warning (OPT_Wattributes, "argument to %qs attribute larger than %d",
4161 IDENTIFIER_POINTER (name), REGPARM_MAX);
4162 *no_add_attrs = true;
4170 /* Do not warn when emulating the MS ABI. */
4171 if (TREE_CODE (*node) != FUNCTION_TYPE || ix86_function_type_abi (*node)!=MS_ABI)
4172 warning (OPT_Wattributes, "%qs attribute ignored",
4173 IDENTIFIER_POINTER (name));
4174 *no_add_attrs = true;
4178 /* Can combine fastcall with stdcall (redundant) and sseregparm. */
4179 if (is_attribute_p ("fastcall", name))
4181 if (lookup_attribute ("cdecl", TYPE_ATTRIBUTES (*node)))
4183 error ("fastcall and cdecl attributes are not compatible");
4185 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (*node)))
4187 error ("fastcall and stdcall attributes are not compatible");
4189 if (lookup_attribute ("regparm", TYPE_ATTRIBUTES (*node)))
4191 error ("fastcall and regparm attributes are not compatible");
4195 /* Can combine stdcall with fastcall (redundant), regparm and
4197 else if (is_attribute_p ("stdcall", name))
4199 if (lookup_attribute ("cdecl", TYPE_ATTRIBUTES (*node)))
4201 error ("stdcall and cdecl attributes are not compatible");
4203 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
4205 error ("stdcall and fastcall attributes are not compatible");
4209 /* Can combine cdecl with regparm and sseregparm. */
4210 else if (is_attribute_p ("cdecl", name))
4212 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (*node)))
4214 error ("stdcall and cdecl attributes are not compatible");
4216 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
4218 error ("fastcall and cdecl attributes are not compatible");
4222 /* Can combine sseregparm with all attributes. */
4227 /* Return 0 if the attributes for two types are incompatible, 1 if they
4228 are compatible, and 2 if they are nearly compatible (which causes a
4229 warning to be generated). */
4232 ix86_comp_type_attributes (const_tree type1, const_tree type2)
4234 /* Check for mismatch of non-default calling convention. */
4235 const char *const rtdstr = TARGET_RTD ? "cdecl" : "stdcall";
4237 if (TREE_CODE (type1) != FUNCTION_TYPE
4238 && TREE_CODE (type1) != METHOD_TYPE)
4241 /* Check for mismatched fastcall/regparm types. */
4242 if ((!lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type1))
4243 != !lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type2)))
4244 || (ix86_function_regparm (type1, NULL)
4245 != ix86_function_regparm (type2, NULL)))
4248 /* Check for mismatched sseregparm types. */
4249 if (!lookup_attribute ("sseregparm", TYPE_ATTRIBUTES (type1))
4250 != !lookup_attribute ("sseregparm", TYPE_ATTRIBUTES (type2)))
4253 /* Check for mismatched return types (cdecl vs stdcall). */
4254 if (!lookup_attribute (rtdstr, TYPE_ATTRIBUTES (type1))
4255 != !lookup_attribute (rtdstr, TYPE_ATTRIBUTES (type2)))
4261 /* Return the regparm value for a function with the indicated TYPE and DECL.
4262 DECL may be NULL when calling function indirectly
4263 or considering a libcall. */
4266 ix86_function_regparm (const_tree type, const_tree decl)
4269 int regparm = ix86_regparm;
4271 static bool error_issued;
4275 if (ix86_function_type_abi (type) == DEFAULT_ABI)
4277 return DEFAULT_ABI != SYSV_ABI ? X86_64_REGPARM_MAX : X64_REGPARM_MAX;
4280 attr = lookup_attribute ("regparm", TYPE_ATTRIBUTES (type));
4284 = TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (attr)));
4286 if (decl && TREE_CODE (decl) == FUNCTION_DECL)
4288 /* We can't use regparm(3) for nested functions because
4289 these pass static chain pointer in %ecx register. */
4290 if (!error_issued && regparm == 3
4291 && decl_function_context (decl)
4292 && !DECL_NO_STATIC_CHAIN (decl))
4294 error ("nested functions are limited to 2 register parameters");
4295 error_issued = true;
4303 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type)))
4306 /* Use register calling convention for local functions when possible. */
4307 if (decl && TREE_CODE (decl) == FUNCTION_DECL
4310 /* FIXME: remove this CONST_CAST when cgraph.[ch] is constified. */
4311 struct cgraph_local_info *i = cgraph_local_info (CONST_CAST_TREE(decl));
4314 int local_regparm, globals = 0, regno;
4317 /* Make sure no regparm register is taken by a
4318 fixed register variable. */
4319 for (local_regparm = 0; local_regparm < REGPARM_MAX; local_regparm++)
4320 if (fixed_regs[local_regparm])
4323 /* We can't use regparm(3) for nested functions as these use
4324 static chain pointer in third argument. */
4325 if (local_regparm == 3
4326 && decl_function_context (decl)
4327 && !DECL_NO_STATIC_CHAIN (decl))
4330 /* If the function realigns its stackpointer, the prologue will
4331 clobber %ecx. If we've already generated code for the callee,
4332 the callee DECL_STRUCT_FUNCTION is gone, so we fall back to
4333 scanning the attributes for the self-realigning property. */
4334 f = DECL_STRUCT_FUNCTION (decl);
4335 /* Since current internal arg pointer won't conflict with
4336 parameter passing regs, so no need to change stack
4337 realignment and adjust regparm number.
4339 Each fixed register usage increases register pressure,
4340 so less registers should be used for argument passing.
4341 This functionality can be overriden by an explicit
4343 for (regno = 0; regno <= DI_REG; regno++)
4344 if (fixed_regs[regno])
4348 = globals < local_regparm ? local_regparm - globals : 0;
4350 if (local_regparm > regparm)
4351 regparm = local_regparm;
4358 /* Return 1 or 2, if we can pass up to SSE_REGPARM_MAX SFmode (1) and
4359 DFmode (2) arguments in SSE registers for a function with the
4360 indicated TYPE and DECL. DECL may be NULL when calling function
4361 indirectly or considering a libcall. Otherwise return 0. */
4364 ix86_function_sseregparm (const_tree type, const_tree decl, bool warn)
4366 gcc_assert (!TARGET_64BIT);
4368 /* Use SSE registers to pass SFmode and DFmode arguments if requested
4369 by the sseregparm attribute. */
4370 if (TARGET_SSEREGPARM
4371 || (type && lookup_attribute ("sseregparm", TYPE_ATTRIBUTES (type))))
4378 error ("Calling %qD with attribute sseregparm without "
4379 "SSE/SSE2 enabled", decl);
4381 error ("Calling %qT with attribute sseregparm without "
4382 "SSE/SSE2 enabled", type);
4390 /* For local functions, pass up to SSE_REGPARM_MAX SFmode
4391 (and DFmode for SSE2) arguments in SSE registers. */
4392 if (decl && TARGET_SSE_MATH && !profile_flag)
4394 /* FIXME: remove this CONST_CAST when cgraph.[ch] is constified. */
4395 struct cgraph_local_info *i = cgraph_local_info (CONST_CAST_TREE(decl));
4397 return TARGET_SSE2 ? 2 : 1;
4403 /* Return true if EAX is live at the start of the function. Used by
4404 ix86_expand_prologue to determine if we need special help before
4405 calling allocate_stack_worker. */
4408 ix86_eax_live_at_start_p (void)
4410 /* Cheat. Don't bother working forward from ix86_function_regparm
4411 to the function type to whether an actual argument is located in
4412 eax. Instead just look at cfg info, which is still close enough
4413 to correct at this point. This gives false positives for broken
4414 functions that might use uninitialized data that happens to be
4415 allocated in eax, but who cares? */
4416 return REGNO_REG_SET_P (df_get_live_out (ENTRY_BLOCK_PTR), 0);
4419 /* Value is the number of bytes of arguments automatically
4420 popped when returning from a subroutine call.
4421 FUNDECL is the declaration node of the function (as a tree),
4422 FUNTYPE is the data type of the function (as a tree),
4423 or for a library call it is an identifier node for the subroutine name.
4424 SIZE is the number of bytes of arguments passed on the stack.
4426 On the 80386, the RTD insn may be used to pop them if the number
4427 of args is fixed, but if the number is variable then the caller
4428 must pop them all. RTD can't be used for library calls now
4429 because the library is compiled with the Unix compiler.
4430 Use of RTD is a selectable option, since it is incompatible with
4431 standard Unix calling sequences. If the option is not selected,
4432 the caller must always pop the args.
4434 The attribute stdcall is equivalent to RTD on a per module basis. */
4437 ix86_return_pops_args (tree fundecl, tree funtype, int size)
4441 /* None of the 64-bit ABIs pop arguments. */
4445 rtd = TARGET_RTD && (!fundecl || TREE_CODE (fundecl) != IDENTIFIER_NODE);
4447 /* Cdecl functions override -mrtd, and never pop the stack. */
4448 if (! lookup_attribute ("cdecl", TYPE_ATTRIBUTES (funtype)))
4450 /* Stdcall and fastcall functions will pop the stack if not
4452 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (funtype))
4453 || lookup_attribute ("fastcall", TYPE_ATTRIBUTES (funtype)))
4456 if (rtd && ! stdarg_p (funtype))
4460 /* Lose any fake structure return argument if it is passed on the stack. */
4461 if (aggregate_value_p (TREE_TYPE (funtype), fundecl)
4462 && !KEEP_AGGREGATE_RETURN_POINTER)
4464 int nregs = ix86_function_regparm (funtype, fundecl);
4466 return GET_MODE_SIZE (Pmode);
4472 /* Argument support functions. */
4474 /* Return true when register may be used to pass function parameters. */
4476 ix86_function_arg_regno_p (int regno)
4479 const int *parm_regs;
4484 return (regno < REGPARM_MAX
4485 || (TARGET_SSE && SSE_REGNO_P (regno) && !fixed_regs[regno]));
4487 return (regno < REGPARM_MAX
4488 || (TARGET_MMX && MMX_REGNO_P (regno)
4489 && (regno < FIRST_MMX_REG + MMX_REGPARM_MAX))
4490 || (TARGET_SSE && SSE_REGNO_P (regno)
4491 && (regno < FIRST_SSE_REG + SSE_REGPARM_MAX)));
4496 if (SSE_REGNO_P (regno) && TARGET_SSE)
4501 if (TARGET_SSE && SSE_REGNO_P (regno)
4502 && (regno < FIRST_SSE_REG + SSE_REGPARM_MAX))
4506 /* TODO: The function should depend on current function ABI but
4507 builtins.c would need updating then. Therefore we use the
4510 /* RAX is used as hidden argument to va_arg functions. */
4511 if (DEFAULT_ABI == SYSV_ABI && regno == AX_REG)
4514 if (DEFAULT_ABI == MS_ABI)
4515 parm_regs = x86_64_ms_abi_int_parameter_registers;
4517 parm_regs = x86_64_int_parameter_registers;
4518 for (i = 0; i < (DEFAULT_ABI == MS_ABI ? X64_REGPARM_MAX
4519 : X86_64_REGPARM_MAX); i++)
4520 if (regno == parm_regs[i])
4525 /* Return if we do not know how to pass TYPE solely in registers. */
4528 ix86_must_pass_in_stack (enum machine_mode mode, const_tree type)
4530 if (must_pass_in_stack_var_size_or_pad (mode, type))
4533 /* For 32-bit, we want TImode aggregates to go on the stack. But watch out!
4534 The layout_type routine is crafty and tries to trick us into passing
4535 currently unsupported vector types on the stack by using TImode. */
4536 return (!TARGET_64BIT && mode == TImode
4537 && type && TREE_CODE (type) != VECTOR_TYPE);
4540 /* It returns the size, in bytes, of the area reserved for arguments passed
4541 in registers for the function represented by fndecl dependent to the used
4544 ix86_reg_parm_stack_space (const_tree fndecl)
4547 /* For libcalls it is possible that there is no fndecl at hand.
4548 Therefore assume for this case the default abi of the target. */
4550 call_abi = DEFAULT_ABI;
4552 call_abi = ix86_function_abi (fndecl);
4558 /* Returns value SYSV_ABI, MS_ABI dependent on fntype, specifying the
4561 ix86_function_type_abi (const_tree fntype)
4563 if (TARGET_64BIT && fntype != NULL)
4566 if (DEFAULT_ABI == SYSV_ABI)
4567 abi = lookup_attribute ("ms_abi", TYPE_ATTRIBUTES (fntype)) ? MS_ABI : SYSV_ABI;
4569 abi = lookup_attribute ("sysv_abi", TYPE_ATTRIBUTES (fntype)) ? SYSV_ABI : MS_ABI;
4577 ix86_function_abi (const_tree fndecl)
4581 return ix86_function_type_abi (TREE_TYPE (fndecl));
4584 /* Returns value SYSV_ABI, MS_ABI dependent on cfun, specifying the
4587 ix86_cfun_abi (void)
4589 if (! cfun || ! TARGET_64BIT)
4591 return cfun->machine->call_abi;
4595 extern void init_regs (void);
4597 /* Implementation of call abi switching target hook. Specific to FNDECL
4598 the specific call register sets are set. See also CONDITIONAL_REGISTER_USAGE
4600 To prevent redudant calls of costy function init_regs (), it checks not to
4601 reset register usage for default abi. */
4603 ix86_call_abi_override (const_tree fndecl)
4605 if (fndecl == NULL_TREE)
4606 cfun->machine->call_abi = DEFAULT_ABI;
4608 cfun->machine->call_abi = ix86_function_type_abi (TREE_TYPE (fndecl));
4609 if (TARGET_64BIT && cfun->machine->call_abi == MS_ABI)
4611 if (call_used_regs[4 /*RSI*/] != 0 || call_used_regs[5 /*RDI*/] != 0)
4613 call_used_regs[4 /*RSI*/] = 0;
4614 call_used_regs[5 /*RDI*/] = 0;
4618 else if (TARGET_64BIT)
4620 if (call_used_regs[4 /*RSI*/] != 1 || call_used_regs[5 /*RDI*/] != 1)
4622 call_used_regs[4 /*RSI*/] = 1;
4623 call_used_regs[5 /*RDI*/] = 1;
4629 /* Initialize a variable CUM of type CUMULATIVE_ARGS
4630 for a call to a function whose data type is FNTYPE.
4631 For a library call, FNTYPE is 0. */
4634 init_cumulative_args (CUMULATIVE_ARGS *cum, /* Argument info to initialize */
4635 tree fntype, /* tree ptr for function decl */
4636 rtx libname, /* SYMBOL_REF of library name or 0 */
4639 struct cgraph_local_info *i = fndecl ? cgraph_local_info (fndecl) : NULL;
4640 memset (cum, 0, sizeof (*cum));
4642 cum->call_abi = ix86_function_type_abi (fntype);
4643 /* Set up the number of registers to use for passing arguments. */
4644 cum->nregs = ix86_regparm;
4647 if (cum->call_abi != DEFAULT_ABI)
4648 cum->nregs = DEFAULT_ABI != SYSV_ABI ? X86_64_REGPARM_MAX
4653 cum->sse_nregs = SSE_REGPARM_MAX;
4656 if (cum->call_abi != DEFAULT_ABI)
4657 cum->sse_nregs = DEFAULT_ABI != SYSV_ABI ? X86_64_SSE_REGPARM_MAX
4658 : X64_SSE_REGPARM_MAX;
4662 cum->mmx_nregs = MMX_REGPARM_MAX;
4663 cum->warn_avx = true;
4664 cum->warn_sse = true;
4665 cum->warn_mmx = true;
4667 /* Because type might mismatch in between caller and callee, we need to
4668 use actual type of function for local calls.
4669 FIXME: cgraph_analyze can be told to actually record if function uses
4670 va_start so for local functions maybe_vaarg can be made aggressive
4672 FIXME: once typesytem is fixed, we won't need this code anymore. */
4674 fntype = TREE_TYPE (fndecl);
4675 cum->maybe_vaarg = (fntype
4676 ? (!prototype_p (fntype) || stdarg_p (fntype))
4681 /* If there are variable arguments, then we won't pass anything
4682 in registers in 32-bit mode. */
4683 if (stdarg_p (fntype))
4694 /* Use ecx and edx registers if function has fastcall attribute,
4695 else look for regparm information. */
4698 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (fntype)))
4704 cum->nregs = ix86_function_regparm (fntype, fndecl);
4707 /* Set up the number of SSE registers used for passing SFmode
4708 and DFmode arguments. Warn for mismatching ABI. */
4709 cum->float_in_sse = ix86_function_sseregparm (fntype, fndecl, true);
4713 /* Return the "natural" mode for TYPE. In most cases, this is just TYPE_MODE.
4714 But in the case of vector types, it is some vector mode.
4716 When we have only some of our vector isa extensions enabled, then there
4717 are some modes for which vector_mode_supported_p is false. For these
4718 modes, the generic vector support in gcc will choose some non-vector mode
4719 in order to implement the type. By computing the natural mode, we'll
4720 select the proper ABI location for the operand and not depend on whatever
4721 the middle-end decides to do with these vector types. */
4723 static enum machine_mode
4724 type_natural_mode (const_tree type)
4726 enum machine_mode mode = TYPE_MODE (type);
4728 if (TREE_CODE (type) == VECTOR_TYPE && !VECTOR_MODE_P (mode))
4730 HOST_WIDE_INT size = int_size_in_bytes (type);
4731 if ((size == 8 || size == 16)
4732 /* ??? Generic code allows us to create width 1 vectors. Ignore. */
4733 && TYPE_VECTOR_SUBPARTS (type) > 1)
4735 enum machine_mode innermode = TYPE_MODE (TREE_TYPE (type));
4737 if (TREE_CODE (TREE_TYPE (type)) == REAL_TYPE)
4738 mode = MIN_MODE_VECTOR_FLOAT;
4740 mode = MIN_MODE_VECTOR_INT;
4742 /* Get the mode which has this inner mode and number of units. */
4743 for (; mode != VOIDmode; mode = GET_MODE_WIDER_MODE (mode))
4744 if (GET_MODE_NUNITS (mode) == TYPE_VECTOR_SUBPARTS (type)
4745 && GET_MODE_INNER (mode) == innermode)
4755 /* We want to pass a value in REGNO whose "natural" mode is MODE. However,
4756 this may not agree with the mode that the type system has chosen for the
4757 register, which is ORIG_MODE. If ORIG_MODE is not BLKmode, then we can
4758 go ahead and use it. Otherwise we have to build a PARALLEL instead. */
4761 gen_reg_or_parallel (enum machine_mode mode, enum machine_mode orig_mode,
4766 if (orig_mode != BLKmode)
4767 tmp = gen_rtx_REG (orig_mode, regno);
4770 tmp = gen_rtx_REG (mode, regno);
4771 tmp = gen_rtx_EXPR_LIST (VOIDmode, tmp, const0_rtx);
4772 tmp = gen_rtx_PARALLEL (orig_mode, gen_rtvec (1, tmp));
4778 /* x86-64 register passing implementation. See x86-64 ABI for details. Goal
4779 of this code is to classify each 8bytes of incoming argument by the register
4780 class and assign registers accordingly. */
4782 /* Return the union class of CLASS1 and CLASS2.
4783 See the x86-64 PS ABI for details. */
4785 static enum x86_64_reg_class
4786 merge_classes (enum x86_64_reg_class class1, enum x86_64_reg_class class2)
4788 /* Rule #1: If both classes are equal, this is the resulting class. */
4789 if (class1 == class2)
4792 /* Rule #2: If one of the classes is NO_CLASS, the resulting class is
4794 if (class1 == X86_64_NO_CLASS)
4796 if (class2 == X86_64_NO_CLASS)
4799 /* Rule #3: If one of the classes is MEMORY, the result is MEMORY. */
4800 if (class1 == X86_64_MEMORY_CLASS || class2 == X86_64_MEMORY_CLASS)
4801 return X86_64_MEMORY_CLASS;
4803 /* Rule #4: If one of the classes is INTEGER, the result is INTEGER. */
4804 if ((class1 == X86_64_INTEGERSI_CLASS && class2 == X86_64_SSESF_CLASS)
4805 || (class2 == X86_64_INTEGERSI_CLASS && class1 == X86_64_SSESF_CLASS))
4806 return X86_64_INTEGERSI_CLASS;
4807 if (class1 == X86_64_INTEGER_CLASS || class1 == X86_64_INTEGERSI_CLASS
4808 || class2 == X86_64_INTEGER_CLASS || class2 == X86_64_INTEGERSI_CLASS)
4809 return X86_64_INTEGER_CLASS;
4811 /* Rule #5: If one of the classes is X87, X87UP, or COMPLEX_X87 class,
4813 if (class1 == X86_64_X87_CLASS
4814 || class1 == X86_64_X87UP_CLASS
4815 || class1 == X86_64_COMPLEX_X87_CLASS
4816 || class2 == X86_64_X87_CLASS
4817 || class2 == X86_64_X87UP_CLASS
4818 || class2 == X86_64_COMPLEX_X87_CLASS)
4819 return X86_64_MEMORY_CLASS;
4821 /* Rule #6: Otherwise class SSE is used. */
4822 return X86_64_SSE_CLASS;
4825 /* Classify the argument of type TYPE and mode MODE.
4826 CLASSES will be filled by the register class used to pass each word
4827 of the operand. The number of words is returned. In case the parameter
4828 should be passed in memory, 0 is returned. As a special case for zero
4829 sized containers, classes[0] will be NO_CLASS and 1 is returned.
4831 BIT_OFFSET is used internally for handling records and specifies offset
4832 of the offset in bits modulo 256 to avoid overflow cases.
4834 See the x86-64 PS ABI for details.
4838 classify_argument (enum machine_mode mode, const_tree type,
4839 enum x86_64_reg_class classes[MAX_CLASSES], int bit_offset)
4841 HOST_WIDE_INT bytes =
4842 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
4843 int words = (bytes + (bit_offset % 64) / 8 + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
4845 /* Variable sized entities are always passed/returned in memory. */
4849 if (mode != VOIDmode
4850 && targetm.calls.must_pass_in_stack (mode, type))
4853 if (type && AGGREGATE_TYPE_P (type))
4857 enum x86_64_reg_class subclasses[MAX_CLASSES];
4859 /* On x86-64 we pass structures larger than 16 bytes on the stack. */
4863 for (i = 0; i < words; i++)
4864 classes[i] = X86_64_NO_CLASS;
4866 /* Zero sized arrays or structures are NO_CLASS. We return 0 to
4867 signalize memory class, so handle it as special case. */
4870 classes[0] = X86_64_NO_CLASS;
4874 /* Classify each field of record and merge classes. */
4875 switch (TREE_CODE (type))
4878 /* And now merge the fields of structure. */
4879 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
4881 if (TREE_CODE (field) == FIELD_DECL)
4885 if (TREE_TYPE (field) == error_mark_node)
4888 /* Bitfields are always classified as integer. Handle them
4889 early, since later code would consider them to be
4890 misaligned integers. */
4891 if (DECL_BIT_FIELD (field))
4893 for (i = (int_bit_position (field) + (bit_offset % 64)) / 8 / 8;
4894 i < ((int_bit_position (field) + (bit_offset % 64))
4895 + tree_low_cst (DECL_SIZE (field), 0)
4898 merge_classes (X86_64_INTEGER_CLASS,
4903 num = classify_argument (TYPE_MODE (TREE_TYPE (field)),
4904 TREE_TYPE (field), subclasses,
4905 (int_bit_position (field)
4906 + bit_offset) % 256);
4909 for (i = 0; i < num; i++)
4912 (int_bit_position (field) + (bit_offset % 64)) / 8 / 8;
4914 merge_classes (subclasses[i], classes[i + pos]);
4922 /* Arrays are handled as small records. */
4925 num = classify_argument (TYPE_MODE (TREE_TYPE (type)),
4926 TREE_TYPE (type), subclasses, bit_offset);
4930 /* The partial classes are now full classes. */
4931 if (subclasses[0] == X86_64_SSESF_CLASS && bytes != 4)
4932 subclasses[0] = X86_64_SSE_CLASS;
4933 if (subclasses[0] == X86_64_INTEGERSI_CLASS
4934 && !((bit_offset % 64) == 0 && bytes == 4))
4935 subclasses[0] = X86_64_INTEGER_CLASS;
4937 for (i = 0; i < words; i++)
4938 classes[i] = subclasses[i % num];
4943 case QUAL_UNION_TYPE:
4944 /* Unions are similar to RECORD_TYPE but offset is always 0.
4946 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
4948 if (TREE_CODE (field) == FIELD_DECL)
4952 if (TREE_TYPE (field) == error_mark_node)
4955 num = classify_argument (TYPE_MODE (TREE_TYPE (field)),
4956 TREE_TYPE (field), subclasses,
4960 for (i = 0; i < num; i++)
4961 classes[i] = merge_classes (subclasses[i], classes[i]);
4970 /* Final merger cleanup. */
4971 for (i = 0; i < words; i++)
4973 /* If one class is MEMORY, everything should be passed in
4975 if (classes[i] == X86_64_MEMORY_CLASS)
4978 /* The X86_64_SSEUP_CLASS should be always preceded by
4979 X86_64_SSE_CLASS. */
4980 if (classes[i] == X86_64_SSEUP_CLASS
4981 && (i == 0 || classes[i - 1] != X86_64_SSE_CLASS))
4982 classes[i] = X86_64_SSE_CLASS;
4984 /* X86_64_X87UP_CLASS should be preceded by X86_64_X87_CLASS. */
4985 if (classes[i] == X86_64_X87UP_CLASS
4986 && (i == 0 || classes[i - 1] != X86_64_X87_CLASS))
4987 classes[i] = X86_64_SSE_CLASS;
4992 /* Compute alignment needed. We align all types to natural boundaries with
4993 exception of XFmode that is aligned to 64bits. */
4994 if (mode != VOIDmode && mode != BLKmode)
4996 int mode_alignment = GET_MODE_BITSIZE (mode);
4999 mode_alignment = 128;
5000 else if (mode == XCmode)
5001 mode_alignment = 256;
5002 if (COMPLEX_MODE_P (mode))
5003 mode_alignment /= 2;
5004 /* Misaligned fields are always returned in memory. */
5005 if (bit_offset % mode_alignment)
5009 /* for V1xx modes, just use the base mode */
5010 if (VECTOR_MODE_P (mode) && mode != V1DImode
5011 && GET_MODE_SIZE (GET_MODE_INNER (mode)) == bytes)
5012 mode = GET_MODE_INNER (mode);
5014 /* Classification of atomic types. */
5019 classes[0] = X86_64_SSE_CLASS;
5022 classes[0] = X86_64_SSE_CLASS;
5023 classes[1] = X86_64_SSEUP_CLASS;
5033 int size = (bit_offset % 64)+ (int) GET_MODE_BITSIZE (mode);
5037 classes[0] = X86_64_INTEGERSI_CLASS;
5040 else if (size <= 64)
5042 classes[0] = X86_64_INTEGER_CLASS;
5045 else if (size <= 64+32)
5047 classes[0] = X86_64_INTEGER_CLASS;
5048 classes[1] = X86_64_INTEGERSI_CLASS;
5051 else if (size <= 64+64)
5053 classes[0] = classes[1] = X86_64_INTEGER_CLASS;
5061 classes[0] = classes[1] = X86_64_INTEGER_CLASS;
5068 if (!(bit_offset % 64))
5069 classes[0] = X86_64_SSESF_CLASS;
5071 classes[0] = X86_64_SSE_CLASS;
5074 classes[0] = X86_64_SSEDF_CLASS;
5077 classes[0] = X86_64_X87_CLASS;
5078 classes[1] = X86_64_X87UP_CLASS;
5081 classes[0] = X86_64_SSE_CLASS;
5082 classes[1] = X86_64_SSEUP_CLASS;
5085 classes[0] = X86_64_SSE_CLASS;
5088 classes[0] = X86_64_SSEDF_CLASS;
5089 classes[1] = X86_64_SSEDF_CLASS;
5092 classes[0] = X86_64_COMPLEX_X87_CLASS;
5095 /* This modes is larger than 16 bytes. */
5103 classes[0] = X86_64_AVX_CLASS;
5111 classes[0] = X86_64_SSE_CLASS;
5112 classes[1] = X86_64_SSEUP_CLASS;
5119 classes[0] = X86_64_SSE_CLASS;
5125 gcc_assert (VECTOR_MODE_P (mode));
5130 gcc_assert (GET_MODE_CLASS (GET_MODE_INNER (mode)) == MODE_INT);
5132 if (bit_offset + GET_MODE_BITSIZE (mode) <= 32)
5133 classes[0] = X86_64_INTEGERSI_CLASS;
5135 classes[0] = X86_64_INTEGER_CLASS;
5136 classes[1] = X86_64_INTEGER_CLASS;
5137 return 1 + (bytes > 8);
5141 /* Examine the argument and return set number of register required in each
5142 class. Return 0 iff parameter should be passed in memory. */
5144 examine_argument (enum machine_mode mode, const_tree type, int in_return,
5145 int *int_nregs, int *sse_nregs)
5147 enum x86_64_reg_class regclass[MAX_CLASSES];
5148 int n = classify_argument (mode, type, regclass, 0);
5154 for (n--; n >= 0; n--)
5155 switch (regclass[n])
5157 case X86_64_INTEGER_CLASS:
5158 case X86_64_INTEGERSI_CLASS:
5161 case X86_64_AVX_CLASS:
5162 case X86_64_SSE_CLASS:
5163 case X86_64_SSESF_CLASS:
5164 case X86_64_SSEDF_CLASS:
5167 case X86_64_NO_CLASS:
5168 case X86_64_SSEUP_CLASS:
5170 case X86_64_X87_CLASS:
5171 case X86_64_X87UP_CLASS:
5175 case X86_64_COMPLEX_X87_CLASS:
5176 return in_return ? 2 : 0;
5177 case X86_64_MEMORY_CLASS:
5183 /* Construct container for the argument used by GCC interface. See
5184 FUNCTION_ARG for the detailed description. */
5187 construct_container (enum machine_mode mode, enum machine_mode orig_mode,
5188 const_tree type, int in_return, int nintregs, int nsseregs,
5189 const int *intreg, int sse_regno)
5191 /* The following variables hold the static issued_error state. */
5192 static bool issued_sse_arg_error;
5193 static bool issued_sse_ret_error;
5194 static bool issued_x87_ret_error;
5196 enum machine_mode tmpmode;
5198 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
5199 enum x86_64_reg_class regclass[MAX_CLASSES];
5203 int needed_sseregs, needed_intregs;
5204 rtx exp[MAX_CLASSES];
5207 n = classify_argument (mode, type, regclass, 0);
5210 if (!examine_argument (mode, type, in_return, &needed_intregs,
5213 if (needed_intregs > nintregs || needed_sseregs > nsseregs)
5216 /* We allowed the user to turn off SSE for kernel mode. Don't crash if
5217 some less clueful developer tries to use floating-point anyway. */
5218 if (needed_sseregs && !TARGET_SSE)
5222 if (!issued_sse_ret_error)
5224 error ("SSE register return with SSE disabled");
5225 issued_sse_ret_error = true;
5228 else if (!issued_sse_arg_error)
5230 error ("SSE register argument with SSE disabled");
5231 issued_sse_arg_error = true;
5236 /* Likewise, error if the ABI requires us to return values in the
5237 x87 registers and the user specified -mno-80387. */
5238 if (!TARGET_80387 && in_return)
5239 for (i = 0; i < n; i++)
5240 if (regclass[i] == X86_64_X87_CLASS
5241 || regclass[i] == X86_64_X87UP_CLASS
5242 || regclass[i] == X86_64_COMPLEX_X87_CLASS)
5244 if (!issued_x87_ret_error)
5246 error ("x87 register return with x87 disabled");
5247 issued_x87_ret_error = true;
5252 /* First construct simple cases. Avoid SCmode, since we want to use
5253 single register to pass this type. */
5254 if (n == 1 && mode != SCmode)
5255 switch (regclass[0])
5257 case X86_64_INTEGER_CLASS:
5258 case X86_64_INTEGERSI_CLASS:
5259 return gen_rtx_REG (mode, intreg[0]);
5260 case X86_64_AVX_CLASS:
5261 case X86_64_SSE_CLASS:
5262 case X86_64_SSESF_CLASS:
5263 case X86_64_SSEDF_CLASS:
5264 return gen_reg_or_parallel (mode, orig_mode, SSE_REGNO (sse_regno));
5265 case X86_64_X87_CLASS:
5266 case X86_64_COMPLEX_X87_CLASS:
5267 return gen_rtx_REG (mode, FIRST_STACK_REG);
5268 case X86_64_NO_CLASS:
5269 /* Zero sized array, struct or class. */
5274 if (n == 2 && regclass[0] == X86_64_SSE_CLASS
5275 && regclass[1] == X86_64_SSEUP_CLASS && mode != BLKmode)
5276 return gen_rtx_REG (mode, SSE_REGNO (sse_regno));
5279 && regclass[0] == X86_64_X87_CLASS && regclass[1] == X86_64_X87UP_CLASS)
5280 return gen_rtx_REG (XFmode, FIRST_STACK_REG);
5281 if (n == 2 && regclass[0] == X86_64_INTEGER_CLASS
5282 && regclass[1] == X86_64_INTEGER_CLASS
5283 && (mode == CDImode || mode == TImode || mode == TFmode)
5284 && intreg[0] + 1 == intreg[1])
5285 return gen_rtx_REG (mode, intreg[0]);
5287 /* Otherwise figure out the entries of the PARALLEL. */
5288 for (i = 0; i < n; i++)
5290 switch (regclass[i])
5292 case X86_64_NO_CLASS:
5294 case X86_64_INTEGER_CLASS:
5295 case X86_64_INTEGERSI_CLASS:
5296 /* Merge TImodes on aligned occasions here too. */
5297 if (i * 8 + 8 > bytes)
5298 tmpmode = mode_for_size ((bytes - i * 8) * BITS_PER_UNIT, MODE_INT, 0);
5299 else if (regclass[i] == X86_64_INTEGERSI_CLASS)
5303 /* We've requested 24 bytes we don't have mode for. Use DImode. */
5304 if (tmpmode == BLKmode)
5306 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
5307 gen_rtx_REG (tmpmode, *intreg),
5311 case X86_64_SSESF_CLASS:
5312 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
5313 gen_rtx_REG (SFmode,
5314 SSE_REGNO (sse_regno)),
5318 case X86_64_SSEDF_CLASS:
5319 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
5320 gen_rtx_REG (DFmode,
5321 SSE_REGNO (sse_regno)),
5325 case X86_64_SSE_CLASS:
5326 if (i < n - 1 && regclass[i + 1] == X86_64_SSEUP_CLASS)
5330 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
5331 gen_rtx_REG (tmpmode,
5332 SSE_REGNO (sse_regno)),
5334 if (tmpmode == TImode)
5343 /* Empty aligned struct, union or class. */
5347 ret = gen_rtx_PARALLEL (mode, rtvec_alloc (nexps));
5348 for (i = 0; i < nexps; i++)
5349 XVECEXP (ret, 0, i) = exp [i];
5353 /* Update the data in CUM to advance over an argument of mode MODE
5354 and data type TYPE. (TYPE is null for libcalls where that information
5355 may not be available.) */
5358 function_arg_advance_32 (CUMULATIVE_ARGS *cum, enum machine_mode mode,
5359 tree type, HOST_WIDE_INT bytes, HOST_WIDE_INT words)
5375 cum->words += words;
5376 cum->nregs -= words;
5377 cum->regno += words;
5379 if (cum->nregs <= 0)
5387 if (cum->float_in_sse < 2)
5390 if (cum->float_in_sse < 1)
5408 if (!type || !AGGREGATE_TYPE_P (type))
5410 cum->sse_words += words;
5411 cum->sse_nregs -= 1;
5412 cum->sse_regno += 1;
5413 if (cum->sse_nregs <= 0)
5426 if (!type || !AGGREGATE_TYPE_P (type))
5428 cum->mmx_words += words;
5429 cum->mmx_nregs -= 1;
5430 cum->mmx_regno += 1;
5431 if (cum->mmx_nregs <= 0)
5442 function_arg_advance_64 (CUMULATIVE_ARGS *cum, enum machine_mode mode,
5443 tree type, HOST_WIDE_INT words, int named)
5445 int int_nregs, sse_nregs;
5447 /* Unnamed 256bit vector mode parameters are passed on stack. */
5448 if (!named && VALID_AVX256_REG_MODE (mode))
5451 if (!examine_argument (mode, type, 0, &int_nregs, &sse_nregs))
5452 cum->words += words;
5453 else if (sse_nregs <= cum->sse_nregs && int_nregs <= cum->nregs)
5455 cum->nregs -= int_nregs;
5456 cum->sse_nregs -= sse_nregs;
5457 cum->regno += int_nregs;
5458 cum->sse_regno += sse_nregs;
5461 cum->words += words;
5465 function_arg_advance_ms_64 (CUMULATIVE_ARGS *cum, HOST_WIDE_INT bytes,
5466 HOST_WIDE_INT words)
5468 /* Otherwise, this should be passed indirect. */
5469 gcc_assert (bytes == 1 || bytes == 2 || bytes == 4 || bytes == 8);
5471 cum->words += words;
5480 function_arg_advance (CUMULATIVE_ARGS *cum, enum machine_mode mode,
5481 tree type, int named)
5483 HOST_WIDE_INT bytes, words;
5485 if (mode == BLKmode)
5486 bytes = int_size_in_bytes (type);
5488 bytes = GET_MODE_SIZE (mode);
5489 words = (bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
5492 mode = type_natural_mode (type);
5494 if (TARGET_64BIT && (cum ? cum->call_abi : DEFAULT_ABI) == MS_ABI)
5495 function_arg_advance_ms_64 (cum, bytes, words);
5496 else if (TARGET_64BIT)
5497 function_arg_advance_64 (cum, mode, type, words, named);
5499 function_arg_advance_32 (cum, mode, type, bytes, words);
5502 /* Define where to put the arguments to a function.
5503 Value is zero to push the argument on the stack,
5504 or a hard register in which to store the argument.
5506 MODE is the argument's machine mode.
5507 TYPE is the data type of the argument (as a tree).
5508 This is null for libcalls where that information may
5510 CUM is a variable of type CUMULATIVE_ARGS which gives info about
5511 the preceding args and about the function being called.
5512 NAMED is nonzero if this argument is a named parameter
5513 (otherwise it is an extra parameter matching an ellipsis). */
5516 function_arg_32 (CUMULATIVE_ARGS *cum, enum machine_mode mode,
5517 enum machine_mode orig_mode, tree type,
5518 HOST_WIDE_INT bytes, HOST_WIDE_INT words)
5520 static bool warnedavx, warnedsse, warnedmmx;
5522 /* Avoid the AL settings for the Unix64 ABI. */
5523 if (mode == VOIDmode)
5539 if (words <= cum->nregs)
5541 int regno = cum->regno;
5543 /* Fastcall allocates the first two DWORD (SImode) or
5544 smaller arguments to ECX and EDX if it isn't an
5550 || (type && AGGREGATE_TYPE_P (type)))
5553 /* ECX not EAX is the first allocated register. */
5554 if (regno == AX_REG)
5557 return gen_rtx_REG (mode, regno);
5562 if (cum->float_in_sse < 2)
5565 if (cum->float_in_sse < 1)
5569 /* In 32bit, we pass TImode in xmm registers. */
5576 if (!type || !AGGREGATE_TYPE_P (type))
5578 if (!TARGET_SSE && !warnedsse && cum->warn_sse)
5581 warning (0, "SSE vector argument without SSE enabled "
5585 return gen_reg_or_parallel (mode, orig_mode,
5586 cum->sse_regno + FIRST_SSE_REG);
5591 /* In 32bit, we pass OImode in ymm registers. */
5598 if (!type || !AGGREGATE_TYPE_P (type))
5600 if (!TARGET_AVX && !warnedavx && cum->warn_avx)
5603 warning (0, "AVX vector argument without AVX enabled "
5607 return gen_reg_or_parallel (mode, orig_mode,
5608 cum->sse_regno + FIRST_SSE_REG);
5617 if (!type || !AGGREGATE_TYPE_P (type))
5619 if (!TARGET_MMX && !warnedmmx && cum->warn_mmx)
5622 warning (0, "MMX vector argument without MMX enabled "
5626 return gen_reg_or_parallel (mode, orig_mode,
5627 cum->mmx_regno + FIRST_MMX_REG);
5636 function_arg_64 (CUMULATIVE_ARGS *cum, enum machine_mode mode,
5637 enum machine_mode orig_mode, tree type, int named)
5639 static bool warnedavx;
5641 /* Handle a hidden AL argument containing number of registers
5642 for varargs x86-64 functions. */
5643 if (mode == VOIDmode)
5644 return GEN_INT (cum->maybe_vaarg
5645 ? (cum->sse_nregs < 0
5646 ? (cum->call_abi == DEFAULT_ABI
5648 : (DEFAULT_ABI != SYSV_ABI ? X86_64_SSE_REGPARM_MAX
5649 : X64_SSE_REGPARM_MAX))
5664 /* In 64bit, we pass TImode in interger registers and OImode on
5666 if (!type || !AGGREGATE_TYPE_P (type))
5668 if (!TARGET_AVX && !warnedavx && cum->warn_avx)
5671 warning (0, "AVX vector argument without AVX enabled "
5676 /* Unnamed 256bit vector mode parameters are passed on stack. */
5682 return construct_container (mode, orig_mode, type, 0, cum->nregs,
5684 &x86_64_int_parameter_registers [cum->regno],
5689 function_arg_ms_64 (CUMULATIVE_ARGS *cum, enum machine_mode mode,
5690 enum machine_mode orig_mode, int named,
5691 HOST_WIDE_INT bytes)
5695 /* Avoid the AL settings for the Unix64 ABI. */
5696 if (mode == VOIDmode)
5699 /* If we've run out of registers, it goes on the stack. */
5700 if (cum->nregs == 0)
5703 regno = x86_64_ms_abi_int_parameter_registers[cum->regno];
5705 /* Only floating point modes are passed in anything but integer regs. */
5706 if (TARGET_SSE && (mode == SFmode || mode == DFmode))
5709 regno = cum->regno + FIRST_SSE_REG;
5714 /* Unnamed floating parameters are passed in both the
5715 SSE and integer registers. */
5716 t1 = gen_rtx_REG (mode, cum->regno + FIRST_SSE_REG);
5717 t2 = gen_rtx_REG (mode, regno);
5718 t1 = gen_rtx_EXPR_LIST (VOIDmode, t1, const0_rtx);
5719 t2 = gen_rtx_EXPR_LIST (VOIDmode, t2, const0_rtx);
5720 return gen_rtx_PARALLEL (mode, gen_rtvec (2, t1, t2));
5723 /* Handle aggregated types passed in register. */
5724 if (orig_mode == BLKmode)
5726 if (bytes > 0 && bytes <= 8)
5727 mode = (bytes > 4 ? DImode : SImode);
5728 if (mode == BLKmode)
5732 return gen_reg_or_parallel (mode, orig_mode, regno);
5736 function_arg (CUMULATIVE_ARGS *cum, enum machine_mode omode,
5737 tree type, int named)
5739 enum machine_mode mode = omode;
5740 HOST_WIDE_INT bytes, words;
5742 if (mode == BLKmode)
5743 bytes = int_size_in_bytes (type);
5745 bytes = GET_MODE_SIZE (mode);
5746 words = (bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
5748 /* To simplify the code below, represent vector types with a vector mode
5749 even if MMX/SSE are not active. */
5750 if (type && TREE_CODE (type) == VECTOR_TYPE)
5751 mode = type_natural_mode (type);
5753 if (TARGET_64BIT && (cum ? cum->call_abi : DEFAULT_ABI) == MS_ABI)
5754 return function_arg_ms_64 (cum, mode, omode, named, bytes);
5755 else if (TARGET_64BIT)
5756 return function_arg_64 (cum, mode, omode, type, named);
5758 return function_arg_32 (cum, mode, omode, type, bytes, words);
5761 /* A C expression that indicates when an argument must be passed by
5762 reference. If nonzero for an argument, a copy of that argument is
5763 made in memory and a pointer to the argument is passed instead of
5764 the argument itself. The pointer is passed in whatever way is
5765 appropriate for passing a pointer to that type. */
5768 ix86_pass_by_reference (CUMULATIVE_ARGS *cum ATTRIBUTE_UNUSED,
5769 enum machine_mode mode ATTRIBUTE_UNUSED,
5770 const_tree type, bool named ATTRIBUTE_UNUSED)
5772 /* See Windows x64 Software Convention. */
5773 if (TARGET_64BIT && (cum ? cum->call_abi : DEFAULT_ABI) == MS_ABI)
5775 int msize = (int) GET_MODE_SIZE (mode);
5778 /* Arrays are passed by reference. */
5779 if (TREE_CODE (type) == ARRAY_TYPE)
5782 if (AGGREGATE_TYPE_P (type))
5784 /* Structs/unions of sizes other than 8, 16, 32, or 64 bits
5785 are passed by reference. */
5786 msize = int_size_in_bytes (type);
5790 /* __m128 is passed by reference. */
5792 case 1: case 2: case 4: case 8:
5798 else if (TARGET_64BIT && type && int_size_in_bytes (type) == -1)
5804 /* Return true when TYPE should be 128bit aligned for 32bit argument passing
5807 contains_aligned_value_p (tree type)
5809 enum machine_mode mode = TYPE_MODE (type);
5810 if (((TARGET_SSE && SSE_REG_MODE_P (mode))
5814 && (!TYPE_USER_ALIGN (type) || TYPE_ALIGN (type) > 128))
5816 if (TYPE_ALIGN (type) < 128)
5819 if (AGGREGATE_TYPE_P (type))
5821 /* Walk the aggregates recursively. */
5822 switch (TREE_CODE (type))
5826 case QUAL_UNION_TYPE:
5830 /* Walk all the structure fields. */
5831 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
5833 if (TREE_CODE (field) == FIELD_DECL
5834 && contains_aligned_value_p (TREE_TYPE (field)))
5841 /* Just for use if some languages passes arrays by value. */
5842 if (contains_aligned_value_p (TREE_TYPE (type)))
5853 /* Gives the alignment boundary, in bits, of an argument with the
5854 specified mode and type. */
5857 ix86_function_arg_boundary (enum machine_mode mode, tree type)
5862 /* Since canonical type is used for call, we convert it to
5863 canonical type if needed. */
5864 if (!TYPE_STRUCTURAL_EQUALITY_P (type))
5865 type = TYPE_CANONICAL (type);
5866 align = TYPE_ALIGN (type);
5869 align = GET_MODE_ALIGNMENT (mode);
5870 if (align < PARM_BOUNDARY)
5871 align = PARM_BOUNDARY;
5872 /* In 32bit, only _Decimal128 and __float128 are aligned to their
5873 natural boundaries. */
5874 if (!TARGET_64BIT && mode != TDmode && mode != TFmode)
5876 /* i386 ABI defines all arguments to be 4 byte aligned. We have to
5877 make an exception for SSE modes since these require 128bit
5880 The handling here differs from field_alignment. ICC aligns MMX
5881 arguments to 4 byte boundaries, while structure fields are aligned
5882 to 8 byte boundaries. */
5885 if (!(TARGET_SSE && SSE_REG_MODE_P (mode)))
5886 align = PARM_BOUNDARY;
5890 if (!contains_aligned_value_p (type))
5891 align = PARM_BOUNDARY;
5894 if (align > BIGGEST_ALIGNMENT)
5895 align = BIGGEST_ALIGNMENT;
5899 /* Return true if N is a possible register number of function value. */
5902 ix86_function_value_regno_p (int regno)
5909 case FIRST_FLOAT_REG:
5910 /* TODO: The function should depend on current function ABI but
5911 builtins.c would need updating then. Therefore we use the
5913 if (TARGET_64BIT && DEFAULT_ABI == MS_ABI)
5915 return TARGET_FLOAT_RETURNS_IN_80387;
5921 if (TARGET_MACHO || TARGET_64BIT)
5929 /* Define how to find the value returned by a function.
5930 VALTYPE is the data type of the value (as a tree).
5931 If the precise function being called is known, FUNC is its FUNCTION_DECL;
5932 otherwise, FUNC is 0. */
5935 function_value_32 (enum machine_mode orig_mode, enum machine_mode mode,
5936 const_tree fntype, const_tree fn)
5940 /* 8-byte vector modes in %mm0. See ix86_return_in_memory for where
5941 we normally prevent this case when mmx is not available. However
5942 some ABIs may require the result to be returned like DImode. */
5943 if (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 8)
5944 regno = TARGET_MMX ? FIRST_MMX_REG : 0;
5946 /* 16-byte vector modes in %xmm0. See ix86_return_in_memory for where
5947 we prevent this case when sse is not available. However some ABIs
5948 may require the result to be returned like integer TImode. */
5949 else if (mode == TImode
5950 || (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 16))
5951 regno = TARGET_SSE ? FIRST_SSE_REG : 0;
5953 /* Floating point return values in %st(0) (unless -mno-fp-ret-in-387). */
5954 else if (X87_FLOAT_MODE_P (mode) && TARGET_FLOAT_RETURNS_IN_80387)
5955 regno = FIRST_FLOAT_REG;
5957 /* Most things go in %eax. */
5960 /* Override FP return register with %xmm0 for local functions when
5961 SSE math is enabled or for functions with sseregparm attribute. */
5962 if ((fn || fntype) && (mode == SFmode || mode == DFmode))
5964 int sse_level = ix86_function_sseregparm (fntype, fn, false);
5965 if ((sse_level >= 1 && mode == SFmode)
5966 || (sse_level == 2 && mode == DFmode))
5967 regno = FIRST_SSE_REG;
5970 return gen_rtx_REG (orig_mode, regno);
5974 function_value_64 (enum machine_mode orig_mode, enum machine_mode mode,
5979 /* Handle libcalls, which don't provide a type node. */
5980 if (valtype == NULL)
5992 return gen_rtx_REG (mode, FIRST_SSE_REG);
5995 return gen_rtx_REG (mode, FIRST_FLOAT_REG);
5999 return gen_rtx_REG (mode, AX_REG);
6003 ret = construct_container (mode, orig_mode, valtype, 1,
6004 X86_64_REGPARM_MAX, X86_64_SSE_REGPARM_MAX,
6005 x86_64_int_return_registers, 0);
6007 /* For zero sized structures, construct_container returns NULL, but we
6008 need to keep rest of compiler happy by returning meaningful value. */
6010 ret = gen_rtx_REG (orig_mode, AX_REG);
6016 function_value_ms_64 (enum machine_mode orig_mode, enum machine_mode mode)
6018 unsigned int regno = AX_REG;
6022 switch (GET_MODE_SIZE (mode))
6025 if((SCALAR_INT_MODE_P (mode) || VECTOR_MODE_P (mode))
6026 && !COMPLEX_MODE_P (mode))
6027 regno = FIRST_SSE_REG;
6031 if (mode == SFmode || mode == DFmode)
6032 regno = FIRST_SSE_REG;
6038 return gen_rtx_REG (orig_mode, regno);
6042 ix86_function_value_1 (const_tree valtype, const_tree fntype_or_decl,
6043 enum machine_mode orig_mode, enum machine_mode mode)
6045 const_tree fn, fntype;
6048 if (fntype_or_decl && DECL_P (fntype_or_decl))
6049 fn = fntype_or_decl;
6050 fntype = fn ? TREE_TYPE (fn) : fntype_or_decl;
6052 if (TARGET_64BIT && ix86_function_type_abi (fntype) == MS_ABI)
6053 return function_value_ms_64 (orig_mode, mode);
6054 else if (TARGET_64BIT)
6055 return function_value_64 (orig_mode, mode, valtype);
6057 return function_value_32 (orig_mode, mode, fntype, fn);
6061 ix86_function_value (const_tree valtype, const_tree fntype_or_decl,
6062 bool outgoing ATTRIBUTE_UNUSED)
6064 enum machine_mode mode, orig_mode;
6066 orig_mode = TYPE_MODE (valtype);
6067 mode = type_natural_mode (valtype);
6068 return ix86_function_value_1 (valtype, fntype_or_decl, orig_mode, mode);
6072 ix86_libcall_value (enum machine_mode mode)
6074 return ix86_function_value_1 (NULL, NULL, mode, mode);
6077 /* Return true iff type is returned in memory. */
6079 static int ATTRIBUTE_UNUSED
6080 return_in_memory_32 (const_tree type, enum machine_mode mode)
6084 if (mode == BLKmode)
6087 size = int_size_in_bytes (type);
6089 if (MS_AGGREGATE_RETURN && AGGREGATE_TYPE_P (type) && size <= 8)
6092 if (VECTOR_MODE_P (mode) || mode == TImode)
6094 /* User-created vectors small enough to fit in EAX. */
6098 /* MMX/3dNow values are returned in MM0,
6099 except when it doesn't exits. */
6101 return (TARGET_MMX ? 0 : 1);
6103 /* SSE values are returned in XMM0, except when it doesn't exist. */
6105 return (TARGET_SSE ? 0 : 1);
6116 static int ATTRIBUTE_UNUSED
6117 return_in_memory_64 (const_tree type, enum machine_mode mode)
6119 int needed_intregs, needed_sseregs;
6120 return !examine_argument (mode, type, 1, &needed_intregs, &needed_sseregs);
6123 static int ATTRIBUTE_UNUSED
6124 return_in_memory_ms_64 (const_tree type, enum machine_mode mode)
6126 HOST_WIDE_INT size = int_size_in_bytes (type);
6128 /* __m128 is returned in xmm0. */
6129 if ((SCALAR_INT_MODE_P (mode) || VECTOR_MODE_P (mode))
6130 && !COMPLEX_MODE_P (mode) && (GET_MODE_SIZE (mode) == 16 || size == 16))
6133 /* Otherwise, the size must be exactly in [1248]. */
6134 return (size != 1 && size != 2 && size != 4 && size != 8);
6138 ix86_return_in_memory (const_tree type, const_tree fntype ATTRIBUTE_UNUSED)
6140 #ifdef SUBTARGET_RETURN_IN_MEMORY
6141 return SUBTARGET_RETURN_IN_MEMORY (type, fntype);
6143 const enum machine_mode mode = type_natural_mode (type);
6145 if (TARGET_64BIT_MS_ABI)
6146 return return_in_memory_ms_64 (type, mode);
6147 else if (TARGET_64BIT)
6148 return return_in_memory_64 (type, mode);
6150 return return_in_memory_32 (type, mode);
6154 /* Return false iff TYPE is returned in memory. This version is used
6155 on Solaris 10. It is similar to the generic ix86_return_in_memory,
6156 but differs notably in that when MMX is available, 8-byte vectors
6157 are returned in memory, rather than in MMX registers. */
6160 ix86_sol10_return_in_memory (const_tree type, const_tree fntype ATTRIBUTE_UNUSED)
6163 enum machine_mode mode = type_natural_mode (type);
6166 return return_in_memory_64 (type, mode);
6168 if (mode == BLKmode)
6171 size = int_size_in_bytes (type);
6173 if (VECTOR_MODE_P (mode))
6175 /* Return in memory only if MMX registers *are* available. This
6176 seems backwards, but it is consistent with the existing
6183 else if (mode == TImode)
6185 else if (mode == XFmode)
6191 /* When returning SSE vector types, we have a choice of either
6192 (1) being abi incompatible with a -march switch, or
6193 (2) generating an error.
6194 Given no good solution, I think the safest thing is one warning.
6195 The user won't be able to use -Werror, but....
6197 Choose the STRUCT_VALUE_RTX hook because that's (at present) only
6198 called in response to actually generating a caller or callee that
6199 uses such a type. As opposed to TARGET_RETURN_IN_MEMORY, which is called
6200 via aggregate_value_p for general type probing from tree-ssa. */
6203 ix86_struct_value_rtx (tree type, int incoming ATTRIBUTE_UNUSED)
6205 static bool warnedsse, warnedmmx;
6207 if (!TARGET_64BIT && type)
6209 /* Look at the return type of the function, not the function type. */
6210 enum machine_mode mode = TYPE_MODE (TREE_TYPE (type));
6212 if (!TARGET_SSE && !warnedsse)
6215 || (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 16))
6218 warning (0, "SSE vector return without SSE enabled "
6223 if (!TARGET_MMX && !warnedmmx)
6225 if (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 8)
6228 warning (0, "MMX vector return without MMX enabled "
6238 /* Create the va_list data type. */
6240 /* Returns the calling convention specific va_list date type.
6241 The argument ABI can be DEFAULT_ABI, MS_ABI, or SYSV_ABI. */
6244 ix86_build_builtin_va_list_abi (enum calling_abi abi)
6246 tree f_gpr, f_fpr, f_ovf, f_sav, record, type_decl;
6248 /* For i386 we use plain pointer to argument area. */
6249 if (!TARGET_64BIT || abi == MS_ABI)
6250 return build_pointer_type (char_type_node);
6252 record = (*lang_hooks.types.make_type) (RECORD_TYPE);
6253 type_decl = build_decl (TYPE_DECL, get_identifier ("__va_list_tag"), record);
6255 f_gpr = build_decl (FIELD_DECL, get_identifier ("gp_offset"),
6256 unsigned_type_node);
6257 f_fpr = build_decl (FIELD_DECL, get_identifier ("fp_offset"),
6258 unsigned_type_node);
6259 f_ovf = build_decl (FIELD_DECL, get_identifier ("overflow_arg_area"),
6261 f_sav = build_decl (FIELD_DECL, get_identifier ("reg_save_area"),
6264 va_list_gpr_counter_field = f_gpr;
6265 va_list_fpr_counter_field = f_fpr;
6267 DECL_FIELD_CONTEXT (f_gpr) = record;
6268 DECL_FIELD_CONTEXT (f_fpr) = record;
6269 DECL_FIELD_CONTEXT (f_ovf) = record;
6270 DECL_FIELD_CONTEXT (f_sav) = record;
6272 TREE_CHAIN (record) = type_decl;
6273 TYPE_NAME (record) = type_decl;
6274 TYPE_FIELDS (record) = f_gpr;
6275 TREE_CHAIN (f_gpr) = f_fpr;
6276 TREE_CHAIN (f_fpr) = f_ovf;
6277 TREE_CHAIN (f_ovf) = f_sav;
6279 layout_type (record);
6281 /* The correct type is an array type of one element. */
6282 return build_array_type (record, build_index_type (size_zero_node));
6285 /* Setup the builtin va_list data type and for 64-bit the additional
6286 calling convention specific va_list data types. */
6289 ix86_build_builtin_va_list (void)
6291 tree ret = ix86_build_builtin_va_list_abi (DEFAULT_ABI);
6293 /* Initialize abi specific va_list builtin types. */
6297 if (DEFAULT_ABI == MS_ABI)
6299 t = ix86_build_builtin_va_list_abi (SYSV_ABI);
6300 if (TREE_CODE (t) != RECORD_TYPE)
6301 t = build_variant_type_copy (t);
6302 sysv_va_list_type_node = t;
6307 if (TREE_CODE (t) != RECORD_TYPE)
6308 t = build_variant_type_copy (t);
6309 sysv_va_list_type_node = t;
6311 if (DEFAULT_ABI != MS_ABI)
6313 t = ix86_build_builtin_va_list_abi (MS_ABI);
6314 if (TREE_CODE (t) != RECORD_TYPE)
6315 t = build_variant_type_copy (t);
6316 ms_va_list_type_node = t;
6321 if (TREE_CODE (t) != RECORD_TYPE)
6322 t = build_variant_type_copy (t);
6323 ms_va_list_type_node = t;
6330 /* Worker function for TARGET_SETUP_INCOMING_VARARGS. */
6333 setup_incoming_varargs_64 (CUMULATIVE_ARGS *cum)
6342 int regparm = ix86_regparm;
6344 if (cum->call_abi != DEFAULT_ABI)
6345 regparm = DEFAULT_ABI != SYSV_ABI ? X86_64_REGPARM_MAX : X64_REGPARM_MAX;
6347 /* GPR size of varargs save area. */
6348 if (cfun->va_list_gpr_size)
6349 ix86_varargs_gpr_size = X86_64_REGPARM_MAX * UNITS_PER_WORD;
6351 ix86_varargs_gpr_size = 0;
6353 /* FPR size of varargs save area. We don't need it if we don't pass
6354 anything in SSE registers. */
6355 if (cum->sse_nregs && cfun->va_list_fpr_size)
6356 ix86_varargs_fpr_size = X86_64_SSE_REGPARM_MAX * 16;
6358 ix86_varargs_fpr_size = 0;
6360 if (! ix86_varargs_gpr_size && ! ix86_varargs_fpr_size)
6363 save_area = frame_pointer_rtx;
6364 set = get_varargs_alias_set ();
6366 for (i = cum->regno;
6368 && i < cum->regno + cfun->va_list_gpr_size / UNITS_PER_WORD;
6371 mem = gen_rtx_MEM (Pmode,
6372 plus_constant (save_area, i * UNITS_PER_WORD));
6373 MEM_NOTRAP_P (mem) = 1;
6374 set_mem_alias_set (mem, set);
6375 emit_move_insn (mem, gen_rtx_REG (Pmode,
6376 x86_64_int_parameter_registers[i]));
6379 if (ix86_varargs_fpr_size)
6381 /* Now emit code to save SSE registers. The AX parameter contains number
6382 of SSE parameter registers used to call this function. We use
6383 sse_prologue_save insn template that produces computed jump across
6384 SSE saves. We need some preparation work to get this working. */
6386 label = gen_label_rtx ();
6387 label_ref = gen_rtx_LABEL_REF (Pmode, label);
6389 /* Compute address to jump to :
6390 label - eax*4 + nnamed_sse_arguments*4 Or
6391 label - eax*5 + nnamed_sse_arguments*5 for AVX. */
6392 tmp_reg = gen_reg_rtx (Pmode);
6393 nsse_reg = gen_reg_rtx (Pmode);
6394 emit_insn (gen_zero_extendqidi2 (nsse_reg, gen_rtx_REG (QImode, AX_REG)));
6395 emit_insn (gen_rtx_SET (VOIDmode, tmp_reg,
6396 gen_rtx_MULT (Pmode, nsse_reg,
6399 /* vmovaps is one byte longer than movaps. */
6401 emit_insn (gen_rtx_SET (VOIDmode, tmp_reg,
6402 gen_rtx_PLUS (Pmode, tmp_reg,
6408 gen_rtx_CONST (DImode,
6409 gen_rtx_PLUS (DImode,
6411 GEN_INT (cum->sse_regno
6412 * (TARGET_AVX ? 5 : 4)))));
6414 emit_move_insn (nsse_reg, label_ref);
6415 emit_insn (gen_subdi3 (nsse_reg, nsse_reg, tmp_reg));
6417 /* Compute address of memory block we save into. We always use pointer
6418 pointing 127 bytes after first byte to store - this is needed to keep
6419 instruction size limited by 4 bytes (5 bytes for AVX) with one
6420 byte displacement. */
6421 tmp_reg = gen_reg_rtx (Pmode);
6422 emit_insn (gen_rtx_SET (VOIDmode, tmp_reg,
6423 plus_constant (save_area,
6424 ix86_varargs_gpr_size + 127)));
6425 mem = gen_rtx_MEM (BLKmode, plus_constant (tmp_reg, -127));
6426 MEM_NOTRAP_P (mem) = 1;
6427 set_mem_alias_set (mem, set);
6428 set_mem_align (mem, BITS_PER_WORD);
6430 /* And finally do the dirty job! */
6431 emit_insn (gen_sse_prologue_save (mem, nsse_reg,
6432 GEN_INT (cum->sse_regno), label));
6437 setup_incoming_varargs_ms_64 (CUMULATIVE_ARGS *cum)
6439 alias_set_type set = get_varargs_alias_set ();
6442 for (i = cum->regno; i < X64_REGPARM_MAX; i++)
6446 mem = gen_rtx_MEM (Pmode,
6447 plus_constant (virtual_incoming_args_rtx,
6448 i * UNITS_PER_WORD));
6449 MEM_NOTRAP_P (mem) = 1;
6450 set_mem_alias_set (mem, set);
6452 reg = gen_rtx_REG (Pmode, x86_64_ms_abi_int_parameter_registers[i]);
6453 emit_move_insn (mem, reg);
6458 ix86_setup_incoming_varargs (CUMULATIVE_ARGS *cum, enum machine_mode mode,
6459 tree type, int *pretend_size ATTRIBUTE_UNUSED,
6462 CUMULATIVE_ARGS next_cum;
6465 /* This argument doesn't appear to be used anymore. Which is good,
6466 because the old code here didn't suppress rtl generation. */
6467 gcc_assert (!no_rtl);
6472 fntype = TREE_TYPE (current_function_decl);
6474 /* For varargs, we do not want to skip the dummy va_dcl argument.
6475 For stdargs, we do want to skip the last named argument. */
6477 if (stdarg_p (fntype))
6478 function_arg_advance (&next_cum, mode, type, 1);
6480 if (cum->call_abi == MS_ABI)
6481 setup_incoming_varargs_ms_64 (&next_cum);
6483 setup_incoming_varargs_64 (&next_cum);
6486 /* Checks if TYPE is of kind va_list char *. */
6489 is_va_list_char_pointer (tree type)
6493 /* For 32-bit it is always true. */
6496 canonic = ix86_canonical_va_list_type (type);
6497 return (canonic == ms_va_list_type_node
6498 || (DEFAULT_ABI == MS_ABI && canonic == va_list_type_node));
6501 /* Implement va_start. */
6504 ix86_va_start (tree valist, rtx nextarg)
6506 HOST_WIDE_INT words, n_gpr, n_fpr;
6507 tree f_gpr, f_fpr, f_ovf, f_sav;
6508 tree gpr, fpr, ovf, sav, t;
6511 /* Only 64bit target needs something special. */
6512 if (!TARGET_64BIT || is_va_list_char_pointer (TREE_TYPE (valist)))
6514 std_expand_builtin_va_start (valist, nextarg);
6518 f_gpr = TYPE_FIELDS (TREE_TYPE (sysv_va_list_type_node));
6519 f_fpr = TREE_CHAIN (f_gpr);
6520 f_ovf = TREE_CHAIN (f_fpr);
6521 f_sav = TREE_CHAIN (f_ovf);
6523 valist = build1 (INDIRECT_REF, TREE_TYPE (TREE_TYPE (valist)), valist);
6524 gpr = build3 (COMPONENT_REF, TREE_TYPE (f_gpr), valist, f_gpr, NULL_TREE);
6525 fpr = build3 (COMPONENT_REF, TREE_TYPE (f_fpr), valist, f_fpr, NULL_TREE);
6526 ovf = build3 (COMPONENT_REF, TREE_TYPE (f_ovf), valist, f_ovf, NULL_TREE);
6527 sav = build3 (COMPONENT_REF, TREE_TYPE (f_sav), valist, f_sav, NULL_TREE);
6529 /* Count number of gp and fp argument registers used. */
6530 words = crtl->args.info.words;
6531 n_gpr = crtl->args.info.regno;
6532 n_fpr = crtl->args.info.sse_regno;
6534 if (cfun->va_list_gpr_size)
6536 type = TREE_TYPE (gpr);
6537 t = build2 (MODIFY_EXPR, type,
6538 gpr, build_int_cst (type, n_gpr * 8));
6539 TREE_SIDE_EFFECTS (t) = 1;
6540 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
6543 if (TARGET_SSE && cfun->va_list_fpr_size)
6545 type = TREE_TYPE (fpr);
6546 t = build2 (MODIFY_EXPR, type, fpr,
6547 build_int_cst (type, n_fpr * 16 + 8*X86_64_REGPARM_MAX));
6548 TREE_SIDE_EFFECTS (t) = 1;
6549 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
6552 /* Find the overflow area. */
6553 type = TREE_TYPE (ovf);
6554 t = make_tree (type, crtl->args.internal_arg_pointer);
6556 t = build2 (POINTER_PLUS_EXPR, type, t,
6557 size_int (words * UNITS_PER_WORD));
6558 t = build2 (MODIFY_EXPR, type, ovf, t);
6559 TREE_SIDE_EFFECTS (t) = 1;
6560 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
6562 if (ix86_varargs_gpr_size || ix86_varargs_fpr_size)
6564 /* Find the register save area.
6565 Prologue of the function save it right above stack frame. */
6566 type = TREE_TYPE (sav);
6567 t = make_tree (type, frame_pointer_rtx);
6568 if (!ix86_varargs_gpr_size)
6569 t = build2 (POINTER_PLUS_EXPR, type, t,
6570 size_int (-8 * X86_64_REGPARM_MAX));
6571 t = build2 (MODIFY_EXPR, type, sav, t);
6572 TREE_SIDE_EFFECTS (t) = 1;
6573 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
6577 /* Implement va_arg. */
6580 ix86_gimplify_va_arg (tree valist, tree type, gimple_seq *pre_p,
6583 static const int intreg[6] = { 0, 1, 2, 3, 4, 5 };
6584 tree f_gpr, f_fpr, f_ovf, f_sav;
6585 tree gpr, fpr, ovf, sav, t;
6587 tree lab_false, lab_over = NULL_TREE;
6592 enum machine_mode nat_mode;
6595 /* Only 64bit target needs something special. */
6596 if (!TARGET_64BIT || is_va_list_char_pointer (TREE_TYPE (valist)))
6597 return std_gimplify_va_arg_expr (valist, type, pre_p, post_p);
6599 f_gpr = TYPE_FIELDS (TREE_TYPE (sysv_va_list_type_node));
6600 f_fpr = TREE_CHAIN (f_gpr);
6601 f_ovf = TREE_CHAIN (f_fpr);
6602 f_sav = TREE_CHAIN (f_ovf);
6604 gpr = build3 (COMPONENT_REF, TREE_TYPE (f_gpr),
6605 build_va_arg_indirect_ref (valist), f_gpr, NULL_TREE);
6606 valist = build_va_arg_indirect_ref (valist);
6607 fpr = build3 (COMPONENT_REF, TREE_TYPE (f_fpr), valist, f_fpr, NULL_TREE);
6608 ovf = build3 (COMPONENT_REF, TREE_TYPE (f_ovf), valist, f_ovf, NULL_TREE);
6609 sav = build3 (COMPONENT_REF, TREE_TYPE (f_sav), valist, f_sav, NULL_TREE);
6611 indirect_p = pass_by_reference (NULL, TYPE_MODE (type), type, false);
6613 type = build_pointer_type (type);
6614 size = int_size_in_bytes (type);
6615 rsize = (size + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
6617 nat_mode = type_natural_mode (type);
6626 /* Unnamed 256bit vector mode parameters are passed on stack. */
6627 if (ix86_cfun_abi () == SYSV_ABI)
6634 container = construct_container (nat_mode, TYPE_MODE (type),
6635 type, 0, X86_64_REGPARM_MAX,
6636 X86_64_SSE_REGPARM_MAX, intreg,
6641 /* Pull the value out of the saved registers. */
6643 addr = create_tmp_var (ptr_type_node, "addr");
6644 DECL_POINTER_ALIAS_SET (addr) = get_varargs_alias_set ();
6648 int needed_intregs, needed_sseregs;
6650 tree int_addr, sse_addr;
6652 lab_false = create_artificial_label ();
6653 lab_over = create_artificial_label ();
6655 examine_argument (nat_mode, type, 0, &needed_intregs, &needed_sseregs);
6657 need_temp = (!REG_P (container)
6658 && ((needed_intregs && TYPE_ALIGN (type) > 64)
6659 || TYPE_ALIGN (type) > 128));
6661 /* In case we are passing structure, verify that it is consecutive block
6662 on the register save area. If not we need to do moves. */
6663 if (!need_temp && !REG_P (container))
6665 /* Verify that all registers are strictly consecutive */
6666 if (SSE_REGNO_P (REGNO (XEXP (XVECEXP (container, 0, 0), 0))))
6670 for (i = 0; i < XVECLEN (container, 0) && !need_temp; i++)
6672 rtx slot = XVECEXP (container, 0, i);
6673 if (REGNO (XEXP (slot, 0)) != FIRST_SSE_REG + (unsigned int) i
6674 || INTVAL (XEXP (slot, 1)) != i * 16)
6682 for (i = 0; i < XVECLEN (container, 0) && !need_temp; i++)
6684 rtx slot = XVECEXP (container, 0, i);
6685 if (REGNO (XEXP (slot, 0)) != (unsigned int) i
6686 || INTVAL (XEXP (slot, 1)) != i * 8)
6698 int_addr = create_tmp_var (ptr_type_node, "int_addr");
6699 DECL_POINTER_ALIAS_SET (int_addr) = get_varargs_alias_set ();
6700 sse_addr = create_tmp_var (ptr_type_node, "sse_addr");
6701 DECL_POINTER_ALIAS_SET (sse_addr) = get_varargs_alias_set ();
6704 /* First ensure that we fit completely in registers. */
6707 t = build_int_cst (TREE_TYPE (gpr),
6708 (X86_64_REGPARM_MAX - needed_intregs + 1) * 8);
6709 t = build2 (GE_EXPR, boolean_type_node, gpr, t);
6710 t2 = build1 (GOTO_EXPR, void_type_node, lab_false);
6711 t = build3 (COND_EXPR, void_type_node, t, t2, NULL_TREE);
6712 gimplify_and_add (t, pre_p);
6716 t = build_int_cst (TREE_TYPE (fpr),
6717 (X86_64_SSE_REGPARM_MAX - needed_sseregs + 1) * 16
6718 + X86_64_REGPARM_MAX * 8);
6719 t = build2 (GE_EXPR, boolean_type_node, fpr, t);
6720 t2 = build1 (GOTO_EXPR, void_type_node, lab_false);
6721 t = build3 (COND_EXPR, void_type_node, t, t2, NULL_TREE);
6722 gimplify_and_add (t, pre_p);
6725 /* Compute index to start of area used for integer regs. */
6728 /* int_addr = gpr + sav; */
6729 t = fold_convert (sizetype, gpr);
6730 t = build2 (POINTER_PLUS_EXPR, ptr_type_node, sav, t);
6731 gimplify_assign (int_addr, t, pre_p);
6735 /* sse_addr = fpr + sav; */
6736 t = fold_convert (sizetype, fpr);
6737 t = build2 (POINTER_PLUS_EXPR, ptr_type_node, sav, t);
6738 gimplify_assign (sse_addr, t, pre_p);
6743 tree temp = create_tmp_var (type, "va_arg_tmp");
6746 t = build1 (ADDR_EXPR, build_pointer_type (type), temp);
6747 gimplify_assign (addr, t, pre_p);
6749 for (i = 0; i < XVECLEN (container, 0); i++)
6751 rtx slot = XVECEXP (container, 0, i);
6752 rtx reg = XEXP (slot, 0);
6753 enum machine_mode mode = GET_MODE (reg);
6754 tree piece_type = lang_hooks.types.type_for_mode (mode, 1);
6755 tree addr_type = build_pointer_type (piece_type);
6758 tree dest_addr, dest;
6760 if (SSE_REGNO_P (REGNO (reg)))
6762 src_addr = sse_addr;
6763 src_offset = (REGNO (reg) - FIRST_SSE_REG) * 16;
6767 src_addr = int_addr;
6768 src_offset = REGNO (reg) * 8;
6770 src_addr = fold_convert (addr_type, src_addr);
6771 src_addr = fold_build2 (POINTER_PLUS_EXPR, addr_type, src_addr,
6772 size_int (src_offset));
6773 src = build_va_arg_indirect_ref (src_addr);
6775 dest_addr = fold_convert (addr_type, addr);
6776 dest_addr = fold_build2 (POINTER_PLUS_EXPR, addr_type, dest_addr,
6777 size_int (INTVAL (XEXP (slot, 1))));
6778 dest = build_va_arg_indirect_ref (dest_addr);
6780 gimplify_assign (dest, src, pre_p);
6786 t = build2 (PLUS_EXPR, TREE_TYPE (gpr), gpr,
6787 build_int_cst (TREE_TYPE (gpr), needed_intregs * 8));
6788 gimplify_assign (gpr, t, pre_p);
6793 t = build2 (PLUS_EXPR, TREE_TYPE (fpr), fpr,
6794 build_int_cst (TREE_TYPE (fpr), needed_sseregs * 16));
6795 gimplify_assign (fpr, t, pre_p);
6798 gimple_seq_add_stmt (pre_p, gimple_build_goto (lab_over));
6800 gimple_seq_add_stmt (pre_p, gimple_build_label (lab_false));
6803 /* ... otherwise out of the overflow area. */
6805 /* When we align parameter on stack for caller, if the parameter
6806 alignment is beyond MAX_SUPPORTED_STACK_ALIGNMENT, it will be
6807 aligned at MAX_SUPPORTED_STACK_ALIGNMENT. We will match callee
6808 here with caller. */
6809 arg_boundary = FUNCTION_ARG_BOUNDARY (VOIDmode, type);
6810 if ((unsigned int) arg_boundary > MAX_SUPPORTED_STACK_ALIGNMENT)
6811 arg_boundary = MAX_SUPPORTED_STACK_ALIGNMENT;
6813 /* Care for on-stack alignment if needed. */
6814 if (arg_boundary <= 64
6815 || integer_zerop (TYPE_SIZE (type)))
6819 HOST_WIDE_INT align = arg_boundary / 8;
6820 t = build2 (POINTER_PLUS_EXPR, TREE_TYPE (ovf), ovf,
6821 size_int (align - 1));
6822 t = fold_convert (sizetype, t);
6823 t = build2 (BIT_AND_EXPR, TREE_TYPE (t), t,
6825 t = fold_convert (TREE_TYPE (ovf), t);
6827 gimplify_expr (&t, pre_p, NULL, is_gimple_val, fb_rvalue);
6828 gimplify_assign (addr, t, pre_p);
6830 t = build2 (POINTER_PLUS_EXPR, TREE_TYPE (t), t,
6831 size_int (rsize * UNITS_PER_WORD));
6832 gimplify_assign (unshare_expr (ovf), t, pre_p);
6835 gimple_seq_add_stmt (pre_p, gimple_build_label (lab_over));
6837 ptrtype = build_pointer_type (type);
6838 addr = fold_convert (ptrtype, addr);
6841 addr = build_va_arg_indirect_ref (addr);
6842 return build_va_arg_indirect_ref (addr);
6845 /* Return nonzero if OPNUM's MEM should be matched
6846 in movabs* patterns. */
6849 ix86_check_movabs (rtx insn, int opnum)
6853 set = PATTERN (insn);
6854 if (GET_CODE (set) == PARALLEL)
6855 set = XVECEXP (set, 0, 0);
6856 gcc_assert (GET_CODE (set) == SET);
6857 mem = XEXP (set, opnum);
6858 while (GET_CODE (mem) == SUBREG)
6859 mem = SUBREG_REG (mem);
6860 gcc_assert (MEM_P (mem));
6861 return (volatile_ok || !MEM_VOLATILE_P (mem));
6864 /* Initialize the table of extra 80387 mathematical constants. */
6867 init_ext_80387_constants (void)
6869 static const char * cst[5] =
6871 "0.3010299956639811952256464283594894482", /* 0: fldlg2 */
6872 "0.6931471805599453094286904741849753009", /* 1: fldln2 */
6873 "1.4426950408889634073876517827983434472", /* 2: fldl2e */
6874 "3.3219280948873623478083405569094566090", /* 3: fldl2t */
6875 "3.1415926535897932385128089594061862044", /* 4: fldpi */
6879 for (i = 0; i < 5; i++)
6881 real_from_string (&ext_80387_constants_table[i], cst[i]);
6882 /* Ensure each constant is rounded to XFmode precision. */
6883 real_convert (&ext_80387_constants_table[i],
6884 XFmode, &ext_80387_constants_table[i]);
6887 ext_80387_constants_init = 1;
6890 /* Return true if the constant is something that can be loaded with
6891 a special instruction. */
6894 standard_80387_constant_p (rtx x)
6896 enum machine_mode mode = GET_MODE (x);
6900 if (!(X87_FLOAT_MODE_P (mode) && (GET_CODE (x) == CONST_DOUBLE)))
6903 if (x == CONST0_RTX (mode))
6905 if (x == CONST1_RTX (mode))
6908 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
6910 /* For XFmode constants, try to find a special 80387 instruction when
6911 optimizing for size or on those CPUs that benefit from them. */
6913 && (optimize_function_for_size_p (cfun) || TARGET_EXT_80387_CONSTANTS))
6917 if (! ext_80387_constants_init)
6918 init_ext_80387_constants ();
6920 for (i = 0; i < 5; i++)
6921 if (real_identical (&r, &ext_80387_constants_table[i]))
6925 /* Load of the constant -0.0 or -1.0 will be split as
6926 fldz;fchs or fld1;fchs sequence. */
6927 if (real_isnegzero (&r))
6929 if (real_identical (&r, &dconstm1))
6935 /* Return the opcode of the special instruction to be used to load
6939 standard_80387_constant_opcode (rtx x)
6941 switch (standard_80387_constant_p (x))
6965 /* Return the CONST_DOUBLE representing the 80387 constant that is
6966 loaded by the specified special instruction. The argument IDX
6967 matches the return value from standard_80387_constant_p. */
6970 standard_80387_constant_rtx (int idx)
6974 if (! ext_80387_constants_init)
6975 init_ext_80387_constants ();
6991 return CONST_DOUBLE_FROM_REAL_VALUE (ext_80387_constants_table[i],
6995 /* Return 1 if mode is a valid mode for sse. */
6997 standard_sse_mode_p (enum machine_mode mode)
7014 /* Return 1 if X is all 0s. For all 1s, return 2 if X is in 128bit
7015 SSE modes and SSE2 is enabled, return 3 if X is in 256bit AVX
7016 modes and AVX is enabled. */
7019 standard_sse_constant_p (rtx x)
7021 enum machine_mode mode = GET_MODE (x);
7023 if (x == const0_rtx || x == CONST0_RTX (GET_MODE (x)))
7025 if (vector_all_ones_operand (x, mode))
7027 if (standard_sse_mode_p (mode))
7028 return TARGET_SSE2 ? 2 : -2;
7029 else if (VALID_AVX256_REG_MODE (mode))
7030 return TARGET_AVX ? 3 : -3;
7036 /* Return the opcode of the special instruction to be used to load
7040 standard_sse_constant_opcode (rtx insn, rtx x)
7042 switch (standard_sse_constant_p (x))
7045 switch (get_attr_mode (insn))
7048 return TARGET_AVX ? "vxorps\t%0, %0, %0" : "xorps\t%0, %0";
7050 return TARGET_AVX ? "vxorpd\t%0, %0, %0" : "xorpd\t%0, %0";
7052 return TARGET_AVX ? "vpxor\t%0, %0, %0" : "pxor\t%0, %0";
7054 return "vxorps\t%x0, %x0, %x0";
7056 return "vxorpd\t%x0, %x0, %x0";
7058 return "vpxor\t%x0, %x0, %x0";
7064 switch (get_attr_mode (insn))
7069 return "vpcmpeqd\t%0, %0, %0";
7075 return "pcmpeqd\t%0, %0";
7080 /* Returns 1 if OP contains a symbol reference */
7083 symbolic_reference_mentioned_p (rtx op)
7088 if (GET_CODE (op) == SYMBOL_REF || GET_CODE (op) == LABEL_REF)
7091 fmt = GET_RTX_FORMAT (GET_CODE (op));
7092 for (i = GET_RTX_LENGTH (GET_CODE (op)) - 1; i >= 0; i--)
7098 for (j = XVECLEN (op, i) - 1; j >= 0; j--)
7099 if (symbolic_reference_mentioned_p (XVECEXP (op, i, j)))
7103 else if (fmt[i] == 'e' && symbolic_reference_mentioned_p (XEXP (op, i)))
7110 /* Return 1 if it is appropriate to emit `ret' instructions in the
7111 body of a function. Do this only if the epilogue is simple, needing a
7112 couple of insns. Prior to reloading, we can't tell how many registers
7113 must be saved, so return 0 then. Return 0 if there is no frame
7114 marker to de-allocate. */
7117 ix86_can_use_return_insn_p (void)
7119 struct ix86_frame frame;
7121 if (! reload_completed || frame_pointer_needed)
7124 /* Don't allow more than 32 pop, since that's all we can do
7125 with one instruction. */
7126 if (crtl->args.pops_args
7127 && crtl->args.size >= 32768)
7130 ix86_compute_frame_layout (&frame);
7131 return frame.to_allocate == 0 && frame.nregs == 0;
7134 /* Value should be nonzero if functions must have frame pointers.
7135 Zero means the frame pointer need not be set up (and parms may
7136 be accessed via the stack pointer) in functions that seem suitable. */
7139 ix86_frame_pointer_required (void)
7141 /* If we accessed previous frames, then the generated code expects
7142 to be able to access the saved ebp value in our frame. */
7143 if (cfun->machine->accesses_prev_frame)
7146 /* Several x86 os'es need a frame pointer for other reasons,
7147 usually pertaining to setjmp. */
7148 if (SUBTARGET_FRAME_POINTER_REQUIRED)
7151 /* In override_options, TARGET_OMIT_LEAF_FRAME_POINTER turns off
7152 the frame pointer by default. Turn it back on now if we've not
7153 got a leaf function. */
7154 if (TARGET_OMIT_LEAF_FRAME_POINTER
7155 && (!current_function_is_leaf
7156 || ix86_current_function_calls_tls_descriptor))
7165 /* Record that the current function accesses previous call frames. */
7168 ix86_setup_frame_addresses (void)
7170 cfun->machine->accesses_prev_frame = 1;
7173 #if (defined(HAVE_GAS_HIDDEN) && (SUPPORTS_ONE_ONLY - 0)) || TARGET_MACHO
7174 # define USE_HIDDEN_LINKONCE 1
7176 # define USE_HIDDEN_LINKONCE 0
7179 static int pic_labels_used;
7181 /* Fills in the label name that should be used for a pc thunk for
7182 the given register. */
7185 get_pc_thunk_name (char name[32], unsigned int regno)
7187 gcc_assert (!TARGET_64BIT);
7189 if (USE_HIDDEN_LINKONCE)
7190 sprintf (name, "__i686.get_pc_thunk.%s", reg_names[regno]);
7192 ASM_GENERATE_INTERNAL_LABEL (name, "LPR", regno);
7196 /* This function generates code for -fpic that loads %ebx with
7197 the return address of the caller and then returns. */
7200 ix86_file_end (void)
7205 for (regno = 0; regno < 8; ++regno)
7209 if (! ((pic_labels_used >> regno) & 1))
7212 get_pc_thunk_name (name, regno);
7217 switch_to_section (darwin_sections[text_coal_section]);
7218 fputs ("\t.weak_definition\t", asm_out_file);
7219 assemble_name (asm_out_file, name);
7220 fputs ("\n\t.private_extern\t", asm_out_file);
7221 assemble_name (asm_out_file, name);
7222 fputs ("\n", asm_out_file);
7223 ASM_OUTPUT_LABEL (asm_out_file, name);
7227 if (USE_HIDDEN_LINKONCE)
7231 decl = build_decl (FUNCTION_DECL, get_identifier (name),
7233 TREE_PUBLIC (decl) = 1;
7234 TREE_STATIC (decl) = 1;
7235 DECL_ONE_ONLY (decl) = 1;
7237 (*targetm.asm_out.unique_section) (decl, 0);
7238 switch_to_section (get_named_section (decl, NULL, 0));
7240 (*targetm.asm_out.globalize_label) (asm_out_file, name);
7241 fputs ("\t.hidden\t", asm_out_file);
7242 assemble_name (asm_out_file, name);
7243 fputc ('\n', asm_out_file);
7244 ASM_DECLARE_FUNCTION_NAME (asm_out_file, name, decl);
7248 switch_to_section (text_section);
7249 ASM_OUTPUT_LABEL (asm_out_file, name);
7252 xops[0] = gen_rtx_REG (Pmode, regno);
7253 xops[1] = gen_rtx_MEM (Pmode, stack_pointer_rtx);
7254 output_asm_insn ("mov%z0\t{%1, %0|%0, %1}", xops);
7255 output_asm_insn ("ret", xops);
7258 if (NEED_INDICATE_EXEC_STACK)
7259 file_end_indicate_exec_stack ();
7262 /* Emit code for the SET_GOT patterns. */
7265 output_set_got (rtx dest, rtx label ATTRIBUTE_UNUSED)
7271 if (TARGET_VXWORKS_RTP && flag_pic)
7273 /* Load (*VXWORKS_GOTT_BASE) into the PIC register. */
7274 xops[2] = gen_rtx_MEM (Pmode,
7275 gen_rtx_SYMBOL_REF (Pmode, VXWORKS_GOTT_BASE));
7276 output_asm_insn ("mov{l}\t{%2, %0|%0, %2}", xops);
7278 /* Load (*VXWORKS_GOTT_BASE)[VXWORKS_GOTT_INDEX] into the PIC register.
7279 Use %P and a local symbol in order to print VXWORKS_GOTT_INDEX as
7280 an unadorned address. */
7281 xops[2] = gen_rtx_SYMBOL_REF (Pmode, VXWORKS_GOTT_INDEX);
7282 SYMBOL_REF_FLAGS (xops[2]) |= SYMBOL_FLAG_LOCAL;
7283 output_asm_insn ("mov{l}\t{%P2(%0), %0|%0, DWORD PTR %P2[%0]}", xops);
7287 xops[1] = gen_rtx_SYMBOL_REF (Pmode, GOT_SYMBOL_NAME);
7289 if (! TARGET_DEEP_BRANCH_PREDICTION || !flag_pic)
7291 xops[2] = gen_rtx_LABEL_REF (Pmode, label ? label : gen_label_rtx ());
7294 output_asm_insn ("mov%z0\t{%2, %0|%0, %2}", xops);
7296 output_asm_insn ("call\t%a2", xops);
7299 /* Output the Mach-O "canonical" label name ("Lxx$pb") here too. This
7300 is what will be referenced by the Mach-O PIC subsystem. */
7302 ASM_OUTPUT_LABEL (asm_out_file, MACHOPIC_FUNCTION_BASE_NAME);
7305 (*targetm.asm_out.internal_label) (asm_out_file, "L",
7306 CODE_LABEL_NUMBER (XEXP (xops[2], 0)));
7309 output_asm_insn ("pop%z0\t%0", xops);
7314 get_pc_thunk_name (name, REGNO (dest));
7315 pic_labels_used |= 1 << REGNO (dest);
7317 xops[2] = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (name));
7318 xops[2] = gen_rtx_MEM (QImode, xops[2]);
7319 output_asm_insn ("call\t%X2", xops);
7320 /* Output the Mach-O "canonical" label name ("Lxx$pb") here too. This
7321 is what will be referenced by the Mach-O PIC subsystem. */
7324 ASM_OUTPUT_LABEL (asm_out_file, MACHOPIC_FUNCTION_BASE_NAME);
7326 targetm.asm_out.internal_label (asm_out_file, "L",
7327 CODE_LABEL_NUMBER (label));
7334 if (!flag_pic || TARGET_DEEP_BRANCH_PREDICTION)
7335 output_asm_insn ("add%z0\t{%1, %0|%0, %1}", xops);
7337 output_asm_insn ("add%z0\t{%1+[.-%a2], %0|%0, %1+(.-%a2)}", xops);
7342 /* Generate an "push" pattern for input ARG. */
7347 return gen_rtx_SET (VOIDmode,
7349 gen_rtx_PRE_DEC (Pmode,
7350 stack_pointer_rtx)),
7354 /* Return >= 0 if there is an unused call-clobbered register available
7355 for the entire function. */
7358 ix86_select_alt_pic_regnum (void)
7360 if (current_function_is_leaf && !crtl->profile
7361 && !ix86_current_function_calls_tls_descriptor)
7364 /* Can't use the same register for both PIC and DRAP. */
7366 drap = REGNO (crtl->drap_reg);
7369 for (i = 2; i >= 0; --i)
7370 if (i != drap && !df_regs_ever_live_p (i))
7374 return INVALID_REGNUM;
7377 /* Return 1 if we need to save REGNO. */
7379 ix86_save_reg (unsigned int regno, int maybe_eh_return)
7381 if (pic_offset_table_rtx
7382 && regno == REAL_PIC_OFFSET_TABLE_REGNUM
7383 && (df_regs_ever_live_p (REAL_PIC_OFFSET_TABLE_REGNUM)
7385 || crtl->calls_eh_return
7386 || crtl->uses_const_pool))
7388 if (ix86_select_alt_pic_regnum () != INVALID_REGNUM)
7393 if (crtl->calls_eh_return && maybe_eh_return)
7398 unsigned test = EH_RETURN_DATA_REGNO (i);
7399 if (test == INVALID_REGNUM)
7407 && regno == REGNO (crtl->drap_reg))
7410 return (df_regs_ever_live_p (regno)
7411 && !call_used_regs[regno]
7412 && !fixed_regs[regno]
7413 && (regno != HARD_FRAME_POINTER_REGNUM || !frame_pointer_needed));
7416 /* Return number of registers to be saved on the stack. */
7419 ix86_nsaved_regs (void)
7424 for (regno = FIRST_PSEUDO_REGISTER - 1; regno >= 0; regno--)
7425 if (ix86_save_reg (regno, true))
7430 /* Given FROM and TO register numbers, say whether this elimination is
7431 allowed. If stack alignment is needed, we can only replace argument
7432 pointer with hard frame pointer, or replace frame pointer with stack
7433 pointer. Otherwise, frame pointer elimination is automatically
7434 handled and all other eliminations are valid. */
7437 ix86_can_eliminate (int from, int to)
7439 if (stack_realign_fp)
7440 return ((from == ARG_POINTER_REGNUM
7441 && to == HARD_FRAME_POINTER_REGNUM)
7442 || (from == FRAME_POINTER_REGNUM
7443 && to == STACK_POINTER_REGNUM));
7445 return to == STACK_POINTER_REGNUM ? !frame_pointer_needed : 1;
7448 /* Return the offset between two registers, one to be eliminated, and the other
7449 its replacement, at the start of a routine. */
7452 ix86_initial_elimination_offset (int from, int to)
7454 struct ix86_frame frame;
7455 ix86_compute_frame_layout (&frame);
7457 if (from == ARG_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
7458 return frame.hard_frame_pointer_offset;
7459 else if (from == FRAME_POINTER_REGNUM
7460 && to == HARD_FRAME_POINTER_REGNUM)
7461 return frame.hard_frame_pointer_offset - frame.frame_pointer_offset;
7464 gcc_assert (to == STACK_POINTER_REGNUM);
7466 if (from == ARG_POINTER_REGNUM)
7467 return frame.stack_pointer_offset;
7469 gcc_assert (from == FRAME_POINTER_REGNUM);
7470 return frame.stack_pointer_offset - frame.frame_pointer_offset;
7474 /* Fill structure ix86_frame about frame of currently computed function. */
7477 ix86_compute_frame_layout (struct ix86_frame *frame)
7479 HOST_WIDE_INT total_size;
7480 unsigned int stack_alignment_needed;
7481 HOST_WIDE_INT offset;
7482 unsigned int preferred_alignment;
7483 HOST_WIDE_INT size = get_frame_size ();
7485 frame->nregs = ix86_nsaved_regs ();
7488 stack_alignment_needed = crtl->stack_alignment_needed / BITS_PER_UNIT;
7489 preferred_alignment = crtl->preferred_stack_boundary / BITS_PER_UNIT;
7491 gcc_assert (!size || stack_alignment_needed);
7492 gcc_assert (preferred_alignment >= STACK_BOUNDARY / BITS_PER_UNIT);
7493 gcc_assert (preferred_alignment <= stack_alignment_needed);
7495 /* During reload iteration the amount of registers saved can change.
7496 Recompute the value as needed. Do not recompute when amount of registers
7497 didn't change as reload does multiple calls to the function and does not
7498 expect the decision to change within single iteration. */
7499 if (!optimize_function_for_size_p (cfun)
7500 && cfun->machine->use_fast_prologue_epilogue_nregs != frame->nregs)
7502 int count = frame->nregs;
7504 cfun->machine->use_fast_prologue_epilogue_nregs = count;
7505 /* The fast prologue uses move instead of push to save registers. This
7506 is significantly longer, but also executes faster as modern hardware
7507 can execute the moves in parallel, but can't do that for push/pop.
7509 Be careful about choosing what prologue to emit: When function takes
7510 many instructions to execute we may use slow version as well as in
7511 case function is known to be outside hot spot (this is known with
7512 feedback only). Weight the size of function by number of registers
7513 to save as it is cheap to use one or two push instructions but very
7514 slow to use many of them. */
7516 count = (count - 1) * FAST_PROLOGUE_INSN_COUNT;
7517 if (cfun->function_frequency < FUNCTION_FREQUENCY_NORMAL
7518 || (flag_branch_probabilities
7519 && cfun->function_frequency < FUNCTION_FREQUENCY_HOT))
7520 cfun->machine->use_fast_prologue_epilogue = false;
7522 cfun->machine->use_fast_prologue_epilogue
7523 = !expensive_function_p (count);
7525 if (TARGET_PROLOGUE_USING_MOVE
7526 && cfun->machine->use_fast_prologue_epilogue)
7527 frame->save_regs_using_mov = true;
7529 frame->save_regs_using_mov = false;
7532 /* Skip return address and saved base pointer. */
7533 offset = frame_pointer_needed ? UNITS_PER_WORD * 2 : UNITS_PER_WORD;
7535 frame->hard_frame_pointer_offset = offset;
7537 /* Set offset to aligned because the realigned frame starts from
7539 if (stack_realign_fp)
7540 offset = (offset + stack_alignment_needed -1) & -stack_alignment_needed;
7542 /* Register save area */
7543 offset += frame->nregs * UNITS_PER_WORD;
7546 frame->va_arg_size = ix86_varargs_gpr_size + ix86_varargs_fpr_size;
7547 offset += frame->va_arg_size;
7549 /* Align start of frame for local function. */
7550 frame->padding1 = ((offset + stack_alignment_needed - 1)
7551 & -stack_alignment_needed) - offset;
7553 offset += frame->padding1;
7555 /* Frame pointer points here. */
7556 frame->frame_pointer_offset = offset;
7560 /* Add outgoing arguments area. Can be skipped if we eliminated
7561 all the function calls as dead code.
7562 Skipping is however impossible when function calls alloca. Alloca
7563 expander assumes that last crtl->outgoing_args_size
7564 of stack frame are unused. */
7565 if (ACCUMULATE_OUTGOING_ARGS
7566 && (!current_function_is_leaf || cfun->calls_alloca
7567 || ix86_current_function_calls_tls_descriptor))
7569 offset += crtl->outgoing_args_size;
7570 frame->outgoing_arguments_size = crtl->outgoing_args_size;
7573 frame->outgoing_arguments_size = 0;
7575 /* Align stack boundary. Only needed if we're calling another function
7577 if (!current_function_is_leaf || cfun->calls_alloca
7578 || ix86_current_function_calls_tls_descriptor)
7579 frame->padding2 = ((offset + preferred_alignment - 1)
7580 & -preferred_alignment) - offset;
7582 frame->padding2 = 0;
7584 offset += frame->padding2;
7586 /* We've reached end of stack frame. */
7587 frame->stack_pointer_offset = offset;
7589 /* Size prologue needs to allocate. */
7590 frame->to_allocate =
7591 (size + frame->padding1 + frame->padding2
7592 + frame->outgoing_arguments_size + frame->va_arg_size);
7594 if ((!frame->to_allocate && frame->nregs <= 1)
7595 || (TARGET_64BIT && frame->to_allocate >= (HOST_WIDE_INT) 0x80000000))
7596 frame->save_regs_using_mov = false;
7598 if (!TARGET_64BIT_MS_ABI && TARGET_RED_ZONE && current_function_sp_is_unchanging
7599 && current_function_is_leaf
7600 && !ix86_current_function_calls_tls_descriptor)
7602 frame->red_zone_size = frame->to_allocate;
7603 if (frame->save_regs_using_mov)
7604 frame->red_zone_size += frame->nregs * UNITS_PER_WORD;
7605 if (frame->red_zone_size > RED_ZONE_SIZE - RED_ZONE_RESERVE)
7606 frame->red_zone_size = RED_ZONE_SIZE - RED_ZONE_RESERVE;
7609 frame->red_zone_size = 0;
7610 frame->to_allocate -= frame->red_zone_size;
7611 frame->stack_pointer_offset -= frame->red_zone_size;
7613 fprintf (stderr, "\n");
7614 fprintf (stderr, "nregs: %ld\n", (long)frame->nregs);
7615 fprintf (stderr, "size: %ld\n", (long)size);
7616 fprintf (stderr, "alignment1: %ld\n", (long)stack_alignment_needed);
7617 fprintf (stderr, "padding1: %ld\n", (long)frame->padding1);
7618 fprintf (stderr, "va_arg: %ld\n", (long)frame->va_arg_size);
7619 fprintf (stderr, "padding2: %ld\n", (long)frame->padding2);
7620 fprintf (stderr, "to_allocate: %ld\n", (long)frame->to_allocate);
7621 fprintf (stderr, "red_zone_size: %ld\n", (long)frame->red_zone_size);
7622 fprintf (stderr, "frame_pointer_offset: %ld\n", (long)frame->frame_pointer_offset);
7623 fprintf (stderr, "hard_frame_pointer_offset: %ld\n",
7624 (long)frame->hard_frame_pointer_offset);
7625 fprintf (stderr, "stack_pointer_offset: %ld\n", (long)frame->stack_pointer_offset);
7626 fprintf (stderr, "current_function_is_leaf: %ld\n", (long)current_function_is_leaf);
7627 fprintf (stderr, "cfun->calls_alloca: %ld\n", (long)cfun->calls_alloca);
7628 fprintf (stderr, "x86_current_function_calls_tls_descriptor: %ld\n", (long)ix86_current_function_calls_tls_descriptor);
7632 /* Emit code to save registers in the prologue. */
7635 ix86_emit_save_regs (void)
7640 for (regno = FIRST_PSEUDO_REGISTER; regno-- > 0; )
7641 if (ix86_save_reg (regno, true))
7643 insn = emit_insn (gen_push (gen_rtx_REG (Pmode, regno)));
7644 RTX_FRAME_RELATED_P (insn) = 1;
7648 /* Emit code to save registers using MOV insns. First register
7649 is restored from POINTER + OFFSET. */
7651 ix86_emit_save_regs_using_mov (rtx pointer, HOST_WIDE_INT offset)
7656 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
7657 if (ix86_save_reg (regno, true))
7659 insn = emit_move_insn (adjust_address (gen_rtx_MEM (Pmode, pointer),
7661 gen_rtx_REG (Pmode, regno));
7662 RTX_FRAME_RELATED_P (insn) = 1;
7663 offset += UNITS_PER_WORD;
7667 /* Expand prologue or epilogue stack adjustment.
7668 The pattern exist to put a dependency on all ebp-based memory accesses.
7669 STYLE should be negative if instructions should be marked as frame related,
7670 zero if %r11 register is live and cannot be freely used and positive
7674 pro_epilogue_adjust_stack (rtx dest, rtx src, rtx offset, int style)
7679 insn = emit_insn (gen_pro_epilogue_adjust_stack_1 (dest, src, offset));
7680 else if (x86_64_immediate_operand (offset, DImode))
7681 insn = emit_insn (gen_pro_epilogue_adjust_stack_rex64 (dest, src, offset));
7685 /* r11 is used by indirect sibcall return as well, set before the
7686 epilogue and used after the epilogue. ATM indirect sibcall
7687 shouldn't be used together with huge frame sizes in one
7688 function because of the frame_size check in sibcall.c. */
7690 r11 = gen_rtx_REG (DImode, R11_REG);
7691 insn = emit_insn (gen_rtx_SET (DImode, r11, offset));
7693 RTX_FRAME_RELATED_P (insn) = 1;
7694 insn = emit_insn (gen_pro_epilogue_adjust_stack_rex64_2 (dest, src, r11,
7698 RTX_FRAME_RELATED_P (insn) = 1;
7701 /* Find an available register to be used as dynamic realign argument
7702 pointer regsiter. Such a register will be written in prologue and
7703 used in begin of body, so it must not be
7704 1. parameter passing register.
7706 We reuse static-chain register if it is available. Otherwise, we
7707 use DI for i386 and R13 for x86-64. We chose R13 since it has
7710 Return: the regno of chosen register. */
7713 find_drap_reg (void)
7715 tree decl = cfun->decl;
7719 /* Use R13 for nested function or function need static chain.
7720 Since function with tail call may use any caller-saved
7721 registers in epilogue, DRAP must not use caller-saved
7722 register in such case. */
7723 if ((decl_function_context (decl)
7724 && !DECL_NO_STATIC_CHAIN (decl))
7725 || crtl->tail_call_emit)
7732 /* Use DI for nested function or function need static chain.
7733 Since function with tail call may use any caller-saved
7734 registers in epilogue, DRAP must not use caller-saved
7735 register in such case. */
7736 if ((decl_function_context (decl)
7737 && !DECL_NO_STATIC_CHAIN (decl))
7738 || crtl->tail_call_emit)
7741 /* Reuse static chain register if it isn't used for parameter
7743 if (ix86_function_regparm (TREE_TYPE (decl), decl) <= 2
7744 && !lookup_attribute ("fastcall",
7745 TYPE_ATTRIBUTES (TREE_TYPE (decl))))
7752 /* Update incoming stack boundary and estimated stack alignment. */
7755 ix86_update_stack_boundary (void)
7757 /* Prefer the one specified at command line. */
7758 ix86_incoming_stack_boundary
7759 = (ix86_user_incoming_stack_boundary
7760 ? ix86_user_incoming_stack_boundary
7761 : ix86_default_incoming_stack_boundary);
7763 /* Incoming stack alignment can be changed on individual functions
7764 via force_align_arg_pointer attribute. We use the smallest
7765 incoming stack boundary. */
7766 if (ix86_incoming_stack_boundary > MIN_STACK_BOUNDARY
7767 && lookup_attribute (ix86_force_align_arg_pointer_string,
7768 TYPE_ATTRIBUTES (TREE_TYPE (current_function_decl))))
7769 ix86_incoming_stack_boundary = MIN_STACK_BOUNDARY;
7771 /* Stack at entrance of main is aligned by runtime. We use the
7772 smallest incoming stack boundary. */
7773 if (ix86_incoming_stack_boundary > MAIN_STACK_BOUNDARY
7774 && DECL_NAME (current_function_decl)
7775 && MAIN_NAME_P (DECL_NAME (current_function_decl))
7776 && DECL_FILE_SCOPE_P (current_function_decl))
7777 ix86_incoming_stack_boundary = MAIN_STACK_BOUNDARY;
7779 /* x86_64 vararg needs 16byte stack alignment for register save
7783 && crtl->stack_alignment_estimated < 128)
7784 crtl->stack_alignment_estimated = 128;
7787 /* Handle the TARGET_GET_DRAP_RTX hook. Return NULL if no DRAP is
7788 needed or an rtx for DRAP otherwise. */
7791 ix86_get_drap_rtx (void)
7793 if (ix86_force_drap || !ACCUMULATE_OUTGOING_ARGS)
7794 crtl->need_drap = true;
7796 if (stack_realign_drap)
7798 /* Assign DRAP to vDRAP and returns vDRAP */
7799 unsigned int regno = find_drap_reg ();
7804 arg_ptr = gen_rtx_REG (Pmode, regno);
7805 crtl->drap_reg = arg_ptr;
7808 drap_vreg = copy_to_reg (arg_ptr);
7812 insn = emit_insn_before (seq, NEXT_INSN (entry_of_function ()));
7813 RTX_FRAME_RELATED_P (insn) = 1;
7820 /* Handle the TARGET_INTERNAL_ARG_POINTER hook. */
7823 ix86_internal_arg_pointer (void)
7825 return virtual_incoming_args_rtx;
7828 /* Handle the TARGET_DWARF_HANDLE_FRAME_UNSPEC hook.
7829 This is called from dwarf2out.c to emit call frame instructions
7830 for frame-related insns containing UNSPECs and UNSPEC_VOLATILEs. */
7832 ix86_dwarf_handle_frame_unspec (const char *label, rtx pattern, int index)
7834 rtx unspec = SET_SRC (pattern);
7835 gcc_assert (GET_CODE (unspec) == UNSPEC);
7839 case UNSPEC_REG_SAVE:
7840 dwarf2out_reg_save_reg (label, XVECEXP (unspec, 0, 0),
7841 SET_DEST (pattern));
7843 case UNSPEC_DEF_CFA:
7844 dwarf2out_def_cfa (label, REGNO (SET_DEST (pattern)),
7845 INTVAL (XVECEXP (unspec, 0, 0)));
7852 /* Finalize stack_realign_needed flag, which will guide prologue/epilogue
7853 to be generated in correct form. */
7855 ix86_finalize_stack_realign_flags (void)
7857 /* Check if stack realign is really needed after reload, and
7858 stores result in cfun */
7859 unsigned int incoming_stack_boundary
7860 = (crtl->parm_stack_boundary > ix86_incoming_stack_boundary
7861 ? crtl->parm_stack_boundary : ix86_incoming_stack_boundary);
7862 unsigned int stack_realign = (incoming_stack_boundary
7863 < (current_function_is_leaf
7864 ? crtl->max_used_stack_slot_alignment
7865 : crtl->stack_alignment_needed));
7867 if (crtl->stack_realign_finalized)
7869 /* After stack_realign_needed is finalized, we can't no longer
7871 gcc_assert (crtl->stack_realign_needed == stack_realign);
7875 crtl->stack_realign_needed = stack_realign;
7876 crtl->stack_realign_finalized = true;
7880 /* Expand the prologue into a bunch of separate insns. */
7883 ix86_expand_prologue (void)
7887 struct ix86_frame frame;
7888 HOST_WIDE_INT allocate;
7890 ix86_finalize_stack_realign_flags ();
7892 /* DRAP should not coexist with stack_realign_fp */
7893 gcc_assert (!(crtl->drap_reg && stack_realign_fp));
7895 ix86_compute_frame_layout (&frame);
7897 /* Emit prologue code to adjust stack alignment and setup DRAP, in case
7898 of DRAP is needed and stack realignment is really needed after reload */
7899 if (crtl->drap_reg && crtl->stack_realign_needed)
7902 int align_bytes = crtl->stack_alignment_needed / BITS_PER_UNIT;
7903 int param_ptr_offset = (call_used_regs[REGNO (crtl->drap_reg)]
7904 ? 0 : UNITS_PER_WORD);
7906 gcc_assert (stack_realign_drap);
7908 /* Grab the argument pointer. */
7909 x = plus_constant (stack_pointer_rtx,
7910 (UNITS_PER_WORD + param_ptr_offset));
7913 /* Only need to push parameter pointer reg if it is caller
7915 if (!call_used_regs[REGNO (crtl->drap_reg)])
7917 /* Push arg pointer reg */
7918 insn = emit_insn (gen_push (y));
7919 RTX_FRAME_RELATED_P (insn) = 1;
7922 insn = emit_insn (gen_rtx_SET (VOIDmode, y, x));
7923 RTX_FRAME_RELATED_P (insn) = 1;
7925 /* Align the stack. */
7926 insn = emit_insn ((*ix86_gen_andsp) (stack_pointer_rtx,
7928 GEN_INT (-align_bytes)));
7929 RTX_FRAME_RELATED_P (insn) = 1;
7931 /* Replicate the return address on the stack so that return
7932 address can be reached via (argp - 1) slot. This is needed
7933 to implement macro RETURN_ADDR_RTX and intrinsic function
7934 expand_builtin_return_addr etc. */
7936 x = gen_frame_mem (Pmode,
7937 plus_constant (x, -UNITS_PER_WORD));
7938 insn = emit_insn (gen_push (x));
7939 RTX_FRAME_RELATED_P (insn) = 1;
7942 /* Note: AT&T enter does NOT have reversed args. Enter is probably
7943 slower on all targets. Also sdb doesn't like it. */
7945 if (frame_pointer_needed)
7947 insn = emit_insn (gen_push (hard_frame_pointer_rtx));
7948 RTX_FRAME_RELATED_P (insn) = 1;
7950 insn = emit_move_insn (hard_frame_pointer_rtx, stack_pointer_rtx);
7951 RTX_FRAME_RELATED_P (insn) = 1;
7954 if (stack_realign_fp)
7956 int align_bytes = crtl->stack_alignment_needed / BITS_PER_UNIT;
7957 gcc_assert (align_bytes > MIN_STACK_BOUNDARY / BITS_PER_UNIT);
7959 /* Align the stack. */
7960 insn = emit_insn ((*ix86_gen_andsp) (stack_pointer_rtx,
7962 GEN_INT (-align_bytes)));
7963 RTX_FRAME_RELATED_P (insn) = 1;
7966 allocate = frame.to_allocate;
7968 if (!frame.save_regs_using_mov)
7969 ix86_emit_save_regs ();
7971 allocate += frame.nregs * UNITS_PER_WORD;
7973 /* When using red zone we may start register saving before allocating
7974 the stack frame saving one cycle of the prologue. However I will
7975 avoid doing this if I am going to have to probe the stack since
7976 at least on x86_64 the stack probe can turn into a call that clobbers
7977 a red zone location */
7978 if (!TARGET_64BIT_MS_ABI && TARGET_RED_ZONE && frame.save_regs_using_mov
7979 && (! TARGET_STACK_PROBE || allocate < CHECK_STACK_LIMIT))
7980 ix86_emit_save_regs_using_mov ((frame_pointer_needed
7981 && !crtl->stack_realign_needed)
7982 ? hard_frame_pointer_rtx
7983 : stack_pointer_rtx,
7984 -frame.nregs * UNITS_PER_WORD);
7988 else if (! TARGET_STACK_PROBE || allocate < CHECK_STACK_LIMIT)
7989 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
7990 GEN_INT (-allocate), -1);
7993 /* Only valid for Win32. */
7994 rtx eax = gen_rtx_REG (Pmode, AX_REG);
7998 gcc_assert (!TARGET_64BIT || cfun->machine->call_abi == MS_ABI);
8000 if (cfun->machine->call_abi == MS_ABI)
8003 eax_live = ix86_eax_live_at_start_p ();
8007 emit_insn (gen_push (eax));
8008 allocate -= UNITS_PER_WORD;
8011 emit_move_insn (eax, GEN_INT (allocate));
8014 insn = gen_allocate_stack_worker_64 (eax, eax);
8016 insn = gen_allocate_stack_worker_32 (eax, eax);
8017 insn = emit_insn (insn);
8018 RTX_FRAME_RELATED_P (insn) = 1;
8019 t = gen_rtx_PLUS (Pmode, stack_pointer_rtx, GEN_INT (-allocate));
8020 t = gen_rtx_SET (VOIDmode, stack_pointer_rtx, t);
8021 REG_NOTES (insn) = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR,
8022 t, REG_NOTES (insn));
8026 if (frame_pointer_needed)
8027 t = plus_constant (hard_frame_pointer_rtx,
8030 - frame.nregs * UNITS_PER_WORD);
8032 t = plus_constant (stack_pointer_rtx, allocate);
8033 emit_move_insn (eax, gen_rtx_MEM (Pmode, t));
8037 if (frame.save_regs_using_mov
8038 && !(!TARGET_64BIT_MS_ABI && TARGET_RED_ZONE
8039 && (! TARGET_STACK_PROBE || allocate < CHECK_STACK_LIMIT)))
8041 if (!frame_pointer_needed
8042 || !frame.to_allocate
8043 || crtl->stack_realign_needed)
8044 ix86_emit_save_regs_using_mov (stack_pointer_rtx,
8047 ix86_emit_save_regs_using_mov (hard_frame_pointer_rtx,
8048 -frame.nregs * UNITS_PER_WORD);
8051 pic_reg_used = false;
8052 if (pic_offset_table_rtx
8053 && (df_regs_ever_live_p (REAL_PIC_OFFSET_TABLE_REGNUM)
8056 unsigned int alt_pic_reg_used = ix86_select_alt_pic_regnum ();
8058 if (alt_pic_reg_used != INVALID_REGNUM)
8059 SET_REGNO (pic_offset_table_rtx, alt_pic_reg_used);
8061 pic_reg_used = true;
8068 if (ix86_cmodel == CM_LARGE_PIC)
8070 rtx tmp_reg = gen_rtx_REG (DImode, R11_REG);
8071 rtx label = gen_label_rtx ();
8073 LABEL_PRESERVE_P (label) = 1;
8074 gcc_assert (REGNO (pic_offset_table_rtx) != REGNO (tmp_reg));
8075 insn = emit_insn (gen_set_rip_rex64 (pic_offset_table_rtx, label));
8076 insn = emit_insn (gen_set_got_offset_rex64 (tmp_reg, label));
8077 insn = emit_insn (gen_adddi3 (pic_offset_table_rtx,
8078 pic_offset_table_rtx, tmp_reg));
8081 insn = emit_insn (gen_set_got_rex64 (pic_offset_table_rtx));
8084 insn = emit_insn (gen_set_got (pic_offset_table_rtx));
8087 /* Prevent function calls from being scheduled before the call to mcount.
8088 In the pic_reg_used case, make sure that the got load isn't deleted. */
8092 emit_insn (gen_prologue_use (pic_offset_table_rtx));
8093 emit_insn (gen_blockage ());
8096 if (crtl->drap_reg && !crtl->stack_realign_needed)
8098 /* vDRAP is setup but after reload it turns out stack realign
8099 isn't necessary, here we will emit prologue to setup DRAP
8100 without stack realign adjustment */
8101 int drap_bp_offset = UNITS_PER_WORD * 2;
8102 rtx x = plus_constant (hard_frame_pointer_rtx, drap_bp_offset);
8103 insn = emit_insn (gen_rtx_SET (VOIDmode, crtl->drap_reg, x));
8106 /* Emit cld instruction if stringops are used in the function. */
8107 if (TARGET_CLD && ix86_current_function_needs_cld)
8108 emit_insn (gen_cld ());
8111 /* Emit code to restore saved registers using MOV insns. First register
8112 is restored from POINTER + OFFSET. */
8114 ix86_emit_restore_regs_using_mov (rtx pointer, HOST_WIDE_INT offset,
8115 int maybe_eh_return)
8118 rtx base_address = gen_rtx_MEM (Pmode, pointer);
8120 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
8121 if (ix86_save_reg (regno, maybe_eh_return))
8123 /* Ensure that adjust_address won't be forced to produce pointer
8124 out of range allowed by x86-64 instruction set. */
8125 if (TARGET_64BIT && offset != trunc_int_for_mode (offset, SImode))
8129 r11 = gen_rtx_REG (DImode, R11_REG);
8130 emit_move_insn (r11, GEN_INT (offset));
8131 emit_insn (gen_adddi3 (r11, r11, pointer));
8132 base_address = gen_rtx_MEM (Pmode, r11);
8135 emit_move_insn (gen_rtx_REG (Pmode, regno),
8136 adjust_address (base_address, Pmode, offset));
8137 offset += UNITS_PER_WORD;
8141 /* Restore function stack, frame, and registers. */
8144 ix86_expand_epilogue (int style)
8148 struct ix86_frame frame;
8149 HOST_WIDE_INT offset;
8151 ix86_finalize_stack_realign_flags ();
8153 /* When stack is realigned, SP must be valid. */
8154 sp_valid = (!frame_pointer_needed
8155 || current_function_sp_is_unchanging
8156 || stack_realign_fp);
8158 ix86_compute_frame_layout (&frame);
8160 /* Calculate start of saved registers relative to ebp. Special care
8161 must be taken for the normal return case of a function using
8162 eh_return: the eax and edx registers are marked as saved, but not
8163 restored along this path. */
8164 offset = frame.nregs;
8165 if (crtl->calls_eh_return && style != 2)
8167 offset *= -UNITS_PER_WORD;
8169 /* If we're only restoring one register and sp is not valid then
8170 using a move instruction to restore the register since it's
8171 less work than reloading sp and popping the register.
8173 The default code result in stack adjustment using add/lea instruction,
8174 while this code results in LEAVE instruction (or discrete equivalent),
8175 so it is profitable in some other cases as well. Especially when there
8176 are no registers to restore. We also use this code when TARGET_USE_LEAVE
8177 and there is exactly one register to pop. This heuristic may need some
8178 tuning in future. */
8179 if ((!sp_valid && frame.nregs <= 1)
8180 || (TARGET_EPILOGUE_USING_MOVE
8181 && cfun->machine->use_fast_prologue_epilogue
8182 && (frame.nregs > 1 || frame.to_allocate))
8183 || (frame_pointer_needed && !frame.nregs && frame.to_allocate)
8184 || (frame_pointer_needed && TARGET_USE_LEAVE
8185 && cfun->machine->use_fast_prologue_epilogue
8186 && frame.nregs == 1)
8187 || crtl->calls_eh_return)
8189 /* Restore registers. We can use ebp or esp to address the memory
8190 locations. If both are available, default to ebp, since offsets
8191 are known to be small. Only exception is esp pointing directly
8192 to the end of block of saved registers, where we may simplify
8195 If we are realigning stack with bp and sp, regs restore can't
8196 be addressed by bp. sp must be used instead. */
8198 if (!frame_pointer_needed
8199 || (sp_valid && !frame.to_allocate)
8200 || stack_realign_fp)
8201 ix86_emit_restore_regs_using_mov (stack_pointer_rtx,
8202 frame.to_allocate, style == 2);
8204 ix86_emit_restore_regs_using_mov (hard_frame_pointer_rtx,
8205 offset, style == 2);
8207 /* eh_return epilogues need %ecx added to the stack pointer. */
8210 rtx tmp, sa = EH_RETURN_STACKADJ_RTX;
8212 /* Stack align doesn't work with eh_return. */
8213 gcc_assert (!crtl->stack_realign_needed);
8215 if (frame_pointer_needed)
8217 tmp = gen_rtx_PLUS (Pmode, hard_frame_pointer_rtx, sa);
8218 tmp = plus_constant (tmp, UNITS_PER_WORD);
8219 emit_insn (gen_rtx_SET (VOIDmode, sa, tmp));
8221 tmp = gen_rtx_MEM (Pmode, hard_frame_pointer_rtx);
8222 emit_move_insn (hard_frame_pointer_rtx, tmp);
8224 pro_epilogue_adjust_stack (stack_pointer_rtx, sa,
8229 tmp = gen_rtx_PLUS (Pmode, stack_pointer_rtx, sa);
8230 tmp = plus_constant (tmp, (frame.to_allocate
8231 + frame.nregs * UNITS_PER_WORD));
8232 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx, tmp));
8235 else if (!frame_pointer_needed)
8236 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
8237 GEN_INT (frame.to_allocate
8238 + frame.nregs * UNITS_PER_WORD),
8240 /* If not an i386, mov & pop is faster than "leave". */
8241 else if (TARGET_USE_LEAVE || optimize_function_for_size_p (cfun)
8242 || !cfun->machine->use_fast_prologue_epilogue)
8243 emit_insn ((*ix86_gen_leave) ());
8246 pro_epilogue_adjust_stack (stack_pointer_rtx,
8247 hard_frame_pointer_rtx,
8250 emit_insn ((*ix86_gen_pop1) (hard_frame_pointer_rtx));
8255 /* First step is to deallocate the stack frame so that we can
8258 If we realign stack with frame pointer, then stack pointer
8259 won't be able to recover via lea $offset(%bp), %sp, because
8260 there is a padding area between bp and sp for realign.
8261 "add $to_allocate, %sp" must be used instead. */
8264 gcc_assert (frame_pointer_needed);
8265 gcc_assert (!stack_realign_fp);
8266 pro_epilogue_adjust_stack (stack_pointer_rtx,
8267 hard_frame_pointer_rtx,
8268 GEN_INT (offset), style);
8270 else if (frame.to_allocate)
8271 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
8272 GEN_INT (frame.to_allocate), style);
8274 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
8275 if (ix86_save_reg (regno, false))
8276 emit_insn ((*ix86_gen_pop1) (gen_rtx_REG (Pmode, regno)));
8277 if (frame_pointer_needed)
8279 /* Leave results in shorter dependency chains on CPUs that are
8280 able to grok it fast. */
8281 if (TARGET_USE_LEAVE)
8282 emit_insn ((*ix86_gen_leave) ());
8285 /* For stack realigned really happens, recover stack
8286 pointer to hard frame pointer is a must, if not using
8288 if (stack_realign_fp)
8289 pro_epilogue_adjust_stack (stack_pointer_rtx,
8290 hard_frame_pointer_rtx,
8292 emit_insn ((*ix86_gen_pop1) (hard_frame_pointer_rtx));
8297 if (crtl->drap_reg && crtl->stack_realign_needed)
8299 int param_ptr_offset = (call_used_regs[REGNO (crtl->drap_reg)]
8300 ? 0 : UNITS_PER_WORD);
8301 gcc_assert (stack_realign_drap);
8302 emit_insn ((*ix86_gen_add3) (stack_pointer_rtx,
8304 GEN_INT (-(UNITS_PER_WORD
8305 + param_ptr_offset))));
8306 if (!call_used_regs[REGNO (crtl->drap_reg)])
8307 emit_insn ((*ix86_gen_pop1) (crtl->drap_reg));
8311 /* Sibcall epilogues don't want a return instruction. */
8315 if (crtl->args.pops_args && crtl->args.size)
8317 rtx popc = GEN_INT (crtl->args.pops_args);
8319 /* i386 can only pop 64K bytes. If asked to pop more, pop
8320 return address, do explicit add, and jump indirectly to the
8323 if (crtl->args.pops_args >= 65536)
8325 rtx ecx = gen_rtx_REG (SImode, CX_REG);
8327 /* There is no "pascal" calling convention in any 64bit ABI. */
8328 gcc_assert (!TARGET_64BIT);
8330 emit_insn (gen_popsi1 (ecx));
8331 emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx, popc));
8332 emit_jump_insn (gen_return_indirect_internal (ecx));
8335 emit_jump_insn (gen_return_pop_internal (popc));
8338 emit_jump_insn (gen_return_internal ());
8341 /* Reset from the function's potential modifications. */
8344 ix86_output_function_epilogue (FILE *file ATTRIBUTE_UNUSED,
8345 HOST_WIDE_INT size ATTRIBUTE_UNUSED)
8347 if (pic_offset_table_rtx)
8348 SET_REGNO (pic_offset_table_rtx, REAL_PIC_OFFSET_TABLE_REGNUM);
8350 /* Mach-O doesn't support labels at the end of objects, so if
8351 it looks like we might want one, insert a NOP. */
8353 rtx insn = get_last_insn ();
8356 && NOTE_KIND (insn) != NOTE_INSN_DELETED_LABEL)
8357 insn = PREV_INSN (insn);
8361 && NOTE_KIND (insn) == NOTE_INSN_DELETED_LABEL)))
8362 fputs ("\tnop\n", file);
8368 /* Extract the parts of an RTL expression that is a valid memory address
8369 for an instruction. Return 0 if the structure of the address is
8370 grossly off. Return -1 if the address contains ASHIFT, so it is not
8371 strictly valid, but still used for computing length of lea instruction. */
8374 ix86_decompose_address (rtx addr, struct ix86_address *out)
8376 rtx base = NULL_RTX, index = NULL_RTX, disp = NULL_RTX;
8377 rtx base_reg, index_reg;
8378 HOST_WIDE_INT scale = 1;
8379 rtx scale_rtx = NULL_RTX;
8381 enum ix86_address_seg seg = SEG_DEFAULT;
8383 if (REG_P (addr) || GET_CODE (addr) == SUBREG)
8385 else if (GET_CODE (addr) == PLUS)
8395 addends[n++] = XEXP (op, 1);
8398 while (GET_CODE (op) == PLUS);
8403 for (i = n; i >= 0; --i)
8406 switch (GET_CODE (op))
8411 index = XEXP (op, 0);
8412 scale_rtx = XEXP (op, 1);
8416 if (XINT (op, 1) == UNSPEC_TP
8417 && TARGET_TLS_DIRECT_SEG_REFS
8418 && seg == SEG_DEFAULT)
8419 seg = TARGET_64BIT ? SEG_FS : SEG_GS;
8448 else if (GET_CODE (addr) == MULT)
8450 index = XEXP (addr, 0); /* index*scale */
8451 scale_rtx = XEXP (addr, 1);
8453 else if (GET_CODE (addr) == ASHIFT)
8457 /* We're called for lea too, which implements ashift on occasion. */
8458 index = XEXP (addr, 0);
8459 tmp = XEXP (addr, 1);
8460 if (!CONST_INT_P (tmp))
8462 scale = INTVAL (tmp);
8463 if ((unsigned HOST_WIDE_INT) scale > 3)
8469 disp = addr; /* displacement */
8471 /* Extract the integral value of scale. */
8474 if (!CONST_INT_P (scale_rtx))
8476 scale = INTVAL (scale_rtx);
8479 base_reg = base && GET_CODE (base) == SUBREG ? SUBREG_REG (base) : base;
8480 index_reg = index && GET_CODE (index) == SUBREG ? SUBREG_REG (index) : index;
8482 /* Allow arg pointer and stack pointer as index if there is not scaling. */
8483 if (base_reg && index_reg && scale == 1
8484 && (index_reg == arg_pointer_rtx
8485 || index_reg == frame_pointer_rtx
8486 || (REG_P (index_reg) && REGNO (index_reg) == STACK_POINTER_REGNUM)))
8489 tmp = base, base = index, index = tmp;
8490 tmp = base_reg, base_reg = index_reg, index_reg = tmp;
8493 /* Special case: %ebp cannot be encoded as a base without a displacement. */
8494 if ((base_reg == hard_frame_pointer_rtx
8495 || base_reg == frame_pointer_rtx
8496 || base_reg == arg_pointer_rtx) && !disp)
8499 /* Special case: on K6, [%esi] makes the instruction vector decoded.
8500 Avoid this by transforming to [%esi+0].
8501 Reload calls address legitimization without cfun defined, so we need
8502 to test cfun for being non-NULL. */
8503 if (TARGET_K6 && cfun && optimize_function_for_speed_p (cfun)
8504 && base_reg && !index_reg && !disp
8506 && REGNO_REG_CLASS (REGNO (base_reg)) == SIREG)
8509 /* Special case: encode reg+reg instead of reg*2. */
8510 if (!base && index && scale && scale == 2)
8511 base = index, base_reg = index_reg, scale = 1;
8513 /* Special case: scaling cannot be encoded without base or displacement. */
8514 if (!base && !disp && index && scale != 1)
8526 /* Return cost of the memory address x.
8527 For i386, it is better to use a complex address than let gcc copy
8528 the address into a reg and make a new pseudo. But not if the address
8529 requires to two regs - that would mean more pseudos with longer
8532 ix86_address_cost (rtx x, bool speed ATTRIBUTE_UNUSED)
8534 struct ix86_address parts;
8536 int ok = ix86_decompose_address (x, &parts);
8540 if (parts.base && GET_CODE (parts.base) == SUBREG)
8541 parts.base = SUBREG_REG (parts.base);
8542 if (parts.index && GET_CODE (parts.index) == SUBREG)
8543 parts.index = SUBREG_REG (parts.index);
8545 /* Attempt to minimize number of registers in the address. */
8547 && (!REG_P (parts.base) || REGNO (parts.base) >= FIRST_PSEUDO_REGISTER))
8549 && (!REG_P (parts.index)
8550 || REGNO (parts.index) >= FIRST_PSEUDO_REGISTER)))
8554 && (!REG_P (parts.base) || REGNO (parts.base) >= FIRST_PSEUDO_REGISTER)
8556 && (!REG_P (parts.index) || REGNO (parts.index) >= FIRST_PSEUDO_REGISTER)
8557 && parts.base != parts.index)
8560 /* AMD-K6 don't like addresses with ModR/M set to 00_xxx_100b,
8561 since it's predecode logic can't detect the length of instructions
8562 and it degenerates to vector decoded. Increase cost of such
8563 addresses here. The penalty is minimally 2 cycles. It may be worthwhile
8564 to split such addresses or even refuse such addresses at all.
8566 Following addressing modes are affected:
8571 The first and last case may be avoidable by explicitly coding the zero in
8572 memory address, but I don't have AMD-K6 machine handy to check this
8576 && ((!parts.disp && parts.base && parts.index && parts.scale != 1)
8577 || (parts.disp && !parts.base && parts.index && parts.scale != 1)
8578 || (!parts.disp && parts.base && parts.index && parts.scale == 1)))
8584 /* Allow {LABEL | SYMBOL}_REF - SYMBOL_REF-FOR-PICBASE for Mach-O as
8585 this is used for to form addresses to local data when -fPIC is in
8589 darwin_local_data_pic (rtx disp)
8591 return (GET_CODE (disp) == UNSPEC
8592 && XINT (disp, 1) == UNSPEC_MACHOPIC_OFFSET);
8595 /* Determine if a given RTX is a valid constant. We already know this
8596 satisfies CONSTANT_P. */
8599 legitimate_constant_p (rtx x)
8601 switch (GET_CODE (x))
8606 if (GET_CODE (x) == PLUS)
8608 if (!CONST_INT_P (XEXP (x, 1)))
8613 if (TARGET_MACHO && darwin_local_data_pic (x))
8616 /* Only some unspecs are valid as "constants". */
8617 if (GET_CODE (x) == UNSPEC)
8618 switch (XINT (x, 1))
8623 return TARGET_64BIT;
8626 x = XVECEXP (x, 0, 0);
8627 return (GET_CODE (x) == SYMBOL_REF
8628 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_EXEC);
8630 x = XVECEXP (x, 0, 0);
8631 return (GET_CODE (x) == SYMBOL_REF
8632 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_DYNAMIC);
8637 /* We must have drilled down to a symbol. */
8638 if (GET_CODE (x) == LABEL_REF)
8640 if (GET_CODE (x) != SYMBOL_REF)
8645 /* TLS symbols are never valid. */
8646 if (SYMBOL_REF_TLS_MODEL (x))
8649 /* DLLIMPORT symbols are never valid. */
8650 if (TARGET_DLLIMPORT_DECL_ATTRIBUTES
8651 && SYMBOL_REF_DLLIMPORT_P (x))
8656 if (GET_MODE (x) == TImode
8657 && x != CONST0_RTX (TImode)
8663 if (x == CONST0_RTX (GET_MODE (x)))
8671 /* Otherwise we handle everything else in the move patterns. */
8675 /* Determine if it's legal to put X into the constant pool. This
8676 is not possible for the address of thread-local symbols, which
8677 is checked above. */
8680 ix86_cannot_force_const_mem (rtx x)
8682 /* We can always put integral constants and vectors in memory. */
8683 switch (GET_CODE (x))
8693 return !legitimate_constant_p (x);
8696 /* Determine if a given RTX is a valid constant address. */
8699 constant_address_p (rtx x)
8701 return CONSTANT_P (x) && legitimate_address_p (Pmode, x, 1);
8704 /* Nonzero if the constant value X is a legitimate general operand
8705 when generating PIC code. It is given that flag_pic is on and
8706 that X satisfies CONSTANT_P or is a CONST_DOUBLE. */
8709 legitimate_pic_operand_p (rtx x)
8713 switch (GET_CODE (x))
8716 inner = XEXP (x, 0);
8717 if (GET_CODE (inner) == PLUS
8718 && CONST_INT_P (XEXP (inner, 1)))
8719 inner = XEXP (inner, 0);
8721 /* Only some unspecs are valid as "constants". */
8722 if (GET_CODE (inner) == UNSPEC)
8723 switch (XINT (inner, 1))
8728 return TARGET_64BIT;
8730 x = XVECEXP (inner, 0, 0);
8731 return (GET_CODE (x) == SYMBOL_REF
8732 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_EXEC);
8733 case UNSPEC_MACHOPIC_OFFSET:
8734 return legitimate_pic_address_disp_p (x);
8742 return legitimate_pic_address_disp_p (x);
8749 /* Determine if a given CONST RTX is a valid memory displacement
8753 legitimate_pic_address_disp_p (rtx disp)
8757 /* In 64bit mode we can allow direct addresses of symbols and labels
8758 when they are not dynamic symbols. */
8761 rtx op0 = disp, op1;
8763 switch (GET_CODE (disp))
8769 if (GET_CODE (XEXP (disp, 0)) != PLUS)
8771 op0 = XEXP (XEXP (disp, 0), 0);
8772 op1 = XEXP (XEXP (disp, 0), 1);
8773 if (!CONST_INT_P (op1)
8774 || INTVAL (op1) >= 16*1024*1024
8775 || INTVAL (op1) < -16*1024*1024)
8777 if (GET_CODE (op0) == LABEL_REF)
8779 if (GET_CODE (op0) != SYMBOL_REF)
8784 /* TLS references should always be enclosed in UNSPEC. */
8785 if (SYMBOL_REF_TLS_MODEL (op0))
8787 if (!SYMBOL_REF_FAR_ADDR_P (op0) && SYMBOL_REF_LOCAL_P (op0)
8788 && ix86_cmodel != CM_LARGE_PIC)
8796 if (GET_CODE (disp) != CONST)
8798 disp = XEXP (disp, 0);
8802 /* We are unsafe to allow PLUS expressions. This limit allowed distance
8803 of GOT tables. We should not need these anyway. */
8804 if (GET_CODE (disp) != UNSPEC
8805 || (XINT (disp, 1) != UNSPEC_GOTPCREL
8806 && XINT (disp, 1) != UNSPEC_GOTOFF
8807 && XINT (disp, 1) != UNSPEC_PLTOFF))
8810 if (GET_CODE (XVECEXP (disp, 0, 0)) != SYMBOL_REF
8811 && GET_CODE (XVECEXP (disp, 0, 0)) != LABEL_REF)
8817 if (GET_CODE (disp) == PLUS)
8819 if (!CONST_INT_P (XEXP (disp, 1)))
8821 disp = XEXP (disp, 0);
8825 if (TARGET_MACHO && darwin_local_data_pic (disp))
8828 if (GET_CODE (disp) != UNSPEC)
8831 switch (XINT (disp, 1))
8836 /* We need to check for both symbols and labels because VxWorks loads
8837 text labels with @GOT rather than @GOTOFF. See gotoff_operand for
8839 return (GET_CODE (XVECEXP (disp, 0, 0)) == SYMBOL_REF
8840 || GET_CODE (XVECEXP (disp, 0, 0)) == LABEL_REF);
8842 /* Refuse GOTOFF in 64bit mode since it is always 64bit when used.
8843 While ABI specify also 32bit relocation but we don't produce it in
8844 small PIC model at all. */
8845 if ((GET_CODE (XVECEXP (disp, 0, 0)) == SYMBOL_REF
8846 || GET_CODE (XVECEXP (disp, 0, 0)) == LABEL_REF)
8848 return gotoff_operand (XVECEXP (disp, 0, 0), Pmode);
8850 case UNSPEC_GOTTPOFF:
8851 case UNSPEC_GOTNTPOFF:
8852 case UNSPEC_INDNTPOFF:
8855 disp = XVECEXP (disp, 0, 0);
8856 return (GET_CODE (disp) == SYMBOL_REF
8857 && SYMBOL_REF_TLS_MODEL (disp) == TLS_MODEL_INITIAL_EXEC);
8859 disp = XVECEXP (disp, 0, 0);
8860 return (GET_CODE (disp) == SYMBOL_REF
8861 && SYMBOL_REF_TLS_MODEL (disp) == TLS_MODEL_LOCAL_EXEC);
8863 disp = XVECEXP (disp, 0, 0);
8864 return (GET_CODE (disp) == SYMBOL_REF
8865 && SYMBOL_REF_TLS_MODEL (disp) == TLS_MODEL_LOCAL_DYNAMIC);
8871 /* GO_IF_LEGITIMATE_ADDRESS recognizes an RTL expression that is a valid
8872 memory address for an instruction. The MODE argument is the machine mode
8873 for the MEM expression that wants to use this address.
8875 It only recognizes address in canonical form. LEGITIMIZE_ADDRESS should
8876 convert common non-canonical forms to canonical form so that they will
8880 legitimate_address_p (enum machine_mode mode ATTRIBUTE_UNUSED,
8881 rtx addr, int strict)
8883 struct ix86_address parts;
8884 rtx base, index, disp;
8885 HOST_WIDE_INT scale;
8886 const char *reason = NULL;
8887 rtx reason_rtx = NULL_RTX;
8889 if (ix86_decompose_address (addr, &parts) <= 0)
8891 reason = "decomposition failed";
8896 index = parts.index;
8898 scale = parts.scale;
8900 /* Validate base register.
8902 Don't allow SUBREG's that span more than a word here. It can lead to spill
8903 failures when the base is one word out of a two word structure, which is
8904 represented internally as a DImode int. */
8913 else if (GET_CODE (base) == SUBREG
8914 && REG_P (SUBREG_REG (base))
8915 && GET_MODE_SIZE (GET_MODE (SUBREG_REG (base)))
8917 reg = SUBREG_REG (base);
8920 reason = "base is not a register";
8924 if (GET_MODE (base) != Pmode)
8926 reason = "base is not in Pmode";
8930 if ((strict && ! REG_OK_FOR_BASE_STRICT_P (reg))
8931 || (! strict && ! REG_OK_FOR_BASE_NONSTRICT_P (reg)))
8933 reason = "base is not valid";
8938 /* Validate index register.
8940 Don't allow SUBREG's that span more than a word here -- same as above. */
8949 else if (GET_CODE (index) == SUBREG
8950 && REG_P (SUBREG_REG (index))
8951 && GET_MODE_SIZE (GET_MODE (SUBREG_REG (index)))
8953 reg = SUBREG_REG (index);
8956 reason = "index is not a register";
8960 if (GET_MODE (index) != Pmode)
8962 reason = "index is not in Pmode";
8966 if ((strict && ! REG_OK_FOR_INDEX_STRICT_P (reg))
8967 || (! strict && ! REG_OK_FOR_INDEX_NONSTRICT_P (reg)))
8969 reason = "index is not valid";
8974 /* Validate scale factor. */
8977 reason_rtx = GEN_INT (scale);
8980 reason = "scale without index";
8984 if (scale != 2 && scale != 4 && scale != 8)
8986 reason = "scale is not a valid multiplier";
8991 /* Validate displacement. */
8996 if (GET_CODE (disp) == CONST
8997 && GET_CODE (XEXP (disp, 0)) == UNSPEC
8998 && XINT (XEXP (disp, 0), 1) != UNSPEC_MACHOPIC_OFFSET)
8999 switch (XINT (XEXP (disp, 0), 1))
9001 /* Refuse GOTOFF and GOT in 64bit mode since it is always 64bit when
9002 used. While ABI specify also 32bit relocations, we don't produce
9003 them at all and use IP relative instead. */
9006 gcc_assert (flag_pic);
9008 goto is_legitimate_pic;
9009 reason = "64bit address unspec";
9012 case UNSPEC_GOTPCREL:
9013 gcc_assert (flag_pic);
9014 goto is_legitimate_pic;
9016 case UNSPEC_GOTTPOFF:
9017 case UNSPEC_GOTNTPOFF:
9018 case UNSPEC_INDNTPOFF:
9024 reason = "invalid address unspec";
9028 else if (SYMBOLIC_CONST (disp)
9032 && MACHOPIC_INDIRECT
9033 && !machopic_operand_p (disp)
9039 if (TARGET_64BIT && (index || base))
9041 /* foo@dtpoff(%rX) is ok. */
9042 if (GET_CODE (disp) != CONST
9043 || GET_CODE (XEXP (disp, 0)) != PLUS
9044 || GET_CODE (XEXP (XEXP (disp, 0), 0)) != UNSPEC
9045 || !CONST_INT_P (XEXP (XEXP (disp, 0), 1))
9046 || (XINT (XEXP (XEXP (disp, 0), 0), 1) != UNSPEC_DTPOFF
9047 && XINT (XEXP (XEXP (disp, 0), 0), 1) != UNSPEC_NTPOFF))
9049 reason = "non-constant pic memory reference";
9053 else if (! legitimate_pic_address_disp_p (disp))
9055 reason = "displacement is an invalid pic construct";
9059 /* This code used to verify that a symbolic pic displacement
9060 includes the pic_offset_table_rtx register.
9062 While this is good idea, unfortunately these constructs may
9063 be created by "adds using lea" optimization for incorrect
9072 This code is nonsensical, but results in addressing
9073 GOT table with pic_offset_table_rtx base. We can't
9074 just refuse it easily, since it gets matched by
9075 "addsi3" pattern, that later gets split to lea in the
9076 case output register differs from input. While this
9077 can be handled by separate addsi pattern for this case
9078 that never results in lea, this seems to be easier and
9079 correct fix for crash to disable this test. */
9081 else if (GET_CODE (disp) != LABEL_REF
9082 && !CONST_INT_P (disp)
9083 && (GET_CODE (disp) != CONST
9084 || !legitimate_constant_p (disp))
9085 && (GET_CODE (disp) != SYMBOL_REF
9086 || !legitimate_constant_p (disp)))
9088 reason = "displacement is not constant";
9091 else if (TARGET_64BIT
9092 && !x86_64_immediate_operand (disp, VOIDmode))
9094 reason = "displacement is out of range";
9099 /* Everything looks valid. */
9106 /* Return a unique alias set for the GOT. */
9108 static alias_set_type
9109 ix86_GOT_alias_set (void)
9111 static alias_set_type set = -1;
9113 set = new_alias_set ();
9117 /* Return a legitimate reference for ORIG (an address) using the
9118 register REG. If REG is 0, a new pseudo is generated.
9120 There are two types of references that must be handled:
9122 1. Global data references must load the address from the GOT, via
9123 the PIC reg. An insn is emitted to do this load, and the reg is
9126 2. Static data references, constant pool addresses, and code labels
9127 compute the address as an offset from the GOT, whose base is in
9128 the PIC reg. Static data objects have SYMBOL_FLAG_LOCAL set to
9129 differentiate them from global data objects. The returned
9130 address is the PIC reg + an unspec constant.
9132 GO_IF_LEGITIMATE_ADDRESS rejects symbolic references unless the PIC
9133 reg also appears in the address. */
9136 legitimize_pic_address (rtx orig, rtx reg)
9143 if (TARGET_MACHO && !TARGET_64BIT)
9146 reg = gen_reg_rtx (Pmode);
9147 /* Use the generic Mach-O PIC machinery. */
9148 return machopic_legitimize_pic_address (orig, GET_MODE (orig), reg);
9152 if (TARGET_64BIT && legitimate_pic_address_disp_p (addr))
9154 else if (TARGET_64BIT
9155 && ix86_cmodel != CM_SMALL_PIC
9156 && gotoff_operand (addr, Pmode))
9159 /* This symbol may be referenced via a displacement from the PIC
9160 base address (@GOTOFF). */
9162 if (reload_in_progress)
9163 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
9164 if (GET_CODE (addr) == CONST)
9165 addr = XEXP (addr, 0);
9166 if (GET_CODE (addr) == PLUS)
9168 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, XEXP (addr, 0)),
9170 new_rtx = gen_rtx_PLUS (Pmode, new_rtx, XEXP (addr, 1));
9173 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTOFF);
9174 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
9176 tmpreg = gen_reg_rtx (Pmode);
9179 emit_move_insn (tmpreg, new_rtx);
9183 new_rtx = expand_simple_binop (Pmode, PLUS, reg, pic_offset_table_rtx,
9184 tmpreg, 1, OPTAB_DIRECT);
9187 else new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, tmpreg);
9189 else if (!TARGET_64BIT && gotoff_operand (addr, Pmode))
9191 /* This symbol may be referenced via a displacement from the PIC
9192 base address (@GOTOFF). */
9194 if (reload_in_progress)
9195 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
9196 if (GET_CODE (addr) == CONST)
9197 addr = XEXP (addr, 0);
9198 if (GET_CODE (addr) == PLUS)
9200 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, XEXP (addr, 0)),
9202 new_rtx = gen_rtx_PLUS (Pmode, new_rtx, XEXP (addr, 1));
9205 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTOFF);
9206 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
9207 new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new_rtx);
9211 emit_move_insn (reg, new_rtx);
9215 else if ((GET_CODE (addr) == SYMBOL_REF && SYMBOL_REF_TLS_MODEL (addr) == 0)
9216 /* We can't use @GOTOFF for text labels on VxWorks;
9217 see gotoff_operand. */
9218 || (TARGET_VXWORKS_RTP && GET_CODE (addr) == LABEL_REF))
9220 if (TARGET_DLLIMPORT_DECL_ATTRIBUTES)
9222 if (GET_CODE (addr) == SYMBOL_REF && SYMBOL_REF_DLLIMPORT_P (addr))
9223 return legitimize_dllimport_symbol (addr, true);
9224 if (GET_CODE (addr) == CONST && GET_CODE (XEXP (addr, 0)) == PLUS
9225 && GET_CODE (XEXP (XEXP (addr, 0), 0)) == SYMBOL_REF
9226 && SYMBOL_REF_DLLIMPORT_P (XEXP (XEXP (addr, 0), 0)))
9228 rtx t = legitimize_dllimport_symbol (XEXP (XEXP (addr, 0), 0), true);
9229 return gen_rtx_PLUS (Pmode, t, XEXP (XEXP (addr, 0), 1));
9233 if (TARGET_64BIT && ix86_cmodel != CM_LARGE_PIC)
9235 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTPCREL);
9236 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
9237 new_rtx = gen_const_mem (Pmode, new_rtx);
9238 set_mem_alias_set (new_rtx, ix86_GOT_alias_set ());
9241 reg = gen_reg_rtx (Pmode);
9242 /* Use directly gen_movsi, otherwise the address is loaded
9243 into register for CSE. We don't want to CSE this addresses,
9244 instead we CSE addresses from the GOT table, so skip this. */
9245 emit_insn (gen_movsi (reg, new_rtx));
9250 /* This symbol must be referenced via a load from the
9251 Global Offset Table (@GOT). */
9253 if (reload_in_progress)
9254 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
9255 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOT);
9256 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
9258 new_rtx = force_reg (Pmode, new_rtx);
9259 new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new_rtx);
9260 new_rtx = gen_const_mem (Pmode, new_rtx);
9261 set_mem_alias_set (new_rtx, ix86_GOT_alias_set ());
9264 reg = gen_reg_rtx (Pmode);
9265 emit_move_insn (reg, new_rtx);
9271 if (CONST_INT_P (addr)
9272 && !x86_64_immediate_operand (addr, VOIDmode))
9276 emit_move_insn (reg, addr);
9280 new_rtx = force_reg (Pmode, addr);
9282 else if (GET_CODE (addr) == CONST)
9284 addr = XEXP (addr, 0);
9286 /* We must match stuff we generate before. Assume the only
9287 unspecs that can get here are ours. Not that we could do
9288 anything with them anyway.... */
9289 if (GET_CODE (addr) == UNSPEC
9290 || (GET_CODE (addr) == PLUS
9291 && GET_CODE (XEXP (addr, 0)) == UNSPEC))
9293 gcc_assert (GET_CODE (addr) == PLUS);
9295 if (GET_CODE (addr) == PLUS)
9297 rtx op0 = XEXP (addr, 0), op1 = XEXP (addr, 1);
9299 /* Check first to see if this is a constant offset from a @GOTOFF
9300 symbol reference. */
9301 if (gotoff_operand (op0, Pmode)
9302 && CONST_INT_P (op1))
9306 if (reload_in_progress)
9307 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
9308 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, op0),
9310 new_rtx = gen_rtx_PLUS (Pmode, new_rtx, op1);
9311 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
9312 new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new_rtx);
9316 emit_move_insn (reg, new_rtx);
9322 if (INTVAL (op1) < -16*1024*1024
9323 || INTVAL (op1) >= 16*1024*1024)
9325 if (!x86_64_immediate_operand (op1, Pmode))
9326 op1 = force_reg (Pmode, op1);
9327 new_rtx = gen_rtx_PLUS (Pmode, force_reg (Pmode, op0), op1);
9333 base = legitimize_pic_address (XEXP (addr, 0), reg);
9334 new_rtx = legitimize_pic_address (XEXP (addr, 1),
9335 base == reg ? NULL_RTX : reg);
9337 if (CONST_INT_P (new_rtx))
9338 new_rtx = plus_constant (base, INTVAL (new_rtx));
9341 if (GET_CODE (new_rtx) == PLUS && CONSTANT_P (XEXP (new_rtx, 1)))
9343 base = gen_rtx_PLUS (Pmode, base, XEXP (new_rtx, 0));
9344 new_rtx = XEXP (new_rtx, 1);
9346 new_rtx = gen_rtx_PLUS (Pmode, base, new_rtx);
9354 /* Load the thread pointer. If TO_REG is true, force it into a register. */
9357 get_thread_pointer (int to_reg)
9361 tp = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx), UNSPEC_TP);
9365 reg = gen_reg_rtx (Pmode);
9366 insn = gen_rtx_SET (VOIDmode, reg, tp);
9367 insn = emit_insn (insn);
9372 /* A subroutine of legitimize_address and ix86_expand_move. FOR_MOV is
9373 false if we expect this to be used for a memory address and true if
9374 we expect to load the address into a register. */
9377 legitimize_tls_address (rtx x, enum tls_model model, int for_mov)
9379 rtx dest, base, off, pic, tp;
9384 case TLS_MODEL_GLOBAL_DYNAMIC:
9385 dest = gen_reg_rtx (Pmode);
9386 tp = TARGET_GNU2_TLS ? get_thread_pointer (1) : 0;
9388 if (TARGET_64BIT && ! TARGET_GNU2_TLS)
9390 rtx rax = gen_rtx_REG (Pmode, AX_REG), insns;
9393 emit_call_insn (gen_tls_global_dynamic_64 (rax, x));
9394 insns = get_insns ();
9397 RTL_CONST_CALL_P (insns) = 1;
9398 emit_libcall_block (insns, dest, rax, x);
9400 else if (TARGET_64BIT && TARGET_GNU2_TLS)
9401 emit_insn (gen_tls_global_dynamic_64 (dest, x));
9403 emit_insn (gen_tls_global_dynamic_32 (dest, x));
9405 if (TARGET_GNU2_TLS)
9407 dest = force_reg (Pmode, gen_rtx_PLUS (Pmode, tp, dest));
9409 set_unique_reg_note (get_last_insn (), REG_EQUIV, x);
9413 case TLS_MODEL_LOCAL_DYNAMIC:
9414 base = gen_reg_rtx (Pmode);
9415 tp = TARGET_GNU2_TLS ? get_thread_pointer (1) : 0;
9417 if (TARGET_64BIT && ! TARGET_GNU2_TLS)
9419 rtx rax = gen_rtx_REG (Pmode, AX_REG), insns, note;
9422 emit_call_insn (gen_tls_local_dynamic_base_64 (rax));
9423 insns = get_insns ();
9426 note = gen_rtx_EXPR_LIST (VOIDmode, const0_rtx, NULL);
9427 note = gen_rtx_EXPR_LIST (VOIDmode, ix86_tls_get_addr (), note);
9428 RTL_CONST_CALL_P (insns) = 1;
9429 emit_libcall_block (insns, base, rax, note);
9431 else if (TARGET_64BIT && TARGET_GNU2_TLS)
9432 emit_insn (gen_tls_local_dynamic_base_64 (base));
9434 emit_insn (gen_tls_local_dynamic_base_32 (base));
9436 if (TARGET_GNU2_TLS)
9438 rtx x = ix86_tls_module_base ();
9440 set_unique_reg_note (get_last_insn (), REG_EQUIV,
9441 gen_rtx_MINUS (Pmode, x, tp));
9444 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x), UNSPEC_DTPOFF);
9445 off = gen_rtx_CONST (Pmode, off);
9447 dest = force_reg (Pmode, gen_rtx_PLUS (Pmode, base, off));
9449 if (TARGET_GNU2_TLS)
9451 dest = force_reg (Pmode, gen_rtx_PLUS (Pmode, dest, tp));
9453 set_unique_reg_note (get_last_insn (), REG_EQUIV, x);
9458 case TLS_MODEL_INITIAL_EXEC:
9462 type = UNSPEC_GOTNTPOFF;
9466 if (reload_in_progress)
9467 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
9468 pic = pic_offset_table_rtx;
9469 type = TARGET_ANY_GNU_TLS ? UNSPEC_GOTNTPOFF : UNSPEC_GOTTPOFF;
9471 else if (!TARGET_ANY_GNU_TLS)
9473 pic = gen_reg_rtx (Pmode);
9474 emit_insn (gen_set_got (pic));
9475 type = UNSPEC_GOTTPOFF;
9480 type = UNSPEC_INDNTPOFF;
9483 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x), type);
9484 off = gen_rtx_CONST (Pmode, off);
9486 off = gen_rtx_PLUS (Pmode, pic, off);
9487 off = gen_const_mem (Pmode, off);
9488 set_mem_alias_set (off, ix86_GOT_alias_set ());
9490 if (TARGET_64BIT || TARGET_ANY_GNU_TLS)
9492 base = get_thread_pointer (for_mov || !TARGET_TLS_DIRECT_SEG_REFS);
9493 off = force_reg (Pmode, off);
9494 return gen_rtx_PLUS (Pmode, base, off);
9498 base = get_thread_pointer (true);
9499 dest = gen_reg_rtx (Pmode);
9500 emit_insn (gen_subsi3 (dest, base, off));
9504 case TLS_MODEL_LOCAL_EXEC:
9505 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x),
9506 (TARGET_64BIT || TARGET_ANY_GNU_TLS)
9507 ? UNSPEC_NTPOFF : UNSPEC_TPOFF);
9508 off = gen_rtx_CONST (Pmode, off);
9510 if (TARGET_64BIT || TARGET_ANY_GNU_TLS)
9512 base = get_thread_pointer (for_mov || !TARGET_TLS_DIRECT_SEG_REFS);
9513 return gen_rtx_PLUS (Pmode, base, off);
9517 base = get_thread_pointer (true);
9518 dest = gen_reg_rtx (Pmode);
9519 emit_insn (gen_subsi3 (dest, base, off));
9530 /* Create or return the unique __imp_DECL dllimport symbol corresponding
9533 static GTY((if_marked ("tree_map_marked_p"), param_is (struct tree_map)))
9534 htab_t dllimport_map;
9537 get_dllimport_decl (tree decl)
9539 struct tree_map *h, in;
9543 size_t namelen, prefixlen;
9549 dllimport_map = htab_create_ggc (512, tree_map_hash, tree_map_eq, 0);
9551 in.hash = htab_hash_pointer (decl);
9552 in.base.from = decl;
9553 loc = htab_find_slot_with_hash (dllimport_map, &in, in.hash, INSERT);
9554 h = (struct tree_map *) *loc;
9558 *loc = h = GGC_NEW (struct tree_map);
9560 h->base.from = decl;
9561 h->to = to = build_decl (VAR_DECL, NULL, ptr_type_node);
9562 DECL_ARTIFICIAL (to) = 1;
9563 DECL_IGNORED_P (to) = 1;
9564 DECL_EXTERNAL (to) = 1;
9565 TREE_READONLY (to) = 1;
9567 name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl));
9568 name = targetm.strip_name_encoding (name);
9569 prefix = name[0] == FASTCALL_PREFIX || user_label_prefix[0] == 0
9570 ? "*__imp_" : "*__imp__";
9571 namelen = strlen (name);
9572 prefixlen = strlen (prefix);
9573 imp_name = (char *) alloca (namelen + prefixlen + 1);
9574 memcpy (imp_name, prefix, prefixlen);
9575 memcpy (imp_name + prefixlen, name, namelen + 1);
9577 name = ggc_alloc_string (imp_name, namelen + prefixlen);
9578 rtl = gen_rtx_SYMBOL_REF (Pmode, name);
9579 SET_SYMBOL_REF_DECL (rtl, to);
9580 SYMBOL_REF_FLAGS (rtl) = SYMBOL_FLAG_LOCAL;
9582 rtl = gen_const_mem (Pmode, rtl);
9583 set_mem_alias_set (rtl, ix86_GOT_alias_set ());
9585 SET_DECL_RTL (to, rtl);
9586 SET_DECL_ASSEMBLER_NAME (to, get_identifier (name));
9591 /* Expand SYMBOL into its corresponding dllimport symbol. WANT_REG is
9592 true if we require the result be a register. */
9595 legitimize_dllimport_symbol (rtx symbol, bool want_reg)
9600 gcc_assert (SYMBOL_REF_DECL (symbol));
9601 imp_decl = get_dllimport_decl (SYMBOL_REF_DECL (symbol));
9603 x = DECL_RTL (imp_decl);
9605 x = force_reg (Pmode, x);
9609 /* Try machine-dependent ways of modifying an illegitimate address
9610 to be legitimate. If we find one, return the new, valid address.
9611 This macro is used in only one place: `memory_address' in explow.c.
9613 OLDX is the address as it was before break_out_memory_refs was called.
9614 In some cases it is useful to look at this to decide what needs to be done.
9616 MODE and WIN are passed so that this macro can use
9617 GO_IF_LEGITIMATE_ADDRESS.
9619 It is always safe for this macro to do nothing. It exists to recognize
9620 opportunities to optimize the output.
9622 For the 80386, we handle X+REG by loading X into a register R and
9623 using R+REG. R will go in a general reg and indexing will be used.
9624 However, if REG is a broken-out memory address or multiplication,
9625 nothing needs to be done because REG can certainly go in a general reg.
9627 When -fpic is used, special handling is needed for symbolic references.
9628 See comments by legitimize_pic_address in i386.c for details. */
9631 legitimize_address (rtx x, rtx oldx ATTRIBUTE_UNUSED, enum machine_mode mode)
9636 log = GET_CODE (x) == SYMBOL_REF ? SYMBOL_REF_TLS_MODEL (x) : 0;
9638 return legitimize_tls_address (x, (enum tls_model) log, false);
9639 if (GET_CODE (x) == CONST
9640 && GET_CODE (XEXP (x, 0)) == PLUS
9641 && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF
9642 && (log = SYMBOL_REF_TLS_MODEL (XEXP (XEXP (x, 0), 0))))
9644 rtx t = legitimize_tls_address (XEXP (XEXP (x, 0), 0),
9645 (enum tls_model) log, false);
9646 return gen_rtx_PLUS (Pmode, t, XEXP (XEXP (x, 0), 1));
9649 if (TARGET_DLLIMPORT_DECL_ATTRIBUTES)
9651 if (GET_CODE (x) == SYMBOL_REF && SYMBOL_REF_DLLIMPORT_P (x))
9652 return legitimize_dllimport_symbol (x, true);
9653 if (GET_CODE (x) == CONST
9654 && GET_CODE (XEXP (x, 0)) == PLUS
9655 && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF
9656 && SYMBOL_REF_DLLIMPORT_P (XEXP (XEXP (x, 0), 0)))
9658 rtx t = legitimize_dllimport_symbol (XEXP (XEXP (x, 0), 0), true);
9659 return gen_rtx_PLUS (Pmode, t, XEXP (XEXP (x, 0), 1));
9663 if (flag_pic && SYMBOLIC_CONST (x))
9664 return legitimize_pic_address (x, 0);
9666 /* Canonicalize shifts by 0, 1, 2, 3 into multiply */
9667 if (GET_CODE (x) == ASHIFT
9668 && CONST_INT_P (XEXP (x, 1))
9669 && (unsigned HOST_WIDE_INT) INTVAL (XEXP (x, 1)) < 4)
9672 log = INTVAL (XEXP (x, 1));
9673 x = gen_rtx_MULT (Pmode, force_reg (Pmode, XEXP (x, 0)),
9674 GEN_INT (1 << log));
9677 if (GET_CODE (x) == PLUS)
9679 /* Canonicalize shifts by 0, 1, 2, 3 into multiply. */
9681 if (GET_CODE (XEXP (x, 0)) == ASHIFT
9682 && CONST_INT_P (XEXP (XEXP (x, 0), 1))
9683 && (unsigned HOST_WIDE_INT) INTVAL (XEXP (XEXP (x, 0), 1)) < 4)
9686 log = INTVAL (XEXP (XEXP (x, 0), 1));
9687 XEXP (x, 0) = gen_rtx_MULT (Pmode,
9688 force_reg (Pmode, XEXP (XEXP (x, 0), 0)),
9689 GEN_INT (1 << log));
9692 if (GET_CODE (XEXP (x, 1)) == ASHIFT
9693 && CONST_INT_P (XEXP (XEXP (x, 1), 1))
9694 && (unsigned HOST_WIDE_INT) INTVAL (XEXP (XEXP (x, 1), 1)) < 4)
9697 log = INTVAL (XEXP (XEXP (x, 1), 1));
9698 XEXP (x, 1) = gen_rtx_MULT (Pmode,
9699 force_reg (Pmode, XEXP (XEXP (x, 1), 0)),
9700 GEN_INT (1 << log));
9703 /* Put multiply first if it isn't already. */
9704 if (GET_CODE (XEXP (x, 1)) == MULT)
9706 rtx tmp = XEXP (x, 0);
9707 XEXP (x, 0) = XEXP (x, 1);
9712 /* Canonicalize (plus (mult (reg) (const)) (plus (reg) (const)))
9713 into (plus (plus (mult (reg) (const)) (reg)) (const)). This can be
9714 created by virtual register instantiation, register elimination, and
9715 similar optimizations. */
9716 if (GET_CODE (XEXP (x, 0)) == MULT && GET_CODE (XEXP (x, 1)) == PLUS)
9719 x = gen_rtx_PLUS (Pmode,
9720 gen_rtx_PLUS (Pmode, XEXP (x, 0),
9721 XEXP (XEXP (x, 1), 0)),
9722 XEXP (XEXP (x, 1), 1));
9726 (plus (plus (mult (reg) (const)) (plus (reg) (const))) const)
9727 into (plus (plus (mult (reg) (const)) (reg)) (const)). */
9728 else if (GET_CODE (x) == PLUS && GET_CODE (XEXP (x, 0)) == PLUS
9729 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
9730 && GET_CODE (XEXP (XEXP (x, 0), 1)) == PLUS
9731 && CONSTANT_P (XEXP (x, 1)))
9734 rtx other = NULL_RTX;
9736 if (CONST_INT_P (XEXP (x, 1)))
9738 constant = XEXP (x, 1);
9739 other = XEXP (XEXP (XEXP (x, 0), 1), 1);
9741 else if (CONST_INT_P (XEXP (XEXP (XEXP (x, 0), 1), 1)))
9743 constant = XEXP (XEXP (XEXP (x, 0), 1), 1);
9744 other = XEXP (x, 1);
9752 x = gen_rtx_PLUS (Pmode,
9753 gen_rtx_PLUS (Pmode, XEXP (XEXP (x, 0), 0),
9754 XEXP (XEXP (XEXP (x, 0), 1), 0)),
9755 plus_constant (other, INTVAL (constant)));
9759 if (changed && legitimate_address_p (mode, x, FALSE))
9762 if (GET_CODE (XEXP (x, 0)) == MULT)
9765 XEXP (x, 0) = force_operand (XEXP (x, 0), 0);
9768 if (GET_CODE (XEXP (x, 1)) == MULT)
9771 XEXP (x, 1) = force_operand (XEXP (x, 1), 0);
9775 && REG_P (XEXP (x, 1))
9776 && REG_P (XEXP (x, 0)))
9779 if (flag_pic && SYMBOLIC_CONST (XEXP (x, 1)))
9782 x = legitimize_pic_address (x, 0);
9785 if (changed && legitimate_address_p (mode, x, FALSE))
9788 if (REG_P (XEXP (x, 0)))
9790 rtx temp = gen_reg_rtx (Pmode);
9791 rtx val = force_operand (XEXP (x, 1), temp);
9793 emit_move_insn (temp, val);
9799 else if (REG_P (XEXP (x, 1)))
9801 rtx temp = gen_reg_rtx (Pmode);
9802 rtx val = force_operand (XEXP (x, 0), temp);
9804 emit_move_insn (temp, val);
9814 /* Print an integer constant expression in assembler syntax. Addition
9815 and subtraction are the only arithmetic that may appear in these
9816 expressions. FILE is the stdio stream to write to, X is the rtx, and
9817 CODE is the operand print code from the output string. */
9820 output_pic_addr_const (FILE *file, rtx x, int code)
9824 switch (GET_CODE (x))
9827 gcc_assert (flag_pic);
9832 if (! TARGET_MACHO || TARGET_64BIT)
9833 output_addr_const (file, x);
9836 const char *name = XSTR (x, 0);
9838 /* Mark the decl as referenced so that cgraph will
9839 output the function. */
9840 if (SYMBOL_REF_DECL (x))
9841 mark_decl_referenced (SYMBOL_REF_DECL (x));
9844 if (MACHOPIC_INDIRECT
9845 && machopic_classify_symbol (x) == MACHOPIC_UNDEFINED_FUNCTION)
9846 name = machopic_indirection_name (x, /*stub_p=*/true);
9848 assemble_name (file, name);
9850 if (!TARGET_MACHO && !(TARGET_64BIT && DEFAULT_ABI == MS_ABI)
9851 && code == 'P' && ! SYMBOL_REF_LOCAL_P (x))
9852 fputs ("@PLT", file);
9859 ASM_GENERATE_INTERNAL_LABEL (buf, "L", CODE_LABEL_NUMBER (x));
9860 assemble_name (asm_out_file, buf);
9864 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
9868 /* This used to output parentheses around the expression,
9869 but that does not work on the 386 (either ATT or BSD assembler). */
9870 output_pic_addr_const (file, XEXP (x, 0), code);
9874 if (GET_MODE (x) == VOIDmode)
9876 /* We can use %d if the number is <32 bits and positive. */
9877 if (CONST_DOUBLE_HIGH (x) || CONST_DOUBLE_LOW (x) < 0)
9878 fprintf (file, "0x%lx%08lx",
9879 (unsigned long) CONST_DOUBLE_HIGH (x),
9880 (unsigned long) CONST_DOUBLE_LOW (x));
9882 fprintf (file, HOST_WIDE_INT_PRINT_DEC, CONST_DOUBLE_LOW (x));
9885 /* We can't handle floating point constants;
9886 PRINT_OPERAND must handle them. */
9887 output_operand_lossage ("floating constant misused");
9891 /* Some assemblers need integer constants to appear first. */
9892 if (CONST_INT_P (XEXP (x, 0)))
9894 output_pic_addr_const (file, XEXP (x, 0), code);
9896 output_pic_addr_const (file, XEXP (x, 1), code);
9900 gcc_assert (CONST_INT_P (XEXP (x, 1)));
9901 output_pic_addr_const (file, XEXP (x, 1), code);
9903 output_pic_addr_const (file, XEXP (x, 0), code);
9909 putc (ASSEMBLER_DIALECT == ASM_INTEL ? '(' : '[', file);
9910 output_pic_addr_const (file, XEXP (x, 0), code);
9912 output_pic_addr_const (file, XEXP (x, 1), code);
9914 putc (ASSEMBLER_DIALECT == ASM_INTEL ? ')' : ']', file);
9918 gcc_assert (XVECLEN (x, 0) == 1);
9919 output_pic_addr_const (file, XVECEXP (x, 0, 0), code);
9920 switch (XINT (x, 1))
9923 fputs ("@GOT", file);
9926 fputs ("@GOTOFF", file);
9929 fputs ("@PLTOFF", file);
9931 case UNSPEC_GOTPCREL:
9932 fputs (ASSEMBLER_DIALECT == ASM_ATT ?
9933 "@GOTPCREL(%rip)" : "@GOTPCREL[rip]", file);
9935 case UNSPEC_GOTTPOFF:
9936 /* FIXME: This might be @TPOFF in Sun ld too. */
9937 fputs ("@GOTTPOFF", file);
9940 fputs ("@TPOFF", file);
9944 fputs ("@TPOFF", file);
9946 fputs ("@NTPOFF", file);
9949 fputs ("@DTPOFF", file);
9951 case UNSPEC_GOTNTPOFF:
9953 fputs (ASSEMBLER_DIALECT == ASM_ATT ?
9954 "@GOTTPOFF(%rip)": "@GOTTPOFF[rip]", file);
9956 fputs ("@GOTNTPOFF", file);
9958 case UNSPEC_INDNTPOFF:
9959 fputs ("@INDNTPOFF", file);
9962 case UNSPEC_MACHOPIC_OFFSET:
9964 machopic_output_function_base_name (file);
9968 output_operand_lossage ("invalid UNSPEC as operand");
9974 output_operand_lossage ("invalid expression as operand");
9978 /* This is called from dwarf2out.c via TARGET_ASM_OUTPUT_DWARF_DTPREL.
9979 We need to emit DTP-relative relocations. */
9981 static void ATTRIBUTE_UNUSED
9982 i386_output_dwarf_dtprel (FILE *file, int size, rtx x)
9984 fputs (ASM_LONG, file);
9985 output_addr_const (file, x);
9986 fputs ("@DTPOFF", file);
9992 fputs (", 0", file);
9999 /* Return true if X is a representation of the PIC register. This copes
10000 with calls from ix86_find_base_term, where the register might have
10001 been replaced by a cselib value. */
10004 ix86_pic_register_p (rtx x)
10006 if (GET_CODE (x) == VALUE)
10007 return (pic_offset_table_rtx
10008 && rtx_equal_for_cselib_p (x, pic_offset_table_rtx));
10010 return REG_P (x) && REGNO (x) == PIC_OFFSET_TABLE_REGNUM;
10013 /* In the name of slightly smaller debug output, and to cater to
10014 general assembler lossage, recognize PIC+GOTOFF and turn it back
10015 into a direct symbol reference.
10017 On Darwin, this is necessary to avoid a crash, because Darwin
10018 has a different PIC label for each routine but the DWARF debugging
10019 information is not associated with any particular routine, so it's
10020 necessary to remove references to the PIC label from RTL stored by
10021 the DWARF output code. */
10024 ix86_delegitimize_address (rtx orig_x)
10027 /* reg_addend is NULL or a multiple of some register. */
10028 rtx reg_addend = NULL_RTX;
10029 /* const_addend is NULL or a const_int. */
10030 rtx const_addend = NULL_RTX;
10031 /* This is the result, or NULL. */
10032 rtx result = NULL_RTX;
10039 if (GET_CODE (x) != CONST
10040 || GET_CODE (XEXP (x, 0)) != UNSPEC
10041 || XINT (XEXP (x, 0), 1) != UNSPEC_GOTPCREL
10042 || !MEM_P (orig_x))
10044 return XVECEXP (XEXP (x, 0), 0, 0);
10047 if (GET_CODE (x) != PLUS
10048 || GET_CODE (XEXP (x, 1)) != CONST)
10051 if (ix86_pic_register_p (XEXP (x, 0)))
10052 /* %ebx + GOT/GOTOFF */
10054 else if (GET_CODE (XEXP (x, 0)) == PLUS)
10056 /* %ebx + %reg * scale + GOT/GOTOFF */
10057 reg_addend = XEXP (x, 0);
10058 if (ix86_pic_register_p (XEXP (reg_addend, 0)))
10059 reg_addend = XEXP (reg_addend, 1);
10060 else if (ix86_pic_register_p (XEXP (reg_addend, 1)))
10061 reg_addend = XEXP (reg_addend, 0);
10064 if (!REG_P (reg_addend)
10065 && GET_CODE (reg_addend) != MULT
10066 && GET_CODE (reg_addend) != ASHIFT)
10072 x = XEXP (XEXP (x, 1), 0);
10073 if (GET_CODE (x) == PLUS
10074 && CONST_INT_P (XEXP (x, 1)))
10076 const_addend = XEXP (x, 1);
10080 if (GET_CODE (x) == UNSPEC
10081 && ((XINT (x, 1) == UNSPEC_GOT && MEM_P (orig_x))
10082 || (XINT (x, 1) == UNSPEC_GOTOFF && !MEM_P (orig_x))))
10083 result = XVECEXP (x, 0, 0);
10085 if (TARGET_MACHO && darwin_local_data_pic (x)
10086 && !MEM_P (orig_x))
10087 result = XVECEXP (x, 0, 0);
10093 result = gen_rtx_CONST (Pmode, gen_rtx_PLUS (Pmode, result, const_addend));
10095 result = gen_rtx_PLUS (Pmode, reg_addend, result);
10099 /* If X is a machine specific address (i.e. a symbol or label being
10100 referenced as a displacement from the GOT implemented using an
10101 UNSPEC), then return the base term. Otherwise return X. */
10104 ix86_find_base_term (rtx x)
10110 if (GET_CODE (x) != CONST)
10112 term = XEXP (x, 0);
10113 if (GET_CODE (term) == PLUS
10114 && (CONST_INT_P (XEXP (term, 1))
10115 || GET_CODE (XEXP (term, 1)) == CONST_DOUBLE))
10116 term = XEXP (term, 0);
10117 if (GET_CODE (term) != UNSPEC
10118 || XINT (term, 1) != UNSPEC_GOTPCREL)
10121 return XVECEXP (term, 0, 0);
10124 return ix86_delegitimize_address (x);
10128 put_condition_code (enum rtx_code code, enum machine_mode mode, int reverse,
10129 int fp, FILE *file)
10131 const char *suffix;
10133 if (mode == CCFPmode || mode == CCFPUmode)
10135 enum rtx_code second_code, bypass_code;
10136 ix86_fp_comparison_codes (code, &bypass_code, &code, &second_code);
10137 gcc_assert (bypass_code == UNKNOWN && second_code == UNKNOWN);
10138 code = ix86_fp_compare_code_to_integer (code);
10142 code = reverse_condition (code);
10193 gcc_assert (mode == CCmode || mode == CCNOmode || mode == CCGCmode);
10197 /* ??? Use "nbe" instead of "a" for fcmov lossage on some assemblers.
10198 Those same assemblers have the same but opposite lossage on cmov. */
10199 if (mode == CCmode)
10200 suffix = fp ? "nbe" : "a";
10201 else if (mode == CCCmode)
10204 gcc_unreachable ();
10220 gcc_unreachable ();
10224 gcc_assert (mode == CCmode || mode == CCCmode);
10241 gcc_unreachable ();
10245 /* ??? As above. */
10246 gcc_assert (mode == CCmode || mode == CCCmode);
10247 suffix = fp ? "nb" : "ae";
10250 gcc_assert (mode == CCmode || mode == CCGCmode || mode == CCNOmode);
10254 /* ??? As above. */
10255 if (mode == CCmode)
10257 else if (mode == CCCmode)
10258 suffix = fp ? "nb" : "ae";
10260 gcc_unreachable ();
10263 suffix = fp ? "u" : "p";
10266 suffix = fp ? "nu" : "np";
10269 gcc_unreachable ();
10271 fputs (suffix, file);
10274 /* Print the name of register X to FILE based on its machine mode and number.
10275 If CODE is 'w', pretend the mode is HImode.
10276 If CODE is 'b', pretend the mode is QImode.
10277 If CODE is 'k', pretend the mode is SImode.
10278 If CODE is 'q', pretend the mode is DImode.
10279 If CODE is 'x', pretend the mode is V4SFmode.
10280 If CODE is 't', pretend the mode is V8SFmode.
10281 If CODE is 'h', pretend the reg is the 'high' byte register.
10282 If CODE is 'y', print "st(0)" instead of "st", if the reg is stack op.
10283 If CODE is 'd', duplicate the operand for AVX instruction.
10287 print_reg (rtx x, int code, FILE *file)
10290 bool duplicated = code == 'd' && TARGET_AVX;
10292 gcc_assert (x == pc_rtx
10293 || (REGNO (x) != ARG_POINTER_REGNUM
10294 && REGNO (x) != FRAME_POINTER_REGNUM
10295 && REGNO (x) != FLAGS_REG
10296 && REGNO (x) != FPSR_REG
10297 && REGNO (x) != FPCR_REG));
10299 if (ASSEMBLER_DIALECT == ASM_ATT)
10304 gcc_assert (TARGET_64BIT);
10305 fputs ("rip", file);
10309 if (code == 'w' || MMX_REG_P (x))
10311 else if (code == 'b')
10313 else if (code == 'k')
10315 else if (code == 'q')
10317 else if (code == 'y')
10319 else if (code == 'h')
10321 else if (code == 'x')
10323 else if (code == 't')
10326 code = GET_MODE_SIZE (GET_MODE (x));
10328 /* Irritatingly, AMD extended registers use different naming convention
10329 from the normal registers. */
10330 if (REX_INT_REG_P (x))
10332 gcc_assert (TARGET_64BIT);
10336 error ("extended registers have no high halves");
10339 fprintf (file, "r%ib", REGNO (x) - FIRST_REX_INT_REG + 8);
10342 fprintf (file, "r%iw", REGNO (x) - FIRST_REX_INT_REG + 8);
10345 fprintf (file, "r%id", REGNO (x) - FIRST_REX_INT_REG + 8);
10348 fprintf (file, "r%i", REGNO (x) - FIRST_REX_INT_REG + 8);
10351 error ("unsupported operand size for extended register");
10361 if (STACK_TOP_P (x))
10370 if (! ANY_FP_REG_P (x))
10371 putc (code == 8 && TARGET_64BIT ? 'r' : 'e', file);
10376 reg = hi_reg_name[REGNO (x)];
10379 if (REGNO (x) >= ARRAY_SIZE (qi_reg_name))
10381 reg = qi_reg_name[REGNO (x)];
10384 if (REGNO (x) >= ARRAY_SIZE (qi_high_reg_name))
10386 reg = qi_high_reg_name[REGNO (x)];
10391 gcc_assert (!duplicated);
10393 fputs (hi_reg_name[REGNO (x)] + 1, file);
10398 gcc_unreachable ();
10404 if (ASSEMBLER_DIALECT == ASM_ATT)
10405 fprintf (file, ", %%%s", reg);
10407 fprintf (file, ", %s", reg);
10411 /* Locate some local-dynamic symbol still in use by this function
10412 so that we can print its name in some tls_local_dynamic_base
10416 get_some_local_dynamic_name_1 (rtx *px, void *data ATTRIBUTE_UNUSED)
10420 if (GET_CODE (x) == SYMBOL_REF
10421 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_DYNAMIC)
10423 cfun->machine->some_ld_name = XSTR (x, 0);
10430 static const char *
10431 get_some_local_dynamic_name (void)
10435 if (cfun->machine->some_ld_name)
10436 return cfun->machine->some_ld_name;
10438 for (insn = get_insns (); insn ; insn = NEXT_INSN (insn))
10440 && for_each_rtx (&PATTERN (insn), get_some_local_dynamic_name_1, 0))
10441 return cfun->machine->some_ld_name;
10443 gcc_unreachable ();
10446 /* Meaning of CODE:
10447 L,W,B,Q,S,T -- print the opcode suffix for specified size of operand.
10448 C -- print opcode suffix for set/cmov insn.
10449 c -- like C, but print reversed condition
10450 E,e -- likewise, but for compare-and-branch fused insn.
10451 F,f -- likewise, but for floating-point.
10452 O -- if HAVE_AS_IX86_CMOV_SUN_SYNTAX, expand to "w.", "l." or "q.",
10454 R -- print the prefix for register names.
10455 z -- print the opcode suffix for the size of the current operand.
10456 * -- print a star (in certain assembler syntax)
10457 A -- print an absolute memory reference.
10458 w -- print the operand as if it's a "word" (HImode) even if it isn't.
10459 s -- print a shift double count, followed by the assemblers argument
10461 b -- print the QImode name of the register for the indicated operand.
10462 %b0 would print %al if operands[0] is reg 0.
10463 w -- likewise, print the HImode name of the register.
10464 k -- likewise, print the SImode name of the register.
10465 q -- likewise, print the DImode name of the register.
10466 x -- likewise, print the V4SFmode name of the register.
10467 t -- likewise, print the V8SFmode name of the register.
10468 h -- print the QImode name for a "high" register, either ah, bh, ch or dh.
10469 y -- print "st(0)" instead of "st" as a register.
10470 d -- print duplicated register operand for AVX instruction.
10471 D -- print condition for SSE cmp instruction.
10472 P -- if PIC, print an @PLT suffix.
10473 X -- don't print any sort of PIC '@' suffix for a symbol.
10474 & -- print some in-use local-dynamic symbol name.
10475 H -- print a memory address offset by 8; used for sse high-parts
10476 Y -- print condition for SSE5 com* instruction.
10477 + -- print a branch hint as 'cs' or 'ds' prefix
10478 ; -- print a semicolon (after prefixes due to bug in older gas).
10482 print_operand (FILE *file, rtx x, int code)
10489 if (ASSEMBLER_DIALECT == ASM_ATT)
10494 assemble_name (file, get_some_local_dynamic_name ());
10498 switch (ASSEMBLER_DIALECT)
10505 /* Intel syntax. For absolute addresses, registers should not
10506 be surrounded by braces. */
10510 PRINT_OPERAND (file, x, 0);
10517 gcc_unreachable ();
10520 PRINT_OPERAND (file, x, 0);
10525 if (ASSEMBLER_DIALECT == ASM_ATT)
10530 if (ASSEMBLER_DIALECT == ASM_ATT)
10535 if (ASSEMBLER_DIALECT == ASM_ATT)
10540 if (ASSEMBLER_DIALECT == ASM_ATT)
10545 if (ASSEMBLER_DIALECT == ASM_ATT)
10550 if (ASSEMBLER_DIALECT == ASM_ATT)
10555 /* 387 opcodes don't get size suffixes if the operands are
10557 if (STACK_REG_P (x))
10560 /* Likewise if using Intel opcodes. */
10561 if (ASSEMBLER_DIALECT == ASM_INTEL)
10564 /* This is the size of op from size of operand. */
10565 switch (GET_MODE_SIZE (GET_MODE (x)))
10574 #ifdef HAVE_GAS_FILDS_FISTS
10584 if (GET_MODE (x) == SFmode)
10599 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)
10603 #ifdef GAS_MNEMONICS
10618 gcc_unreachable ();
10635 if (CONST_INT_P (x) || ! SHIFT_DOUBLE_OMITS_COUNT)
10637 PRINT_OPERAND (file, x, 0);
10638 fputs (", ", file);
10643 /* Little bit of braindamage here. The SSE compare instructions
10644 does use completely different names for the comparisons that the
10645 fp conditional moves. */
10648 switch (GET_CODE (x))
10651 fputs ("eq", file);
10654 fputs ("eq_us", file);
10657 fputs ("lt", file);
10660 fputs ("nge", file);
10663 fputs ("le", file);
10666 fputs ("ngt", file);
10669 fputs ("unord", file);
10672 fputs ("neq", file);
10675 fputs ("neq_oq", file);
10678 fputs ("ge", file);
10681 fputs ("nlt", file);
10684 fputs ("gt", file);
10687 fputs ("nle", file);
10690 fputs ("ord", file);
10693 gcc_unreachable ();
10698 switch (GET_CODE (x))
10702 fputs ("eq", file);
10706 fputs ("lt", file);
10710 fputs ("le", file);
10713 fputs ("unord", file);
10717 fputs ("neq", file);
10721 fputs ("nlt", file);
10725 fputs ("nle", file);
10728 fputs ("ord", file);
10731 gcc_unreachable ();
10736 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
10737 if (ASSEMBLER_DIALECT == ASM_ATT)
10739 switch (GET_MODE (x))
10741 case HImode: putc ('w', file); break;
10743 case SFmode: putc ('l', file); break;
10745 case DFmode: putc ('q', file); break;
10746 default: gcc_unreachable ();
10753 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 0, 0, file);
10756 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
10757 if (ASSEMBLER_DIALECT == ASM_ATT)
10760 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 0, 1, file);
10763 /* Like above, but reverse condition */
10765 /* Check to see if argument to %c is really a constant
10766 and not a condition code which needs to be reversed. */
10767 if (!COMPARISON_P (x))
10769 output_operand_lossage ("operand is neither a constant nor a condition code, invalid operand code 'c'");
10772 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 1, 0, file);
10775 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
10776 if (ASSEMBLER_DIALECT == ASM_ATT)
10779 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 1, 1, file);
10783 put_condition_code (GET_CODE (x), CCmode, 0, 0, file);
10787 put_condition_code (GET_CODE (x), CCmode, 1, 0, file);
10791 /* It doesn't actually matter what mode we use here, as we're
10792 only going to use this for printing. */
10793 x = adjust_address_nv (x, DImode, 8);
10801 || optimize_function_for_size_p (cfun) || !TARGET_BRANCH_PREDICTION_HINTS)
10804 x = find_reg_note (current_output_insn, REG_BR_PROB, 0);
10807 int pred_val = INTVAL (XEXP (x, 0));
10809 if (pred_val < REG_BR_PROB_BASE * 45 / 100
10810 || pred_val > REG_BR_PROB_BASE * 55 / 100)
10812 int taken = pred_val > REG_BR_PROB_BASE / 2;
10813 int cputaken = final_forward_branch_p (current_output_insn) == 0;
10815 /* Emit hints only in the case default branch prediction
10816 heuristics would fail. */
10817 if (taken != cputaken)
10819 /* We use 3e (DS) prefix for taken branches and
10820 2e (CS) prefix for not taken branches. */
10822 fputs ("ds ; ", file);
10824 fputs ("cs ; ", file);
10832 switch (GET_CODE (x))
10835 fputs ("neq", file);
10838 fputs ("eq", file);
10842 fputs (INTEGRAL_MODE_P (GET_MODE (x)) ? "ge" : "unlt", file);
10846 fputs (INTEGRAL_MODE_P (GET_MODE (x)) ? "gt" : "unle", file);
10850 fputs ("le", file);
10854 fputs ("lt", file);
10857 fputs ("unord", file);
10860 fputs ("ord", file);
10863 fputs ("ueq", file);
10866 fputs ("nlt", file);
10869 fputs ("nle", file);
10872 fputs ("ule", file);
10875 fputs ("ult", file);
10878 fputs ("une", file);
10881 gcc_unreachable ();
10887 fputs (" ; ", file);
10894 output_operand_lossage ("invalid operand code '%c'", code);
10899 print_reg (x, code, file);
10901 else if (MEM_P (x))
10903 /* No `byte ptr' prefix for call instructions or BLKmode operands. */
10904 if (ASSEMBLER_DIALECT == ASM_INTEL && code != 'X' && code != 'P'
10905 && GET_MODE (x) != BLKmode)
10908 switch (GET_MODE_SIZE (GET_MODE (x)))
10910 case 1: size = "BYTE"; break;
10911 case 2: size = "WORD"; break;
10912 case 4: size = "DWORD"; break;
10913 case 8: size = "QWORD"; break;
10914 case 12: size = "XWORD"; break;
10916 if (GET_MODE (x) == XFmode)
10922 gcc_unreachable ();
10925 /* Check for explicit size override (codes 'b', 'w' and 'k') */
10928 else if (code == 'w')
10930 else if (code == 'k')
10933 fputs (size, file);
10934 fputs (" PTR ", file);
10938 /* Avoid (%rip) for call operands. */
10939 if (CONSTANT_ADDRESS_P (x) && code == 'P'
10940 && !CONST_INT_P (x))
10941 output_addr_const (file, x);
10942 else if (this_is_asm_operands && ! address_operand (x, VOIDmode))
10943 output_operand_lossage ("invalid constraints for operand");
10945 output_address (x);
10948 else if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) == SFmode)
10953 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
10954 REAL_VALUE_TO_TARGET_SINGLE (r, l);
10956 if (ASSEMBLER_DIALECT == ASM_ATT)
10958 fprintf (file, "0x%08lx", (long unsigned int) l);
10961 /* These float cases don't actually occur as immediate operands. */
10962 else if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) == DFmode)
10966 real_to_decimal (dstr, CONST_DOUBLE_REAL_VALUE (x), sizeof (dstr), 0, 1);
10967 fprintf (file, "%s", dstr);
10970 else if (GET_CODE (x) == CONST_DOUBLE
10971 && GET_MODE (x) == XFmode)
10975 real_to_decimal (dstr, CONST_DOUBLE_REAL_VALUE (x), sizeof (dstr), 0, 1);
10976 fprintf (file, "%s", dstr);
10981 /* We have patterns that allow zero sets of memory, for instance.
10982 In 64-bit mode, we should probably support all 8-byte vectors,
10983 since we can in fact encode that into an immediate. */
10984 if (GET_CODE (x) == CONST_VECTOR)
10986 gcc_assert (x == CONST0_RTX (GET_MODE (x)));
10992 if (CONST_INT_P (x) || GET_CODE (x) == CONST_DOUBLE)
10994 if (ASSEMBLER_DIALECT == ASM_ATT)
10997 else if (GET_CODE (x) == CONST || GET_CODE (x) == SYMBOL_REF
10998 || GET_CODE (x) == LABEL_REF)
11000 if (ASSEMBLER_DIALECT == ASM_ATT)
11003 fputs ("OFFSET FLAT:", file);
11006 if (CONST_INT_P (x))
11007 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
11009 output_pic_addr_const (file, x, code);
11011 output_addr_const (file, x);
11015 /* Print a memory operand whose address is ADDR. */
11018 print_operand_address (FILE *file, rtx addr)
11020 struct ix86_address parts;
11021 rtx base, index, disp;
11023 int ok = ix86_decompose_address (addr, &parts);
11028 index = parts.index;
11030 scale = parts.scale;
11038 if (ASSEMBLER_DIALECT == ASM_ATT)
11040 fputs ((parts.seg == SEG_FS ? "fs:" : "gs:"), file);
11043 gcc_unreachable ();
11046 /* Use one byte shorter RIP relative addressing for 64bit mode. */
11047 if (TARGET_64BIT && !base && !index)
11051 if (GET_CODE (disp) == CONST
11052 && GET_CODE (XEXP (disp, 0)) == PLUS
11053 && CONST_INT_P (XEXP (XEXP (disp, 0), 1)))
11054 symbol = XEXP (XEXP (disp, 0), 0);
11056 if (GET_CODE (symbol) == LABEL_REF
11057 || (GET_CODE (symbol) == SYMBOL_REF
11058 && SYMBOL_REF_TLS_MODEL (symbol) == 0))
11061 if (!base && !index)
11063 /* Displacement only requires special attention. */
11065 if (CONST_INT_P (disp))
11067 if (ASSEMBLER_DIALECT == ASM_INTEL && parts.seg == SEG_DEFAULT)
11068 fputs ("ds:", file);
11069 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (disp));
11072 output_pic_addr_const (file, disp, 0);
11074 output_addr_const (file, disp);
11078 if (ASSEMBLER_DIALECT == ASM_ATT)
11083 output_pic_addr_const (file, disp, 0);
11084 else if (GET_CODE (disp) == LABEL_REF)
11085 output_asm_label (disp);
11087 output_addr_const (file, disp);
11092 print_reg (base, 0, file);
11096 print_reg (index, 0, file);
11098 fprintf (file, ",%d", scale);
11104 rtx offset = NULL_RTX;
11108 /* Pull out the offset of a symbol; print any symbol itself. */
11109 if (GET_CODE (disp) == CONST
11110 && GET_CODE (XEXP (disp, 0)) == PLUS
11111 && CONST_INT_P (XEXP (XEXP (disp, 0), 1)))
11113 offset = XEXP (XEXP (disp, 0), 1);
11114 disp = gen_rtx_CONST (VOIDmode,
11115 XEXP (XEXP (disp, 0), 0));
11119 output_pic_addr_const (file, disp, 0);
11120 else if (GET_CODE (disp) == LABEL_REF)
11121 output_asm_label (disp);
11122 else if (CONST_INT_P (disp))
11125 output_addr_const (file, disp);
11131 print_reg (base, 0, file);
11134 if (INTVAL (offset) >= 0)
11136 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (offset));
11140 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (offset));
11147 print_reg (index, 0, file);
11149 fprintf (file, "*%d", scale);
11157 output_addr_const_extra (FILE *file, rtx x)
11161 if (GET_CODE (x) != UNSPEC)
11164 op = XVECEXP (x, 0, 0);
11165 switch (XINT (x, 1))
11167 case UNSPEC_GOTTPOFF:
11168 output_addr_const (file, op);
11169 /* FIXME: This might be @TPOFF in Sun ld. */
11170 fputs ("@GOTTPOFF", file);
11173 output_addr_const (file, op);
11174 fputs ("@TPOFF", file);
11176 case UNSPEC_NTPOFF:
11177 output_addr_const (file, op);
11179 fputs ("@TPOFF", file);
11181 fputs ("@NTPOFF", file);
11183 case UNSPEC_DTPOFF:
11184 output_addr_const (file, op);
11185 fputs ("@DTPOFF", file);
11187 case UNSPEC_GOTNTPOFF:
11188 output_addr_const (file, op);
11190 fputs (ASSEMBLER_DIALECT == ASM_ATT ?
11191 "@GOTTPOFF(%rip)" : "@GOTTPOFF[rip]", file);
11193 fputs ("@GOTNTPOFF", file);
11195 case UNSPEC_INDNTPOFF:
11196 output_addr_const (file, op);
11197 fputs ("@INDNTPOFF", file);
11200 case UNSPEC_MACHOPIC_OFFSET:
11201 output_addr_const (file, op);
11203 machopic_output_function_base_name (file);
11214 /* Split one or more DImode RTL references into pairs of SImode
11215 references. The RTL can be REG, offsettable MEM, integer constant, or
11216 CONST_DOUBLE. "operands" is a pointer to an array of DImode RTL to
11217 split and "num" is its length. lo_half and hi_half are output arrays
11218 that parallel "operands". */
11221 split_di (rtx operands[], int num, rtx lo_half[], rtx hi_half[])
11225 rtx op = operands[num];
11227 /* simplify_subreg refuse to split volatile memory addresses,
11228 but we still have to handle it. */
11231 lo_half[num] = adjust_address (op, SImode, 0);
11232 hi_half[num] = adjust_address (op, SImode, 4);
11236 lo_half[num] = simplify_gen_subreg (SImode, op,
11237 GET_MODE (op) == VOIDmode
11238 ? DImode : GET_MODE (op), 0);
11239 hi_half[num] = simplify_gen_subreg (SImode, op,
11240 GET_MODE (op) == VOIDmode
11241 ? DImode : GET_MODE (op), 4);
11245 /* Split one or more TImode RTL references into pairs of DImode
11246 references. The RTL can be REG, offsettable MEM, integer constant, or
11247 CONST_DOUBLE. "operands" is a pointer to an array of DImode RTL to
11248 split and "num" is its length. lo_half and hi_half are output arrays
11249 that parallel "operands". */
11252 split_ti (rtx operands[], int num, rtx lo_half[], rtx hi_half[])
11256 rtx op = operands[num];
11258 /* simplify_subreg refuse to split volatile memory addresses, but we
11259 still have to handle it. */
11262 lo_half[num] = adjust_address (op, DImode, 0);
11263 hi_half[num] = adjust_address (op, DImode, 8);
11267 lo_half[num] = simplify_gen_subreg (DImode, op, TImode, 0);
11268 hi_half[num] = simplify_gen_subreg (DImode, op, TImode, 8);
11273 /* Output code to perform a 387 binary operation in INSN, one of PLUS,
11274 MINUS, MULT or DIV. OPERANDS are the insn operands, where operands[3]
11275 is the expression of the binary operation. The output may either be
11276 emitted here, or returned to the caller, like all output_* functions.
11278 There is no guarantee that the operands are the same mode, as they
11279 might be within FLOAT or FLOAT_EXTEND expressions. */
11281 #ifndef SYSV386_COMPAT
11282 /* Set to 1 for compatibility with brain-damaged assemblers. No-one
11283 wants to fix the assemblers because that causes incompatibility
11284 with gcc. No-one wants to fix gcc because that causes
11285 incompatibility with assemblers... You can use the option of
11286 -DSYSV386_COMPAT=0 if you recompile both gcc and gas this way. */
11287 #define SYSV386_COMPAT 1
11291 output_387_binary_op (rtx insn, rtx *operands)
11293 static char buf[40];
11296 int is_sse = SSE_REG_P (operands[0]) || SSE_REG_P (operands[1]) || SSE_REG_P (operands[2]);
11298 #ifdef ENABLE_CHECKING
11299 /* Even if we do not want to check the inputs, this documents input
11300 constraints. Which helps in understanding the following code. */
11301 if (STACK_REG_P (operands[0])
11302 && ((REG_P (operands[1])
11303 && REGNO (operands[0]) == REGNO (operands[1])
11304 && (STACK_REG_P (operands[2]) || MEM_P (operands[2])))
11305 || (REG_P (operands[2])
11306 && REGNO (operands[0]) == REGNO (operands[2])
11307 && (STACK_REG_P (operands[1]) || MEM_P (operands[1]))))
11308 && (STACK_TOP_P (operands[1]) || STACK_TOP_P (operands[2])))
11311 gcc_assert (is_sse);
11314 switch (GET_CODE (operands[3]))
11317 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
11318 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
11326 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
11327 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
11335 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
11336 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
11344 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
11345 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
11353 gcc_unreachable ();
11360 strcpy (buf, ssep);
11361 if (GET_MODE (operands[0]) == SFmode)
11362 strcat (buf, "ss\t{%2, %1, %0|%0, %1, %2}");
11364 strcat (buf, "sd\t{%2, %1, %0|%0, %1, %2}");
11368 strcpy (buf, ssep + 1);
11369 if (GET_MODE (operands[0]) == SFmode)
11370 strcat (buf, "ss\t{%2, %0|%0, %2}");
11372 strcat (buf, "sd\t{%2, %0|%0, %2}");
11378 switch (GET_CODE (operands[3]))
11382 if (REG_P (operands[2]) && REGNO (operands[0]) == REGNO (operands[2]))
11384 rtx temp = operands[2];
11385 operands[2] = operands[1];
11386 operands[1] = temp;
11389 /* know operands[0] == operands[1]. */
11391 if (MEM_P (operands[2]))
11397 if (find_regno_note (insn, REG_DEAD, REGNO (operands[2])))
11399 if (STACK_TOP_P (operands[0]))
11400 /* How is it that we are storing to a dead operand[2]?
11401 Well, presumably operands[1] is dead too. We can't
11402 store the result to st(0) as st(0) gets popped on this
11403 instruction. Instead store to operands[2] (which I
11404 think has to be st(1)). st(1) will be popped later.
11405 gcc <= 2.8.1 didn't have this check and generated
11406 assembly code that the Unixware assembler rejected. */
11407 p = "p\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
11409 p = "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
11413 if (STACK_TOP_P (operands[0]))
11414 p = "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
11416 p = "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
11421 if (MEM_P (operands[1]))
11427 if (MEM_P (operands[2]))
11433 if (find_regno_note (insn, REG_DEAD, REGNO (operands[2])))
11436 /* The SystemV/386 SVR3.2 assembler, and probably all AT&T
11437 derived assemblers, confusingly reverse the direction of
11438 the operation for fsub{r} and fdiv{r} when the
11439 destination register is not st(0). The Intel assembler
11440 doesn't have this brain damage. Read !SYSV386_COMPAT to
11441 figure out what the hardware really does. */
11442 if (STACK_TOP_P (operands[0]))
11443 p = "{p\t%0, %2|rp\t%2, %0}";
11445 p = "{rp\t%2, %0|p\t%0, %2}";
11447 if (STACK_TOP_P (operands[0]))
11448 /* As above for fmul/fadd, we can't store to st(0). */
11449 p = "rp\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
11451 p = "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
11456 if (find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
11459 if (STACK_TOP_P (operands[0]))
11460 p = "{rp\t%0, %1|p\t%1, %0}";
11462 p = "{p\t%1, %0|rp\t%0, %1}";
11464 if (STACK_TOP_P (operands[0]))
11465 p = "p\t{%0, %1|%1, %0}"; /* st(1) = st(1) op st(0); pop */
11467 p = "rp\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2); pop */
11472 if (STACK_TOP_P (operands[0]))
11474 if (STACK_TOP_P (operands[1]))
11475 p = "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
11477 p = "r\t{%y1, %0|%0, %y1}"; /* st(0) = st(r1) op st(0) */
11480 else if (STACK_TOP_P (operands[1]))
11483 p = "{\t%1, %0|r\t%0, %1}";
11485 p = "r\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2) */
11491 p = "{r\t%2, %0|\t%0, %2}";
11493 p = "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
11499 gcc_unreachable ();
11506 /* Return needed mode for entity in optimize_mode_switching pass. */
11509 ix86_mode_needed (int entity, rtx insn)
11511 enum attr_i387_cw mode;
11513 /* The mode UNINITIALIZED is used to store control word after a
11514 function call or ASM pattern. The mode ANY specify that function
11515 has no requirements on the control word and make no changes in the
11516 bits we are interested in. */
11519 || (NONJUMP_INSN_P (insn)
11520 && (asm_noperands (PATTERN (insn)) >= 0
11521 || GET_CODE (PATTERN (insn)) == ASM_INPUT)))
11522 return I387_CW_UNINITIALIZED;
11524 if (recog_memoized (insn) < 0)
11525 return I387_CW_ANY;
11527 mode = get_attr_i387_cw (insn);
11532 if (mode == I387_CW_TRUNC)
11537 if (mode == I387_CW_FLOOR)
11542 if (mode == I387_CW_CEIL)
11547 if (mode == I387_CW_MASK_PM)
11552 gcc_unreachable ();
11555 return I387_CW_ANY;
11558 /* Output code to initialize control word copies used by trunc?f?i and
11559 rounding patterns. CURRENT_MODE is set to current control word,
11560 while NEW_MODE is set to new control word. */
11563 emit_i387_cw_initialization (int mode)
11565 rtx stored_mode = assign_386_stack_local (HImode, SLOT_CW_STORED);
11568 enum ix86_stack_slot slot;
11570 rtx reg = gen_reg_rtx (HImode);
11572 emit_insn (gen_x86_fnstcw_1 (stored_mode));
11573 emit_move_insn (reg, copy_rtx (stored_mode));
11575 if (TARGET_64BIT || TARGET_PARTIAL_REG_STALL
11576 || optimize_function_for_size_p (cfun))
11580 case I387_CW_TRUNC:
11581 /* round toward zero (truncate) */
11582 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0c00)));
11583 slot = SLOT_CW_TRUNC;
11586 case I387_CW_FLOOR:
11587 /* round down toward -oo */
11588 emit_insn (gen_andhi3 (reg, reg, GEN_INT (~0x0c00)));
11589 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0400)));
11590 slot = SLOT_CW_FLOOR;
11594 /* round up toward +oo */
11595 emit_insn (gen_andhi3 (reg, reg, GEN_INT (~0x0c00)));
11596 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0800)));
11597 slot = SLOT_CW_CEIL;
11600 case I387_CW_MASK_PM:
11601 /* mask precision exception for nearbyint() */
11602 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0020)));
11603 slot = SLOT_CW_MASK_PM;
11607 gcc_unreachable ();
11614 case I387_CW_TRUNC:
11615 /* round toward zero (truncate) */
11616 emit_insn (gen_movsi_insv_1 (reg, GEN_INT (0xc)));
11617 slot = SLOT_CW_TRUNC;
11620 case I387_CW_FLOOR:
11621 /* round down toward -oo */
11622 emit_insn (gen_movsi_insv_1 (reg, GEN_INT (0x4)));
11623 slot = SLOT_CW_FLOOR;
11627 /* round up toward +oo */
11628 emit_insn (gen_movsi_insv_1 (reg, GEN_INT (0x8)));
11629 slot = SLOT_CW_CEIL;
11632 case I387_CW_MASK_PM:
11633 /* mask precision exception for nearbyint() */
11634 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0020)));
11635 slot = SLOT_CW_MASK_PM;
11639 gcc_unreachable ();
11643 gcc_assert (slot < MAX_386_STACK_LOCALS);
11645 new_mode = assign_386_stack_local (HImode, slot);
11646 emit_move_insn (new_mode, reg);
11649 /* Output code for INSN to convert a float to a signed int. OPERANDS
11650 are the insn operands. The output may be [HSD]Imode and the input
11651 operand may be [SDX]Fmode. */
11654 output_fix_trunc (rtx insn, rtx *operands, int fisttp)
11656 int stack_top_dies = find_regno_note (insn, REG_DEAD, FIRST_STACK_REG) != 0;
11657 int dimode_p = GET_MODE (operands[0]) == DImode;
11658 int round_mode = get_attr_i387_cw (insn);
11660 /* Jump through a hoop or two for DImode, since the hardware has no
11661 non-popping instruction. We used to do this a different way, but
11662 that was somewhat fragile and broke with post-reload splitters. */
11663 if ((dimode_p || fisttp) && !stack_top_dies)
11664 output_asm_insn ("fld\t%y1", operands);
11666 gcc_assert (STACK_TOP_P (operands[1]));
11667 gcc_assert (MEM_P (operands[0]));
11668 gcc_assert (GET_MODE (operands[1]) != TFmode);
11671 output_asm_insn ("fisttp%z0\t%0", operands);
11674 if (round_mode != I387_CW_ANY)
11675 output_asm_insn ("fldcw\t%3", operands);
11676 if (stack_top_dies || dimode_p)
11677 output_asm_insn ("fistp%z0\t%0", operands);
11679 output_asm_insn ("fist%z0\t%0", operands);
11680 if (round_mode != I387_CW_ANY)
11681 output_asm_insn ("fldcw\t%2", operands);
11687 /* Output code for x87 ffreep insn. The OPNO argument, which may only
11688 have the values zero or one, indicates the ffreep insn's operand
11689 from the OPERANDS array. */
11691 static const char *
11692 output_387_ffreep (rtx *operands ATTRIBUTE_UNUSED, int opno)
11694 if (TARGET_USE_FFREEP)
11695 #if HAVE_AS_IX86_FFREEP
11696 return opno ? "ffreep\t%y1" : "ffreep\t%y0";
11699 static char retval[] = ".word\t0xc_df";
11700 int regno = REGNO (operands[opno]);
11702 gcc_assert (FP_REGNO_P (regno));
11704 retval[9] = '0' + (regno - FIRST_STACK_REG);
11709 return opno ? "fstp\t%y1" : "fstp\t%y0";
11713 /* Output code for INSN to compare OPERANDS. EFLAGS_P is 1 when fcomi
11714 should be used. UNORDERED_P is true when fucom should be used. */
11717 output_fp_compare (rtx insn, rtx *operands, int eflags_p, int unordered_p)
11719 int stack_top_dies;
11720 rtx cmp_op0, cmp_op1;
11721 int is_sse = SSE_REG_P (operands[0]) || SSE_REG_P (operands[1]);
11725 cmp_op0 = operands[0];
11726 cmp_op1 = operands[1];
11730 cmp_op0 = operands[1];
11731 cmp_op1 = operands[2];
11736 static const char ucomiss[] = "vucomiss\t{%1, %0|%0, %1}";
11737 static const char ucomisd[] = "vucomisd\t{%1, %0|%0, %1}";
11738 static const char comiss[] = "vcomiss\t{%1, %0|%0, %1}";
11739 static const char comisd[] = "vcomisd\t{%1, %0|%0, %1}";
11741 if (GET_MODE (operands[0]) == SFmode)
11743 return &ucomiss[TARGET_AVX ? 0 : 1];
11745 return &comiss[TARGET_AVX ? 0 : 1];
11748 return &ucomisd[TARGET_AVX ? 0 : 1];
11750 return &comisd[TARGET_AVX ? 0 : 1];
11753 gcc_assert (STACK_TOP_P (cmp_op0));
11755 stack_top_dies = find_regno_note (insn, REG_DEAD, FIRST_STACK_REG) != 0;
11757 if (cmp_op1 == CONST0_RTX (GET_MODE (cmp_op1)))
11759 if (stack_top_dies)
11761 output_asm_insn ("ftst\n\tfnstsw\t%0", operands);
11762 return output_387_ffreep (operands, 1);
11765 return "ftst\n\tfnstsw\t%0";
11768 if (STACK_REG_P (cmp_op1)
11770 && find_regno_note (insn, REG_DEAD, REGNO (cmp_op1))
11771 && REGNO (cmp_op1) != FIRST_STACK_REG)
11773 /* If both the top of the 387 stack dies, and the other operand
11774 is also a stack register that dies, then this must be a
11775 `fcompp' float compare */
11779 /* There is no double popping fcomi variant. Fortunately,
11780 eflags is immune from the fstp's cc clobbering. */
11782 output_asm_insn ("fucomip\t{%y1, %0|%0, %y1}", operands);
11784 output_asm_insn ("fcomip\t{%y1, %0|%0, %y1}", operands);
11785 return output_387_ffreep (operands, 0);
11790 return "fucompp\n\tfnstsw\t%0";
11792 return "fcompp\n\tfnstsw\t%0";
11797 /* Encoded here as eflags_p | intmode | unordered_p | stack_top_dies. */
11799 static const char * const alt[16] =
11801 "fcom%z2\t%y2\n\tfnstsw\t%0",
11802 "fcomp%z2\t%y2\n\tfnstsw\t%0",
11803 "fucom%z2\t%y2\n\tfnstsw\t%0",
11804 "fucomp%z2\t%y2\n\tfnstsw\t%0",
11806 "ficom%z2\t%y2\n\tfnstsw\t%0",
11807 "ficomp%z2\t%y2\n\tfnstsw\t%0",
11811 "fcomi\t{%y1, %0|%0, %y1}",
11812 "fcomip\t{%y1, %0|%0, %y1}",
11813 "fucomi\t{%y1, %0|%0, %y1}",
11814 "fucomip\t{%y1, %0|%0, %y1}",
11825 mask = eflags_p << 3;
11826 mask |= (GET_MODE_CLASS (GET_MODE (cmp_op1)) == MODE_INT) << 2;
11827 mask |= unordered_p << 1;
11828 mask |= stack_top_dies;
11830 gcc_assert (mask < 16);
11839 ix86_output_addr_vec_elt (FILE *file, int value)
11841 const char *directive = ASM_LONG;
11845 directive = ASM_QUAD;
11847 gcc_assert (!TARGET_64BIT);
11850 fprintf (file, "%s%s%d\n", directive, LPREFIX, value);
11854 ix86_output_addr_diff_elt (FILE *file, int value, int rel)
11856 const char *directive = ASM_LONG;
11859 if (TARGET_64BIT && CASE_VECTOR_MODE == DImode)
11860 directive = ASM_QUAD;
11862 gcc_assert (!TARGET_64BIT);
11864 /* We can't use @GOTOFF for text labels on VxWorks; see gotoff_operand. */
11865 if (TARGET_64BIT || TARGET_VXWORKS_RTP)
11866 fprintf (file, "%s%s%d-%s%d\n",
11867 directive, LPREFIX, value, LPREFIX, rel);
11868 else if (HAVE_AS_GOTOFF_IN_DATA)
11869 fprintf (file, "%s%s%d@GOTOFF\n", ASM_LONG, LPREFIX, value);
11871 else if (TARGET_MACHO)
11873 fprintf (file, "%s%s%d-", ASM_LONG, LPREFIX, value);
11874 machopic_output_function_base_name (file);
11875 fprintf(file, "\n");
11879 asm_fprintf (file, "%s%U%s+[.-%s%d]\n",
11880 ASM_LONG, GOT_SYMBOL_NAME, LPREFIX, value);
11883 /* Generate either "mov $0, reg" or "xor reg, reg", as appropriate
11887 ix86_expand_clear (rtx dest)
11891 /* We play register width games, which are only valid after reload. */
11892 gcc_assert (reload_completed);
11894 /* Avoid HImode and its attendant prefix byte. */
11895 if (GET_MODE_SIZE (GET_MODE (dest)) < 4)
11896 dest = gen_rtx_REG (SImode, REGNO (dest));
11897 tmp = gen_rtx_SET (VOIDmode, dest, const0_rtx);
11899 /* This predicate should match that for movsi_xor and movdi_xor_rex64. */
11900 if (reload_completed && (!TARGET_USE_MOV0 || optimize_insn_for_speed_p ()))
11902 rtx clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
11903 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, tmp, clob));
11909 /* X is an unchanging MEM. If it is a constant pool reference, return
11910 the constant pool rtx, else NULL. */
11913 maybe_get_pool_constant (rtx x)
11915 x = ix86_delegitimize_address (XEXP (x, 0));
11917 if (GET_CODE (x) == SYMBOL_REF && CONSTANT_POOL_ADDRESS_P (x))
11918 return get_pool_constant (x);
11924 ix86_expand_move (enum machine_mode mode, rtx operands[])
11927 enum tls_model model;
11932 if (GET_CODE (op1) == SYMBOL_REF)
11934 model = SYMBOL_REF_TLS_MODEL (op1);
11937 op1 = legitimize_tls_address (op1, model, true);
11938 op1 = force_operand (op1, op0);
11942 else if (TARGET_DLLIMPORT_DECL_ATTRIBUTES
11943 && SYMBOL_REF_DLLIMPORT_P (op1))
11944 op1 = legitimize_dllimport_symbol (op1, false);
11946 else if (GET_CODE (op1) == CONST
11947 && GET_CODE (XEXP (op1, 0)) == PLUS
11948 && GET_CODE (XEXP (XEXP (op1, 0), 0)) == SYMBOL_REF)
11950 rtx addend = XEXP (XEXP (op1, 0), 1);
11951 rtx symbol = XEXP (XEXP (op1, 0), 0);
11954 model = SYMBOL_REF_TLS_MODEL (symbol);
11956 tmp = legitimize_tls_address (symbol, model, true);
11957 else if (TARGET_DLLIMPORT_DECL_ATTRIBUTES
11958 && SYMBOL_REF_DLLIMPORT_P (symbol))
11959 tmp = legitimize_dllimport_symbol (symbol, true);
11963 tmp = force_operand (tmp, NULL);
11964 tmp = expand_simple_binop (Pmode, PLUS, tmp, addend,
11965 op0, 1, OPTAB_DIRECT);
11971 if (flag_pic && mode == Pmode && symbolic_operand (op1, Pmode))
11973 if (TARGET_MACHO && !TARGET_64BIT)
11978 rtx temp = ((reload_in_progress
11979 || ((op0 && REG_P (op0))
11981 ? op0 : gen_reg_rtx (Pmode));
11982 op1 = machopic_indirect_data_reference (op1, temp);
11983 op1 = machopic_legitimize_pic_address (op1, mode,
11984 temp == op1 ? 0 : temp);
11986 else if (MACHOPIC_INDIRECT)
11987 op1 = machopic_indirect_data_reference (op1, 0);
11995 op1 = force_reg (Pmode, op1);
11996 else if (!TARGET_64BIT || !x86_64_movabs_operand (op1, Pmode))
11998 rtx reg = !can_create_pseudo_p () ? op0 : NULL_RTX;
11999 op1 = legitimize_pic_address (op1, reg);
12008 && (PUSH_ROUNDING (GET_MODE_SIZE (mode)) != GET_MODE_SIZE (mode)
12009 || !push_operand (op0, mode))
12011 op1 = force_reg (mode, op1);
12013 if (push_operand (op0, mode)
12014 && ! general_no_elim_operand (op1, mode))
12015 op1 = copy_to_mode_reg (mode, op1);
12017 /* Force large constants in 64bit compilation into register
12018 to get them CSEed. */
12019 if (can_create_pseudo_p ()
12020 && (mode == DImode) && TARGET_64BIT
12021 && immediate_operand (op1, mode)
12022 && !x86_64_zext_immediate_operand (op1, VOIDmode)
12023 && !register_operand (op0, mode)
12025 op1 = copy_to_mode_reg (mode, op1);
12027 if (can_create_pseudo_p ()
12028 && FLOAT_MODE_P (mode)
12029 && GET_CODE (op1) == CONST_DOUBLE)
12031 /* If we are loading a floating point constant to a register,
12032 force the value to memory now, since we'll get better code
12033 out the back end. */
12035 op1 = validize_mem (force_const_mem (mode, op1));
12036 if (!register_operand (op0, mode))
12038 rtx temp = gen_reg_rtx (mode);
12039 emit_insn (gen_rtx_SET (VOIDmode, temp, op1));
12040 emit_move_insn (op0, temp);
12046 emit_insn (gen_rtx_SET (VOIDmode, op0, op1));
12050 ix86_expand_vector_move (enum machine_mode mode, rtx operands[])
12052 rtx op0 = operands[0], op1 = operands[1];
12053 unsigned int align = GET_MODE_ALIGNMENT (mode);
12055 /* Force constants other than zero into memory. We do not know how
12056 the instructions used to build constants modify the upper 64 bits
12057 of the register, once we have that information we may be able
12058 to handle some of them more efficiently. */
12059 if (can_create_pseudo_p ()
12060 && register_operand (op0, mode)
12061 && (CONSTANT_P (op1)
12062 || (GET_CODE (op1) == SUBREG
12063 && CONSTANT_P (SUBREG_REG (op1))))
12064 && standard_sse_constant_p (op1) <= 0)
12065 op1 = validize_mem (force_const_mem (mode, op1));
12067 /* We need to check memory alignment for SSE mode since attribute
12068 can make operands unaligned. */
12069 if (can_create_pseudo_p ()
12070 && SSE_REG_MODE_P (mode)
12071 && ((MEM_P (op0) && (MEM_ALIGN (op0) < align))
12072 || (MEM_P (op1) && (MEM_ALIGN (op1) < align))))
12076 /* ix86_expand_vector_move_misalign() does not like constants ... */
12077 if (CONSTANT_P (op1)
12078 || (GET_CODE (op1) == SUBREG
12079 && CONSTANT_P (SUBREG_REG (op1))))
12080 op1 = validize_mem (force_const_mem (mode, op1));
12082 /* ... nor both arguments in memory. */
12083 if (!register_operand (op0, mode)
12084 && !register_operand (op1, mode))
12085 op1 = force_reg (mode, op1);
12087 tmp[0] = op0; tmp[1] = op1;
12088 ix86_expand_vector_move_misalign (mode, tmp);
12092 /* Make operand1 a register if it isn't already. */
12093 if (can_create_pseudo_p ()
12094 && !register_operand (op0, mode)
12095 && !register_operand (op1, mode))
12097 emit_move_insn (op0, force_reg (GET_MODE (op0), op1));
12101 emit_insn (gen_rtx_SET (VOIDmode, op0, op1));
12104 /* Implement the movmisalign patterns for SSE. Non-SSE modes go
12105 straight to ix86_expand_vector_move. */
12106 /* Code generation for scalar reg-reg moves of single and double precision data:
12107 if (x86_sse_partial_reg_dependency == true | x86_sse_split_regs == true)
12111 if (x86_sse_partial_reg_dependency == true)
12116 Code generation for scalar loads of double precision data:
12117 if (x86_sse_split_regs == true)
12118 movlpd mem, reg (gas syntax)
12122 Code generation for unaligned packed loads of single precision data
12123 (x86_sse_unaligned_move_optimal overrides x86_sse_partial_reg_dependency):
12124 if (x86_sse_unaligned_move_optimal)
12127 if (x86_sse_partial_reg_dependency == true)
12139 Code generation for unaligned packed loads of double precision data
12140 (x86_sse_unaligned_move_optimal overrides x86_sse_split_regs):
12141 if (x86_sse_unaligned_move_optimal)
12144 if (x86_sse_split_regs == true)
12157 ix86_expand_vector_move_misalign (enum machine_mode mode, rtx operands[])
12166 switch (GET_MODE_CLASS (mode))
12168 case MODE_VECTOR_INT:
12170 switch (GET_MODE_SIZE (mode))
12173 op0 = gen_lowpart (V16QImode, op0);
12174 op1 = gen_lowpart (V16QImode, op1);
12175 emit_insn (gen_avx_movdqu (op0, op1));
12178 op0 = gen_lowpart (V32QImode, op0);
12179 op1 = gen_lowpart (V32QImode, op1);
12180 emit_insn (gen_avx_movdqu256 (op0, op1));
12183 gcc_unreachable ();
12186 case MODE_VECTOR_FLOAT:
12187 op0 = gen_lowpart (mode, op0);
12188 op1 = gen_lowpart (mode, op1);
12193 emit_insn (gen_avx_movups (op0, op1));
12196 emit_insn (gen_avx_movups256 (op0, op1));
12199 emit_insn (gen_avx_movupd (op0, op1));
12202 emit_insn (gen_avx_movupd256 (op0, op1));
12205 gcc_unreachable ();
12210 gcc_unreachable ();
12218 /* If we're optimizing for size, movups is the smallest. */
12219 if (optimize_insn_for_size_p ())
12221 op0 = gen_lowpart (V4SFmode, op0);
12222 op1 = gen_lowpart (V4SFmode, op1);
12223 emit_insn (gen_sse_movups (op0, op1));
12227 /* ??? If we have typed data, then it would appear that using
12228 movdqu is the only way to get unaligned data loaded with
12230 if (TARGET_SSE2 && GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
12232 op0 = gen_lowpart (V16QImode, op0);
12233 op1 = gen_lowpart (V16QImode, op1);
12234 emit_insn (gen_sse2_movdqu (op0, op1));
12238 if (TARGET_SSE2 && mode == V2DFmode)
12242 if (TARGET_SSE_UNALIGNED_MOVE_OPTIMAL)
12244 op0 = gen_lowpart (V2DFmode, op0);
12245 op1 = gen_lowpart (V2DFmode, op1);
12246 emit_insn (gen_sse2_movupd (op0, op1));
12250 /* When SSE registers are split into halves, we can avoid
12251 writing to the top half twice. */
12252 if (TARGET_SSE_SPLIT_REGS)
12254 emit_clobber (op0);
12259 /* ??? Not sure about the best option for the Intel chips.
12260 The following would seem to satisfy; the register is
12261 entirely cleared, breaking the dependency chain. We
12262 then store to the upper half, with a dependency depth
12263 of one. A rumor has it that Intel recommends two movsd
12264 followed by an unpacklpd, but this is unconfirmed. And
12265 given that the dependency depth of the unpacklpd would
12266 still be one, I'm not sure why this would be better. */
12267 zero = CONST0_RTX (V2DFmode);
12270 m = adjust_address (op1, DFmode, 0);
12271 emit_insn (gen_sse2_loadlpd (op0, zero, m));
12272 m = adjust_address (op1, DFmode, 8);
12273 emit_insn (gen_sse2_loadhpd (op0, op0, m));
12277 if (TARGET_SSE_UNALIGNED_MOVE_OPTIMAL)
12279 op0 = gen_lowpart (V4SFmode, op0);
12280 op1 = gen_lowpart (V4SFmode, op1);
12281 emit_insn (gen_sse_movups (op0, op1));
12285 if (TARGET_SSE_PARTIAL_REG_DEPENDENCY)
12286 emit_move_insn (op0, CONST0_RTX (mode));
12288 emit_clobber (op0);
12290 if (mode != V4SFmode)
12291 op0 = gen_lowpart (V4SFmode, op0);
12292 m = adjust_address (op1, V2SFmode, 0);
12293 emit_insn (gen_sse_loadlps (op0, op0, m));
12294 m = adjust_address (op1, V2SFmode, 8);
12295 emit_insn (gen_sse_loadhps (op0, op0, m));
12298 else if (MEM_P (op0))
12300 /* If we're optimizing for size, movups is the smallest. */
12301 if (optimize_insn_for_size_p ())
12303 op0 = gen_lowpart (V4SFmode, op0);
12304 op1 = gen_lowpart (V4SFmode, op1);
12305 emit_insn (gen_sse_movups (op0, op1));
12309 /* ??? Similar to above, only less clear because of quote
12310 typeless stores unquote. */
12311 if (TARGET_SSE2 && !TARGET_SSE_TYPELESS_STORES
12312 && GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
12314 op0 = gen_lowpart (V16QImode, op0);
12315 op1 = gen_lowpart (V16QImode, op1);
12316 emit_insn (gen_sse2_movdqu (op0, op1));
12320 if (TARGET_SSE2 && mode == V2DFmode)
12322 m = adjust_address (op0, DFmode, 0);
12323 emit_insn (gen_sse2_storelpd (m, op1));
12324 m = adjust_address (op0, DFmode, 8);
12325 emit_insn (gen_sse2_storehpd (m, op1));
12329 if (mode != V4SFmode)
12330 op1 = gen_lowpart (V4SFmode, op1);
12331 m = adjust_address (op0, V2SFmode, 0);
12332 emit_insn (gen_sse_storelps (m, op1));
12333 m = adjust_address (op0, V2SFmode, 8);
12334 emit_insn (gen_sse_storehps (m, op1));
12338 gcc_unreachable ();
12341 /* Expand a push in MODE. This is some mode for which we do not support
12342 proper push instructions, at least from the registers that we expect
12343 the value to live in. */
12346 ix86_expand_push (enum machine_mode mode, rtx x)
12350 tmp = expand_simple_binop (Pmode, PLUS, stack_pointer_rtx,
12351 GEN_INT (-GET_MODE_SIZE (mode)),
12352 stack_pointer_rtx, 1, OPTAB_DIRECT);
12353 if (tmp != stack_pointer_rtx)
12354 emit_move_insn (stack_pointer_rtx, tmp);
12356 tmp = gen_rtx_MEM (mode, stack_pointer_rtx);
12357 emit_move_insn (tmp, x);
12360 /* Helper function of ix86_fixup_binary_operands to canonicalize
12361 operand order. Returns true if the operands should be swapped. */
12364 ix86_swap_binary_operands_p (enum rtx_code code, enum machine_mode mode,
12367 rtx dst = operands[0];
12368 rtx src1 = operands[1];
12369 rtx src2 = operands[2];
12371 /* If the operation is not commutative, we can't do anything. */
12372 if (GET_RTX_CLASS (code) != RTX_COMM_ARITH)
12375 /* Highest priority is that src1 should match dst. */
12376 if (rtx_equal_p (dst, src1))
12378 if (rtx_equal_p (dst, src2))
12381 /* Next highest priority is that immediate constants come second. */
12382 if (immediate_operand (src2, mode))
12384 if (immediate_operand (src1, mode))
12387 /* Lowest priority is that memory references should come second. */
12397 /* Fix up OPERANDS to satisfy ix86_binary_operator_ok. Return the
12398 destination to use for the operation. If different from the true
12399 destination in operands[0], a copy operation will be required. */
12402 ix86_fixup_binary_operands (enum rtx_code code, enum machine_mode mode,
12405 rtx dst = operands[0];
12406 rtx src1 = operands[1];
12407 rtx src2 = operands[2];
12409 /* Canonicalize operand order. */
12410 if (ix86_swap_binary_operands_p (code, mode, operands))
12414 /* It is invalid to swap operands of different modes. */
12415 gcc_assert (GET_MODE (src1) == GET_MODE (src2));
12422 /* Both source operands cannot be in memory. */
12423 if (MEM_P (src1) && MEM_P (src2))
12425 /* Optimization: Only read from memory once. */
12426 if (rtx_equal_p (src1, src2))
12428 src2 = force_reg (mode, src2);
12432 src2 = force_reg (mode, src2);
12435 /* If the destination is memory, and we do not have matching source
12436 operands, do things in registers. */
12437 if (MEM_P (dst) && !rtx_equal_p (dst, src1))
12438 dst = gen_reg_rtx (mode);
12440 /* Source 1 cannot be a constant. */
12441 if (CONSTANT_P (src1))
12442 src1 = force_reg (mode, src1);
12444 /* Source 1 cannot be a non-matching memory. */
12445 if (MEM_P (src1) && !rtx_equal_p (dst, src1))
12446 src1 = force_reg (mode, src1);
12448 operands[1] = src1;
12449 operands[2] = src2;
12453 /* Similarly, but assume that the destination has already been
12454 set up properly. */
12457 ix86_fixup_binary_operands_no_copy (enum rtx_code code,
12458 enum machine_mode mode, rtx operands[])
12460 rtx dst = ix86_fixup_binary_operands (code, mode, operands);
12461 gcc_assert (dst == operands[0]);
12464 /* Attempt to expand a binary operator. Make the expansion closer to the
12465 actual machine, then just general_operand, which will allow 3 separate
12466 memory references (one output, two input) in a single insn. */
12469 ix86_expand_binary_operator (enum rtx_code code, enum machine_mode mode,
12472 rtx src1, src2, dst, op, clob;
12474 dst = ix86_fixup_binary_operands (code, mode, operands);
12475 src1 = operands[1];
12476 src2 = operands[2];
12478 /* Emit the instruction. */
12480 op = gen_rtx_SET (VOIDmode, dst, gen_rtx_fmt_ee (code, mode, src1, src2));
12481 if (reload_in_progress)
12483 /* Reload doesn't know about the flags register, and doesn't know that
12484 it doesn't want to clobber it. We can only do this with PLUS. */
12485 gcc_assert (code == PLUS);
12490 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
12491 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
12494 /* Fix up the destination if needed. */
12495 if (dst != operands[0])
12496 emit_move_insn (operands[0], dst);
12499 /* Return TRUE or FALSE depending on whether the binary operator meets the
12500 appropriate constraints. */
12503 ix86_binary_operator_ok (enum rtx_code code, enum machine_mode mode,
12506 rtx dst = operands[0];
12507 rtx src1 = operands[1];
12508 rtx src2 = operands[2];
12510 /* Both source operands cannot be in memory. */
12511 if (MEM_P (src1) && MEM_P (src2))
12514 /* Canonicalize operand order for commutative operators. */
12515 if (ix86_swap_binary_operands_p (code, mode, operands))
12522 /* If the destination is memory, we must have a matching source operand. */
12523 if (MEM_P (dst) && !rtx_equal_p (dst, src1))
12526 /* Source 1 cannot be a constant. */
12527 if (CONSTANT_P (src1))
12530 /* Source 1 cannot be a non-matching memory. */
12531 if (MEM_P (src1) && !rtx_equal_p (dst, src1))
12537 /* Attempt to expand a unary operator. Make the expansion closer to the
12538 actual machine, then just general_operand, which will allow 2 separate
12539 memory references (one output, one input) in a single insn. */
12542 ix86_expand_unary_operator (enum rtx_code code, enum machine_mode mode,
12545 int matching_memory;
12546 rtx src, dst, op, clob;
12551 /* If the destination is memory, and we do not have matching source
12552 operands, do things in registers. */
12553 matching_memory = 0;
12556 if (rtx_equal_p (dst, src))
12557 matching_memory = 1;
12559 dst = gen_reg_rtx (mode);
12562 /* When source operand is memory, destination must match. */
12563 if (MEM_P (src) && !matching_memory)
12564 src = force_reg (mode, src);
12566 /* Emit the instruction. */
12568 op = gen_rtx_SET (VOIDmode, dst, gen_rtx_fmt_e (code, mode, src));
12569 if (reload_in_progress || code == NOT)
12571 /* Reload doesn't know about the flags register, and doesn't know that
12572 it doesn't want to clobber it. */
12573 gcc_assert (code == NOT);
12578 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
12579 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
12582 /* Fix up the destination if needed. */
12583 if (dst != operands[0])
12584 emit_move_insn (operands[0], dst);
12587 /* Return TRUE or FALSE depending on whether the unary operator meets the
12588 appropriate constraints. */
12591 ix86_unary_operator_ok (enum rtx_code code ATTRIBUTE_UNUSED,
12592 enum machine_mode mode ATTRIBUTE_UNUSED,
12593 rtx operands[2] ATTRIBUTE_UNUSED)
12595 /* If one of operands is memory, source and destination must match. */
12596 if ((MEM_P (operands[0])
12597 || MEM_P (operands[1]))
12598 && ! rtx_equal_p (operands[0], operands[1]))
12603 /* Post-reload splitter for converting an SF or DFmode value in an
12604 SSE register into an unsigned SImode. */
12607 ix86_split_convert_uns_si_sse (rtx operands[])
12609 enum machine_mode vecmode;
12610 rtx value, large, zero_or_two31, input, two31, x;
12612 large = operands[1];
12613 zero_or_two31 = operands[2];
12614 input = operands[3];
12615 two31 = operands[4];
12616 vecmode = GET_MODE (large);
12617 value = gen_rtx_REG (vecmode, REGNO (operands[0]));
12619 /* Load up the value into the low element. We must ensure that the other
12620 elements are valid floats -- zero is the easiest such value. */
12623 if (vecmode == V4SFmode)
12624 emit_insn (gen_vec_setv4sf_0 (value, CONST0_RTX (V4SFmode), input));
12626 emit_insn (gen_sse2_loadlpd (value, CONST0_RTX (V2DFmode), input));
12630 input = gen_rtx_REG (vecmode, REGNO (input));
12631 emit_move_insn (value, CONST0_RTX (vecmode));
12632 if (vecmode == V4SFmode)
12633 emit_insn (gen_sse_movss (value, value, input));
12635 emit_insn (gen_sse2_movsd (value, value, input));
12638 emit_move_insn (large, two31);
12639 emit_move_insn (zero_or_two31, MEM_P (two31) ? large : two31);
12641 x = gen_rtx_fmt_ee (LE, vecmode, large, value);
12642 emit_insn (gen_rtx_SET (VOIDmode, large, x));
12644 x = gen_rtx_AND (vecmode, zero_or_two31, large);
12645 emit_insn (gen_rtx_SET (VOIDmode, zero_or_two31, x));
12647 x = gen_rtx_MINUS (vecmode, value, zero_or_two31);
12648 emit_insn (gen_rtx_SET (VOIDmode, value, x));
12650 large = gen_rtx_REG (V4SImode, REGNO (large));
12651 emit_insn (gen_ashlv4si3 (large, large, GEN_INT (31)));
12653 x = gen_rtx_REG (V4SImode, REGNO (value));
12654 if (vecmode == V4SFmode)
12655 emit_insn (gen_sse2_cvttps2dq (x, value));
12657 emit_insn (gen_sse2_cvttpd2dq (x, value));
12660 emit_insn (gen_xorv4si3 (value, value, large));
12663 /* Convert an unsigned DImode value into a DFmode, using only SSE.
12664 Expects the 64-bit DImode to be supplied in a pair of integral
12665 registers. Requires SSE2; will use SSE3 if available. For x86_32,
12666 -mfpmath=sse, !optimize_size only. */
12669 ix86_expand_convert_uns_didf_sse (rtx target, rtx input)
12671 REAL_VALUE_TYPE bias_lo_rvt, bias_hi_rvt;
12672 rtx int_xmm, fp_xmm;
12673 rtx biases, exponents;
12676 int_xmm = gen_reg_rtx (V4SImode);
12677 if (TARGET_INTER_UNIT_MOVES)
12678 emit_insn (gen_movdi_to_sse (int_xmm, input));
12679 else if (TARGET_SSE_SPLIT_REGS)
12681 emit_clobber (int_xmm);
12682 emit_move_insn (gen_lowpart (DImode, int_xmm), input);
12686 x = gen_reg_rtx (V2DImode);
12687 ix86_expand_vector_init_one_nonzero (false, V2DImode, x, input, 0);
12688 emit_move_insn (int_xmm, gen_lowpart (V4SImode, x));
12691 x = gen_rtx_CONST_VECTOR (V4SImode,
12692 gen_rtvec (4, GEN_INT (0x43300000UL),
12693 GEN_INT (0x45300000UL),
12694 const0_rtx, const0_rtx));
12695 exponents = validize_mem (force_const_mem (V4SImode, x));
12697 /* int_xmm = {0x45300000UL, fp_xmm/hi, 0x43300000, fp_xmm/lo } */
12698 emit_insn (gen_sse2_punpckldq (int_xmm, int_xmm, exponents));
12700 /* Concatenating (juxtaposing) (0x43300000UL ## fp_value_low_xmm)
12701 yields a valid DF value equal to (0x1.0p52 + double(fp_value_lo_xmm)).
12702 Similarly (0x45300000UL ## fp_value_hi_xmm) yields
12703 (0x1.0p84 + double(fp_value_hi_xmm)).
12704 Note these exponents differ by 32. */
12706 fp_xmm = copy_to_mode_reg (V2DFmode, gen_lowpart (V2DFmode, int_xmm));
12708 /* Subtract off those 0x1.0p52 and 0x1.0p84 biases, to produce values
12709 in [0,2**32-1] and [0]+[2**32,2**64-1] respectively. */
12710 real_ldexp (&bias_lo_rvt, &dconst1, 52);
12711 real_ldexp (&bias_hi_rvt, &dconst1, 84);
12712 biases = const_double_from_real_value (bias_lo_rvt, DFmode);
12713 x = const_double_from_real_value (bias_hi_rvt, DFmode);
12714 biases = gen_rtx_CONST_VECTOR (V2DFmode, gen_rtvec (2, biases, x));
12715 biases = validize_mem (force_const_mem (V2DFmode, biases));
12716 emit_insn (gen_subv2df3 (fp_xmm, fp_xmm, biases));
12718 /* Add the upper and lower DFmode values together. */
12720 emit_insn (gen_sse3_haddv2df3 (fp_xmm, fp_xmm, fp_xmm));
12723 x = copy_to_mode_reg (V2DFmode, fp_xmm);
12724 emit_insn (gen_sse2_unpckhpd (fp_xmm, fp_xmm, fp_xmm));
12725 emit_insn (gen_addv2df3 (fp_xmm, fp_xmm, x));
12728 ix86_expand_vector_extract (false, target, fp_xmm, 0);
12731 /* Not used, but eases macroization of patterns. */
12733 ix86_expand_convert_uns_sixf_sse (rtx target ATTRIBUTE_UNUSED,
12734 rtx input ATTRIBUTE_UNUSED)
12736 gcc_unreachable ();
12739 /* Convert an unsigned SImode value into a DFmode. Only currently used
12740 for SSE, but applicable anywhere. */
12743 ix86_expand_convert_uns_sidf_sse (rtx target, rtx input)
12745 REAL_VALUE_TYPE TWO31r;
12748 x = expand_simple_binop (SImode, PLUS, input, GEN_INT (-2147483647 - 1),
12749 NULL, 1, OPTAB_DIRECT);
12751 fp = gen_reg_rtx (DFmode);
12752 emit_insn (gen_floatsidf2 (fp, x));
12754 real_ldexp (&TWO31r, &dconst1, 31);
12755 x = const_double_from_real_value (TWO31r, DFmode);
12757 x = expand_simple_binop (DFmode, PLUS, fp, x, target, 0, OPTAB_DIRECT);
12759 emit_move_insn (target, x);
12762 /* Convert a signed DImode value into a DFmode. Only used for SSE in
12763 32-bit mode; otherwise we have a direct convert instruction. */
12766 ix86_expand_convert_sign_didf_sse (rtx target, rtx input)
12768 REAL_VALUE_TYPE TWO32r;
12769 rtx fp_lo, fp_hi, x;
12771 fp_lo = gen_reg_rtx (DFmode);
12772 fp_hi = gen_reg_rtx (DFmode);
12774 emit_insn (gen_floatsidf2 (fp_hi, gen_highpart (SImode, input)));
12776 real_ldexp (&TWO32r, &dconst1, 32);
12777 x = const_double_from_real_value (TWO32r, DFmode);
12778 fp_hi = expand_simple_binop (DFmode, MULT, fp_hi, x, fp_hi, 0, OPTAB_DIRECT);
12780 ix86_expand_convert_uns_sidf_sse (fp_lo, gen_lowpart (SImode, input));
12782 x = expand_simple_binop (DFmode, PLUS, fp_hi, fp_lo, target,
12785 emit_move_insn (target, x);
12788 /* Convert an unsigned SImode value into a SFmode, using only SSE.
12789 For x86_32, -mfpmath=sse, !optimize_size only. */
12791 ix86_expand_convert_uns_sisf_sse (rtx target, rtx input)
12793 REAL_VALUE_TYPE ONE16r;
12794 rtx fp_hi, fp_lo, int_hi, int_lo, x;
12796 real_ldexp (&ONE16r, &dconst1, 16);
12797 x = const_double_from_real_value (ONE16r, SFmode);
12798 int_lo = expand_simple_binop (SImode, AND, input, GEN_INT(0xffff),
12799 NULL, 0, OPTAB_DIRECT);
12800 int_hi = expand_simple_binop (SImode, LSHIFTRT, input, GEN_INT(16),
12801 NULL, 0, OPTAB_DIRECT);
12802 fp_hi = gen_reg_rtx (SFmode);
12803 fp_lo = gen_reg_rtx (SFmode);
12804 emit_insn (gen_floatsisf2 (fp_hi, int_hi));
12805 emit_insn (gen_floatsisf2 (fp_lo, int_lo));
12806 fp_hi = expand_simple_binop (SFmode, MULT, fp_hi, x, fp_hi,
12808 fp_hi = expand_simple_binop (SFmode, PLUS, fp_hi, fp_lo, target,
12810 if (!rtx_equal_p (target, fp_hi))
12811 emit_move_insn (target, fp_hi);
12814 /* A subroutine of ix86_build_signbit_mask_vector. If VECT is true,
12815 then replicate the value for all elements of the vector
12819 ix86_build_const_vector (enum machine_mode mode, bool vect, rtx value)
12826 v = gen_rtvec (4, value, value, value, value);
12827 return gen_rtx_CONST_VECTOR (V4SImode, v);
12831 v = gen_rtvec (2, value, value);
12832 return gen_rtx_CONST_VECTOR (V2DImode, v);
12836 v = gen_rtvec (4, value, value, value, value);
12838 v = gen_rtvec (4, value, CONST0_RTX (SFmode),
12839 CONST0_RTX (SFmode), CONST0_RTX (SFmode));
12840 return gen_rtx_CONST_VECTOR (V4SFmode, v);
12844 v = gen_rtvec (2, value, value);
12846 v = gen_rtvec (2, value, CONST0_RTX (DFmode));
12847 return gen_rtx_CONST_VECTOR (V2DFmode, v);
12850 gcc_unreachable ();
12854 /* A subroutine of ix86_expand_fp_absneg_operator, copysign expanders
12855 and ix86_expand_int_vcond. Create a mask for the sign bit in MODE
12856 for an SSE register. If VECT is true, then replicate the mask for
12857 all elements of the vector register. If INVERT is true, then create
12858 a mask excluding the sign bit. */
12861 ix86_build_signbit_mask (enum machine_mode mode, bool vect, bool invert)
12863 enum machine_mode vec_mode, imode;
12864 HOST_WIDE_INT hi, lo;
12869 /* Find the sign bit, sign extended to 2*HWI. */
12875 vec_mode = (mode == SImode) ? V4SImode : V4SFmode;
12876 lo = 0x80000000, hi = lo < 0;
12882 vec_mode = (mode == DImode) ? V2DImode : V2DFmode;
12883 if (HOST_BITS_PER_WIDE_INT >= 64)
12884 lo = (HOST_WIDE_INT)1 << shift, hi = -1;
12886 lo = 0, hi = (HOST_WIDE_INT)1 << (shift - HOST_BITS_PER_WIDE_INT);
12891 vec_mode = VOIDmode;
12892 if (HOST_BITS_PER_WIDE_INT >= 64)
12895 lo = 0, hi = (HOST_WIDE_INT)1 << shift;
12902 lo = 0, hi = (HOST_WIDE_INT)1 << (shift - HOST_BITS_PER_WIDE_INT);
12906 lo = ~lo, hi = ~hi;
12912 mask = immed_double_const (lo, hi, imode);
12914 vec = gen_rtvec (2, v, mask);
12915 v = gen_rtx_CONST_VECTOR (V2DImode, vec);
12916 v = copy_to_mode_reg (mode, gen_lowpart (mode, v));
12923 gcc_unreachable ();
12927 lo = ~lo, hi = ~hi;
12929 /* Force this value into the low part of a fp vector constant. */
12930 mask = immed_double_const (lo, hi, imode);
12931 mask = gen_lowpart (mode, mask);
12933 if (vec_mode == VOIDmode)
12934 return force_reg (mode, mask);
12936 v = ix86_build_const_vector (mode, vect, mask);
12937 return force_reg (vec_mode, v);
12940 /* Generate code for floating point ABS or NEG. */
12943 ix86_expand_fp_absneg_operator (enum rtx_code code, enum machine_mode mode,
12946 rtx mask, set, use, clob, dst, src;
12947 bool use_sse = false;
12948 bool vector_mode = VECTOR_MODE_P (mode);
12949 enum machine_mode elt_mode = mode;
12953 elt_mode = GET_MODE_INNER (mode);
12956 else if (mode == TFmode)
12958 else if (TARGET_SSE_MATH)
12959 use_sse = SSE_FLOAT_MODE_P (mode);
12961 /* NEG and ABS performed with SSE use bitwise mask operations.
12962 Create the appropriate mask now. */
12964 mask = ix86_build_signbit_mask (elt_mode, vector_mode, code == ABS);
12973 set = gen_rtx_fmt_ee (code == NEG ? XOR : AND, mode, src, mask);
12974 set = gen_rtx_SET (VOIDmode, dst, set);
12979 set = gen_rtx_fmt_e (code, mode, src);
12980 set = gen_rtx_SET (VOIDmode, dst, set);
12983 use = gen_rtx_USE (VOIDmode, mask);
12984 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
12985 emit_insn (gen_rtx_PARALLEL (VOIDmode,
12986 gen_rtvec (3, set, use, clob)));
12993 /* Expand a copysign operation. Special case operand 0 being a constant. */
12996 ix86_expand_copysign (rtx operands[])
12998 enum machine_mode mode;
12999 rtx dest, op0, op1, mask, nmask;
13001 dest = operands[0];
13005 mode = GET_MODE (dest);
13007 if (GET_CODE (op0) == CONST_DOUBLE)
13009 rtx (*copysign_insn)(rtx, rtx, rtx, rtx);
13011 if (real_isneg (CONST_DOUBLE_REAL_VALUE (op0)))
13012 op0 = simplify_unary_operation (ABS, mode, op0, mode);
13014 if (mode == SFmode || mode == DFmode)
13016 enum machine_mode vmode;
13018 vmode = mode == SFmode ? V4SFmode : V2DFmode;
13020 if (op0 == CONST0_RTX (mode))
13021 op0 = CONST0_RTX (vmode);
13026 if (mode == SFmode)
13027 v = gen_rtvec (4, op0, CONST0_RTX (SFmode),
13028 CONST0_RTX (SFmode), CONST0_RTX (SFmode));
13030 v = gen_rtvec (2, op0, CONST0_RTX (DFmode));
13032 op0 = force_reg (vmode, gen_rtx_CONST_VECTOR (vmode, v));
13035 else if (op0 != CONST0_RTX (mode))
13036 op0 = force_reg (mode, op0);
13038 mask = ix86_build_signbit_mask (mode, 0, 0);
13040 if (mode == SFmode)
13041 copysign_insn = gen_copysignsf3_const;
13042 else if (mode == DFmode)
13043 copysign_insn = gen_copysigndf3_const;
13045 copysign_insn = gen_copysigntf3_const;
13047 emit_insn (copysign_insn (dest, op0, op1, mask));
13051 rtx (*copysign_insn)(rtx, rtx, rtx, rtx, rtx, rtx);
13053 nmask = ix86_build_signbit_mask (mode, 0, 1);
13054 mask = ix86_build_signbit_mask (mode, 0, 0);
13056 if (mode == SFmode)
13057 copysign_insn = gen_copysignsf3_var;
13058 else if (mode == DFmode)
13059 copysign_insn = gen_copysigndf3_var;
13061 copysign_insn = gen_copysigntf3_var;
13063 emit_insn (copysign_insn (dest, NULL_RTX, op0, op1, nmask, mask));
13067 /* Deconstruct a copysign operation into bit masks. Operand 0 is known to
13068 be a constant, and so has already been expanded into a vector constant. */
13071 ix86_split_copysign_const (rtx operands[])
13073 enum machine_mode mode, vmode;
13074 rtx dest, op0, op1, mask, x;
13076 dest = operands[0];
13079 mask = operands[3];
13081 mode = GET_MODE (dest);
13082 vmode = GET_MODE (mask);
13084 dest = simplify_gen_subreg (vmode, dest, mode, 0);
13085 x = gen_rtx_AND (vmode, dest, mask);
13086 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
13088 if (op0 != CONST0_RTX (vmode))
13090 x = gen_rtx_IOR (vmode, dest, op0);
13091 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
13095 /* Deconstruct a copysign operation into bit masks. Operand 0 is variable,
13096 so we have to do two masks. */
13099 ix86_split_copysign_var (rtx operands[])
13101 enum machine_mode mode, vmode;
13102 rtx dest, scratch, op0, op1, mask, nmask, x;
13104 dest = operands[0];
13105 scratch = operands[1];
13108 nmask = operands[4];
13109 mask = operands[5];
13111 mode = GET_MODE (dest);
13112 vmode = GET_MODE (mask);
13114 if (rtx_equal_p (op0, op1))
13116 /* Shouldn't happen often (it's useless, obviously), but when it does
13117 we'd generate incorrect code if we continue below. */
13118 emit_move_insn (dest, op0);
13122 if (REG_P (mask) && REGNO (dest) == REGNO (mask)) /* alternative 0 */
13124 gcc_assert (REGNO (op1) == REGNO (scratch));
13126 x = gen_rtx_AND (vmode, scratch, mask);
13127 emit_insn (gen_rtx_SET (VOIDmode, scratch, x));
13130 op0 = simplify_gen_subreg (vmode, op0, mode, 0);
13131 x = gen_rtx_NOT (vmode, dest);
13132 x = gen_rtx_AND (vmode, x, op0);
13133 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
13137 if (REGNO (op1) == REGNO (scratch)) /* alternative 1,3 */
13139 x = gen_rtx_AND (vmode, scratch, mask);
13141 else /* alternative 2,4 */
13143 gcc_assert (REGNO (mask) == REGNO (scratch));
13144 op1 = simplify_gen_subreg (vmode, op1, mode, 0);
13145 x = gen_rtx_AND (vmode, scratch, op1);
13147 emit_insn (gen_rtx_SET (VOIDmode, scratch, x));
13149 if (REGNO (op0) == REGNO (dest)) /* alternative 1,2 */
13151 dest = simplify_gen_subreg (vmode, op0, mode, 0);
13152 x = gen_rtx_AND (vmode, dest, nmask);
13154 else /* alternative 3,4 */
13156 gcc_assert (REGNO (nmask) == REGNO (dest));
13158 op0 = simplify_gen_subreg (vmode, op0, mode, 0);
13159 x = gen_rtx_AND (vmode, dest, op0);
13161 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
13164 x = gen_rtx_IOR (vmode, dest, scratch);
13165 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
13168 /* Return TRUE or FALSE depending on whether the first SET in INSN
13169 has source and destination with matching CC modes, and that the
13170 CC mode is at least as constrained as REQ_MODE. */
13173 ix86_match_ccmode (rtx insn, enum machine_mode req_mode)
13176 enum machine_mode set_mode;
13178 set = PATTERN (insn);
13179 if (GET_CODE (set) == PARALLEL)
13180 set = XVECEXP (set, 0, 0);
13181 gcc_assert (GET_CODE (set) == SET);
13182 gcc_assert (GET_CODE (SET_SRC (set)) == COMPARE);
13184 set_mode = GET_MODE (SET_DEST (set));
13188 if (req_mode != CCNOmode
13189 && (req_mode != CCmode
13190 || XEXP (SET_SRC (set), 1) != const0_rtx))
13194 if (req_mode == CCGCmode)
13198 if (req_mode == CCGOCmode || req_mode == CCNOmode)
13202 if (req_mode == CCZmode)
13213 gcc_unreachable ();
13216 return (GET_MODE (SET_SRC (set)) == set_mode);
13219 /* Generate insn patterns to do an integer compare of OPERANDS. */
13222 ix86_expand_int_compare (enum rtx_code code, rtx op0, rtx op1)
13224 enum machine_mode cmpmode;
13227 cmpmode = SELECT_CC_MODE (code, op0, op1);
13228 flags = gen_rtx_REG (cmpmode, FLAGS_REG);
13230 /* This is very simple, but making the interface the same as in the
13231 FP case makes the rest of the code easier. */
13232 tmp = gen_rtx_COMPARE (cmpmode, op0, op1);
13233 emit_insn (gen_rtx_SET (VOIDmode, flags, tmp));
13235 /* Return the test that should be put into the flags user, i.e.
13236 the bcc, scc, or cmov instruction. */
13237 return gen_rtx_fmt_ee (code, VOIDmode, flags, const0_rtx);
13240 /* Figure out whether to use ordered or unordered fp comparisons.
13241 Return the appropriate mode to use. */
13244 ix86_fp_compare_mode (enum rtx_code code ATTRIBUTE_UNUSED)
13246 /* ??? In order to make all comparisons reversible, we do all comparisons
13247 non-trapping when compiling for IEEE. Once gcc is able to distinguish
13248 all forms trapping and nontrapping comparisons, we can make inequality
13249 comparisons trapping again, since it results in better code when using
13250 FCOM based compares. */
13251 return TARGET_IEEE_FP ? CCFPUmode : CCFPmode;
13255 ix86_cc_mode (enum rtx_code code, rtx op0, rtx op1)
13257 enum machine_mode mode = GET_MODE (op0);
13259 if (SCALAR_FLOAT_MODE_P (mode))
13261 gcc_assert (!DECIMAL_FLOAT_MODE_P (mode));
13262 return ix86_fp_compare_mode (code);
13267 /* Only zero flag is needed. */
13268 case EQ: /* ZF=0 */
13269 case NE: /* ZF!=0 */
13271 /* Codes needing carry flag. */
13272 case GEU: /* CF=0 */
13273 case LTU: /* CF=1 */
13274 /* Detect overflow checks. They need just the carry flag. */
13275 if (GET_CODE (op0) == PLUS
13276 && rtx_equal_p (op1, XEXP (op0, 0)))
13280 case GTU: /* CF=0 & ZF=0 */
13281 case LEU: /* CF=1 | ZF=1 */
13282 /* Detect overflow checks. They need just the carry flag. */
13283 if (GET_CODE (op0) == MINUS
13284 && rtx_equal_p (op1, XEXP (op0, 0)))
13288 /* Codes possibly doable only with sign flag when
13289 comparing against zero. */
13290 case GE: /* SF=OF or SF=0 */
13291 case LT: /* SF<>OF or SF=1 */
13292 if (op1 == const0_rtx)
13295 /* For other cases Carry flag is not required. */
13297 /* Codes doable only with sign flag when comparing
13298 against zero, but we miss jump instruction for it
13299 so we need to use relational tests against overflow
13300 that thus needs to be zero. */
13301 case GT: /* ZF=0 & SF=OF */
13302 case LE: /* ZF=1 | SF<>OF */
13303 if (op1 == const0_rtx)
13307 /* strcmp pattern do (use flags) and combine may ask us for proper
13312 gcc_unreachable ();
13316 /* Return the fixed registers used for condition codes. */
13319 ix86_fixed_condition_code_regs (unsigned int *p1, unsigned int *p2)
13326 /* If two condition code modes are compatible, return a condition code
13327 mode which is compatible with both. Otherwise, return
13330 static enum machine_mode
13331 ix86_cc_modes_compatible (enum machine_mode m1, enum machine_mode m2)
13336 if (GET_MODE_CLASS (m1) != MODE_CC || GET_MODE_CLASS (m2) != MODE_CC)
13339 if ((m1 == CCGCmode && m2 == CCGOCmode)
13340 || (m1 == CCGOCmode && m2 == CCGCmode))
13346 gcc_unreachable ();
13376 /* These are only compatible with themselves, which we already
13382 /* Split comparison code CODE into comparisons we can do using branch
13383 instructions. BYPASS_CODE is comparison code for branch that will
13384 branch around FIRST_CODE and SECOND_CODE. If some of branches
13385 is not required, set value to UNKNOWN.
13386 We never require more than two branches. */
13389 ix86_fp_comparison_codes (enum rtx_code code, enum rtx_code *bypass_code,
13390 enum rtx_code *first_code,
13391 enum rtx_code *second_code)
13393 *first_code = code;
13394 *bypass_code = UNKNOWN;
13395 *second_code = UNKNOWN;
13397 /* The fcomi comparison sets flags as follows:
13407 case GT: /* GTU - CF=0 & ZF=0 */
13408 case GE: /* GEU - CF=0 */
13409 case ORDERED: /* PF=0 */
13410 case UNORDERED: /* PF=1 */
13411 case UNEQ: /* EQ - ZF=1 */
13412 case UNLT: /* LTU - CF=1 */
13413 case UNLE: /* LEU - CF=1 | ZF=1 */
13414 case LTGT: /* EQ - ZF=0 */
13416 case LT: /* LTU - CF=1 - fails on unordered */
13417 *first_code = UNLT;
13418 *bypass_code = UNORDERED;
13420 case LE: /* LEU - CF=1 | ZF=1 - fails on unordered */
13421 *first_code = UNLE;
13422 *bypass_code = UNORDERED;
13424 case EQ: /* EQ - ZF=1 - fails on unordered */
13425 *first_code = UNEQ;
13426 *bypass_code = UNORDERED;
13428 case NE: /* NE - ZF=0 - fails on unordered */
13429 *first_code = LTGT;
13430 *second_code = UNORDERED;
13432 case UNGE: /* GEU - CF=0 - fails on unordered */
13434 *second_code = UNORDERED;
13436 case UNGT: /* GTU - CF=0 & ZF=0 - fails on unordered */
13438 *second_code = UNORDERED;
13441 gcc_unreachable ();
13443 if (!TARGET_IEEE_FP)
13445 *second_code = UNKNOWN;
13446 *bypass_code = UNKNOWN;
13450 /* Return cost of comparison done fcom + arithmetics operations on AX.
13451 All following functions do use number of instructions as a cost metrics.
13452 In future this should be tweaked to compute bytes for optimize_size and
13453 take into account performance of various instructions on various CPUs. */
13455 ix86_fp_comparison_arithmetics_cost (enum rtx_code code)
13457 if (!TARGET_IEEE_FP)
13459 /* The cost of code output by ix86_expand_fp_compare. */
13483 gcc_unreachable ();
13487 /* Return cost of comparison done using fcomi operation.
13488 See ix86_fp_comparison_arithmetics_cost for the metrics. */
13490 ix86_fp_comparison_fcomi_cost (enum rtx_code code)
13492 enum rtx_code bypass_code, first_code, second_code;
13493 /* Return arbitrarily high cost when instruction is not supported - this
13494 prevents gcc from using it. */
13497 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
13498 return (bypass_code != UNKNOWN || second_code != UNKNOWN) + 2;
13501 /* Return cost of comparison done using sahf operation.
13502 See ix86_fp_comparison_arithmetics_cost for the metrics. */
13504 ix86_fp_comparison_sahf_cost (enum rtx_code code)
13506 enum rtx_code bypass_code, first_code, second_code;
13507 /* Return arbitrarily high cost when instruction is not preferred - this
13508 avoids gcc from using it. */
13509 if (!(TARGET_SAHF && (TARGET_USE_SAHF || optimize_insn_for_size_p ())))
13511 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
13512 return (bypass_code != UNKNOWN || second_code != UNKNOWN) + 3;
13515 /* Compute cost of the comparison done using any method.
13516 See ix86_fp_comparison_arithmetics_cost for the metrics. */
13518 ix86_fp_comparison_cost (enum rtx_code code)
13520 int fcomi_cost, sahf_cost, arithmetics_cost = 1024;
13523 fcomi_cost = ix86_fp_comparison_fcomi_cost (code);
13524 sahf_cost = ix86_fp_comparison_sahf_cost (code);
13526 min = arithmetics_cost = ix86_fp_comparison_arithmetics_cost (code);
13527 if (min > sahf_cost)
13529 if (min > fcomi_cost)
13534 /* Return true if we should use an FCOMI instruction for this
13538 ix86_use_fcomi_compare (enum rtx_code code ATTRIBUTE_UNUSED)
13540 enum rtx_code swapped_code = swap_condition (code);
13542 return ((ix86_fp_comparison_cost (code)
13543 == ix86_fp_comparison_fcomi_cost (code))
13544 || (ix86_fp_comparison_cost (swapped_code)
13545 == ix86_fp_comparison_fcomi_cost (swapped_code)));
13548 /* Swap, force into registers, or otherwise massage the two operands
13549 to a fp comparison. The operands are updated in place; the new
13550 comparison code is returned. */
13552 static enum rtx_code
13553 ix86_prepare_fp_compare_args (enum rtx_code code, rtx *pop0, rtx *pop1)
13555 enum machine_mode fpcmp_mode = ix86_fp_compare_mode (code);
13556 rtx op0 = *pop0, op1 = *pop1;
13557 enum machine_mode op_mode = GET_MODE (op0);
13558 int is_sse = TARGET_SSE_MATH && SSE_FLOAT_MODE_P (op_mode);
13560 /* All of the unordered compare instructions only work on registers.
13561 The same is true of the fcomi compare instructions. The XFmode
13562 compare instructions require registers except when comparing
13563 against zero or when converting operand 1 from fixed point to
13567 && (fpcmp_mode == CCFPUmode
13568 || (op_mode == XFmode
13569 && ! (standard_80387_constant_p (op0) == 1
13570 || standard_80387_constant_p (op1) == 1)
13571 && GET_CODE (op1) != FLOAT)
13572 || ix86_use_fcomi_compare (code)))
13574 op0 = force_reg (op_mode, op0);
13575 op1 = force_reg (op_mode, op1);
13579 /* %%% We only allow op1 in memory; op0 must be st(0). So swap
13580 things around if they appear profitable, otherwise force op0
13581 into a register. */
13583 if (standard_80387_constant_p (op0) == 0
13585 && ! (standard_80387_constant_p (op1) == 0
13589 tmp = op0, op0 = op1, op1 = tmp;
13590 code = swap_condition (code);
13594 op0 = force_reg (op_mode, op0);
13596 if (CONSTANT_P (op1))
13598 int tmp = standard_80387_constant_p (op1);
13600 op1 = validize_mem (force_const_mem (op_mode, op1));
13604 op1 = force_reg (op_mode, op1);
13607 op1 = force_reg (op_mode, op1);
13611 /* Try to rearrange the comparison to make it cheaper. */
13612 if (ix86_fp_comparison_cost (code)
13613 > ix86_fp_comparison_cost (swap_condition (code))
13614 && (REG_P (op1) || can_create_pseudo_p ()))
13617 tmp = op0, op0 = op1, op1 = tmp;
13618 code = swap_condition (code);
13620 op0 = force_reg (op_mode, op0);
13628 /* Convert comparison codes we use to represent FP comparison to integer
13629 code that will result in proper branch. Return UNKNOWN if no such code
13633 ix86_fp_compare_code_to_integer (enum rtx_code code)
13662 /* Generate insn patterns to do a floating point compare of OPERANDS. */
13665 ix86_expand_fp_compare (enum rtx_code code, rtx op0, rtx op1, rtx scratch,
13666 rtx *second_test, rtx *bypass_test)
13668 enum machine_mode fpcmp_mode, intcmp_mode;
13670 int cost = ix86_fp_comparison_cost (code);
13671 enum rtx_code bypass_code, first_code, second_code;
13673 fpcmp_mode = ix86_fp_compare_mode (code);
13674 code = ix86_prepare_fp_compare_args (code, &op0, &op1);
13677 *second_test = NULL_RTX;
13679 *bypass_test = NULL_RTX;
13681 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
13683 /* Do fcomi/sahf based test when profitable. */
13684 if (ix86_fp_comparison_arithmetics_cost (code) > cost
13685 && (bypass_code == UNKNOWN || bypass_test)
13686 && (second_code == UNKNOWN || second_test))
13688 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
13689 tmp = gen_rtx_SET (VOIDmode, gen_rtx_REG (fpcmp_mode, FLAGS_REG),
13695 gcc_assert (TARGET_SAHF);
13698 scratch = gen_reg_rtx (HImode);
13699 tmp2 = gen_rtx_CLOBBER (VOIDmode, scratch);
13701 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, tmp, tmp2)));
13704 /* The FP codes work out to act like unsigned. */
13705 intcmp_mode = fpcmp_mode;
13707 if (bypass_code != UNKNOWN)
13708 *bypass_test = gen_rtx_fmt_ee (bypass_code, VOIDmode,
13709 gen_rtx_REG (intcmp_mode, FLAGS_REG),
13711 if (second_code != UNKNOWN)
13712 *second_test = gen_rtx_fmt_ee (second_code, VOIDmode,
13713 gen_rtx_REG (intcmp_mode, FLAGS_REG),
13718 /* Sadness wrt reg-stack pops killing fpsr -- gotta get fnstsw first. */
13719 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
13720 tmp2 = gen_rtx_UNSPEC (HImode, gen_rtvec (1, tmp), UNSPEC_FNSTSW);
13722 scratch = gen_reg_rtx (HImode);
13723 emit_insn (gen_rtx_SET (VOIDmode, scratch, tmp2));
13725 /* In the unordered case, we have to check C2 for NaN's, which
13726 doesn't happen to work out to anything nice combination-wise.
13727 So do some bit twiddling on the value we've got in AH to come
13728 up with an appropriate set of condition codes. */
13730 intcmp_mode = CCNOmode;
13735 if (code == GT || !TARGET_IEEE_FP)
13737 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x45)));
13742 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
13743 emit_insn (gen_addqi_ext_1 (scratch, scratch, constm1_rtx));
13744 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x44)));
13745 intcmp_mode = CCmode;
13751 if (code == LT && TARGET_IEEE_FP)
13753 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
13754 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x01)));
13755 intcmp_mode = CCmode;
13760 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x01)));
13766 if (code == GE || !TARGET_IEEE_FP)
13768 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x05)));
13773 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
13774 emit_insn (gen_xorqi_cc_ext_1 (scratch, scratch,
13781 if (code == LE && TARGET_IEEE_FP)
13783 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
13784 emit_insn (gen_addqi_ext_1 (scratch, scratch, constm1_rtx));
13785 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x40)));
13786 intcmp_mode = CCmode;
13791 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x45)));
13797 if (code == EQ && TARGET_IEEE_FP)
13799 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
13800 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x40)));
13801 intcmp_mode = CCmode;
13806 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x40)));
13813 if (code == NE && TARGET_IEEE_FP)
13815 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
13816 emit_insn (gen_xorqi_cc_ext_1 (scratch, scratch,
13822 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x40)));
13828 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x04)));
13832 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x04)));
13837 gcc_unreachable ();
13841 /* Return the test that should be put into the flags user, i.e.
13842 the bcc, scc, or cmov instruction. */
13843 return gen_rtx_fmt_ee (code, VOIDmode,
13844 gen_rtx_REG (intcmp_mode, FLAGS_REG),
13849 ix86_expand_compare (enum rtx_code code, rtx *second_test, rtx *bypass_test)
13852 op0 = ix86_compare_op0;
13853 op1 = ix86_compare_op1;
13856 *second_test = NULL_RTX;
13858 *bypass_test = NULL_RTX;
13860 if (ix86_compare_emitted)
13862 ret = gen_rtx_fmt_ee (code, VOIDmode, ix86_compare_emitted, const0_rtx);
13863 ix86_compare_emitted = NULL_RTX;
13865 else if (SCALAR_FLOAT_MODE_P (GET_MODE (op0)))
13867 gcc_assert (!DECIMAL_FLOAT_MODE_P (GET_MODE (op0)));
13868 ret = ix86_expand_fp_compare (code, op0, op1, NULL_RTX,
13869 second_test, bypass_test);
13872 ret = ix86_expand_int_compare (code, op0, op1);
13877 /* Return true if the CODE will result in nontrivial jump sequence. */
13879 ix86_fp_jump_nontrivial_p (enum rtx_code code)
13881 enum rtx_code bypass_code, first_code, second_code;
13884 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
13885 return bypass_code != UNKNOWN || second_code != UNKNOWN;
13889 ix86_expand_branch (enum rtx_code code, rtx label)
13893 /* If we have emitted a compare insn, go straight to simple.
13894 ix86_expand_compare won't emit anything if ix86_compare_emitted
13896 if (ix86_compare_emitted)
13899 switch (GET_MODE (ix86_compare_op0))
13905 tmp = ix86_expand_compare (code, NULL, NULL);
13906 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
13907 gen_rtx_LABEL_REF (VOIDmode, label),
13909 emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
13918 enum rtx_code bypass_code, first_code, second_code;
13920 code = ix86_prepare_fp_compare_args (code, &ix86_compare_op0,
13921 &ix86_compare_op1);
13923 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
13925 /* Check whether we will use the natural sequence with one jump. If
13926 so, we can expand jump early. Otherwise delay expansion by
13927 creating compound insn to not confuse optimizers. */
13928 if (bypass_code == UNKNOWN && second_code == UNKNOWN)
13930 ix86_split_fp_branch (code, ix86_compare_op0, ix86_compare_op1,
13931 gen_rtx_LABEL_REF (VOIDmode, label),
13932 pc_rtx, NULL_RTX, NULL_RTX);
13936 tmp = gen_rtx_fmt_ee (code, VOIDmode,
13937 ix86_compare_op0, ix86_compare_op1);
13938 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
13939 gen_rtx_LABEL_REF (VOIDmode, label),
13941 tmp = gen_rtx_SET (VOIDmode, pc_rtx, tmp);
13943 use_fcomi = ix86_use_fcomi_compare (code);
13944 vec = rtvec_alloc (3 + !use_fcomi);
13945 RTVEC_ELT (vec, 0) = tmp;
13947 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCFPmode, FPSR_REG));
13949 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCFPmode, FLAGS_REG));
13952 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (HImode));
13954 emit_jump_insn (gen_rtx_PARALLEL (VOIDmode, vec));
13963 /* Expand DImode branch into multiple compare+branch. */
13965 rtx lo[2], hi[2], label2;
13966 enum rtx_code code1, code2, code3;
13967 enum machine_mode submode;
13969 if (CONSTANT_P (ix86_compare_op0) && ! CONSTANT_P (ix86_compare_op1))
13971 tmp = ix86_compare_op0;
13972 ix86_compare_op0 = ix86_compare_op1;
13973 ix86_compare_op1 = tmp;
13974 code = swap_condition (code);
13976 if (GET_MODE (ix86_compare_op0) == DImode)
13978 split_di (&ix86_compare_op0, 1, lo+0, hi+0);
13979 split_di (&ix86_compare_op1, 1, lo+1, hi+1);
13984 split_ti (&ix86_compare_op0, 1, lo+0, hi+0);
13985 split_ti (&ix86_compare_op1, 1, lo+1, hi+1);
13989 /* When comparing for equality, we can use (hi0^hi1)|(lo0^lo1) to
13990 avoid two branches. This costs one extra insn, so disable when
13991 optimizing for size. */
13993 if ((code == EQ || code == NE)
13994 && (!optimize_insn_for_size_p ()
13995 || hi[1] == const0_rtx || lo[1] == const0_rtx))
14000 if (hi[1] != const0_rtx)
14001 xor1 = expand_binop (submode, xor_optab, xor1, hi[1],
14002 NULL_RTX, 0, OPTAB_WIDEN);
14005 if (lo[1] != const0_rtx)
14006 xor0 = expand_binop (submode, xor_optab, xor0, lo[1],
14007 NULL_RTX, 0, OPTAB_WIDEN);
14009 tmp = expand_binop (submode, ior_optab, xor1, xor0,
14010 NULL_RTX, 0, OPTAB_WIDEN);
14012 ix86_compare_op0 = tmp;
14013 ix86_compare_op1 = const0_rtx;
14014 ix86_expand_branch (code, label);
14018 /* Otherwise, if we are doing less-than or greater-or-equal-than,
14019 op1 is a constant and the low word is zero, then we can just
14020 examine the high word. Similarly for low word -1 and
14021 less-or-equal-than or greater-than. */
14023 if (CONST_INT_P (hi[1]))
14026 case LT: case LTU: case GE: case GEU:
14027 if (lo[1] == const0_rtx)
14029 ix86_compare_op0 = hi[0];
14030 ix86_compare_op1 = hi[1];
14031 ix86_expand_branch (code, label);
14035 case LE: case LEU: case GT: case GTU:
14036 if (lo[1] == constm1_rtx)
14038 ix86_compare_op0 = hi[0];
14039 ix86_compare_op1 = hi[1];
14040 ix86_expand_branch (code, label);
14048 /* Otherwise, we need two or three jumps. */
14050 label2 = gen_label_rtx ();
14053 code2 = swap_condition (code);
14054 code3 = unsigned_condition (code);
14058 case LT: case GT: case LTU: case GTU:
14061 case LE: code1 = LT; code2 = GT; break;
14062 case GE: code1 = GT; code2 = LT; break;
14063 case LEU: code1 = LTU; code2 = GTU; break;
14064 case GEU: code1 = GTU; code2 = LTU; break;
14066 case EQ: code1 = UNKNOWN; code2 = NE; break;
14067 case NE: code2 = UNKNOWN; break;
14070 gcc_unreachable ();
14075 * if (hi(a) < hi(b)) goto true;
14076 * if (hi(a) > hi(b)) goto false;
14077 * if (lo(a) < lo(b)) goto true;
14081 ix86_compare_op0 = hi[0];
14082 ix86_compare_op1 = hi[1];
14084 if (code1 != UNKNOWN)
14085 ix86_expand_branch (code1, label);
14086 if (code2 != UNKNOWN)
14087 ix86_expand_branch (code2, label2);
14089 ix86_compare_op0 = lo[0];
14090 ix86_compare_op1 = lo[1];
14091 ix86_expand_branch (code3, label);
14093 if (code2 != UNKNOWN)
14094 emit_label (label2);
14099 gcc_unreachable ();
14103 /* Split branch based on floating point condition. */
14105 ix86_split_fp_branch (enum rtx_code code, rtx op1, rtx op2,
14106 rtx target1, rtx target2, rtx tmp, rtx pushed)
14108 rtx second, bypass;
14109 rtx label = NULL_RTX;
14111 int bypass_probability = -1, second_probability = -1, probability = -1;
14114 if (target2 != pc_rtx)
14117 code = reverse_condition_maybe_unordered (code);
14122 condition = ix86_expand_fp_compare (code, op1, op2,
14123 tmp, &second, &bypass);
14125 /* Remove pushed operand from stack. */
14127 ix86_free_from_memory (GET_MODE (pushed));
14129 if (split_branch_probability >= 0)
14131 /* Distribute the probabilities across the jumps.
14132 Assume the BYPASS and SECOND to be always test
14134 probability = split_branch_probability;
14136 /* Value of 1 is low enough to make no need for probability
14137 to be updated. Later we may run some experiments and see
14138 if unordered values are more frequent in practice. */
14140 bypass_probability = 1;
14142 second_probability = 1;
14144 if (bypass != NULL_RTX)
14146 label = gen_label_rtx ();
14147 i = emit_jump_insn (gen_rtx_SET
14149 gen_rtx_IF_THEN_ELSE (VOIDmode,
14151 gen_rtx_LABEL_REF (VOIDmode,
14154 if (bypass_probability >= 0)
14156 = gen_rtx_EXPR_LIST (REG_BR_PROB,
14157 GEN_INT (bypass_probability),
14160 i = emit_jump_insn (gen_rtx_SET
14162 gen_rtx_IF_THEN_ELSE (VOIDmode,
14163 condition, target1, target2)));
14164 if (probability >= 0)
14166 = gen_rtx_EXPR_LIST (REG_BR_PROB,
14167 GEN_INT (probability),
14169 if (second != NULL_RTX)
14171 i = emit_jump_insn (gen_rtx_SET
14173 gen_rtx_IF_THEN_ELSE (VOIDmode, second, target1,
14175 if (second_probability >= 0)
14177 = gen_rtx_EXPR_LIST (REG_BR_PROB,
14178 GEN_INT (second_probability),
14181 if (label != NULL_RTX)
14182 emit_label (label);
14186 ix86_expand_setcc (enum rtx_code code, rtx dest)
14188 rtx ret, tmp, tmpreg, equiv;
14189 rtx second_test, bypass_test;
14191 if (GET_MODE (ix86_compare_op0) == (TARGET_64BIT ? TImode : DImode))
14192 return 0; /* FAIL */
14194 gcc_assert (GET_MODE (dest) == QImode);
14196 ret = ix86_expand_compare (code, &second_test, &bypass_test);
14197 PUT_MODE (ret, QImode);
14202 emit_insn (gen_rtx_SET (VOIDmode, tmp, ret));
14203 if (bypass_test || second_test)
14205 rtx test = second_test;
14207 rtx tmp2 = gen_reg_rtx (QImode);
14210 gcc_assert (!second_test);
14211 test = bypass_test;
14213 PUT_CODE (test, reverse_condition_maybe_unordered (GET_CODE (test)));
14215 PUT_MODE (test, QImode);
14216 emit_insn (gen_rtx_SET (VOIDmode, tmp2, test));
14219 emit_insn (gen_andqi3 (tmp, tmpreg, tmp2));
14221 emit_insn (gen_iorqi3 (tmp, tmpreg, tmp2));
14224 /* Attach a REG_EQUAL note describing the comparison result. */
14225 if (ix86_compare_op0 && ix86_compare_op1)
14227 equiv = simplify_gen_relational (code, QImode,
14228 GET_MODE (ix86_compare_op0),
14229 ix86_compare_op0, ix86_compare_op1);
14230 set_unique_reg_note (get_last_insn (), REG_EQUAL, equiv);
14233 return 1; /* DONE */
14236 /* Expand comparison setting or clearing carry flag. Return true when
14237 successful and set pop for the operation. */
14239 ix86_expand_carry_flag_compare (enum rtx_code code, rtx op0, rtx op1, rtx *pop)
14241 enum machine_mode mode =
14242 GET_MODE (op0) != VOIDmode ? GET_MODE (op0) : GET_MODE (op1);
14244 /* Do not handle DImode compares that go through special path. */
14245 if (mode == (TARGET_64BIT ? TImode : DImode))
14248 if (SCALAR_FLOAT_MODE_P (mode))
14250 rtx second_test = NULL, bypass_test = NULL;
14251 rtx compare_op, compare_seq;
14253 gcc_assert (!DECIMAL_FLOAT_MODE_P (mode));
14255 /* Shortcut: following common codes never translate
14256 into carry flag compares. */
14257 if (code == EQ || code == NE || code == UNEQ || code == LTGT
14258 || code == ORDERED || code == UNORDERED)
14261 /* These comparisons require zero flag; swap operands so they won't. */
14262 if ((code == GT || code == UNLE || code == LE || code == UNGT)
14263 && !TARGET_IEEE_FP)
14268 code = swap_condition (code);
14271 /* Try to expand the comparison and verify that we end up with
14272 carry flag based comparison. This fails to be true only when
14273 we decide to expand comparison using arithmetic that is not
14274 too common scenario. */
14276 compare_op = ix86_expand_fp_compare (code, op0, op1, NULL_RTX,
14277 &second_test, &bypass_test);
14278 compare_seq = get_insns ();
14281 if (second_test || bypass_test)
14284 if (GET_MODE (XEXP (compare_op, 0)) == CCFPmode
14285 || GET_MODE (XEXP (compare_op, 0)) == CCFPUmode)
14286 code = ix86_fp_compare_code_to_integer (GET_CODE (compare_op));
14288 code = GET_CODE (compare_op);
14290 if (code != LTU && code != GEU)
14293 emit_insn (compare_seq);
14298 if (!INTEGRAL_MODE_P (mode))
14307 /* Convert a==0 into (unsigned)a<1. */
14310 if (op1 != const0_rtx)
14313 code = (code == EQ ? LTU : GEU);
14316 /* Convert a>b into b<a or a>=b-1. */
14319 if (CONST_INT_P (op1))
14321 op1 = gen_int_mode (INTVAL (op1) + 1, GET_MODE (op0));
14322 /* Bail out on overflow. We still can swap operands but that
14323 would force loading of the constant into register. */
14324 if (op1 == const0_rtx
14325 || !x86_64_immediate_operand (op1, GET_MODE (op1)))
14327 code = (code == GTU ? GEU : LTU);
14334 code = (code == GTU ? LTU : GEU);
14338 /* Convert a>=0 into (unsigned)a<0x80000000. */
14341 if (mode == DImode || op1 != const0_rtx)
14343 op1 = gen_int_mode (1 << (GET_MODE_BITSIZE (mode) - 1), mode);
14344 code = (code == LT ? GEU : LTU);
14348 if (mode == DImode || op1 != constm1_rtx)
14350 op1 = gen_int_mode (1 << (GET_MODE_BITSIZE (mode) - 1), mode);
14351 code = (code == LE ? GEU : LTU);
14357 /* Swapping operands may cause constant to appear as first operand. */
14358 if (!nonimmediate_operand (op0, VOIDmode))
14360 if (!can_create_pseudo_p ())
14362 op0 = force_reg (mode, op0);
14364 ix86_compare_op0 = op0;
14365 ix86_compare_op1 = op1;
14366 *pop = ix86_expand_compare (code, NULL, NULL);
14367 gcc_assert (GET_CODE (*pop) == LTU || GET_CODE (*pop) == GEU);
14372 ix86_expand_int_movcc (rtx operands[])
14374 enum rtx_code code = GET_CODE (operands[1]), compare_code;
14375 rtx compare_seq, compare_op;
14376 rtx second_test, bypass_test;
14377 enum machine_mode mode = GET_MODE (operands[0]);
14378 bool sign_bit_compare_p = false;;
14381 compare_op = ix86_expand_compare (code, &second_test, &bypass_test);
14382 compare_seq = get_insns ();
14385 compare_code = GET_CODE (compare_op);
14387 if ((ix86_compare_op1 == const0_rtx && (code == GE || code == LT))
14388 || (ix86_compare_op1 == constm1_rtx && (code == GT || code == LE)))
14389 sign_bit_compare_p = true;
14391 /* Don't attempt mode expansion here -- if we had to expand 5 or 6
14392 HImode insns, we'd be swallowed in word prefix ops. */
14394 if ((mode != HImode || TARGET_FAST_PREFIX)
14395 && (mode != (TARGET_64BIT ? TImode : DImode))
14396 && CONST_INT_P (operands[2])
14397 && CONST_INT_P (operands[3]))
14399 rtx out = operands[0];
14400 HOST_WIDE_INT ct = INTVAL (operands[2]);
14401 HOST_WIDE_INT cf = INTVAL (operands[3]);
14402 HOST_WIDE_INT diff;
14405 /* Sign bit compares are better done using shifts than we do by using
14407 if (sign_bit_compare_p
14408 || ix86_expand_carry_flag_compare (code, ix86_compare_op0,
14409 ix86_compare_op1, &compare_op))
14411 /* Detect overlap between destination and compare sources. */
14414 if (!sign_bit_compare_p)
14416 bool fpcmp = false;
14418 compare_code = GET_CODE (compare_op);
14420 if (GET_MODE (XEXP (compare_op, 0)) == CCFPmode
14421 || GET_MODE (XEXP (compare_op, 0)) == CCFPUmode)
14424 compare_code = ix86_fp_compare_code_to_integer (compare_code);
14427 /* To simplify rest of code, restrict to the GEU case. */
14428 if (compare_code == LTU)
14430 HOST_WIDE_INT tmp = ct;
14433 compare_code = reverse_condition (compare_code);
14434 code = reverse_condition (code);
14439 PUT_CODE (compare_op,
14440 reverse_condition_maybe_unordered
14441 (GET_CODE (compare_op)));
14443 PUT_CODE (compare_op, reverse_condition (GET_CODE (compare_op)));
14447 if (reg_overlap_mentioned_p (out, ix86_compare_op0)
14448 || reg_overlap_mentioned_p (out, ix86_compare_op1))
14449 tmp = gen_reg_rtx (mode);
14451 if (mode == DImode)
14452 emit_insn (gen_x86_movdicc_0_m1_rex64 (tmp, compare_op));
14454 emit_insn (gen_x86_movsicc_0_m1 (gen_lowpart (SImode, tmp), compare_op));
14458 if (code == GT || code == GE)
14459 code = reverse_condition (code);
14462 HOST_WIDE_INT tmp = ct;
14467 tmp = emit_store_flag (tmp, code, ix86_compare_op0,
14468 ix86_compare_op1, VOIDmode, 0, -1);
14481 tmp = expand_simple_binop (mode, PLUS,
14483 copy_rtx (tmp), 1, OPTAB_DIRECT);
14494 tmp = expand_simple_binop (mode, IOR,
14496 copy_rtx (tmp), 1, OPTAB_DIRECT);
14498 else if (diff == -1 && ct)
14508 tmp = expand_simple_unop (mode, NOT, tmp, copy_rtx (tmp), 1);
14510 tmp = expand_simple_binop (mode, PLUS,
14511 copy_rtx (tmp), GEN_INT (cf),
14512 copy_rtx (tmp), 1, OPTAB_DIRECT);
14520 * andl cf - ct, dest
14530 tmp = expand_simple_unop (mode, NOT, tmp, copy_rtx (tmp), 1);
14533 tmp = expand_simple_binop (mode, AND,
14535 gen_int_mode (cf - ct, mode),
14536 copy_rtx (tmp), 1, OPTAB_DIRECT);
14538 tmp = expand_simple_binop (mode, PLUS,
14539 copy_rtx (tmp), GEN_INT (ct),
14540 copy_rtx (tmp), 1, OPTAB_DIRECT);
14543 if (!rtx_equal_p (tmp, out))
14544 emit_move_insn (copy_rtx (out), copy_rtx (tmp));
14546 return 1; /* DONE */
14551 enum machine_mode cmp_mode = GET_MODE (ix86_compare_op0);
14554 tmp = ct, ct = cf, cf = tmp;
14557 if (SCALAR_FLOAT_MODE_P (cmp_mode))
14559 gcc_assert (!DECIMAL_FLOAT_MODE_P (cmp_mode));
14561 /* We may be reversing unordered compare to normal compare, that
14562 is not valid in general (we may convert non-trapping condition
14563 to trapping one), however on i386 we currently emit all
14564 comparisons unordered. */
14565 compare_code = reverse_condition_maybe_unordered (compare_code);
14566 code = reverse_condition_maybe_unordered (code);
14570 compare_code = reverse_condition (compare_code);
14571 code = reverse_condition (code);
14575 compare_code = UNKNOWN;
14576 if (GET_MODE_CLASS (GET_MODE (ix86_compare_op0)) == MODE_INT
14577 && CONST_INT_P (ix86_compare_op1))
14579 if (ix86_compare_op1 == const0_rtx
14580 && (code == LT || code == GE))
14581 compare_code = code;
14582 else if (ix86_compare_op1 == constm1_rtx)
14586 else if (code == GT)
14591 /* Optimize dest = (op0 < 0) ? -1 : cf. */
14592 if (compare_code != UNKNOWN
14593 && GET_MODE (ix86_compare_op0) == GET_MODE (out)
14594 && (cf == -1 || ct == -1))
14596 /* If lea code below could be used, only optimize
14597 if it results in a 2 insn sequence. */
14599 if (! (diff == 1 || diff == 2 || diff == 4 || diff == 8
14600 || diff == 3 || diff == 5 || diff == 9)
14601 || (compare_code == LT && ct == -1)
14602 || (compare_code == GE && cf == -1))
14605 * notl op1 (if necessary)
14613 code = reverse_condition (code);
14616 out = emit_store_flag (out, code, ix86_compare_op0,
14617 ix86_compare_op1, VOIDmode, 0, -1);
14619 out = expand_simple_binop (mode, IOR,
14621 out, 1, OPTAB_DIRECT);
14622 if (out != operands[0])
14623 emit_move_insn (operands[0], out);
14625 return 1; /* DONE */
14630 if ((diff == 1 || diff == 2 || diff == 4 || diff == 8
14631 || diff == 3 || diff == 5 || diff == 9)
14632 && ((mode != QImode && mode != HImode) || !TARGET_PARTIAL_REG_STALL)
14634 || x86_64_immediate_operand (GEN_INT (cf), VOIDmode)))
14640 * lea cf(dest*(ct-cf)),dest
14644 * This also catches the degenerate setcc-only case.
14650 out = emit_store_flag (out, code, ix86_compare_op0,
14651 ix86_compare_op1, VOIDmode, 0, 1);
14654 /* On x86_64 the lea instruction operates on Pmode, so we need
14655 to get arithmetics done in proper mode to match. */
14657 tmp = copy_rtx (out);
14661 out1 = copy_rtx (out);
14662 tmp = gen_rtx_MULT (mode, out1, GEN_INT (diff & ~1));
14666 tmp = gen_rtx_PLUS (mode, tmp, out1);
14672 tmp = gen_rtx_PLUS (mode, tmp, GEN_INT (cf));
14675 if (!rtx_equal_p (tmp, out))
14678 out = force_operand (tmp, copy_rtx (out));
14680 emit_insn (gen_rtx_SET (VOIDmode, copy_rtx (out), copy_rtx (tmp)));
14682 if (!rtx_equal_p (out, operands[0]))
14683 emit_move_insn (operands[0], copy_rtx (out));
14685 return 1; /* DONE */
14689 * General case: Jumpful:
14690 * xorl dest,dest cmpl op1, op2
14691 * cmpl op1, op2 movl ct, dest
14692 * setcc dest jcc 1f
14693 * decl dest movl cf, dest
14694 * andl (cf-ct),dest 1:
14697 * Size 20. Size 14.
14699 * This is reasonably steep, but branch mispredict costs are
14700 * high on modern cpus, so consider failing only if optimizing
14704 if ((!TARGET_CMOVE || (mode == QImode && TARGET_PARTIAL_REG_STALL))
14705 && BRANCH_COST (optimize_insn_for_speed_p (),
14710 enum machine_mode cmp_mode = GET_MODE (ix86_compare_op0);
14715 if (SCALAR_FLOAT_MODE_P (cmp_mode))
14717 gcc_assert (!DECIMAL_FLOAT_MODE_P (cmp_mode));
14719 /* We may be reversing unordered compare to normal compare,
14720 that is not valid in general (we may convert non-trapping
14721 condition to trapping one), however on i386 we currently
14722 emit all comparisons unordered. */
14723 code = reverse_condition_maybe_unordered (code);
14727 code = reverse_condition (code);
14728 if (compare_code != UNKNOWN)
14729 compare_code = reverse_condition (compare_code);
14733 if (compare_code != UNKNOWN)
14735 /* notl op1 (if needed)
14740 For x < 0 (resp. x <= -1) there will be no notl,
14741 so if possible swap the constants to get rid of the
14743 True/false will be -1/0 while code below (store flag
14744 followed by decrement) is 0/-1, so the constants need
14745 to be exchanged once more. */
14747 if (compare_code == GE || !cf)
14749 code = reverse_condition (code);
14754 HOST_WIDE_INT tmp = cf;
14759 out = emit_store_flag (out, code, ix86_compare_op0,
14760 ix86_compare_op1, VOIDmode, 0, -1);
14764 out = emit_store_flag (out, code, ix86_compare_op0,
14765 ix86_compare_op1, VOIDmode, 0, 1);
14767 out = expand_simple_binop (mode, PLUS, copy_rtx (out), constm1_rtx,
14768 copy_rtx (out), 1, OPTAB_DIRECT);
14771 out = expand_simple_binop (mode, AND, copy_rtx (out),
14772 gen_int_mode (cf - ct, mode),
14773 copy_rtx (out), 1, OPTAB_DIRECT);
14775 out = expand_simple_binop (mode, PLUS, copy_rtx (out), GEN_INT (ct),
14776 copy_rtx (out), 1, OPTAB_DIRECT);
14777 if (!rtx_equal_p (out, operands[0]))
14778 emit_move_insn (operands[0], copy_rtx (out));
14780 return 1; /* DONE */
14784 if (!TARGET_CMOVE || (mode == QImode && TARGET_PARTIAL_REG_STALL))
14786 /* Try a few things more with specific constants and a variable. */
14789 rtx var, orig_out, out, tmp;
14791 if (BRANCH_COST (optimize_insn_for_speed_p (), false) <= 2)
14792 return 0; /* FAIL */
14794 /* If one of the two operands is an interesting constant, load a
14795 constant with the above and mask it in with a logical operation. */
14797 if (CONST_INT_P (operands[2]))
14800 if (INTVAL (operands[2]) == 0 && operands[3] != constm1_rtx)
14801 operands[3] = constm1_rtx, op = and_optab;
14802 else if (INTVAL (operands[2]) == -1 && operands[3] != const0_rtx)
14803 operands[3] = const0_rtx, op = ior_optab;
14805 return 0; /* FAIL */
14807 else if (CONST_INT_P (operands[3]))
14810 if (INTVAL (operands[3]) == 0 && operands[2] != constm1_rtx)
14811 operands[2] = constm1_rtx, op = and_optab;
14812 else if (INTVAL (operands[3]) == -1 && operands[3] != const0_rtx)
14813 operands[2] = const0_rtx, op = ior_optab;
14815 return 0; /* FAIL */
14818 return 0; /* FAIL */
14820 orig_out = operands[0];
14821 tmp = gen_reg_rtx (mode);
14824 /* Recurse to get the constant loaded. */
14825 if (ix86_expand_int_movcc (operands) == 0)
14826 return 0; /* FAIL */
14828 /* Mask in the interesting variable. */
14829 out = expand_binop (mode, op, var, tmp, orig_out, 0,
14831 if (!rtx_equal_p (out, orig_out))
14832 emit_move_insn (copy_rtx (orig_out), copy_rtx (out));
14834 return 1; /* DONE */
14838 * For comparison with above,
14848 if (! nonimmediate_operand (operands[2], mode))
14849 operands[2] = force_reg (mode, operands[2]);
14850 if (! nonimmediate_operand (operands[3], mode))
14851 operands[3] = force_reg (mode, operands[3]);
14853 if (bypass_test && reg_overlap_mentioned_p (operands[0], operands[3]))
14855 rtx tmp = gen_reg_rtx (mode);
14856 emit_move_insn (tmp, operands[3]);
14859 if (second_test && reg_overlap_mentioned_p (operands[0], operands[2]))
14861 rtx tmp = gen_reg_rtx (mode);
14862 emit_move_insn (tmp, operands[2]);
14866 if (! register_operand (operands[2], VOIDmode)
14868 || ! register_operand (operands[3], VOIDmode)))
14869 operands[2] = force_reg (mode, operands[2]);
14872 && ! register_operand (operands[3], VOIDmode))
14873 operands[3] = force_reg (mode, operands[3]);
14875 emit_insn (compare_seq);
14876 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
14877 gen_rtx_IF_THEN_ELSE (mode,
14878 compare_op, operands[2],
14881 emit_insn (gen_rtx_SET (VOIDmode, copy_rtx (operands[0]),
14882 gen_rtx_IF_THEN_ELSE (mode,
14884 copy_rtx (operands[3]),
14885 copy_rtx (operands[0]))));
14887 emit_insn (gen_rtx_SET (VOIDmode, copy_rtx (operands[0]),
14888 gen_rtx_IF_THEN_ELSE (mode,
14890 copy_rtx (operands[2]),
14891 copy_rtx (operands[0]))));
14893 return 1; /* DONE */
14896 /* Swap, force into registers, or otherwise massage the two operands
14897 to an sse comparison with a mask result. Thus we differ a bit from
14898 ix86_prepare_fp_compare_args which expects to produce a flags result.
14900 The DEST operand exists to help determine whether to commute commutative
14901 operators. The POP0/POP1 operands are updated in place. The new
14902 comparison code is returned, or UNKNOWN if not implementable. */
14904 static enum rtx_code
14905 ix86_prepare_sse_fp_compare_args (rtx dest, enum rtx_code code,
14906 rtx *pop0, rtx *pop1)
14914 /* We have no LTGT as an operator. We could implement it with
14915 NE & ORDERED, but this requires an extra temporary. It's
14916 not clear that it's worth it. */
14923 /* These are supported directly. */
14930 /* For commutative operators, try to canonicalize the destination
14931 operand to be first in the comparison - this helps reload to
14932 avoid extra moves. */
14933 if (!dest || !rtx_equal_p (dest, *pop1))
14941 /* These are not supported directly. Swap the comparison operands
14942 to transform into something that is supported. */
14946 code = swap_condition (code);
14950 gcc_unreachable ();
14956 /* Detect conditional moves that exactly match min/max operational
14957 semantics. Note that this is IEEE safe, as long as we don't
14958 interchange the operands.
14960 Returns FALSE if this conditional move doesn't match a MIN/MAX,
14961 and TRUE if the operation is successful and instructions are emitted. */
14964 ix86_expand_sse_fp_minmax (rtx dest, enum rtx_code code, rtx cmp_op0,
14965 rtx cmp_op1, rtx if_true, rtx if_false)
14967 enum machine_mode mode;
14973 else if (code == UNGE)
14976 if_true = if_false;
14982 if (rtx_equal_p (cmp_op0, if_true) && rtx_equal_p (cmp_op1, if_false))
14984 else if (rtx_equal_p (cmp_op1, if_true) && rtx_equal_p (cmp_op0, if_false))
14989 mode = GET_MODE (dest);
14991 /* We want to check HONOR_NANS and HONOR_SIGNED_ZEROS here,
14992 but MODE may be a vector mode and thus not appropriate. */
14993 if (!flag_finite_math_only || !flag_unsafe_math_optimizations)
14995 int u = is_min ? UNSPEC_IEEE_MIN : UNSPEC_IEEE_MAX;
14998 if_true = force_reg (mode, if_true);
14999 v = gen_rtvec (2, if_true, if_false);
15000 tmp = gen_rtx_UNSPEC (mode, v, u);
15004 code = is_min ? SMIN : SMAX;
15005 tmp = gen_rtx_fmt_ee (code, mode, if_true, if_false);
15008 emit_insn (gen_rtx_SET (VOIDmode, dest, tmp));
15012 /* Expand an sse vector comparison. Return the register with the result. */
15015 ix86_expand_sse_cmp (rtx dest, enum rtx_code code, rtx cmp_op0, rtx cmp_op1,
15016 rtx op_true, rtx op_false)
15018 enum machine_mode mode = GET_MODE (dest);
15021 cmp_op0 = force_reg (mode, cmp_op0);
15022 if (!nonimmediate_operand (cmp_op1, mode))
15023 cmp_op1 = force_reg (mode, cmp_op1);
15026 || reg_overlap_mentioned_p (dest, op_true)
15027 || reg_overlap_mentioned_p (dest, op_false))
15028 dest = gen_reg_rtx (mode);
15030 x = gen_rtx_fmt_ee (code, mode, cmp_op0, cmp_op1);
15031 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
15036 /* Expand DEST = CMP ? OP_TRUE : OP_FALSE into a sequence of logical
15037 operations. This is used for both scalar and vector conditional moves. */
15040 ix86_expand_sse_movcc (rtx dest, rtx cmp, rtx op_true, rtx op_false)
15042 enum machine_mode mode = GET_MODE (dest);
15045 if (op_false == CONST0_RTX (mode))
15047 op_true = force_reg (mode, op_true);
15048 x = gen_rtx_AND (mode, cmp, op_true);
15049 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
15051 else if (op_true == CONST0_RTX (mode))
15053 op_false = force_reg (mode, op_false);
15054 x = gen_rtx_NOT (mode, cmp);
15055 x = gen_rtx_AND (mode, x, op_false);
15056 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
15058 else if (TARGET_SSE5)
15060 rtx pcmov = gen_rtx_SET (mode, dest,
15061 gen_rtx_IF_THEN_ELSE (mode, cmp,
15068 op_true = force_reg (mode, op_true);
15069 op_false = force_reg (mode, op_false);
15071 t2 = gen_reg_rtx (mode);
15073 t3 = gen_reg_rtx (mode);
15077 x = gen_rtx_AND (mode, op_true, cmp);
15078 emit_insn (gen_rtx_SET (VOIDmode, t2, x));
15080 x = gen_rtx_NOT (mode, cmp);
15081 x = gen_rtx_AND (mode, x, op_false);
15082 emit_insn (gen_rtx_SET (VOIDmode, t3, x));
15084 x = gen_rtx_IOR (mode, t3, t2);
15085 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
15089 /* Expand a floating-point conditional move. Return true if successful. */
15092 ix86_expand_fp_movcc (rtx operands[])
15094 enum machine_mode mode = GET_MODE (operands[0]);
15095 enum rtx_code code = GET_CODE (operands[1]);
15096 rtx tmp, compare_op, second_test, bypass_test;
15098 if (TARGET_SSE_MATH && SSE_FLOAT_MODE_P (mode))
15100 enum machine_mode cmode;
15102 /* Since we've no cmove for sse registers, don't force bad register
15103 allocation just to gain access to it. Deny movcc when the
15104 comparison mode doesn't match the move mode. */
15105 cmode = GET_MODE (ix86_compare_op0);
15106 if (cmode == VOIDmode)
15107 cmode = GET_MODE (ix86_compare_op1);
15111 code = ix86_prepare_sse_fp_compare_args (operands[0], code,
15113 &ix86_compare_op1);
15114 if (code == UNKNOWN)
15117 if (ix86_expand_sse_fp_minmax (operands[0], code, ix86_compare_op0,
15118 ix86_compare_op1, operands[2],
15122 tmp = ix86_expand_sse_cmp (operands[0], code, ix86_compare_op0,
15123 ix86_compare_op1, operands[2], operands[3]);
15124 ix86_expand_sse_movcc (operands[0], tmp, operands[2], operands[3]);
15128 /* The floating point conditional move instructions don't directly
15129 support conditions resulting from a signed integer comparison. */
15131 compare_op = ix86_expand_compare (code, &second_test, &bypass_test);
15133 /* The floating point conditional move instructions don't directly
15134 support signed integer comparisons. */
15136 if (!fcmov_comparison_operator (compare_op, VOIDmode))
15138 gcc_assert (!second_test && !bypass_test);
15139 tmp = gen_reg_rtx (QImode);
15140 ix86_expand_setcc (code, tmp);
15142 ix86_compare_op0 = tmp;
15143 ix86_compare_op1 = const0_rtx;
15144 compare_op = ix86_expand_compare (code, &second_test, &bypass_test);
15146 if (bypass_test && reg_overlap_mentioned_p (operands[0], operands[3]))
15148 tmp = gen_reg_rtx (mode);
15149 emit_move_insn (tmp, operands[3]);
15152 if (second_test && reg_overlap_mentioned_p (operands[0], operands[2]))
15154 tmp = gen_reg_rtx (mode);
15155 emit_move_insn (tmp, operands[2]);
15159 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
15160 gen_rtx_IF_THEN_ELSE (mode, compare_op,
15161 operands[2], operands[3])));
15163 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
15164 gen_rtx_IF_THEN_ELSE (mode, bypass_test,
15165 operands[3], operands[0])));
15167 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
15168 gen_rtx_IF_THEN_ELSE (mode, second_test,
15169 operands[2], operands[0])));
15174 /* Expand a floating-point vector conditional move; a vcond operation
15175 rather than a movcc operation. */
15178 ix86_expand_fp_vcond (rtx operands[])
15180 enum rtx_code code = GET_CODE (operands[3]);
15183 code = ix86_prepare_sse_fp_compare_args (operands[0], code,
15184 &operands[4], &operands[5]);
15185 if (code == UNKNOWN)
15188 if (ix86_expand_sse_fp_minmax (operands[0], code, operands[4],
15189 operands[5], operands[1], operands[2]))
15192 cmp = ix86_expand_sse_cmp (operands[0], code, operands[4], operands[5],
15193 operands[1], operands[2]);
15194 ix86_expand_sse_movcc (operands[0], cmp, operands[1], operands[2]);
15198 /* Expand a signed/unsigned integral vector conditional move. */
15201 ix86_expand_int_vcond (rtx operands[])
15203 enum machine_mode mode = GET_MODE (operands[0]);
15204 enum rtx_code code = GET_CODE (operands[3]);
15205 bool negate = false;
15208 cop0 = operands[4];
15209 cop1 = operands[5];
15211 /* SSE5 supports all of the comparisons on all vector int types. */
15214 /* Canonicalize the comparison to EQ, GT, GTU. */
15225 code = reverse_condition (code);
15231 code = reverse_condition (code);
15237 code = swap_condition (code);
15238 x = cop0, cop0 = cop1, cop1 = x;
15242 gcc_unreachable ();
15245 /* Only SSE4.1/SSE4.2 supports V2DImode. */
15246 if (mode == V2DImode)
15251 /* SSE4.1 supports EQ. */
15252 if (!TARGET_SSE4_1)
15258 /* SSE4.2 supports GT/GTU. */
15259 if (!TARGET_SSE4_2)
15264 gcc_unreachable ();
15268 /* Unsigned parallel compare is not supported by the hardware. Play some
15269 tricks to turn this into a signed comparison against 0. */
15272 cop0 = force_reg (mode, cop0);
15281 /* Perform a parallel modulo subtraction. */
15282 t1 = gen_reg_rtx (mode);
15283 emit_insn ((mode == V4SImode
15285 : gen_subv2di3) (t1, cop0, cop1));
15287 /* Extract the original sign bit of op0. */
15288 mask = ix86_build_signbit_mask (GET_MODE_INNER (mode),
15290 t2 = gen_reg_rtx (mode);
15291 emit_insn ((mode == V4SImode
15293 : gen_andv2di3) (t2, cop0, mask));
15295 /* XOR it back into the result of the subtraction. This results
15296 in the sign bit set iff we saw unsigned underflow. */
15297 x = gen_reg_rtx (mode);
15298 emit_insn ((mode == V4SImode
15300 : gen_xorv2di3) (x, t1, t2));
15308 /* Perform a parallel unsigned saturating subtraction. */
15309 x = gen_reg_rtx (mode);
15310 emit_insn (gen_rtx_SET (VOIDmode, x,
15311 gen_rtx_US_MINUS (mode, cop0, cop1)));
15318 gcc_unreachable ();
15322 cop1 = CONST0_RTX (mode);
15326 x = ix86_expand_sse_cmp (operands[0], code, cop0, cop1,
15327 operands[1+negate], operands[2-negate]);
15329 ix86_expand_sse_movcc (operands[0], x, operands[1+negate],
15330 operands[2-negate]);
15334 /* Unpack OP[1] into the next wider integer vector type. UNSIGNED_P is
15335 true if we should do zero extension, else sign extension. HIGH_P is
15336 true if we want the N/2 high elements, else the low elements. */
15339 ix86_expand_sse_unpack (rtx operands[2], bool unsigned_p, bool high_p)
15341 enum machine_mode imode = GET_MODE (operands[1]);
15342 rtx (*unpack)(rtx, rtx, rtx);
15349 unpack = gen_vec_interleave_highv16qi;
15351 unpack = gen_vec_interleave_lowv16qi;
15355 unpack = gen_vec_interleave_highv8hi;
15357 unpack = gen_vec_interleave_lowv8hi;
15361 unpack = gen_vec_interleave_highv4si;
15363 unpack = gen_vec_interleave_lowv4si;
15366 gcc_unreachable ();
15369 dest = gen_lowpart (imode, operands[0]);
15372 se = force_reg (imode, CONST0_RTX (imode));
15374 se = ix86_expand_sse_cmp (gen_reg_rtx (imode), GT, CONST0_RTX (imode),
15375 operands[1], pc_rtx, pc_rtx);
15377 emit_insn (unpack (dest, operands[1], se));
15380 /* This function performs the same task as ix86_expand_sse_unpack,
15381 but with SSE4.1 instructions. */
15384 ix86_expand_sse4_unpack (rtx operands[2], bool unsigned_p, bool high_p)
15386 enum machine_mode imode = GET_MODE (operands[1]);
15387 rtx (*unpack)(rtx, rtx);
15394 unpack = gen_sse4_1_zero_extendv8qiv8hi2;
15396 unpack = gen_sse4_1_extendv8qiv8hi2;
15400 unpack = gen_sse4_1_zero_extendv4hiv4si2;
15402 unpack = gen_sse4_1_extendv4hiv4si2;
15406 unpack = gen_sse4_1_zero_extendv2siv2di2;
15408 unpack = gen_sse4_1_extendv2siv2di2;
15411 gcc_unreachable ();
15414 dest = operands[0];
15417 /* Shift higher 8 bytes to lower 8 bytes. */
15418 src = gen_reg_rtx (imode);
15419 emit_insn (gen_sse2_lshrti3 (gen_lowpart (TImode, src),
15420 gen_lowpart (TImode, operands[1]),
15426 emit_insn (unpack (dest, src));
15429 /* This function performs the same task as ix86_expand_sse_unpack,
15430 but with sse5 instructions. */
15433 ix86_expand_sse5_unpack (rtx operands[2], bool unsigned_p, bool high_p)
15435 enum machine_mode imode = GET_MODE (operands[1]);
15436 int pperm_bytes[16];
15438 int h = (high_p) ? 8 : 0;
15441 rtvec v = rtvec_alloc (16);
15444 rtx op0 = operands[0], op1 = operands[1];
15449 vs = rtvec_alloc (8);
15450 h2 = (high_p) ? 8 : 0;
15451 for (i = 0; i < 8; i++)
15453 pperm_bytes[2*i+0] = PPERM_SRC | PPERM_SRC2 | i | h;
15454 pperm_bytes[2*i+1] = ((unsigned_p)
15456 : PPERM_SIGN | PPERM_SRC2 | i | h);
15459 for (i = 0; i < 16; i++)
15460 RTVEC_ELT (v, i) = GEN_INT (pperm_bytes[i]);
15462 for (i = 0; i < 8; i++)
15463 RTVEC_ELT (vs, i) = GEN_INT (i + h2);
15465 p = gen_rtx_PARALLEL (VOIDmode, vs);
15466 x = force_reg (V16QImode, gen_rtx_CONST_VECTOR (V16QImode, v));
15468 emit_insn (gen_sse5_pperm_zero_v16qi_v8hi (op0, op1, p, x));
15470 emit_insn (gen_sse5_pperm_sign_v16qi_v8hi (op0, op1, p, x));
15474 vs = rtvec_alloc (4);
15475 h2 = (high_p) ? 4 : 0;
15476 for (i = 0; i < 4; i++)
15478 sign_extend = ((unsigned_p)
15480 : PPERM_SIGN | PPERM_SRC2 | ((2*i) + 1 + h));
15481 pperm_bytes[4*i+0] = PPERM_SRC | PPERM_SRC2 | ((2*i) + 0 + h);
15482 pperm_bytes[4*i+1] = PPERM_SRC | PPERM_SRC2 | ((2*i) + 1 + h);
15483 pperm_bytes[4*i+2] = sign_extend;
15484 pperm_bytes[4*i+3] = sign_extend;
15487 for (i = 0; i < 16; i++)
15488 RTVEC_ELT (v, i) = GEN_INT (pperm_bytes[i]);
15490 for (i = 0; i < 4; i++)
15491 RTVEC_ELT (vs, i) = GEN_INT (i + h2);
15493 p = gen_rtx_PARALLEL (VOIDmode, vs);
15494 x = force_reg (V16QImode, gen_rtx_CONST_VECTOR (V16QImode, v));
15496 emit_insn (gen_sse5_pperm_zero_v8hi_v4si (op0, op1, p, x));
15498 emit_insn (gen_sse5_pperm_sign_v8hi_v4si (op0, op1, p, x));
15502 vs = rtvec_alloc (2);
15503 h2 = (high_p) ? 2 : 0;
15504 for (i = 0; i < 2; i++)
15506 sign_extend = ((unsigned_p)
15508 : PPERM_SIGN | PPERM_SRC2 | ((4*i) + 3 + h));
15509 pperm_bytes[8*i+0] = PPERM_SRC | PPERM_SRC2 | ((4*i) + 0 + h);
15510 pperm_bytes[8*i+1] = PPERM_SRC | PPERM_SRC2 | ((4*i) + 1 + h);
15511 pperm_bytes[8*i+2] = PPERM_SRC | PPERM_SRC2 | ((4*i) + 2 + h);
15512 pperm_bytes[8*i+3] = PPERM_SRC | PPERM_SRC2 | ((4*i) + 3 + h);
15513 pperm_bytes[8*i+4] = sign_extend;
15514 pperm_bytes[8*i+5] = sign_extend;
15515 pperm_bytes[8*i+6] = sign_extend;
15516 pperm_bytes[8*i+7] = sign_extend;
15519 for (i = 0; i < 16; i++)
15520 RTVEC_ELT (v, i) = GEN_INT (pperm_bytes[i]);
15522 for (i = 0; i < 2; i++)
15523 RTVEC_ELT (vs, i) = GEN_INT (i + h2);
15525 p = gen_rtx_PARALLEL (VOIDmode, vs);
15526 x = force_reg (V16QImode, gen_rtx_CONST_VECTOR (V16QImode, v));
15528 emit_insn (gen_sse5_pperm_zero_v4si_v2di (op0, op1, p, x));
15530 emit_insn (gen_sse5_pperm_sign_v4si_v2di (op0, op1, p, x));
15534 gcc_unreachable ();
15540 /* Pack the high bits from OPERANDS[1] and low bits from OPERANDS[2] into the
15541 next narrower integer vector type */
15543 ix86_expand_sse5_pack (rtx operands[3])
15545 enum machine_mode imode = GET_MODE (operands[0]);
15546 int pperm_bytes[16];
15548 rtvec v = rtvec_alloc (16);
15550 rtx op0 = operands[0];
15551 rtx op1 = operands[1];
15552 rtx op2 = operands[2];
15557 for (i = 0; i < 8; i++)
15559 pperm_bytes[i+0] = PPERM_SRC | PPERM_SRC1 | (i*2);
15560 pperm_bytes[i+8] = PPERM_SRC | PPERM_SRC2 | (i*2);
15563 for (i = 0; i < 16; i++)
15564 RTVEC_ELT (v, i) = GEN_INT (pperm_bytes[i]);
15566 x = force_reg (V16QImode, gen_rtx_CONST_VECTOR (V16QImode, v));
15567 emit_insn (gen_sse5_pperm_pack_v8hi_v16qi (op0, op1, op2, x));
15571 for (i = 0; i < 4; i++)
15573 pperm_bytes[(2*i)+0] = PPERM_SRC | PPERM_SRC1 | ((i*4) + 0);
15574 pperm_bytes[(2*i)+1] = PPERM_SRC | PPERM_SRC1 | ((i*4) + 1);
15575 pperm_bytes[(2*i)+8] = PPERM_SRC | PPERM_SRC2 | ((i*4) + 0);
15576 pperm_bytes[(2*i)+9] = PPERM_SRC | PPERM_SRC2 | ((i*4) + 1);
15579 for (i = 0; i < 16; i++)
15580 RTVEC_ELT (v, i) = GEN_INT (pperm_bytes[i]);
15582 x = force_reg (V16QImode, gen_rtx_CONST_VECTOR (V16QImode, v));
15583 emit_insn (gen_sse5_pperm_pack_v4si_v8hi (op0, op1, op2, x));
15587 for (i = 0; i < 2; i++)
15589 pperm_bytes[(4*i)+0] = PPERM_SRC | PPERM_SRC1 | ((i*8) + 0);
15590 pperm_bytes[(4*i)+1] = PPERM_SRC | PPERM_SRC1 | ((i*8) + 1);
15591 pperm_bytes[(4*i)+2] = PPERM_SRC | PPERM_SRC1 | ((i*8) + 2);
15592 pperm_bytes[(4*i)+3] = PPERM_SRC | PPERM_SRC1 | ((i*8) + 3);
15593 pperm_bytes[(4*i)+8] = PPERM_SRC | PPERM_SRC2 | ((i*8) + 0);
15594 pperm_bytes[(4*i)+9] = PPERM_SRC | PPERM_SRC2 | ((i*8) + 1);
15595 pperm_bytes[(4*i)+10] = PPERM_SRC | PPERM_SRC2 | ((i*8) + 2);
15596 pperm_bytes[(4*i)+11] = PPERM_SRC | PPERM_SRC2 | ((i*8) + 3);
15599 for (i = 0; i < 16; i++)
15600 RTVEC_ELT (v, i) = GEN_INT (pperm_bytes[i]);
15602 x = force_reg (V16QImode, gen_rtx_CONST_VECTOR (V16QImode, v));
15603 emit_insn (gen_sse5_pperm_pack_v2di_v4si (op0, op1, op2, x));
15607 gcc_unreachable ();
15613 /* Expand conditional increment or decrement using adb/sbb instructions.
15614 The default case using setcc followed by the conditional move can be
15615 done by generic code. */
15617 ix86_expand_int_addcc (rtx operands[])
15619 enum rtx_code code = GET_CODE (operands[1]);
15621 rtx val = const0_rtx;
15622 bool fpcmp = false;
15623 enum machine_mode mode = GET_MODE (operands[0]);
15625 if (operands[3] != const1_rtx
15626 && operands[3] != constm1_rtx)
15628 if (!ix86_expand_carry_flag_compare (code, ix86_compare_op0,
15629 ix86_compare_op1, &compare_op))
15631 code = GET_CODE (compare_op);
15633 if (GET_MODE (XEXP (compare_op, 0)) == CCFPmode
15634 || GET_MODE (XEXP (compare_op, 0)) == CCFPUmode)
15637 code = ix86_fp_compare_code_to_integer (code);
15644 PUT_CODE (compare_op,
15645 reverse_condition_maybe_unordered
15646 (GET_CODE (compare_op)));
15648 PUT_CODE (compare_op, reverse_condition (GET_CODE (compare_op)));
15650 PUT_MODE (compare_op, mode);
15652 /* Construct either adc or sbb insn. */
15653 if ((code == LTU) == (operands[3] == constm1_rtx))
15655 switch (GET_MODE (operands[0]))
15658 emit_insn (gen_subqi3_carry (operands[0], operands[2], val, compare_op));
15661 emit_insn (gen_subhi3_carry (operands[0], operands[2], val, compare_op));
15664 emit_insn (gen_subsi3_carry (operands[0], operands[2], val, compare_op));
15667 emit_insn (gen_subdi3_carry_rex64 (operands[0], operands[2], val, compare_op));
15670 gcc_unreachable ();
15675 switch (GET_MODE (operands[0]))
15678 emit_insn (gen_addqi3_carry (operands[0], operands[2], val, compare_op));
15681 emit_insn (gen_addhi3_carry (operands[0], operands[2], val, compare_op));
15684 emit_insn (gen_addsi3_carry (operands[0], operands[2], val, compare_op));
15687 emit_insn (gen_adddi3_carry_rex64 (operands[0], operands[2], val, compare_op));
15690 gcc_unreachable ();
15693 return 1; /* DONE */
15697 /* Split operands 0 and 1 into SImode parts. Similar to split_di, but
15698 works for floating pointer parameters and nonoffsetable memories.
15699 For pushes, it returns just stack offsets; the values will be saved
15700 in the right order. Maximally three parts are generated. */
15703 ix86_split_to_parts (rtx operand, rtx *parts, enum machine_mode mode)
15708 size = mode==XFmode ? 3 : GET_MODE_SIZE (mode) / 4;
15710 size = (GET_MODE_SIZE (mode) + 4) / 8;
15712 gcc_assert (!REG_P (operand) || !MMX_REGNO_P (REGNO (operand)));
15713 gcc_assert (size >= 2 && size <= 4);
15715 /* Optimize constant pool reference to immediates. This is used by fp
15716 moves, that force all constants to memory to allow combining. */
15717 if (MEM_P (operand) && MEM_READONLY_P (operand))
15719 rtx tmp = maybe_get_pool_constant (operand);
15724 if (MEM_P (operand) && !offsettable_memref_p (operand))
15726 /* The only non-offsetable memories we handle are pushes. */
15727 int ok = push_operand (operand, VOIDmode);
15731 operand = copy_rtx (operand);
15732 PUT_MODE (operand, Pmode);
15733 parts[0] = parts[1] = parts[2] = parts[3] = operand;
15737 if (GET_CODE (operand) == CONST_VECTOR)
15739 enum machine_mode imode = int_mode_for_mode (mode);
15740 /* Caution: if we looked through a constant pool memory above,
15741 the operand may actually have a different mode now. That's
15742 ok, since we want to pun this all the way back to an integer. */
15743 operand = simplify_subreg (imode, operand, GET_MODE (operand), 0);
15744 gcc_assert (operand != NULL);
15750 if (mode == DImode)
15751 split_di (&operand, 1, &parts[0], &parts[1]);
15756 if (REG_P (operand))
15758 gcc_assert (reload_completed);
15759 for (i = 0; i < size; i++)
15760 parts[i] = gen_rtx_REG (SImode, REGNO (operand) + i);
15762 else if (offsettable_memref_p (operand))
15764 operand = adjust_address (operand, SImode, 0);
15765 parts[0] = operand;
15766 for (i = 1; i < size; i++)
15767 parts[i] = adjust_address (operand, SImode, 4 * i);
15769 else if (GET_CODE (operand) == CONST_DOUBLE)
15774 REAL_VALUE_FROM_CONST_DOUBLE (r, operand);
15778 real_to_target (l, &r, mode);
15779 parts[3] = gen_int_mode (l[3], SImode);
15780 parts[2] = gen_int_mode (l[2], SImode);
15783 REAL_VALUE_TO_TARGET_LONG_DOUBLE (r, l);
15784 parts[2] = gen_int_mode (l[2], SImode);
15787 REAL_VALUE_TO_TARGET_DOUBLE (r, l);
15790 gcc_unreachable ();
15792 parts[1] = gen_int_mode (l[1], SImode);
15793 parts[0] = gen_int_mode (l[0], SImode);
15796 gcc_unreachable ();
15801 if (mode == TImode)
15802 split_ti (&operand, 1, &parts[0], &parts[1]);
15803 if (mode == XFmode || mode == TFmode)
15805 enum machine_mode upper_mode = mode==XFmode ? SImode : DImode;
15806 if (REG_P (operand))
15808 gcc_assert (reload_completed);
15809 parts[0] = gen_rtx_REG (DImode, REGNO (operand) + 0);
15810 parts[1] = gen_rtx_REG (upper_mode, REGNO (operand) + 1);
15812 else if (offsettable_memref_p (operand))
15814 operand = adjust_address (operand, DImode, 0);
15815 parts[0] = operand;
15816 parts[1] = adjust_address (operand, upper_mode, 8);
15818 else if (GET_CODE (operand) == CONST_DOUBLE)
15823 REAL_VALUE_FROM_CONST_DOUBLE (r, operand);
15824 real_to_target (l, &r, mode);
15826 /* Do not use shift by 32 to avoid warning on 32bit systems. */
15827 if (HOST_BITS_PER_WIDE_INT >= 64)
15830 ((l[0] & (((HOST_WIDE_INT) 2 << 31) - 1))
15831 + ((((HOST_WIDE_INT) l[1]) << 31) << 1),
15834 parts[0] = immed_double_const (l[0], l[1], DImode);
15836 if (upper_mode == SImode)
15837 parts[1] = gen_int_mode (l[2], SImode);
15838 else if (HOST_BITS_PER_WIDE_INT >= 64)
15841 ((l[2] & (((HOST_WIDE_INT) 2 << 31) - 1))
15842 + ((((HOST_WIDE_INT) l[3]) << 31) << 1),
15845 parts[1] = immed_double_const (l[2], l[3], DImode);
15848 gcc_unreachable ();
15855 /* Emit insns to perform a move or push of DI, DF, XF, and TF values.
15856 Return false when normal moves are needed; true when all required
15857 insns have been emitted. Operands 2-4 contain the input values
15858 int the correct order; operands 5-7 contain the output values. */
15861 ix86_split_long_move (rtx operands[])
15866 int collisions = 0;
15867 enum machine_mode mode = GET_MODE (operands[0]);
15868 bool collisionparts[4];
15870 /* The DFmode expanders may ask us to move double.
15871 For 64bit target this is single move. By hiding the fact
15872 here we simplify i386.md splitters. */
15873 if (GET_MODE_SIZE (GET_MODE (operands[0])) == 8 && TARGET_64BIT)
15875 /* Optimize constant pool reference to immediates. This is used by
15876 fp moves, that force all constants to memory to allow combining. */
15878 if (MEM_P (operands[1])
15879 && GET_CODE (XEXP (operands[1], 0)) == SYMBOL_REF
15880 && CONSTANT_POOL_ADDRESS_P (XEXP (operands[1], 0)))
15881 operands[1] = get_pool_constant (XEXP (operands[1], 0));
15882 if (push_operand (operands[0], VOIDmode))
15884 operands[0] = copy_rtx (operands[0]);
15885 PUT_MODE (operands[0], Pmode);
15888 operands[0] = gen_lowpart (DImode, operands[0]);
15889 operands[1] = gen_lowpart (DImode, operands[1]);
15890 emit_move_insn (operands[0], operands[1]);
15894 /* The only non-offsettable memory we handle is push. */
15895 if (push_operand (operands[0], VOIDmode))
15898 gcc_assert (!MEM_P (operands[0])
15899 || offsettable_memref_p (operands[0]));
15901 nparts = ix86_split_to_parts (operands[1], part[1], GET_MODE (operands[0]));
15902 ix86_split_to_parts (operands[0], part[0], GET_MODE (operands[0]));
15904 /* When emitting push, take care for source operands on the stack. */
15905 if (push && MEM_P (operands[1])
15906 && reg_overlap_mentioned_p (stack_pointer_rtx, operands[1]))
15907 for (i = 0; i < nparts - 1; i++)
15908 part[1][i] = change_address (part[1][i],
15909 GET_MODE (part[1][i]),
15910 XEXP (part[1][i + 1], 0));
15912 /* We need to do copy in the right order in case an address register
15913 of the source overlaps the destination. */
15914 if (REG_P (part[0][0]) && MEM_P (part[1][0]))
15918 for (i = 0; i < nparts; i++)
15921 = reg_overlap_mentioned_p (part[0][i], XEXP (part[1][0], 0));
15922 if (collisionparts[i])
15926 /* Collision in the middle part can be handled by reordering. */
15927 if (collisions == 1 && nparts == 3 && collisionparts [1])
15929 tmp = part[0][1]; part[0][1] = part[0][2]; part[0][2] = tmp;
15930 tmp = part[1][1]; part[1][1] = part[1][2]; part[1][2] = tmp;
15932 else if (collisions == 1
15934 && (collisionparts [1] || collisionparts [2]))
15936 if (collisionparts [1])
15938 tmp = part[0][1]; part[0][1] = part[0][2]; part[0][2] = tmp;
15939 tmp = part[1][1]; part[1][1] = part[1][2]; part[1][2] = tmp;
15943 tmp = part[0][2]; part[0][2] = part[0][3]; part[0][3] = tmp;
15944 tmp = part[1][2]; part[1][2] = part[1][3]; part[1][3] = tmp;
15948 /* If there are more collisions, we can't handle it by reordering.
15949 Do an lea to the last part and use only one colliding move. */
15950 else if (collisions > 1)
15956 base = part[0][nparts - 1];
15958 /* Handle the case when the last part isn't valid for lea.
15959 Happens in 64-bit mode storing the 12-byte XFmode. */
15960 if (GET_MODE (base) != Pmode)
15961 base = gen_rtx_REG (Pmode, REGNO (base));
15963 emit_insn (gen_rtx_SET (VOIDmode, base, XEXP (part[1][0], 0)));
15964 part[1][0] = replace_equiv_address (part[1][0], base);
15965 for (i = 1; i < nparts; i++)
15967 tmp = plus_constant (base, UNITS_PER_WORD * i);
15968 part[1][i] = replace_equiv_address (part[1][i], tmp);
15979 if (TARGET_128BIT_LONG_DOUBLE && mode == XFmode)
15980 emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx, GEN_INT (-4)));
15981 emit_move_insn (part[0][2], part[1][2]);
15983 else if (nparts == 4)
15985 emit_move_insn (part[0][3], part[1][3]);
15986 emit_move_insn (part[0][2], part[1][2]);
15991 /* In 64bit mode we don't have 32bit push available. In case this is
15992 register, it is OK - we will just use larger counterpart. We also
15993 retype memory - these comes from attempt to avoid REX prefix on
15994 moving of second half of TFmode value. */
15995 if (GET_MODE (part[1][1]) == SImode)
15997 switch (GET_CODE (part[1][1]))
16000 part[1][1] = adjust_address (part[1][1], DImode, 0);
16004 part[1][1] = gen_rtx_REG (DImode, REGNO (part[1][1]));
16008 gcc_unreachable ();
16011 if (GET_MODE (part[1][0]) == SImode)
16012 part[1][0] = part[1][1];
16015 emit_move_insn (part[0][1], part[1][1]);
16016 emit_move_insn (part[0][0], part[1][0]);
16020 /* Choose correct order to not overwrite the source before it is copied. */
16021 if ((REG_P (part[0][0])
16022 && REG_P (part[1][1])
16023 && (REGNO (part[0][0]) == REGNO (part[1][1])
16025 && REGNO (part[0][0]) == REGNO (part[1][2]))
16027 && REGNO (part[0][0]) == REGNO (part[1][3]))))
16029 && reg_overlap_mentioned_p (part[0][0], XEXP (part[1][0], 0))))
16031 for (i = 0, j = nparts - 1; i < nparts; i++, j--)
16033 operands[2 + i] = part[0][j];
16034 operands[6 + i] = part[1][j];
16039 for (i = 0; i < nparts; i++)
16041 operands[2 + i] = part[0][i];
16042 operands[6 + i] = part[1][i];
16046 /* If optimizing for size, attempt to locally unCSE nonzero constants. */
16047 if (optimize_insn_for_size_p ())
16049 for (j = 0; j < nparts - 1; j++)
16050 if (CONST_INT_P (operands[6 + j])
16051 && operands[6 + j] != const0_rtx
16052 && REG_P (operands[2 + j]))
16053 for (i = j; i < nparts - 1; i++)
16054 if (CONST_INT_P (operands[7 + i])
16055 && INTVAL (operands[7 + i]) == INTVAL (operands[6 + j]))
16056 operands[7 + i] = operands[2 + j];
16059 for (i = 0; i < nparts; i++)
16060 emit_move_insn (operands[2 + i], operands[6 + i]);
16065 /* Helper function of ix86_split_ashl used to generate an SImode/DImode
16066 left shift by a constant, either using a single shift or
16067 a sequence of add instructions. */
16070 ix86_expand_ashl_const (rtx operand, int count, enum machine_mode mode)
16074 emit_insn ((mode == DImode
16076 : gen_adddi3) (operand, operand, operand));
16078 else if (!optimize_insn_for_size_p ()
16079 && count * ix86_cost->add <= ix86_cost->shift_const)
16082 for (i=0; i<count; i++)
16084 emit_insn ((mode == DImode
16086 : gen_adddi3) (operand, operand, operand));
16090 emit_insn ((mode == DImode
16092 : gen_ashldi3) (operand, operand, GEN_INT (count)));
16096 ix86_split_ashl (rtx *operands, rtx scratch, enum machine_mode mode)
16098 rtx low[2], high[2];
16100 const int single_width = mode == DImode ? 32 : 64;
16102 if (CONST_INT_P (operands[2]))
16104 (mode == DImode ? split_di : split_ti) (operands, 2, low, high);
16105 count = INTVAL (operands[2]) & (single_width * 2 - 1);
16107 if (count >= single_width)
16109 emit_move_insn (high[0], low[1]);
16110 emit_move_insn (low[0], const0_rtx);
16112 if (count > single_width)
16113 ix86_expand_ashl_const (high[0], count - single_width, mode);
16117 if (!rtx_equal_p (operands[0], operands[1]))
16118 emit_move_insn (operands[0], operands[1]);
16119 emit_insn ((mode == DImode
16121 : gen_x86_64_shld) (high[0], low[0], GEN_INT (count)));
16122 ix86_expand_ashl_const (low[0], count, mode);
16127 (mode == DImode ? split_di : split_ti) (operands, 1, low, high);
16129 if (operands[1] == const1_rtx)
16131 /* Assuming we've chosen a QImode capable registers, then 1 << N
16132 can be done with two 32/64-bit shifts, no branches, no cmoves. */
16133 if (ANY_QI_REG_P (low[0]) && ANY_QI_REG_P (high[0]))
16135 rtx s, d, flags = gen_rtx_REG (CCZmode, FLAGS_REG);
16137 ix86_expand_clear (low[0]);
16138 ix86_expand_clear (high[0]);
16139 emit_insn (gen_testqi_ccz_1 (operands[2], GEN_INT (single_width)));
16141 d = gen_lowpart (QImode, low[0]);
16142 d = gen_rtx_STRICT_LOW_PART (VOIDmode, d);
16143 s = gen_rtx_EQ (QImode, flags, const0_rtx);
16144 emit_insn (gen_rtx_SET (VOIDmode, d, s));
16146 d = gen_lowpart (QImode, high[0]);
16147 d = gen_rtx_STRICT_LOW_PART (VOIDmode, d);
16148 s = gen_rtx_NE (QImode, flags, const0_rtx);
16149 emit_insn (gen_rtx_SET (VOIDmode, d, s));
16152 /* Otherwise, we can get the same results by manually performing
16153 a bit extract operation on bit 5/6, and then performing the two
16154 shifts. The two methods of getting 0/1 into low/high are exactly
16155 the same size. Avoiding the shift in the bit extract case helps
16156 pentium4 a bit; no one else seems to care much either way. */
16161 if (TARGET_PARTIAL_REG_STALL && !optimize_insn_for_size_p ())
16162 x = gen_rtx_ZERO_EXTEND (mode == DImode ? SImode : DImode, operands[2]);
16164 x = gen_lowpart (mode == DImode ? SImode : DImode, operands[2]);
16165 emit_insn (gen_rtx_SET (VOIDmode, high[0], x));
16167 emit_insn ((mode == DImode
16169 : gen_lshrdi3) (high[0], high[0], GEN_INT (mode == DImode ? 5 : 6)));
16170 emit_insn ((mode == DImode
16172 : gen_anddi3) (high[0], high[0], GEN_INT (1)));
16173 emit_move_insn (low[0], high[0]);
16174 emit_insn ((mode == DImode
16176 : gen_xordi3) (low[0], low[0], GEN_INT (1)));
16179 emit_insn ((mode == DImode
16181 : gen_ashldi3) (low[0], low[0], operands[2]));
16182 emit_insn ((mode == DImode
16184 : gen_ashldi3) (high[0], high[0], operands[2]));
16188 if (operands[1] == constm1_rtx)
16190 /* For -1 << N, we can avoid the shld instruction, because we
16191 know that we're shifting 0...31/63 ones into a -1. */
16192 emit_move_insn (low[0], constm1_rtx);
16193 if (optimize_insn_for_size_p ())
16194 emit_move_insn (high[0], low[0]);
16196 emit_move_insn (high[0], constm1_rtx);
16200 if (!rtx_equal_p (operands[0], operands[1]))
16201 emit_move_insn (operands[0], operands[1]);
16203 (mode == DImode ? split_di : split_ti) (operands, 1, low, high);
16204 emit_insn ((mode == DImode
16206 : gen_x86_64_shld) (high[0], low[0], operands[2]));
16209 emit_insn ((mode == DImode ? gen_ashlsi3 : gen_ashldi3) (low[0], low[0], operands[2]));
16211 if (TARGET_CMOVE && scratch)
16213 ix86_expand_clear (scratch);
16214 emit_insn ((mode == DImode
16215 ? gen_x86_shift_adj_1
16216 : gen_x86_64_shift_adj_1) (high[0], low[0], operands[2],
16220 emit_insn ((mode == DImode
16221 ? gen_x86_shift_adj_2
16222 : gen_x86_64_shift_adj_2) (high[0], low[0], operands[2]));
16226 ix86_split_ashr (rtx *operands, rtx scratch, enum machine_mode mode)
16228 rtx low[2], high[2];
16230 const int single_width = mode == DImode ? 32 : 64;
16232 if (CONST_INT_P (operands[2]))
16234 (mode == DImode ? split_di : split_ti) (operands, 2, low, high);
16235 count = INTVAL (operands[2]) & (single_width * 2 - 1);
16237 if (count == single_width * 2 - 1)
16239 emit_move_insn (high[0], high[1]);
16240 emit_insn ((mode == DImode
16242 : gen_ashrdi3) (high[0], high[0],
16243 GEN_INT (single_width - 1)));
16244 emit_move_insn (low[0], high[0]);
16247 else if (count >= single_width)
16249 emit_move_insn (low[0], high[1]);
16250 emit_move_insn (high[0], low[0]);
16251 emit_insn ((mode == DImode
16253 : gen_ashrdi3) (high[0], high[0],
16254 GEN_INT (single_width - 1)));
16255 if (count > single_width)
16256 emit_insn ((mode == DImode
16258 : gen_ashrdi3) (low[0], low[0],
16259 GEN_INT (count - single_width)));
16263 if (!rtx_equal_p (operands[0], operands[1]))
16264 emit_move_insn (operands[0], operands[1]);
16265 emit_insn ((mode == DImode
16267 : gen_x86_64_shrd) (low[0], high[0], GEN_INT (count)));
16268 emit_insn ((mode == DImode
16270 : gen_ashrdi3) (high[0], high[0], GEN_INT (count)));
16275 if (!rtx_equal_p (operands[0], operands[1]))
16276 emit_move_insn (operands[0], operands[1]);
16278 (mode == DImode ? split_di : split_ti) (operands, 1, low, high);
16280 emit_insn ((mode == DImode
16282 : gen_x86_64_shrd) (low[0], high[0], operands[2]));
16283 emit_insn ((mode == DImode
16285 : gen_ashrdi3) (high[0], high[0], operands[2]));
16287 if (TARGET_CMOVE && scratch)
16289 emit_move_insn (scratch, high[0]);
16290 emit_insn ((mode == DImode
16292 : gen_ashrdi3) (scratch, scratch,
16293 GEN_INT (single_width - 1)));
16294 emit_insn ((mode == DImode
16295 ? gen_x86_shift_adj_1
16296 : gen_x86_64_shift_adj_1) (low[0], high[0], operands[2],
16300 emit_insn ((mode == DImode
16301 ? gen_x86_shift_adj_3
16302 : gen_x86_64_shift_adj_3) (low[0], high[0], operands[2]));
16307 ix86_split_lshr (rtx *operands, rtx scratch, enum machine_mode mode)
16309 rtx low[2], high[2];
16311 const int single_width = mode == DImode ? 32 : 64;
16313 if (CONST_INT_P (operands[2]))
16315 (mode == DImode ? split_di : split_ti) (operands, 2, low, high);
16316 count = INTVAL (operands[2]) & (single_width * 2 - 1);
16318 if (count >= single_width)
16320 emit_move_insn (low[0], high[1]);
16321 ix86_expand_clear (high[0]);
16323 if (count > single_width)
16324 emit_insn ((mode == DImode
16326 : gen_lshrdi3) (low[0], low[0],
16327 GEN_INT (count - single_width)));
16331 if (!rtx_equal_p (operands[0], operands[1]))
16332 emit_move_insn (operands[0], operands[1]);
16333 emit_insn ((mode == DImode
16335 : gen_x86_64_shrd) (low[0], high[0], GEN_INT (count)));
16336 emit_insn ((mode == DImode
16338 : gen_lshrdi3) (high[0], high[0], GEN_INT (count)));
16343 if (!rtx_equal_p (operands[0], operands[1]))
16344 emit_move_insn (operands[0], operands[1]);
16346 (mode == DImode ? split_di : split_ti) (operands, 1, low, high);
16348 emit_insn ((mode == DImode
16350 : gen_x86_64_shrd) (low[0], high[0], operands[2]));
16351 emit_insn ((mode == DImode
16353 : gen_lshrdi3) (high[0], high[0], operands[2]));
16355 /* Heh. By reversing the arguments, we can reuse this pattern. */
16356 if (TARGET_CMOVE && scratch)
16358 ix86_expand_clear (scratch);
16359 emit_insn ((mode == DImode
16360 ? gen_x86_shift_adj_1
16361 : gen_x86_64_shift_adj_1) (low[0], high[0], operands[2],
16365 emit_insn ((mode == DImode
16366 ? gen_x86_shift_adj_2
16367 : gen_x86_64_shift_adj_2) (low[0], high[0], operands[2]));
16371 /* Predict just emitted jump instruction to be taken with probability PROB. */
16373 predict_jump (int prob)
16375 rtx insn = get_last_insn ();
16376 gcc_assert (JUMP_P (insn));
16378 = gen_rtx_EXPR_LIST (REG_BR_PROB,
16383 /* Helper function for the string operations below. Dest VARIABLE whether
16384 it is aligned to VALUE bytes. If true, jump to the label. */
16386 ix86_expand_aligntest (rtx variable, int value, bool epilogue)
16388 rtx label = gen_label_rtx ();
16389 rtx tmpcount = gen_reg_rtx (GET_MODE (variable));
16390 if (GET_MODE (variable) == DImode)
16391 emit_insn (gen_anddi3 (tmpcount, variable, GEN_INT (value)));
16393 emit_insn (gen_andsi3 (tmpcount, variable, GEN_INT (value)));
16394 emit_cmp_and_jump_insns (tmpcount, const0_rtx, EQ, 0, GET_MODE (variable),
16397 predict_jump (REG_BR_PROB_BASE * 50 / 100);
16399 predict_jump (REG_BR_PROB_BASE * 90 / 100);
16403 /* Adjust COUNTER by the VALUE. */
16405 ix86_adjust_counter (rtx countreg, HOST_WIDE_INT value)
16407 if (GET_MODE (countreg) == DImode)
16408 emit_insn (gen_adddi3 (countreg, countreg, GEN_INT (-value)));
16410 emit_insn (gen_addsi3 (countreg, countreg, GEN_INT (-value)));
16413 /* Zero extend possibly SImode EXP to Pmode register. */
16415 ix86_zero_extend_to_Pmode (rtx exp)
16418 if (GET_MODE (exp) == VOIDmode)
16419 return force_reg (Pmode, exp);
16420 if (GET_MODE (exp) == Pmode)
16421 return copy_to_mode_reg (Pmode, exp);
16422 r = gen_reg_rtx (Pmode);
16423 emit_insn (gen_zero_extendsidi2 (r, exp));
16427 /* Divide COUNTREG by SCALE. */
16429 scale_counter (rtx countreg, int scale)
16432 rtx piece_size_mask;
16436 if (CONST_INT_P (countreg))
16437 return GEN_INT (INTVAL (countreg) / scale);
16438 gcc_assert (REG_P (countreg));
16440 piece_size_mask = GEN_INT (scale - 1);
16441 sc = expand_simple_binop (GET_MODE (countreg), LSHIFTRT, countreg,
16442 GEN_INT (exact_log2 (scale)),
16443 NULL, 1, OPTAB_DIRECT);
16447 /* Return mode for the memcpy/memset loop counter. Prefer SImode over
16448 DImode for constant loop counts. */
16450 static enum machine_mode
16451 counter_mode (rtx count_exp)
16453 if (GET_MODE (count_exp) != VOIDmode)
16454 return GET_MODE (count_exp);
16455 if (GET_CODE (count_exp) != CONST_INT)
16457 if (TARGET_64BIT && (INTVAL (count_exp) & ~0xffffffff))
16462 /* When SRCPTR is non-NULL, output simple loop to move memory
16463 pointer to SRCPTR to DESTPTR via chunks of MODE unrolled UNROLL times,
16464 overall size is COUNT specified in bytes. When SRCPTR is NULL, output the
16465 equivalent loop to set memory by VALUE (supposed to be in MODE).
16467 The size is rounded down to whole number of chunk size moved at once.
16468 SRCMEM and DESTMEM provide MEMrtx to feed proper aliasing info. */
16472 expand_set_or_movmem_via_loop (rtx destmem, rtx srcmem,
16473 rtx destptr, rtx srcptr, rtx value,
16474 rtx count, enum machine_mode mode, int unroll,
16477 rtx out_label, top_label, iter, tmp;
16478 enum machine_mode iter_mode = counter_mode (count);
16479 rtx piece_size = GEN_INT (GET_MODE_SIZE (mode) * unroll);
16480 rtx piece_size_mask = GEN_INT (~((GET_MODE_SIZE (mode) * unroll) - 1));
16486 top_label = gen_label_rtx ();
16487 out_label = gen_label_rtx ();
16488 iter = gen_reg_rtx (iter_mode);
16490 size = expand_simple_binop (iter_mode, AND, count, piece_size_mask,
16491 NULL, 1, OPTAB_DIRECT);
16492 /* Those two should combine. */
16493 if (piece_size == const1_rtx)
16495 emit_cmp_and_jump_insns (size, const0_rtx, EQ, NULL_RTX, iter_mode,
16497 predict_jump (REG_BR_PROB_BASE * 10 / 100);
16499 emit_move_insn (iter, const0_rtx);
16501 emit_label (top_label);
16503 tmp = convert_modes (Pmode, iter_mode, iter, true);
16504 x_addr = gen_rtx_PLUS (Pmode, destptr, tmp);
16505 destmem = change_address (destmem, mode, x_addr);
16509 y_addr = gen_rtx_PLUS (Pmode, srcptr, copy_rtx (tmp));
16510 srcmem = change_address (srcmem, mode, y_addr);
16512 /* When unrolling for chips that reorder memory reads and writes,
16513 we can save registers by using single temporary.
16514 Also using 4 temporaries is overkill in 32bit mode. */
16515 if (!TARGET_64BIT && 0)
16517 for (i = 0; i < unroll; i++)
16522 adjust_address (copy_rtx (destmem), mode, GET_MODE_SIZE (mode));
16524 adjust_address (copy_rtx (srcmem), mode, GET_MODE_SIZE (mode));
16526 emit_move_insn (destmem, srcmem);
16532 gcc_assert (unroll <= 4);
16533 for (i = 0; i < unroll; i++)
16535 tmpreg[i] = gen_reg_rtx (mode);
16539 adjust_address (copy_rtx (srcmem), mode, GET_MODE_SIZE (mode));
16541 emit_move_insn (tmpreg[i], srcmem);
16543 for (i = 0; i < unroll; i++)
16548 adjust_address (copy_rtx (destmem), mode, GET_MODE_SIZE (mode));
16550 emit_move_insn (destmem, tmpreg[i]);
16555 for (i = 0; i < unroll; i++)
16559 adjust_address (copy_rtx (destmem), mode, GET_MODE_SIZE (mode));
16560 emit_move_insn (destmem, value);
16563 tmp = expand_simple_binop (iter_mode, PLUS, iter, piece_size, iter,
16564 true, OPTAB_LIB_WIDEN);
16566 emit_move_insn (iter, tmp);
16568 emit_cmp_and_jump_insns (iter, size, LT, NULL_RTX, iter_mode,
16570 if (expected_size != -1)
16572 expected_size /= GET_MODE_SIZE (mode) * unroll;
16573 if (expected_size == 0)
16575 else if (expected_size > REG_BR_PROB_BASE)
16576 predict_jump (REG_BR_PROB_BASE - 1);
16578 predict_jump (REG_BR_PROB_BASE - (REG_BR_PROB_BASE + expected_size / 2) / expected_size);
16581 predict_jump (REG_BR_PROB_BASE * 80 / 100);
16582 iter = ix86_zero_extend_to_Pmode (iter);
16583 tmp = expand_simple_binop (Pmode, PLUS, destptr, iter, destptr,
16584 true, OPTAB_LIB_WIDEN);
16585 if (tmp != destptr)
16586 emit_move_insn (destptr, tmp);
16589 tmp = expand_simple_binop (Pmode, PLUS, srcptr, iter, srcptr,
16590 true, OPTAB_LIB_WIDEN);
16592 emit_move_insn (srcptr, tmp);
16594 emit_label (out_label);
16597 /* Output "rep; mov" instruction.
16598 Arguments have same meaning as for previous function */
16600 expand_movmem_via_rep_mov (rtx destmem, rtx srcmem,
16601 rtx destptr, rtx srcptr,
16603 enum machine_mode mode)
16609 /* If the size is known, it is shorter to use rep movs. */
16610 if (mode == QImode && CONST_INT_P (count)
16611 && !(INTVAL (count) & 3))
16614 if (destptr != XEXP (destmem, 0) || GET_MODE (destmem) != BLKmode)
16615 destmem = adjust_automodify_address_nv (destmem, BLKmode, destptr, 0);
16616 if (srcptr != XEXP (srcmem, 0) || GET_MODE (srcmem) != BLKmode)
16617 srcmem = adjust_automodify_address_nv (srcmem, BLKmode, srcptr, 0);
16618 countreg = ix86_zero_extend_to_Pmode (scale_counter (count, GET_MODE_SIZE (mode)));
16619 if (mode != QImode)
16621 destexp = gen_rtx_ASHIFT (Pmode, countreg,
16622 GEN_INT (exact_log2 (GET_MODE_SIZE (mode))));
16623 destexp = gen_rtx_PLUS (Pmode, destexp, destptr);
16624 srcexp = gen_rtx_ASHIFT (Pmode, countreg,
16625 GEN_INT (exact_log2 (GET_MODE_SIZE (mode))));
16626 srcexp = gen_rtx_PLUS (Pmode, srcexp, srcptr);
16630 destexp = gen_rtx_PLUS (Pmode, destptr, countreg);
16631 srcexp = gen_rtx_PLUS (Pmode, srcptr, countreg);
16633 emit_insn (gen_rep_mov (destptr, destmem, srcptr, srcmem, countreg,
16637 /* Output "rep; stos" instruction.
16638 Arguments have same meaning as for previous function */
16640 expand_setmem_via_rep_stos (rtx destmem, rtx destptr, rtx value,
16642 enum machine_mode mode)
16647 if (destptr != XEXP (destmem, 0) || GET_MODE (destmem) != BLKmode)
16648 destmem = adjust_automodify_address_nv (destmem, BLKmode, destptr, 0);
16649 value = force_reg (mode, gen_lowpart (mode, value));
16650 countreg = ix86_zero_extend_to_Pmode (scale_counter (count, GET_MODE_SIZE (mode)));
16651 if (mode != QImode)
16653 destexp = gen_rtx_ASHIFT (Pmode, countreg,
16654 GEN_INT (exact_log2 (GET_MODE_SIZE (mode))));
16655 destexp = gen_rtx_PLUS (Pmode, destexp, destptr);
16658 destexp = gen_rtx_PLUS (Pmode, destptr, countreg);
16659 emit_insn (gen_rep_stos (destptr, countreg, destmem, value, destexp));
16663 emit_strmov (rtx destmem, rtx srcmem,
16664 rtx destptr, rtx srcptr, enum machine_mode mode, int offset)
16666 rtx src = adjust_automodify_address_nv (srcmem, mode, srcptr, offset);
16667 rtx dest = adjust_automodify_address_nv (destmem, mode, destptr, offset);
16668 emit_insn (gen_strmov (destptr, dest, srcptr, src));
16671 /* Output code to copy at most count & (max_size - 1) bytes from SRC to DEST. */
16673 expand_movmem_epilogue (rtx destmem, rtx srcmem,
16674 rtx destptr, rtx srcptr, rtx count, int max_size)
16677 if (CONST_INT_P (count))
16679 HOST_WIDE_INT countval = INTVAL (count);
16682 if ((countval & 0x10) && max_size > 16)
16686 emit_strmov (destmem, srcmem, destptr, srcptr, DImode, offset);
16687 emit_strmov (destmem, srcmem, destptr, srcptr, DImode, offset + 8);
16690 gcc_unreachable ();
16693 if ((countval & 0x08) && max_size > 8)
16696 emit_strmov (destmem, srcmem, destptr, srcptr, DImode, offset);
16699 emit_strmov (destmem, srcmem, destptr, srcptr, SImode, offset);
16700 emit_strmov (destmem, srcmem, destptr, srcptr, SImode, offset + 4);
16704 if ((countval & 0x04) && max_size > 4)
16706 emit_strmov (destmem, srcmem, destptr, srcptr, SImode, offset);
16709 if ((countval & 0x02) && max_size > 2)
16711 emit_strmov (destmem, srcmem, destptr, srcptr, HImode, offset);
16714 if ((countval & 0x01) && max_size > 1)
16716 emit_strmov (destmem, srcmem, destptr, srcptr, QImode, offset);
16723 count = expand_simple_binop (GET_MODE (count), AND, count, GEN_INT (max_size - 1),
16724 count, 1, OPTAB_DIRECT);
16725 expand_set_or_movmem_via_loop (destmem, srcmem, destptr, srcptr, NULL,
16726 count, QImode, 1, 4);
16730 /* When there are stringops, we can cheaply increase dest and src pointers.
16731 Otherwise we save code size by maintaining offset (zero is readily
16732 available from preceding rep operation) and using x86 addressing modes.
16734 if (TARGET_SINGLE_STRINGOP)
16738 rtx label = ix86_expand_aligntest (count, 4, true);
16739 src = change_address (srcmem, SImode, srcptr);
16740 dest = change_address (destmem, SImode, destptr);
16741 emit_insn (gen_strmov (destptr, dest, srcptr, src));
16742 emit_label (label);
16743 LABEL_NUSES (label) = 1;
16747 rtx label = ix86_expand_aligntest (count, 2, true);
16748 src = change_address (srcmem, HImode, srcptr);
16749 dest = change_address (destmem, HImode, destptr);
16750 emit_insn (gen_strmov (destptr, dest, srcptr, src));
16751 emit_label (label);
16752 LABEL_NUSES (label) = 1;
16756 rtx label = ix86_expand_aligntest (count, 1, true);
16757 src = change_address (srcmem, QImode, srcptr);
16758 dest = change_address (destmem, QImode, destptr);
16759 emit_insn (gen_strmov (destptr, dest, srcptr, src));
16760 emit_label (label);
16761 LABEL_NUSES (label) = 1;
16766 rtx offset = force_reg (Pmode, const0_rtx);
16771 rtx label = ix86_expand_aligntest (count, 4, true);
16772 src = change_address (srcmem, SImode, srcptr);
16773 dest = change_address (destmem, SImode, destptr);
16774 emit_move_insn (dest, src);
16775 tmp = expand_simple_binop (Pmode, PLUS, offset, GEN_INT (4), NULL,
16776 true, OPTAB_LIB_WIDEN);
16778 emit_move_insn (offset, tmp);
16779 emit_label (label);
16780 LABEL_NUSES (label) = 1;
16784 rtx label = ix86_expand_aligntest (count, 2, true);
16785 tmp = gen_rtx_PLUS (Pmode, srcptr, offset);
16786 src = change_address (srcmem, HImode, tmp);
16787 tmp = gen_rtx_PLUS (Pmode, destptr, offset);
16788 dest = change_address (destmem, HImode, tmp);
16789 emit_move_insn (dest, src);
16790 tmp = expand_simple_binop (Pmode, PLUS, offset, GEN_INT (2), tmp,
16791 true, OPTAB_LIB_WIDEN);
16793 emit_move_insn (offset, tmp);
16794 emit_label (label);
16795 LABEL_NUSES (label) = 1;
16799 rtx label = ix86_expand_aligntest (count, 1, true);
16800 tmp = gen_rtx_PLUS (Pmode, srcptr, offset);
16801 src = change_address (srcmem, QImode, tmp);
16802 tmp = gen_rtx_PLUS (Pmode, destptr, offset);
16803 dest = change_address (destmem, QImode, tmp);
16804 emit_move_insn (dest, src);
16805 emit_label (label);
16806 LABEL_NUSES (label) = 1;
16811 /* Output code to set at most count & (max_size - 1) bytes starting by DEST. */
16813 expand_setmem_epilogue_via_loop (rtx destmem, rtx destptr, rtx value,
16814 rtx count, int max_size)
16817 expand_simple_binop (counter_mode (count), AND, count,
16818 GEN_INT (max_size - 1), count, 1, OPTAB_DIRECT);
16819 expand_set_or_movmem_via_loop (destmem, NULL, destptr, NULL,
16820 gen_lowpart (QImode, value), count, QImode,
16824 /* Output code to set at most count & (max_size - 1) bytes starting by DEST. */
16826 expand_setmem_epilogue (rtx destmem, rtx destptr, rtx value, rtx count, int max_size)
16830 if (CONST_INT_P (count))
16832 HOST_WIDE_INT countval = INTVAL (count);
16835 if ((countval & 0x10) && max_size > 16)
16839 dest = adjust_automodify_address_nv (destmem, DImode, destptr, offset);
16840 emit_insn (gen_strset (destptr, dest, value));
16841 dest = adjust_automodify_address_nv (destmem, DImode, destptr, offset + 8);
16842 emit_insn (gen_strset (destptr, dest, value));
16845 gcc_unreachable ();
16848 if ((countval & 0x08) && max_size > 8)
16852 dest = adjust_automodify_address_nv (destmem, DImode, destptr, offset);
16853 emit_insn (gen_strset (destptr, dest, value));
16857 dest = adjust_automodify_address_nv (destmem, SImode, destptr, offset);
16858 emit_insn (gen_strset (destptr, dest, value));
16859 dest = adjust_automodify_address_nv (destmem, SImode, destptr, offset + 4);
16860 emit_insn (gen_strset (destptr, dest, value));
16864 if ((countval & 0x04) && max_size > 4)
16866 dest = adjust_automodify_address_nv (destmem, SImode, destptr, offset);
16867 emit_insn (gen_strset (destptr, dest, gen_lowpart (SImode, value)));
16870 if ((countval & 0x02) && max_size > 2)
16872 dest = adjust_automodify_address_nv (destmem, HImode, destptr, offset);
16873 emit_insn (gen_strset (destptr, dest, gen_lowpart (HImode, value)));
16876 if ((countval & 0x01) && max_size > 1)
16878 dest = adjust_automodify_address_nv (destmem, QImode, destptr, offset);
16879 emit_insn (gen_strset (destptr, dest, gen_lowpart (QImode, value)));
16886 expand_setmem_epilogue_via_loop (destmem, destptr, value, count, max_size);
16891 rtx label = ix86_expand_aligntest (count, 16, true);
16894 dest = change_address (destmem, DImode, destptr);
16895 emit_insn (gen_strset (destptr, dest, value));
16896 emit_insn (gen_strset (destptr, dest, value));
16900 dest = change_address (destmem, SImode, destptr);
16901 emit_insn (gen_strset (destptr, dest, value));
16902 emit_insn (gen_strset (destptr, dest, value));
16903 emit_insn (gen_strset (destptr, dest, value));
16904 emit_insn (gen_strset (destptr, dest, value));
16906 emit_label (label);
16907 LABEL_NUSES (label) = 1;
16911 rtx label = ix86_expand_aligntest (count, 8, true);
16914 dest = change_address (destmem, DImode, destptr);
16915 emit_insn (gen_strset (destptr, dest, value));
16919 dest = change_address (destmem, SImode, destptr);
16920 emit_insn (gen_strset (destptr, dest, value));
16921 emit_insn (gen_strset (destptr, dest, value));
16923 emit_label (label);
16924 LABEL_NUSES (label) = 1;
16928 rtx label = ix86_expand_aligntest (count, 4, true);
16929 dest = change_address (destmem, SImode, destptr);
16930 emit_insn (gen_strset (destptr, dest, gen_lowpart (SImode, value)));
16931 emit_label (label);
16932 LABEL_NUSES (label) = 1;
16936 rtx label = ix86_expand_aligntest (count, 2, true);
16937 dest = change_address (destmem, HImode, destptr);
16938 emit_insn (gen_strset (destptr, dest, gen_lowpart (HImode, value)));
16939 emit_label (label);
16940 LABEL_NUSES (label) = 1;
16944 rtx label = ix86_expand_aligntest (count, 1, true);
16945 dest = change_address (destmem, QImode, destptr);
16946 emit_insn (gen_strset (destptr, dest, gen_lowpart (QImode, value)));
16947 emit_label (label);
16948 LABEL_NUSES (label) = 1;
16952 /* Copy enough from DEST to SRC to align DEST known to by aligned by ALIGN to
16953 DESIRED_ALIGNMENT. */
16955 expand_movmem_prologue (rtx destmem, rtx srcmem,
16956 rtx destptr, rtx srcptr, rtx count,
16957 int align, int desired_alignment)
16959 if (align <= 1 && desired_alignment > 1)
16961 rtx label = ix86_expand_aligntest (destptr, 1, false);
16962 srcmem = change_address (srcmem, QImode, srcptr);
16963 destmem = change_address (destmem, QImode, destptr);
16964 emit_insn (gen_strmov (destptr, destmem, srcptr, srcmem));
16965 ix86_adjust_counter (count, 1);
16966 emit_label (label);
16967 LABEL_NUSES (label) = 1;
16969 if (align <= 2 && desired_alignment > 2)
16971 rtx label = ix86_expand_aligntest (destptr, 2, false);
16972 srcmem = change_address (srcmem, HImode, srcptr);
16973 destmem = change_address (destmem, HImode, destptr);
16974 emit_insn (gen_strmov (destptr, destmem, srcptr, srcmem));
16975 ix86_adjust_counter (count, 2);
16976 emit_label (label);
16977 LABEL_NUSES (label) = 1;
16979 if (align <= 4 && desired_alignment > 4)
16981 rtx label = ix86_expand_aligntest (destptr, 4, false);
16982 srcmem = change_address (srcmem, SImode, srcptr);
16983 destmem = change_address (destmem, SImode, destptr);
16984 emit_insn (gen_strmov (destptr, destmem, srcptr, srcmem));
16985 ix86_adjust_counter (count, 4);
16986 emit_label (label);
16987 LABEL_NUSES (label) = 1;
16989 gcc_assert (desired_alignment <= 8);
16992 /* Set enough from DEST to align DEST known to by aligned by ALIGN to
16993 DESIRED_ALIGNMENT. */
16995 expand_setmem_prologue (rtx destmem, rtx destptr, rtx value, rtx count,
16996 int align, int desired_alignment)
16998 if (align <= 1 && desired_alignment > 1)
17000 rtx label = ix86_expand_aligntest (destptr, 1, false);
17001 destmem = change_address (destmem, QImode, destptr);
17002 emit_insn (gen_strset (destptr, destmem, gen_lowpart (QImode, value)));
17003 ix86_adjust_counter (count, 1);
17004 emit_label (label);
17005 LABEL_NUSES (label) = 1;
17007 if (align <= 2 && desired_alignment > 2)
17009 rtx label = ix86_expand_aligntest (destptr, 2, false);
17010 destmem = change_address (destmem, HImode, destptr);
17011 emit_insn (gen_strset (destptr, destmem, gen_lowpart (HImode, value)));
17012 ix86_adjust_counter (count, 2);
17013 emit_label (label);
17014 LABEL_NUSES (label) = 1;
17016 if (align <= 4 && desired_alignment > 4)
17018 rtx label = ix86_expand_aligntest (destptr, 4, false);
17019 destmem = change_address (destmem, SImode, destptr);
17020 emit_insn (gen_strset (destptr, destmem, gen_lowpart (SImode, value)));
17021 ix86_adjust_counter (count, 4);
17022 emit_label (label);
17023 LABEL_NUSES (label) = 1;
17025 gcc_assert (desired_alignment <= 8);
17028 /* Given COUNT and EXPECTED_SIZE, decide on codegen of string operation. */
17029 static enum stringop_alg
17030 decide_alg (HOST_WIDE_INT count, HOST_WIDE_INT expected_size, bool memset,
17031 int *dynamic_check)
17033 const struct stringop_algs * algs;
17034 bool optimize_for_speed;
17035 /* Algorithms using the rep prefix want at least edi and ecx;
17036 additionally, memset wants eax and memcpy wants esi. Don't
17037 consider such algorithms if the user has appropriated those
17038 registers for their own purposes. */
17039 bool rep_prefix_usable = !(fixed_regs[CX_REG] || fixed_regs[DI_REG]
17041 ? fixed_regs[AX_REG] : fixed_regs[SI_REG]));
17043 #define ALG_USABLE_P(alg) (rep_prefix_usable \
17044 || (alg != rep_prefix_1_byte \
17045 && alg != rep_prefix_4_byte \
17046 && alg != rep_prefix_8_byte))
17047 const struct processor_costs *cost;
17049 /* Even if the string operation call is cold, we still might spend a lot
17050 of time processing large blocks. */
17051 if (optimize_function_for_size_p (cfun)
17052 || (optimize_insn_for_size_p ()
17053 && expected_size != -1 && expected_size < 256))
17054 optimize_for_speed = false;
17056 optimize_for_speed = true;
17058 cost = optimize_for_speed ? ix86_cost : &ix86_size_cost;
17060 *dynamic_check = -1;
17062 algs = &cost->memset[TARGET_64BIT != 0];
17064 algs = &cost->memcpy[TARGET_64BIT != 0];
17065 if (stringop_alg != no_stringop && ALG_USABLE_P (stringop_alg))
17066 return stringop_alg;
17067 /* rep; movq or rep; movl is the smallest variant. */
17068 else if (!optimize_for_speed)
17070 if (!count || (count & 3))
17071 return rep_prefix_usable ? rep_prefix_1_byte : loop_1_byte;
17073 return rep_prefix_usable ? rep_prefix_4_byte : loop;
17075 /* Very tiny blocks are best handled via the loop, REP is expensive to setup.
17077 else if (expected_size != -1 && expected_size < 4)
17078 return loop_1_byte;
17079 else if (expected_size != -1)
17082 enum stringop_alg alg = libcall;
17083 for (i = 0; i < NAX_STRINGOP_ALGS; i++)
17085 /* We get here if the algorithms that were not libcall-based
17086 were rep-prefix based and we are unable to use rep prefixes
17087 based on global register usage. Break out of the loop and
17088 use the heuristic below. */
17089 if (algs->size[i].max == 0)
17091 if (algs->size[i].max >= expected_size || algs->size[i].max == -1)
17093 enum stringop_alg candidate = algs->size[i].alg;
17095 if (candidate != libcall && ALG_USABLE_P (candidate))
17097 /* Honor TARGET_INLINE_ALL_STRINGOPS by picking
17098 last non-libcall inline algorithm. */
17099 if (TARGET_INLINE_ALL_STRINGOPS)
17101 /* When the current size is best to be copied by a libcall,
17102 but we are still forced to inline, run the heuristic below
17103 that will pick code for medium sized blocks. */
17104 if (alg != libcall)
17108 else if (ALG_USABLE_P (candidate))
17112 gcc_assert (TARGET_INLINE_ALL_STRINGOPS || !rep_prefix_usable);
17114 /* When asked to inline the call anyway, try to pick meaningful choice.
17115 We look for maximal size of block that is faster to copy by hand and
17116 take blocks of at most of that size guessing that average size will
17117 be roughly half of the block.
17119 If this turns out to be bad, we might simply specify the preferred
17120 choice in ix86_costs. */
17121 if ((TARGET_INLINE_ALL_STRINGOPS || TARGET_INLINE_STRINGOPS_DYNAMICALLY)
17122 && (algs->unknown_size == libcall || !ALG_USABLE_P (algs->unknown_size)))
17125 enum stringop_alg alg;
17127 bool any_alg_usable_p = true;
17129 for (i = 0; i < NAX_STRINGOP_ALGS; i++)
17131 enum stringop_alg candidate = algs->size[i].alg;
17132 any_alg_usable_p = any_alg_usable_p && ALG_USABLE_P (candidate);
17134 if (candidate != libcall && candidate
17135 && ALG_USABLE_P (candidate))
17136 max = algs->size[i].max;
17138 /* If there aren't any usable algorithms, then recursing on
17139 smaller sizes isn't going to find anything. Just return the
17140 simple byte-at-a-time copy loop. */
17141 if (!any_alg_usable_p)
17143 /* Pick something reasonable. */
17144 if (TARGET_INLINE_STRINGOPS_DYNAMICALLY)
17145 *dynamic_check = 128;
17146 return loop_1_byte;
17150 alg = decide_alg (count, max / 2, memset, dynamic_check);
17151 gcc_assert (*dynamic_check == -1);
17152 gcc_assert (alg != libcall);
17153 if (TARGET_INLINE_STRINGOPS_DYNAMICALLY)
17154 *dynamic_check = max;
17157 return ALG_USABLE_P (algs->unknown_size) ? algs->unknown_size : libcall;
17158 #undef ALG_USABLE_P
17161 /* Decide on alignment. We know that the operand is already aligned to ALIGN
17162 (ALIGN can be based on profile feedback and thus it is not 100% guaranteed). */
17164 decide_alignment (int align,
17165 enum stringop_alg alg,
17168 int desired_align = 0;
17172 gcc_unreachable ();
17174 case unrolled_loop:
17175 desired_align = GET_MODE_SIZE (Pmode);
17177 case rep_prefix_8_byte:
17180 case rep_prefix_4_byte:
17181 /* PentiumPro has special logic triggering for 8 byte aligned blocks.
17182 copying whole cacheline at once. */
17183 if (TARGET_PENTIUMPRO)
17188 case rep_prefix_1_byte:
17189 /* PentiumPro has special logic triggering for 8 byte aligned blocks.
17190 copying whole cacheline at once. */
17191 if (TARGET_PENTIUMPRO)
17205 if (desired_align < align)
17206 desired_align = align;
17207 if (expected_size != -1 && expected_size < 4)
17208 desired_align = align;
17209 return desired_align;
17212 /* Return the smallest power of 2 greater than VAL. */
17214 smallest_pow2_greater_than (int val)
17222 /* Expand string move (memcpy) operation. Use i386 string operations when
17223 profitable. expand_setmem contains similar code. The code depends upon
17224 architecture, block size and alignment, but always has the same
17227 1) Prologue guard: Conditional that jumps up to epilogues for small
17228 blocks that can be handled by epilogue alone. This is faster but
17229 also needed for correctness, since prologue assume the block is larger
17230 than the desired alignment.
17232 Optional dynamic check for size and libcall for large
17233 blocks is emitted here too, with -minline-stringops-dynamically.
17235 2) Prologue: copy first few bytes in order to get destination aligned
17236 to DESIRED_ALIGN. It is emitted only when ALIGN is less than
17237 DESIRED_ALIGN and and up to DESIRED_ALIGN - ALIGN bytes can be copied.
17238 We emit either a jump tree on power of two sized blocks, or a byte loop.
17240 3) Main body: the copying loop itself, copying in SIZE_NEEDED chunks
17241 with specified algorithm.
17243 4) Epilogue: code copying tail of the block that is too small to be
17244 handled by main body (or up to size guarded by prologue guard). */
17247 ix86_expand_movmem (rtx dst, rtx src, rtx count_exp, rtx align_exp,
17248 rtx expected_align_exp, rtx expected_size_exp)
17254 rtx jump_around_label = NULL;
17255 HOST_WIDE_INT align = 1;
17256 unsigned HOST_WIDE_INT count = 0;
17257 HOST_WIDE_INT expected_size = -1;
17258 int size_needed = 0, epilogue_size_needed;
17259 int desired_align = 0;
17260 enum stringop_alg alg;
17262 bool need_zero_guard = false;
17264 if (CONST_INT_P (align_exp))
17265 align = INTVAL (align_exp);
17266 /* i386 can do misaligned access on reasonably increased cost. */
17267 if (CONST_INT_P (expected_align_exp)
17268 && INTVAL (expected_align_exp) > align)
17269 align = INTVAL (expected_align_exp);
17270 if (CONST_INT_P (count_exp))
17271 count = expected_size = INTVAL (count_exp);
17272 if (CONST_INT_P (expected_size_exp) && count == 0)
17273 expected_size = INTVAL (expected_size_exp);
17275 /* Make sure we don't need to care about overflow later on. */
17276 if (count > ((unsigned HOST_WIDE_INT) 1 << 30))
17279 /* Step 0: Decide on preferred algorithm, desired alignment and
17280 size of chunks to be copied by main loop. */
17282 alg = decide_alg (count, expected_size, false, &dynamic_check);
17283 desired_align = decide_alignment (align, alg, expected_size);
17285 if (!TARGET_ALIGN_STRINGOPS)
17286 align = desired_align;
17288 if (alg == libcall)
17290 gcc_assert (alg != no_stringop);
17292 count_exp = copy_to_mode_reg (GET_MODE (count_exp), count_exp);
17293 destreg = copy_to_mode_reg (Pmode, XEXP (dst, 0));
17294 srcreg = copy_to_mode_reg (Pmode, XEXP (src, 0));
17299 gcc_unreachable ();
17301 need_zero_guard = true;
17302 size_needed = GET_MODE_SIZE (Pmode);
17304 case unrolled_loop:
17305 need_zero_guard = true;
17306 size_needed = GET_MODE_SIZE (Pmode) * (TARGET_64BIT ? 4 : 2);
17308 case rep_prefix_8_byte:
17311 case rep_prefix_4_byte:
17314 case rep_prefix_1_byte:
17318 need_zero_guard = true;
17323 epilogue_size_needed = size_needed;
17325 /* Step 1: Prologue guard. */
17327 /* Alignment code needs count to be in register. */
17328 if (CONST_INT_P (count_exp) && desired_align > align)
17329 count_exp = force_reg (counter_mode (count_exp), count_exp);
17330 gcc_assert (desired_align >= 1 && align >= 1);
17332 /* Ensure that alignment prologue won't copy past end of block. */
17333 if (size_needed > 1 || (desired_align > 1 && desired_align > align))
17335 epilogue_size_needed = MAX (size_needed - 1, desired_align - align);
17336 /* Epilogue always copies COUNT_EXP & EPILOGUE_SIZE_NEEDED bytes.
17337 Make sure it is power of 2. */
17338 epilogue_size_needed = smallest_pow2_greater_than (epilogue_size_needed);
17340 if (CONST_INT_P (count_exp))
17342 if (UINTVAL (count_exp) < (unsigned HOST_WIDE_INT)epilogue_size_needed)
17347 label = gen_label_rtx ();
17348 emit_cmp_and_jump_insns (count_exp,
17349 GEN_INT (epilogue_size_needed),
17350 LTU, 0, counter_mode (count_exp), 1, label);
17351 if (expected_size == -1 || expected_size < epilogue_size_needed)
17352 predict_jump (REG_BR_PROB_BASE * 60 / 100);
17354 predict_jump (REG_BR_PROB_BASE * 20 / 100);
17358 /* Emit code to decide on runtime whether library call or inline should be
17360 if (dynamic_check != -1)
17362 if (CONST_INT_P (count_exp))
17364 if (UINTVAL (count_exp) >= (unsigned HOST_WIDE_INT)dynamic_check)
17366 emit_block_move_via_libcall (dst, src, count_exp, false);
17367 count_exp = const0_rtx;
17373 rtx hot_label = gen_label_rtx ();
17374 jump_around_label = gen_label_rtx ();
17375 emit_cmp_and_jump_insns (count_exp, GEN_INT (dynamic_check - 1),
17376 LEU, 0, GET_MODE (count_exp), 1, hot_label);
17377 predict_jump (REG_BR_PROB_BASE * 90 / 100);
17378 emit_block_move_via_libcall (dst, src, count_exp, false);
17379 emit_jump (jump_around_label);
17380 emit_label (hot_label);
17384 /* Step 2: Alignment prologue. */
17386 if (desired_align > align)
17388 /* Except for the first move in epilogue, we no longer know
17389 constant offset in aliasing info. It don't seems to worth
17390 the pain to maintain it for the first move, so throw away
17392 src = change_address (src, BLKmode, srcreg);
17393 dst = change_address (dst, BLKmode, destreg);
17394 expand_movmem_prologue (dst, src, destreg, srcreg, count_exp, align,
17396 if (need_zero_guard && !count)
17398 /* It is possible that we copied enough so the main loop will not
17400 emit_cmp_and_jump_insns (count_exp,
17401 GEN_INT (size_needed),
17402 LTU, 0, counter_mode (count_exp), 1, label);
17403 if (expected_size == -1
17404 || expected_size < (desired_align - align) / 2 + size_needed)
17405 predict_jump (REG_BR_PROB_BASE * 20 / 100);
17407 predict_jump (REG_BR_PROB_BASE * 60 / 100);
17410 if (label && size_needed == 1)
17412 emit_label (label);
17413 LABEL_NUSES (label) = 1;
17417 /* Step 3: Main loop. */
17423 gcc_unreachable ();
17425 expand_set_or_movmem_via_loop (dst, src, destreg, srcreg, NULL,
17426 count_exp, QImode, 1, expected_size);
17429 expand_set_or_movmem_via_loop (dst, src, destreg, srcreg, NULL,
17430 count_exp, Pmode, 1, expected_size);
17432 case unrolled_loop:
17433 /* Unroll only by factor of 2 in 32bit mode, since we don't have enough
17434 registers for 4 temporaries anyway. */
17435 expand_set_or_movmem_via_loop (dst, src, destreg, srcreg, NULL,
17436 count_exp, Pmode, TARGET_64BIT ? 4 : 2,
17439 case rep_prefix_8_byte:
17440 expand_movmem_via_rep_mov (dst, src, destreg, srcreg, count_exp,
17443 case rep_prefix_4_byte:
17444 expand_movmem_via_rep_mov (dst, src, destreg, srcreg, count_exp,
17447 case rep_prefix_1_byte:
17448 expand_movmem_via_rep_mov (dst, src, destreg, srcreg, count_exp,
17452 /* Adjust properly the offset of src and dest memory for aliasing. */
17453 if (CONST_INT_P (count_exp))
17455 src = adjust_automodify_address_nv (src, BLKmode, srcreg,
17456 (count / size_needed) * size_needed);
17457 dst = adjust_automodify_address_nv (dst, BLKmode, destreg,
17458 (count / size_needed) * size_needed);
17462 src = change_address (src, BLKmode, srcreg);
17463 dst = change_address (dst, BLKmode, destreg);
17466 /* Step 4: Epilogue to copy the remaining bytes. */
17470 /* When the main loop is done, COUNT_EXP might hold original count,
17471 while we want to copy only COUNT_EXP & SIZE_NEEDED bytes.
17472 Epilogue code will actually copy COUNT_EXP & EPILOGUE_SIZE_NEEDED
17473 bytes. Compensate if needed. */
17475 if (size_needed < epilogue_size_needed)
17478 expand_simple_binop (counter_mode (count_exp), AND, count_exp,
17479 GEN_INT (size_needed - 1), count_exp, 1,
17481 if (tmp != count_exp)
17482 emit_move_insn (count_exp, tmp);
17484 emit_label (label);
17485 LABEL_NUSES (label) = 1;
17488 if (count_exp != const0_rtx && epilogue_size_needed > 1)
17489 expand_movmem_epilogue (dst, src, destreg, srcreg, count_exp,
17490 epilogue_size_needed);
17491 if (jump_around_label)
17492 emit_label (jump_around_label);
17496 /* Helper function for memcpy. For QImode value 0xXY produce
17497 0xXYXYXYXY of wide specified by MODE. This is essentially
17498 a * 0x10101010, but we can do slightly better than
17499 synth_mult by unwinding the sequence by hand on CPUs with
17502 promote_duplicated_reg (enum machine_mode mode, rtx val)
17504 enum machine_mode valmode = GET_MODE (val);
17506 int nops = mode == DImode ? 3 : 2;
17508 gcc_assert (mode == SImode || mode == DImode);
17509 if (val == const0_rtx)
17510 return copy_to_mode_reg (mode, const0_rtx);
17511 if (CONST_INT_P (val))
17513 HOST_WIDE_INT v = INTVAL (val) & 255;
17517 if (mode == DImode)
17518 v |= (v << 16) << 16;
17519 return copy_to_mode_reg (mode, gen_int_mode (v, mode));
17522 if (valmode == VOIDmode)
17524 if (valmode != QImode)
17525 val = gen_lowpart (QImode, val);
17526 if (mode == QImode)
17528 if (!TARGET_PARTIAL_REG_STALL)
17530 if (ix86_cost->mult_init[mode == DImode ? 3 : 2]
17531 + ix86_cost->mult_bit * (mode == DImode ? 8 : 4)
17532 <= (ix86_cost->shift_const + ix86_cost->add) * nops
17533 + (COSTS_N_INSNS (TARGET_PARTIAL_REG_STALL == 0)))
17535 rtx reg = convert_modes (mode, QImode, val, true);
17536 tmp = promote_duplicated_reg (mode, const1_rtx);
17537 return expand_simple_binop (mode, MULT, reg, tmp, NULL, 1,
17542 rtx reg = convert_modes (mode, QImode, val, true);
17544 if (!TARGET_PARTIAL_REG_STALL)
17545 if (mode == SImode)
17546 emit_insn (gen_movsi_insv_1 (reg, reg));
17548 emit_insn (gen_movdi_insv_1_rex64 (reg, reg));
17551 tmp = expand_simple_binop (mode, ASHIFT, reg, GEN_INT (8),
17552 NULL, 1, OPTAB_DIRECT);
17554 expand_simple_binop (mode, IOR, reg, tmp, reg, 1, OPTAB_DIRECT);
17556 tmp = expand_simple_binop (mode, ASHIFT, reg, GEN_INT (16),
17557 NULL, 1, OPTAB_DIRECT);
17558 reg = expand_simple_binop (mode, IOR, reg, tmp, reg, 1, OPTAB_DIRECT);
17559 if (mode == SImode)
17561 tmp = expand_simple_binop (mode, ASHIFT, reg, GEN_INT (32),
17562 NULL, 1, OPTAB_DIRECT);
17563 reg = expand_simple_binop (mode, IOR, reg, tmp, reg, 1, OPTAB_DIRECT);
17568 /* Duplicate value VAL using promote_duplicated_reg into maximal size that will
17569 be needed by main loop copying SIZE_NEEDED chunks and prologue getting
17570 alignment from ALIGN to DESIRED_ALIGN. */
17572 promote_duplicated_reg_to_size (rtx val, int size_needed, int desired_align, int align)
17577 && (size_needed > 4 || (desired_align > align && desired_align > 4)))
17578 promoted_val = promote_duplicated_reg (DImode, val);
17579 else if (size_needed > 2 || (desired_align > align && desired_align > 2))
17580 promoted_val = promote_duplicated_reg (SImode, val);
17581 else if (size_needed > 1 || (desired_align > align && desired_align > 1))
17582 promoted_val = promote_duplicated_reg (HImode, val);
17584 promoted_val = val;
17586 return promoted_val;
17589 /* Expand string clear operation (bzero). Use i386 string operations when
17590 profitable. See expand_movmem comment for explanation of individual
17591 steps performed. */
17593 ix86_expand_setmem (rtx dst, rtx count_exp, rtx val_exp, rtx align_exp,
17594 rtx expected_align_exp, rtx expected_size_exp)
17599 rtx jump_around_label = NULL;
17600 HOST_WIDE_INT align = 1;
17601 unsigned HOST_WIDE_INT count = 0;
17602 HOST_WIDE_INT expected_size = -1;
17603 int size_needed = 0, epilogue_size_needed;
17604 int desired_align = 0;
17605 enum stringop_alg alg;
17606 rtx promoted_val = NULL;
17607 bool force_loopy_epilogue = false;
17609 bool need_zero_guard = false;
17611 if (CONST_INT_P (align_exp))
17612 align = INTVAL (align_exp);
17613 /* i386 can do misaligned access on reasonably increased cost. */
17614 if (CONST_INT_P (expected_align_exp)
17615 && INTVAL (expected_align_exp) > align)
17616 align = INTVAL (expected_align_exp);
17617 if (CONST_INT_P (count_exp))
17618 count = expected_size = INTVAL (count_exp);
17619 if (CONST_INT_P (expected_size_exp) && count == 0)
17620 expected_size = INTVAL (expected_size_exp);
17622 /* Make sure we don't need to care about overflow later on. */
17623 if (count > ((unsigned HOST_WIDE_INT) 1 << 30))
17626 /* Step 0: Decide on preferred algorithm, desired alignment and
17627 size of chunks to be copied by main loop. */
17629 alg = decide_alg (count, expected_size, true, &dynamic_check);
17630 desired_align = decide_alignment (align, alg, expected_size);
17632 if (!TARGET_ALIGN_STRINGOPS)
17633 align = desired_align;
17635 if (alg == libcall)
17637 gcc_assert (alg != no_stringop);
17639 count_exp = copy_to_mode_reg (counter_mode (count_exp), count_exp);
17640 destreg = copy_to_mode_reg (Pmode, XEXP (dst, 0));
17645 gcc_unreachable ();
17647 need_zero_guard = true;
17648 size_needed = GET_MODE_SIZE (Pmode);
17650 case unrolled_loop:
17651 need_zero_guard = true;
17652 size_needed = GET_MODE_SIZE (Pmode) * 4;
17654 case rep_prefix_8_byte:
17657 case rep_prefix_4_byte:
17660 case rep_prefix_1_byte:
17664 need_zero_guard = true;
17668 epilogue_size_needed = size_needed;
17670 /* Step 1: Prologue guard. */
17672 /* Alignment code needs count to be in register. */
17673 if (CONST_INT_P (count_exp) && desired_align > align)
17675 enum machine_mode mode = SImode;
17676 if (TARGET_64BIT && (count & ~0xffffffff))
17678 count_exp = force_reg (mode, count_exp);
17680 /* Do the cheap promotion to allow better CSE across the
17681 main loop and epilogue (ie one load of the big constant in the
17682 front of all code. */
17683 if (CONST_INT_P (val_exp))
17684 promoted_val = promote_duplicated_reg_to_size (val_exp, size_needed,
17685 desired_align, align);
17686 /* Ensure that alignment prologue won't copy past end of block. */
17687 if (size_needed > 1 || (desired_align > 1 && desired_align > align))
17689 epilogue_size_needed = MAX (size_needed - 1, desired_align - align);
17690 /* Epilogue always copies COUNT_EXP & EPILOGUE_SIZE_NEEDED bytes.
17691 Make sure it is power of 2. */
17692 epilogue_size_needed = smallest_pow2_greater_than (epilogue_size_needed);
17694 /* To improve performance of small blocks, we jump around the VAL
17695 promoting mode. This mean that if the promoted VAL is not constant,
17696 we might not use it in the epilogue and have to use byte
17698 if (epilogue_size_needed > 2 && !promoted_val)
17699 force_loopy_epilogue = true;
17700 label = gen_label_rtx ();
17701 emit_cmp_and_jump_insns (count_exp,
17702 GEN_INT (epilogue_size_needed),
17703 LTU, 0, counter_mode (count_exp), 1, label);
17704 if (GET_CODE (count_exp) == CONST_INT)
17706 else if (expected_size == -1 || expected_size <= epilogue_size_needed)
17707 predict_jump (REG_BR_PROB_BASE * 60 / 100);
17709 predict_jump (REG_BR_PROB_BASE * 20 / 100);
17711 if (dynamic_check != -1)
17713 rtx hot_label = gen_label_rtx ();
17714 jump_around_label = gen_label_rtx ();
17715 emit_cmp_and_jump_insns (count_exp, GEN_INT (dynamic_check - 1),
17716 LEU, 0, counter_mode (count_exp), 1, hot_label);
17717 predict_jump (REG_BR_PROB_BASE * 90 / 100);
17718 set_storage_via_libcall (dst, count_exp, val_exp, false);
17719 emit_jump (jump_around_label);
17720 emit_label (hot_label);
17723 /* Step 2: Alignment prologue. */
17725 /* Do the expensive promotion once we branched off the small blocks. */
17727 promoted_val = promote_duplicated_reg_to_size (val_exp, size_needed,
17728 desired_align, align);
17729 gcc_assert (desired_align >= 1 && align >= 1);
17731 if (desired_align > align)
17733 /* Except for the first move in epilogue, we no longer know
17734 constant offset in aliasing info. It don't seems to worth
17735 the pain to maintain it for the first move, so throw away
17737 dst = change_address (dst, BLKmode, destreg);
17738 expand_setmem_prologue (dst, destreg, promoted_val, count_exp, align,
17740 if (need_zero_guard && !count)
17742 /* It is possible that we copied enough so the main loop will not
17744 emit_cmp_and_jump_insns (count_exp,
17745 GEN_INT (size_needed),
17746 LTU, 0, counter_mode (count_exp), 1, label);
17747 if (expected_size == -1
17748 || expected_size < (desired_align - align) / 2 + size_needed)
17749 predict_jump (REG_BR_PROB_BASE * 20 / 100);
17751 predict_jump (REG_BR_PROB_BASE * 60 / 100);
17754 if (label && size_needed == 1)
17756 emit_label (label);
17757 LABEL_NUSES (label) = 1;
17761 /* Step 3: Main loop. */
17767 gcc_unreachable ();
17769 expand_set_or_movmem_via_loop (dst, NULL, destreg, NULL, promoted_val,
17770 count_exp, QImode, 1, expected_size);
17773 expand_set_or_movmem_via_loop (dst, NULL, destreg, NULL, promoted_val,
17774 count_exp, Pmode, 1, expected_size);
17776 case unrolled_loop:
17777 expand_set_or_movmem_via_loop (dst, NULL, destreg, NULL, promoted_val,
17778 count_exp, Pmode, 4, expected_size);
17780 case rep_prefix_8_byte:
17781 expand_setmem_via_rep_stos (dst, destreg, promoted_val, count_exp,
17784 case rep_prefix_4_byte:
17785 expand_setmem_via_rep_stos (dst, destreg, promoted_val, count_exp,
17788 case rep_prefix_1_byte:
17789 expand_setmem_via_rep_stos (dst, destreg, promoted_val, count_exp,
17793 /* Adjust properly the offset of src and dest memory for aliasing. */
17794 if (CONST_INT_P (count_exp))
17795 dst = adjust_automodify_address_nv (dst, BLKmode, destreg,
17796 (count / size_needed) * size_needed);
17798 dst = change_address (dst, BLKmode, destreg);
17800 /* Step 4: Epilogue to copy the remaining bytes. */
17804 /* When the main loop is done, COUNT_EXP might hold original count,
17805 while we want to copy only COUNT_EXP & SIZE_NEEDED bytes.
17806 Epilogue code will actually copy COUNT_EXP & EPILOGUE_SIZE_NEEDED
17807 bytes. Compensate if needed. */
17809 if (size_needed < desired_align - align)
17812 expand_simple_binop (counter_mode (count_exp), AND, count_exp,
17813 GEN_INT (size_needed - 1), count_exp, 1,
17815 size_needed = desired_align - align + 1;
17816 if (tmp != count_exp)
17817 emit_move_insn (count_exp, tmp);
17819 emit_label (label);
17820 LABEL_NUSES (label) = 1;
17822 if (count_exp != const0_rtx && epilogue_size_needed > 1)
17824 if (force_loopy_epilogue)
17825 expand_setmem_epilogue_via_loop (dst, destreg, val_exp, count_exp,
17828 expand_setmem_epilogue (dst, destreg, promoted_val, count_exp,
17831 if (jump_around_label)
17832 emit_label (jump_around_label);
17836 /* Expand the appropriate insns for doing strlen if not just doing
17839 out = result, initialized with the start address
17840 align_rtx = alignment of the address.
17841 scratch = scratch register, initialized with the startaddress when
17842 not aligned, otherwise undefined
17844 This is just the body. It needs the initializations mentioned above and
17845 some address computing at the end. These things are done in i386.md. */
17848 ix86_expand_strlensi_unroll_1 (rtx out, rtx src, rtx align_rtx)
17852 rtx align_2_label = NULL_RTX;
17853 rtx align_3_label = NULL_RTX;
17854 rtx align_4_label = gen_label_rtx ();
17855 rtx end_0_label = gen_label_rtx ();
17857 rtx tmpreg = gen_reg_rtx (SImode);
17858 rtx scratch = gen_reg_rtx (SImode);
17862 if (CONST_INT_P (align_rtx))
17863 align = INTVAL (align_rtx);
17865 /* Loop to check 1..3 bytes for null to get an aligned pointer. */
17867 /* Is there a known alignment and is it less than 4? */
17870 rtx scratch1 = gen_reg_rtx (Pmode);
17871 emit_move_insn (scratch1, out);
17872 /* Is there a known alignment and is it not 2? */
17875 align_3_label = gen_label_rtx (); /* Label when aligned to 3-byte */
17876 align_2_label = gen_label_rtx (); /* Label when aligned to 2-byte */
17878 /* Leave just the 3 lower bits. */
17879 align_rtx = expand_binop (Pmode, and_optab, scratch1, GEN_INT (3),
17880 NULL_RTX, 0, OPTAB_WIDEN);
17882 emit_cmp_and_jump_insns (align_rtx, const0_rtx, EQ, NULL,
17883 Pmode, 1, align_4_label);
17884 emit_cmp_and_jump_insns (align_rtx, const2_rtx, EQ, NULL,
17885 Pmode, 1, align_2_label);
17886 emit_cmp_and_jump_insns (align_rtx, const2_rtx, GTU, NULL,
17887 Pmode, 1, align_3_label);
17891 /* Since the alignment is 2, we have to check 2 or 0 bytes;
17892 check if is aligned to 4 - byte. */
17894 align_rtx = expand_binop (Pmode, and_optab, scratch1, const2_rtx,
17895 NULL_RTX, 0, OPTAB_WIDEN);
17897 emit_cmp_and_jump_insns (align_rtx, const0_rtx, EQ, NULL,
17898 Pmode, 1, align_4_label);
17901 mem = change_address (src, QImode, out);
17903 /* Now compare the bytes. */
17905 /* Compare the first n unaligned byte on a byte per byte basis. */
17906 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL,
17907 QImode, 1, end_0_label);
17909 /* Increment the address. */
17910 emit_insn ((*ix86_gen_add3) (out, out, const1_rtx));
17912 /* Not needed with an alignment of 2 */
17915 emit_label (align_2_label);
17917 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL, QImode, 1,
17920 emit_insn ((*ix86_gen_add3) (out, out, const1_rtx));
17922 emit_label (align_3_label);
17925 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL, QImode, 1,
17928 emit_insn ((*ix86_gen_add3) (out, out, const1_rtx));
17931 /* Generate loop to check 4 bytes at a time. It is not a good idea to
17932 align this loop. It gives only huge programs, but does not help to
17934 emit_label (align_4_label);
17936 mem = change_address (src, SImode, out);
17937 emit_move_insn (scratch, mem);
17938 emit_insn ((*ix86_gen_add3) (out, out, GEN_INT (4)));
17940 /* This formula yields a nonzero result iff one of the bytes is zero.
17941 This saves three branches inside loop and many cycles. */
17943 emit_insn (gen_addsi3 (tmpreg, scratch, GEN_INT (-0x01010101)));
17944 emit_insn (gen_one_cmplsi2 (scratch, scratch));
17945 emit_insn (gen_andsi3 (tmpreg, tmpreg, scratch));
17946 emit_insn (gen_andsi3 (tmpreg, tmpreg,
17947 gen_int_mode (0x80808080, SImode)));
17948 emit_cmp_and_jump_insns (tmpreg, const0_rtx, EQ, 0, SImode, 1,
17953 rtx reg = gen_reg_rtx (SImode);
17954 rtx reg2 = gen_reg_rtx (Pmode);
17955 emit_move_insn (reg, tmpreg);
17956 emit_insn (gen_lshrsi3 (reg, reg, GEN_INT (16)));
17958 /* If zero is not in the first two bytes, move two bytes forward. */
17959 emit_insn (gen_testsi_ccno_1 (tmpreg, GEN_INT (0x8080)));
17960 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
17961 tmp = gen_rtx_EQ (VOIDmode, tmp, const0_rtx);
17962 emit_insn (gen_rtx_SET (VOIDmode, tmpreg,
17963 gen_rtx_IF_THEN_ELSE (SImode, tmp,
17966 /* Emit lea manually to avoid clobbering of flags. */
17967 emit_insn (gen_rtx_SET (SImode, reg2,
17968 gen_rtx_PLUS (Pmode, out, const2_rtx)));
17970 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
17971 tmp = gen_rtx_EQ (VOIDmode, tmp, const0_rtx);
17972 emit_insn (gen_rtx_SET (VOIDmode, out,
17973 gen_rtx_IF_THEN_ELSE (Pmode, tmp,
17980 rtx end_2_label = gen_label_rtx ();
17981 /* Is zero in the first two bytes? */
17983 emit_insn (gen_testsi_ccno_1 (tmpreg, GEN_INT (0x8080)));
17984 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
17985 tmp = gen_rtx_NE (VOIDmode, tmp, const0_rtx);
17986 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
17987 gen_rtx_LABEL_REF (VOIDmode, end_2_label),
17989 tmp = emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
17990 JUMP_LABEL (tmp) = end_2_label;
17992 /* Not in the first two. Move two bytes forward. */
17993 emit_insn (gen_lshrsi3 (tmpreg, tmpreg, GEN_INT (16)));
17994 emit_insn ((*ix86_gen_add3) (out, out, const2_rtx));
17996 emit_label (end_2_label);
18000 /* Avoid branch in fixing the byte. */
18001 tmpreg = gen_lowpart (QImode, tmpreg);
18002 emit_insn (gen_addqi3_cc (tmpreg, tmpreg, tmpreg));
18003 cmp = gen_rtx_LTU (Pmode, gen_rtx_REG (CCmode, FLAGS_REG), const0_rtx);
18004 emit_insn ((*ix86_gen_sub3_carry) (out, out, GEN_INT (3), cmp));
18006 emit_label (end_0_label);
18009 /* Expand strlen. */
18012 ix86_expand_strlen (rtx out, rtx src, rtx eoschar, rtx align)
18014 rtx addr, scratch1, scratch2, scratch3, scratch4;
18016 /* The generic case of strlen expander is long. Avoid it's
18017 expanding unless TARGET_INLINE_ALL_STRINGOPS. */
18019 if (TARGET_UNROLL_STRLEN && eoschar == const0_rtx && optimize > 1
18020 && !TARGET_INLINE_ALL_STRINGOPS
18021 && !optimize_insn_for_size_p ()
18022 && (!CONST_INT_P (align) || INTVAL (align) < 4))
18025 addr = force_reg (Pmode, XEXP (src, 0));
18026 scratch1 = gen_reg_rtx (Pmode);
18028 if (TARGET_UNROLL_STRLEN && eoschar == const0_rtx && optimize > 1
18029 && !optimize_insn_for_size_p ())
18031 /* Well it seems that some optimizer does not combine a call like
18032 foo(strlen(bar), strlen(bar));
18033 when the move and the subtraction is done here. It does calculate
18034 the length just once when these instructions are done inside of
18035 output_strlen_unroll(). But I think since &bar[strlen(bar)] is
18036 often used and I use one fewer register for the lifetime of
18037 output_strlen_unroll() this is better. */
18039 emit_move_insn (out, addr);
18041 ix86_expand_strlensi_unroll_1 (out, src, align);
18043 /* strlensi_unroll_1 returns the address of the zero at the end of
18044 the string, like memchr(), so compute the length by subtracting
18045 the start address. */
18046 emit_insn ((*ix86_gen_sub3) (out, out, addr));
18052 /* Can't use this if the user has appropriated eax, ecx, or edi. */
18053 if (fixed_regs[AX_REG] || fixed_regs[CX_REG] || fixed_regs[DI_REG])
18056 scratch2 = gen_reg_rtx (Pmode);
18057 scratch3 = gen_reg_rtx (Pmode);
18058 scratch4 = force_reg (Pmode, constm1_rtx);
18060 emit_move_insn (scratch3, addr);
18061 eoschar = force_reg (QImode, eoschar);
18063 src = replace_equiv_address_nv (src, scratch3);
18065 /* If .md starts supporting :P, this can be done in .md. */
18066 unspec = gen_rtx_UNSPEC (Pmode, gen_rtvec (4, src, eoschar, align,
18067 scratch4), UNSPEC_SCAS);
18068 emit_insn (gen_strlenqi_1 (scratch1, scratch3, unspec));
18069 emit_insn ((*ix86_gen_one_cmpl2) (scratch2, scratch1));
18070 emit_insn ((*ix86_gen_add3) (out, scratch2, constm1_rtx));
18075 /* For given symbol (function) construct code to compute address of it's PLT
18076 entry in large x86-64 PIC model. */
18078 construct_plt_address (rtx symbol)
18080 rtx tmp = gen_reg_rtx (Pmode);
18081 rtx unspec = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, symbol), UNSPEC_PLTOFF);
18083 gcc_assert (GET_CODE (symbol) == SYMBOL_REF);
18084 gcc_assert (ix86_cmodel == CM_LARGE_PIC);
18086 emit_move_insn (tmp, gen_rtx_CONST (Pmode, unspec));
18087 emit_insn (gen_adddi3 (tmp, tmp, pic_offset_table_rtx));
18092 ix86_expand_call (rtx retval, rtx fnaddr, rtx callarg1,
18093 rtx callarg2 ATTRIBUTE_UNUSED,
18094 rtx pop, int sibcall)
18096 rtx use = NULL, call;
18098 if (pop == const0_rtx)
18100 gcc_assert (!TARGET_64BIT || !pop);
18102 if (TARGET_MACHO && !TARGET_64BIT)
18105 if (flag_pic && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF)
18106 fnaddr = machopic_indirect_call_target (fnaddr);
18111 /* Static functions and indirect calls don't need the pic register. */
18112 if (flag_pic && (!TARGET_64BIT || ix86_cmodel == CM_LARGE_PIC)
18113 && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF
18114 && ! SYMBOL_REF_LOCAL_P (XEXP (fnaddr, 0)))
18115 use_reg (&use, pic_offset_table_rtx);
18118 if (TARGET_64BIT && INTVAL (callarg2) >= 0)
18120 rtx al = gen_rtx_REG (QImode, AX_REG);
18121 emit_move_insn (al, callarg2);
18122 use_reg (&use, al);
18125 if (ix86_cmodel == CM_LARGE_PIC
18126 && GET_CODE (fnaddr) == MEM
18127 && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF
18128 && !local_symbolic_operand (XEXP (fnaddr, 0), VOIDmode))
18129 fnaddr = gen_rtx_MEM (QImode, construct_plt_address (XEXP (fnaddr, 0)));
18130 else if (! call_insn_operand (XEXP (fnaddr, 0), Pmode))
18132 fnaddr = copy_to_mode_reg (Pmode, XEXP (fnaddr, 0));
18133 fnaddr = gen_rtx_MEM (QImode, fnaddr);
18135 if (sibcall && TARGET_64BIT
18136 && !constant_call_address_operand (XEXP (fnaddr, 0), Pmode))
18139 addr = copy_to_mode_reg (Pmode, XEXP (fnaddr, 0));
18140 fnaddr = gen_rtx_REG (Pmode, R11_REG);
18141 emit_move_insn (fnaddr, addr);
18142 fnaddr = gen_rtx_MEM (QImode, fnaddr);
18145 call = gen_rtx_CALL (VOIDmode, fnaddr, callarg1);
18147 call = gen_rtx_SET (VOIDmode, retval, call);
18150 pop = gen_rtx_PLUS (Pmode, stack_pointer_rtx, pop);
18151 pop = gen_rtx_SET (VOIDmode, stack_pointer_rtx, pop);
18152 call = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, call, pop));
18155 call = emit_call_insn (call);
18157 CALL_INSN_FUNCTION_USAGE (call) = use;
18161 /* Clear stack slot assignments remembered from previous functions.
18162 This is called from INIT_EXPANDERS once before RTL is emitted for each
18165 static struct machine_function *
18166 ix86_init_machine_status (void)
18168 struct machine_function *f;
18170 f = GGC_CNEW (struct machine_function);
18171 f->use_fast_prologue_epilogue_nregs = -1;
18172 f->tls_descriptor_call_expanded_p = 0;
18173 f->call_abi = DEFAULT_ABI;
18178 /* Return a MEM corresponding to a stack slot with mode MODE.
18179 Allocate a new slot if necessary.
18181 The RTL for a function can have several slots available: N is
18182 which slot to use. */
18185 assign_386_stack_local (enum machine_mode mode, enum ix86_stack_slot n)
18187 struct stack_local_entry *s;
18189 gcc_assert (n < MAX_386_STACK_LOCALS);
18191 /* Virtual slot is valid only before vregs are instantiated. */
18192 gcc_assert ((n == SLOT_VIRTUAL) == !virtuals_instantiated);
18194 for (s = ix86_stack_locals; s; s = s->next)
18195 if (s->mode == mode && s->n == n)
18196 return copy_rtx (s->rtl);
18198 s = (struct stack_local_entry *)
18199 ggc_alloc (sizeof (struct stack_local_entry));
18202 s->rtl = assign_stack_local (mode, GET_MODE_SIZE (mode), 0);
18204 s->next = ix86_stack_locals;
18205 ix86_stack_locals = s;
18209 /* Construct the SYMBOL_REF for the tls_get_addr function. */
18211 static GTY(()) rtx ix86_tls_symbol;
18213 ix86_tls_get_addr (void)
18216 if (!ix86_tls_symbol)
18218 ix86_tls_symbol = gen_rtx_SYMBOL_REF (Pmode,
18219 (TARGET_ANY_GNU_TLS
18221 ? "___tls_get_addr"
18222 : "__tls_get_addr");
18225 return ix86_tls_symbol;
18228 /* Construct the SYMBOL_REF for the _TLS_MODULE_BASE_ symbol. */
18230 static GTY(()) rtx ix86_tls_module_base_symbol;
18232 ix86_tls_module_base (void)
18235 if (!ix86_tls_module_base_symbol)
18237 ix86_tls_module_base_symbol = gen_rtx_SYMBOL_REF (Pmode,
18238 "_TLS_MODULE_BASE_");
18239 SYMBOL_REF_FLAGS (ix86_tls_module_base_symbol)
18240 |= TLS_MODEL_GLOBAL_DYNAMIC << SYMBOL_FLAG_TLS_SHIFT;
18243 return ix86_tls_module_base_symbol;
18246 /* Calculate the length of the memory address in the instruction
18247 encoding. Does not include the one-byte modrm, opcode, or prefix. */
18250 memory_address_length (rtx addr)
18252 struct ix86_address parts;
18253 rtx base, index, disp;
18257 if (GET_CODE (addr) == PRE_DEC
18258 || GET_CODE (addr) == POST_INC
18259 || GET_CODE (addr) == PRE_MODIFY
18260 || GET_CODE (addr) == POST_MODIFY)
18263 ok = ix86_decompose_address (addr, &parts);
18266 if (parts.base && GET_CODE (parts.base) == SUBREG)
18267 parts.base = SUBREG_REG (parts.base);
18268 if (parts.index && GET_CODE (parts.index) == SUBREG)
18269 parts.index = SUBREG_REG (parts.index);
18272 index = parts.index;
18277 - esp as the base always wants an index,
18278 - ebp as the base always wants a displacement. */
18280 /* Register Indirect. */
18281 if (base && !index && !disp)
18283 /* esp (for its index) and ebp (for its displacement) need
18284 the two-byte modrm form. */
18285 if (addr == stack_pointer_rtx
18286 || addr == arg_pointer_rtx
18287 || addr == frame_pointer_rtx
18288 || addr == hard_frame_pointer_rtx)
18292 /* Direct Addressing. */
18293 else if (disp && !base && !index)
18298 /* Find the length of the displacement constant. */
18301 if (base && satisfies_constraint_K (disp))
18306 /* ebp always wants a displacement. */
18307 else if (base == hard_frame_pointer_rtx)
18310 /* An index requires the two-byte modrm form.... */
18312 /* ...like esp, which always wants an index. */
18313 || base == stack_pointer_rtx
18314 || base == arg_pointer_rtx
18315 || base == frame_pointer_rtx)
18322 /* Compute default value for "length_immediate" attribute. When SHORTFORM
18323 is set, expect that insn have 8bit immediate alternative. */
18325 ix86_attr_length_immediate_default (rtx insn, int shortform)
18329 extract_insn_cached (insn);
18330 for (i = recog_data.n_operands - 1; i >= 0; --i)
18331 if (CONSTANT_P (recog_data.operand[i]))
18334 if (shortform && satisfies_constraint_K (recog_data.operand[i]))
18338 switch (get_attr_mode (insn))
18349 /* Immediates for DImode instructions are encoded as 32bit sign extended values. */
18354 fatal_insn ("unknown insn mode", insn);
18360 /* Compute default value for "length_address" attribute. */
18362 ix86_attr_length_address_default (rtx insn)
18366 if (get_attr_type (insn) == TYPE_LEA)
18368 rtx set = PATTERN (insn);
18370 if (GET_CODE (set) == PARALLEL)
18371 set = XVECEXP (set, 0, 0);
18373 gcc_assert (GET_CODE (set) == SET);
18375 return memory_address_length (SET_SRC (set));
18378 extract_insn_cached (insn);
18379 for (i = recog_data.n_operands - 1; i >= 0; --i)
18380 if (MEM_P (recog_data.operand[i]))
18382 return memory_address_length (XEXP (recog_data.operand[i], 0));
18388 /* Compute default value for "length_vex" attribute. It includes
18389 2 or 3 byte VEX prefix and 1 opcode byte. */
18392 ix86_attr_length_vex_default (rtx insn, int has_0f_opcode,
18397 /* Only 0f opcode can use 2 byte VEX prefix and VEX W bit uses 3
18398 byte VEX prefix. */
18399 if (!has_0f_opcode || has_vex_w)
18402 /* We can always use 2 byte VEX prefix in 32bit. */
18406 extract_insn_cached (insn);
18408 for (i = recog_data.n_operands - 1; i >= 0; --i)
18409 if (REG_P (recog_data.operand[i]))
18411 /* REX.W bit uses 3 byte VEX prefix. */
18412 if (GET_MODE (recog_data.operand[i]) == DImode)
18417 /* REX.X or REX.B bits use 3 byte VEX prefix. */
18418 if (MEM_P (recog_data.operand[i])
18419 && x86_extended_reg_mentioned_p (recog_data.operand[i]))
18426 /* Return the maximum number of instructions a cpu can issue. */
18429 ix86_issue_rate (void)
18433 case PROCESSOR_PENTIUM:
18437 case PROCESSOR_PENTIUMPRO:
18438 case PROCESSOR_PENTIUM4:
18439 case PROCESSOR_ATHLON:
18441 case PROCESSOR_AMDFAM10:
18442 case PROCESSOR_NOCONA:
18443 case PROCESSOR_GENERIC32:
18444 case PROCESSOR_GENERIC64:
18447 case PROCESSOR_CORE2:
18455 /* A subroutine of ix86_adjust_cost -- return true iff INSN reads flags set
18456 by DEP_INSN and nothing set by DEP_INSN. */
18459 ix86_flags_dependent (rtx insn, rtx dep_insn, enum attr_type insn_type)
18463 /* Simplify the test for uninteresting insns. */
18464 if (insn_type != TYPE_SETCC
18465 && insn_type != TYPE_ICMOV
18466 && insn_type != TYPE_FCMOV
18467 && insn_type != TYPE_IBR)
18470 if ((set = single_set (dep_insn)) != 0)
18472 set = SET_DEST (set);
18475 else if (GET_CODE (PATTERN (dep_insn)) == PARALLEL
18476 && XVECLEN (PATTERN (dep_insn), 0) == 2
18477 && GET_CODE (XVECEXP (PATTERN (dep_insn), 0, 0)) == SET
18478 && GET_CODE (XVECEXP (PATTERN (dep_insn), 0, 1)) == SET)
18480 set = SET_DEST (XVECEXP (PATTERN (dep_insn), 0, 0));
18481 set2 = SET_DEST (XVECEXP (PATTERN (dep_insn), 0, 0));
18486 if (!REG_P (set) || REGNO (set) != FLAGS_REG)
18489 /* This test is true if the dependent insn reads the flags but
18490 not any other potentially set register. */
18491 if (!reg_overlap_mentioned_p (set, PATTERN (insn)))
18494 if (set2 && reg_overlap_mentioned_p (set2, PATTERN (insn)))
18500 /* A subroutine of ix86_adjust_cost -- return true iff INSN has a memory
18501 address with operands set by DEP_INSN. */
18504 ix86_agi_dependent (rtx insn, rtx dep_insn, enum attr_type insn_type)
18508 if (insn_type == TYPE_LEA
18511 addr = PATTERN (insn);
18513 if (GET_CODE (addr) == PARALLEL)
18514 addr = XVECEXP (addr, 0, 0);
18516 gcc_assert (GET_CODE (addr) == SET);
18518 addr = SET_SRC (addr);
18523 extract_insn_cached (insn);
18524 for (i = recog_data.n_operands - 1; i >= 0; --i)
18525 if (MEM_P (recog_data.operand[i]))
18527 addr = XEXP (recog_data.operand[i], 0);
18534 return modified_in_p (addr, dep_insn);
18538 ix86_adjust_cost (rtx insn, rtx link, rtx dep_insn, int cost)
18540 enum attr_type insn_type, dep_insn_type;
18541 enum attr_memory memory;
18543 int dep_insn_code_number;
18545 /* Anti and output dependencies have zero cost on all CPUs. */
18546 if (REG_NOTE_KIND (link) != 0)
18549 dep_insn_code_number = recog_memoized (dep_insn);
18551 /* If we can't recognize the insns, we can't really do anything. */
18552 if (dep_insn_code_number < 0 || recog_memoized (insn) < 0)
18555 insn_type = get_attr_type (insn);
18556 dep_insn_type = get_attr_type (dep_insn);
18560 case PROCESSOR_PENTIUM:
18561 /* Address Generation Interlock adds a cycle of latency. */
18562 if (ix86_agi_dependent (insn, dep_insn, insn_type))
18565 /* ??? Compares pair with jump/setcc. */
18566 if (ix86_flags_dependent (insn, dep_insn, insn_type))
18569 /* Floating point stores require value to be ready one cycle earlier. */
18570 if (insn_type == TYPE_FMOV
18571 && get_attr_memory (insn) == MEMORY_STORE
18572 && !ix86_agi_dependent (insn, dep_insn, insn_type))
18576 case PROCESSOR_PENTIUMPRO:
18577 memory = get_attr_memory (insn);
18579 /* INT->FP conversion is expensive. */
18580 if (get_attr_fp_int_src (dep_insn))
18583 /* There is one cycle extra latency between an FP op and a store. */
18584 if (insn_type == TYPE_FMOV
18585 && (set = single_set (dep_insn)) != NULL_RTX
18586 && (set2 = single_set (insn)) != NULL_RTX
18587 && rtx_equal_p (SET_DEST (set), SET_SRC (set2))
18588 && MEM_P (SET_DEST (set2)))
18591 /* Show ability of reorder buffer to hide latency of load by executing
18592 in parallel with previous instruction in case
18593 previous instruction is not needed to compute the address. */
18594 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
18595 && !ix86_agi_dependent (insn, dep_insn, insn_type))
18597 /* Claim moves to take one cycle, as core can issue one load
18598 at time and the next load can start cycle later. */
18599 if (dep_insn_type == TYPE_IMOV
18600 || dep_insn_type == TYPE_FMOV)
18608 memory = get_attr_memory (insn);
18610 /* The esp dependency is resolved before the instruction is really
18612 if ((insn_type == TYPE_PUSH || insn_type == TYPE_POP)
18613 && (dep_insn_type == TYPE_PUSH || dep_insn_type == TYPE_POP))
18616 /* INT->FP conversion is expensive. */
18617 if (get_attr_fp_int_src (dep_insn))
18620 /* Show ability of reorder buffer to hide latency of load by executing
18621 in parallel with previous instruction in case
18622 previous instruction is not needed to compute the address. */
18623 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
18624 && !ix86_agi_dependent (insn, dep_insn, insn_type))
18626 /* Claim moves to take one cycle, as core can issue one load
18627 at time and the next load can start cycle later. */
18628 if (dep_insn_type == TYPE_IMOV
18629 || dep_insn_type == TYPE_FMOV)
18638 case PROCESSOR_ATHLON:
18640 case PROCESSOR_AMDFAM10:
18641 case PROCESSOR_GENERIC32:
18642 case PROCESSOR_GENERIC64:
18643 memory = get_attr_memory (insn);
18645 /* Show ability of reorder buffer to hide latency of load by executing
18646 in parallel with previous instruction in case
18647 previous instruction is not needed to compute the address. */
18648 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
18649 && !ix86_agi_dependent (insn, dep_insn, insn_type))
18651 enum attr_unit unit = get_attr_unit (insn);
18654 /* Because of the difference between the length of integer and
18655 floating unit pipeline preparation stages, the memory operands
18656 for floating point are cheaper.
18658 ??? For Athlon it the difference is most probably 2. */
18659 if (unit == UNIT_INTEGER || unit == UNIT_UNKNOWN)
18662 loadcost = TARGET_ATHLON ? 2 : 0;
18664 if (cost >= loadcost)
18677 /* How many alternative schedules to try. This should be as wide as the
18678 scheduling freedom in the DFA, but no wider. Making this value too
18679 large results extra work for the scheduler. */
18682 ia32_multipass_dfa_lookahead (void)
18686 case PROCESSOR_PENTIUM:
18689 case PROCESSOR_PENTIUMPRO:
18699 /* Compute the alignment given to a constant that is being placed in memory.
18700 EXP is the constant and ALIGN is the alignment that the object would
18702 The value of this function is used instead of that alignment to align
18706 ix86_constant_alignment (tree exp, int align)
18708 if (TREE_CODE (exp) == REAL_CST || TREE_CODE (exp) == VECTOR_CST
18709 || TREE_CODE (exp) == INTEGER_CST)
18711 if (TYPE_MODE (TREE_TYPE (exp)) == DFmode && align < 64)
18713 else if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (exp))) && align < 128)
18716 else if (!optimize_size && TREE_CODE (exp) == STRING_CST
18717 && TREE_STRING_LENGTH (exp) >= 31 && align < BITS_PER_WORD)
18718 return BITS_PER_WORD;
18723 /* Compute the alignment for a static variable.
18724 TYPE is the data type, and ALIGN is the alignment that
18725 the object would ordinarily have. The value of this function is used
18726 instead of that alignment to align the object. */
18729 ix86_data_alignment (tree type, int align)
18731 int max_align = optimize_size ? BITS_PER_WORD : MIN (256, MAX_OFILE_ALIGNMENT);
18733 if (AGGREGATE_TYPE_P (type)
18734 && TYPE_SIZE (type)
18735 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
18736 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= (unsigned) max_align
18737 || TREE_INT_CST_HIGH (TYPE_SIZE (type)))
18738 && align < max_align)
18741 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
18742 to 16byte boundary. */
18745 if (AGGREGATE_TYPE_P (type)
18746 && TYPE_SIZE (type)
18747 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
18748 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= 128
18749 || TREE_INT_CST_HIGH (TYPE_SIZE (type))) && align < 128)
18753 if (TREE_CODE (type) == ARRAY_TYPE)
18755 if (TYPE_MODE (TREE_TYPE (type)) == DFmode && align < 64)
18757 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type))) && align < 128)
18760 else if (TREE_CODE (type) == COMPLEX_TYPE)
18763 if (TYPE_MODE (type) == DCmode && align < 64)
18765 if ((TYPE_MODE (type) == XCmode
18766 || TYPE_MODE (type) == TCmode) && align < 128)
18769 else if ((TREE_CODE (type) == RECORD_TYPE
18770 || TREE_CODE (type) == UNION_TYPE
18771 || TREE_CODE (type) == QUAL_UNION_TYPE)
18772 && TYPE_FIELDS (type))
18774 if (DECL_MODE (TYPE_FIELDS (type)) == DFmode && align < 64)
18776 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type))) && align < 128)
18779 else if (TREE_CODE (type) == REAL_TYPE || TREE_CODE (type) == VECTOR_TYPE
18780 || TREE_CODE (type) == INTEGER_TYPE)
18782 if (TYPE_MODE (type) == DFmode && align < 64)
18784 if (ALIGN_MODE_128 (TYPE_MODE (type)) && align < 128)
18791 /* Compute the alignment for a local variable or a stack slot. TYPE is
18792 the data type, MODE is the widest mode available and ALIGN is the
18793 alignment that the object would ordinarily have. The value of this
18794 macro is used instead of that alignment to align the object. */
18797 ix86_local_alignment (tree type, enum machine_mode mode,
18798 unsigned int align)
18800 /* If TYPE is NULL, we are allocating a stack slot for caller-save
18801 register in MODE. We will return the largest alignment of XF
18805 if (mode == XFmode && align < GET_MODE_ALIGNMENT (DFmode))
18806 align = GET_MODE_ALIGNMENT (DFmode);
18810 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
18811 to 16byte boundary. */
18814 if (AGGREGATE_TYPE_P (type)
18815 && TYPE_SIZE (type)
18816 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
18817 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= 16
18818 || TREE_INT_CST_HIGH (TYPE_SIZE (type))) && align < 128)
18821 if (TREE_CODE (type) == ARRAY_TYPE)
18823 if (TYPE_MODE (TREE_TYPE (type)) == DFmode && align < 64)
18825 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type))) && align < 128)
18828 else if (TREE_CODE (type) == COMPLEX_TYPE)
18830 if (TYPE_MODE (type) == DCmode && align < 64)
18832 if ((TYPE_MODE (type) == XCmode
18833 || TYPE_MODE (type) == TCmode) && align < 128)
18836 else if ((TREE_CODE (type) == RECORD_TYPE
18837 || TREE_CODE (type) == UNION_TYPE
18838 || TREE_CODE (type) == QUAL_UNION_TYPE)
18839 && TYPE_FIELDS (type))
18841 if (DECL_MODE (TYPE_FIELDS (type)) == DFmode && align < 64)
18843 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type))) && align < 128)
18846 else if (TREE_CODE (type) == REAL_TYPE || TREE_CODE (type) == VECTOR_TYPE
18847 || TREE_CODE (type) == INTEGER_TYPE)
18850 if (TYPE_MODE (type) == DFmode && align < 64)
18852 if (ALIGN_MODE_128 (TYPE_MODE (type)) && align < 128)
18858 /* Emit RTL insns to initialize the variable parts of a trampoline.
18859 FNADDR is an RTX for the address of the function's pure code.
18860 CXT is an RTX for the static chain value for the function. */
18862 x86_initialize_trampoline (rtx tramp, rtx fnaddr, rtx cxt)
18866 /* Compute offset from the end of the jmp to the target function. */
18867 rtx disp = expand_binop (SImode, sub_optab, fnaddr,
18868 plus_constant (tramp, 10),
18869 NULL_RTX, 1, OPTAB_DIRECT);
18870 emit_move_insn (gen_rtx_MEM (QImode, tramp),
18871 gen_int_mode (0xb9, QImode));
18872 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 1)), cxt);
18873 emit_move_insn (gen_rtx_MEM (QImode, plus_constant (tramp, 5)),
18874 gen_int_mode (0xe9, QImode));
18875 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 6)), disp);
18880 /* Try to load address using shorter movl instead of movabs.
18881 We may want to support movq for kernel mode, but kernel does not use
18882 trampolines at the moment. */
18883 if (x86_64_zext_immediate_operand (fnaddr, VOIDmode))
18885 fnaddr = copy_to_mode_reg (DImode, fnaddr);
18886 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
18887 gen_int_mode (0xbb41, HImode));
18888 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, offset + 2)),
18889 gen_lowpart (SImode, fnaddr));
18894 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
18895 gen_int_mode (0xbb49, HImode));
18896 emit_move_insn (gen_rtx_MEM (DImode, plus_constant (tramp, offset + 2)),
18900 /* Load static chain using movabs to r10. */
18901 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
18902 gen_int_mode (0xba49, HImode));
18903 emit_move_insn (gen_rtx_MEM (DImode, plus_constant (tramp, offset + 2)),
18906 /* Jump to the r11 */
18907 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
18908 gen_int_mode (0xff49, HImode));
18909 emit_move_insn (gen_rtx_MEM (QImode, plus_constant (tramp, offset+2)),
18910 gen_int_mode (0xe3, QImode));
18912 gcc_assert (offset <= TRAMPOLINE_SIZE);
18915 #ifdef ENABLE_EXECUTE_STACK
18916 emit_library_call (gen_rtx_SYMBOL_REF (Pmode, "__enable_execute_stack"),
18917 LCT_NORMAL, VOIDmode, 1, tramp, Pmode);
18921 /* Codes for all the SSE/MMX builtins. */
18924 IX86_BUILTIN_ADDPS,
18925 IX86_BUILTIN_ADDSS,
18926 IX86_BUILTIN_DIVPS,
18927 IX86_BUILTIN_DIVSS,
18928 IX86_BUILTIN_MULPS,
18929 IX86_BUILTIN_MULSS,
18930 IX86_BUILTIN_SUBPS,
18931 IX86_BUILTIN_SUBSS,
18933 IX86_BUILTIN_CMPEQPS,
18934 IX86_BUILTIN_CMPLTPS,
18935 IX86_BUILTIN_CMPLEPS,
18936 IX86_BUILTIN_CMPGTPS,
18937 IX86_BUILTIN_CMPGEPS,
18938 IX86_BUILTIN_CMPNEQPS,
18939 IX86_BUILTIN_CMPNLTPS,
18940 IX86_BUILTIN_CMPNLEPS,
18941 IX86_BUILTIN_CMPNGTPS,
18942 IX86_BUILTIN_CMPNGEPS,
18943 IX86_BUILTIN_CMPORDPS,
18944 IX86_BUILTIN_CMPUNORDPS,
18945 IX86_BUILTIN_CMPEQSS,
18946 IX86_BUILTIN_CMPLTSS,
18947 IX86_BUILTIN_CMPLESS,
18948 IX86_BUILTIN_CMPNEQSS,
18949 IX86_BUILTIN_CMPNLTSS,
18950 IX86_BUILTIN_CMPNLESS,
18951 IX86_BUILTIN_CMPNGTSS,
18952 IX86_BUILTIN_CMPNGESS,
18953 IX86_BUILTIN_CMPORDSS,
18954 IX86_BUILTIN_CMPUNORDSS,
18956 IX86_BUILTIN_COMIEQSS,
18957 IX86_BUILTIN_COMILTSS,
18958 IX86_BUILTIN_COMILESS,
18959 IX86_BUILTIN_COMIGTSS,
18960 IX86_BUILTIN_COMIGESS,
18961 IX86_BUILTIN_COMINEQSS,
18962 IX86_BUILTIN_UCOMIEQSS,
18963 IX86_BUILTIN_UCOMILTSS,
18964 IX86_BUILTIN_UCOMILESS,
18965 IX86_BUILTIN_UCOMIGTSS,
18966 IX86_BUILTIN_UCOMIGESS,
18967 IX86_BUILTIN_UCOMINEQSS,
18969 IX86_BUILTIN_CVTPI2PS,
18970 IX86_BUILTIN_CVTPS2PI,
18971 IX86_BUILTIN_CVTSI2SS,
18972 IX86_BUILTIN_CVTSI642SS,
18973 IX86_BUILTIN_CVTSS2SI,
18974 IX86_BUILTIN_CVTSS2SI64,
18975 IX86_BUILTIN_CVTTPS2PI,
18976 IX86_BUILTIN_CVTTSS2SI,
18977 IX86_BUILTIN_CVTTSS2SI64,
18979 IX86_BUILTIN_MAXPS,
18980 IX86_BUILTIN_MAXSS,
18981 IX86_BUILTIN_MINPS,
18982 IX86_BUILTIN_MINSS,
18984 IX86_BUILTIN_LOADUPS,
18985 IX86_BUILTIN_STOREUPS,
18986 IX86_BUILTIN_MOVSS,
18988 IX86_BUILTIN_MOVHLPS,
18989 IX86_BUILTIN_MOVLHPS,
18990 IX86_BUILTIN_LOADHPS,
18991 IX86_BUILTIN_LOADLPS,
18992 IX86_BUILTIN_STOREHPS,
18993 IX86_BUILTIN_STORELPS,
18995 IX86_BUILTIN_MASKMOVQ,
18996 IX86_BUILTIN_MOVMSKPS,
18997 IX86_BUILTIN_PMOVMSKB,
18999 IX86_BUILTIN_MOVNTPS,
19000 IX86_BUILTIN_MOVNTQ,
19002 IX86_BUILTIN_LOADDQU,
19003 IX86_BUILTIN_STOREDQU,
19005 IX86_BUILTIN_PACKSSWB,
19006 IX86_BUILTIN_PACKSSDW,
19007 IX86_BUILTIN_PACKUSWB,
19009 IX86_BUILTIN_PADDB,
19010 IX86_BUILTIN_PADDW,
19011 IX86_BUILTIN_PADDD,
19012 IX86_BUILTIN_PADDQ,
19013 IX86_BUILTIN_PADDSB,
19014 IX86_BUILTIN_PADDSW,
19015 IX86_BUILTIN_PADDUSB,
19016 IX86_BUILTIN_PADDUSW,
19017 IX86_BUILTIN_PSUBB,
19018 IX86_BUILTIN_PSUBW,
19019 IX86_BUILTIN_PSUBD,
19020 IX86_BUILTIN_PSUBQ,
19021 IX86_BUILTIN_PSUBSB,
19022 IX86_BUILTIN_PSUBSW,
19023 IX86_BUILTIN_PSUBUSB,
19024 IX86_BUILTIN_PSUBUSW,
19027 IX86_BUILTIN_PANDN,
19031 IX86_BUILTIN_PAVGB,
19032 IX86_BUILTIN_PAVGW,
19034 IX86_BUILTIN_PCMPEQB,
19035 IX86_BUILTIN_PCMPEQW,
19036 IX86_BUILTIN_PCMPEQD,
19037 IX86_BUILTIN_PCMPGTB,
19038 IX86_BUILTIN_PCMPGTW,
19039 IX86_BUILTIN_PCMPGTD,
19041 IX86_BUILTIN_PMADDWD,
19043 IX86_BUILTIN_PMAXSW,
19044 IX86_BUILTIN_PMAXUB,
19045 IX86_BUILTIN_PMINSW,
19046 IX86_BUILTIN_PMINUB,
19048 IX86_BUILTIN_PMULHUW,
19049 IX86_BUILTIN_PMULHW,
19050 IX86_BUILTIN_PMULLW,
19052 IX86_BUILTIN_PSADBW,
19053 IX86_BUILTIN_PSHUFW,
19055 IX86_BUILTIN_PSLLW,
19056 IX86_BUILTIN_PSLLD,
19057 IX86_BUILTIN_PSLLQ,
19058 IX86_BUILTIN_PSRAW,
19059 IX86_BUILTIN_PSRAD,
19060 IX86_BUILTIN_PSRLW,
19061 IX86_BUILTIN_PSRLD,
19062 IX86_BUILTIN_PSRLQ,
19063 IX86_BUILTIN_PSLLWI,
19064 IX86_BUILTIN_PSLLDI,
19065 IX86_BUILTIN_PSLLQI,
19066 IX86_BUILTIN_PSRAWI,
19067 IX86_BUILTIN_PSRADI,
19068 IX86_BUILTIN_PSRLWI,
19069 IX86_BUILTIN_PSRLDI,
19070 IX86_BUILTIN_PSRLQI,
19072 IX86_BUILTIN_PUNPCKHBW,
19073 IX86_BUILTIN_PUNPCKHWD,
19074 IX86_BUILTIN_PUNPCKHDQ,
19075 IX86_BUILTIN_PUNPCKLBW,
19076 IX86_BUILTIN_PUNPCKLWD,
19077 IX86_BUILTIN_PUNPCKLDQ,
19079 IX86_BUILTIN_SHUFPS,
19081 IX86_BUILTIN_RCPPS,
19082 IX86_BUILTIN_RCPSS,
19083 IX86_BUILTIN_RSQRTPS,
19084 IX86_BUILTIN_RSQRTPS_NR,
19085 IX86_BUILTIN_RSQRTSS,
19086 IX86_BUILTIN_RSQRTF,
19087 IX86_BUILTIN_SQRTPS,
19088 IX86_BUILTIN_SQRTPS_NR,
19089 IX86_BUILTIN_SQRTSS,
19091 IX86_BUILTIN_UNPCKHPS,
19092 IX86_BUILTIN_UNPCKLPS,
19094 IX86_BUILTIN_ANDPS,
19095 IX86_BUILTIN_ANDNPS,
19097 IX86_BUILTIN_XORPS,
19100 IX86_BUILTIN_LDMXCSR,
19101 IX86_BUILTIN_STMXCSR,
19102 IX86_BUILTIN_SFENCE,
19104 /* 3DNow! Original */
19105 IX86_BUILTIN_FEMMS,
19106 IX86_BUILTIN_PAVGUSB,
19107 IX86_BUILTIN_PF2ID,
19108 IX86_BUILTIN_PFACC,
19109 IX86_BUILTIN_PFADD,
19110 IX86_BUILTIN_PFCMPEQ,
19111 IX86_BUILTIN_PFCMPGE,
19112 IX86_BUILTIN_PFCMPGT,
19113 IX86_BUILTIN_PFMAX,
19114 IX86_BUILTIN_PFMIN,
19115 IX86_BUILTIN_PFMUL,
19116 IX86_BUILTIN_PFRCP,
19117 IX86_BUILTIN_PFRCPIT1,
19118 IX86_BUILTIN_PFRCPIT2,
19119 IX86_BUILTIN_PFRSQIT1,
19120 IX86_BUILTIN_PFRSQRT,
19121 IX86_BUILTIN_PFSUB,
19122 IX86_BUILTIN_PFSUBR,
19123 IX86_BUILTIN_PI2FD,
19124 IX86_BUILTIN_PMULHRW,
19126 /* 3DNow! Athlon Extensions */
19127 IX86_BUILTIN_PF2IW,
19128 IX86_BUILTIN_PFNACC,
19129 IX86_BUILTIN_PFPNACC,
19130 IX86_BUILTIN_PI2FW,
19131 IX86_BUILTIN_PSWAPDSI,
19132 IX86_BUILTIN_PSWAPDSF,
19135 IX86_BUILTIN_ADDPD,
19136 IX86_BUILTIN_ADDSD,
19137 IX86_BUILTIN_DIVPD,
19138 IX86_BUILTIN_DIVSD,
19139 IX86_BUILTIN_MULPD,
19140 IX86_BUILTIN_MULSD,
19141 IX86_BUILTIN_SUBPD,
19142 IX86_BUILTIN_SUBSD,
19144 IX86_BUILTIN_CMPEQPD,
19145 IX86_BUILTIN_CMPLTPD,
19146 IX86_BUILTIN_CMPLEPD,
19147 IX86_BUILTIN_CMPGTPD,
19148 IX86_BUILTIN_CMPGEPD,
19149 IX86_BUILTIN_CMPNEQPD,
19150 IX86_BUILTIN_CMPNLTPD,
19151 IX86_BUILTIN_CMPNLEPD,
19152 IX86_BUILTIN_CMPNGTPD,
19153 IX86_BUILTIN_CMPNGEPD,
19154 IX86_BUILTIN_CMPORDPD,
19155 IX86_BUILTIN_CMPUNORDPD,
19156 IX86_BUILTIN_CMPEQSD,
19157 IX86_BUILTIN_CMPLTSD,
19158 IX86_BUILTIN_CMPLESD,
19159 IX86_BUILTIN_CMPNEQSD,
19160 IX86_BUILTIN_CMPNLTSD,
19161 IX86_BUILTIN_CMPNLESD,
19162 IX86_BUILTIN_CMPORDSD,
19163 IX86_BUILTIN_CMPUNORDSD,
19165 IX86_BUILTIN_COMIEQSD,
19166 IX86_BUILTIN_COMILTSD,
19167 IX86_BUILTIN_COMILESD,
19168 IX86_BUILTIN_COMIGTSD,
19169 IX86_BUILTIN_COMIGESD,
19170 IX86_BUILTIN_COMINEQSD,
19171 IX86_BUILTIN_UCOMIEQSD,
19172 IX86_BUILTIN_UCOMILTSD,
19173 IX86_BUILTIN_UCOMILESD,
19174 IX86_BUILTIN_UCOMIGTSD,
19175 IX86_BUILTIN_UCOMIGESD,
19176 IX86_BUILTIN_UCOMINEQSD,
19178 IX86_BUILTIN_MAXPD,
19179 IX86_BUILTIN_MAXSD,
19180 IX86_BUILTIN_MINPD,
19181 IX86_BUILTIN_MINSD,
19183 IX86_BUILTIN_ANDPD,
19184 IX86_BUILTIN_ANDNPD,
19186 IX86_BUILTIN_XORPD,
19188 IX86_BUILTIN_SQRTPD,
19189 IX86_BUILTIN_SQRTSD,
19191 IX86_BUILTIN_UNPCKHPD,
19192 IX86_BUILTIN_UNPCKLPD,
19194 IX86_BUILTIN_SHUFPD,
19196 IX86_BUILTIN_LOADUPD,
19197 IX86_BUILTIN_STOREUPD,
19198 IX86_BUILTIN_MOVSD,
19200 IX86_BUILTIN_LOADHPD,
19201 IX86_BUILTIN_LOADLPD,
19203 IX86_BUILTIN_CVTDQ2PD,
19204 IX86_BUILTIN_CVTDQ2PS,
19206 IX86_BUILTIN_CVTPD2DQ,
19207 IX86_BUILTIN_CVTPD2PI,
19208 IX86_BUILTIN_CVTPD2PS,
19209 IX86_BUILTIN_CVTTPD2DQ,
19210 IX86_BUILTIN_CVTTPD2PI,
19212 IX86_BUILTIN_CVTPI2PD,
19213 IX86_BUILTIN_CVTSI2SD,
19214 IX86_BUILTIN_CVTSI642SD,
19216 IX86_BUILTIN_CVTSD2SI,
19217 IX86_BUILTIN_CVTSD2SI64,
19218 IX86_BUILTIN_CVTSD2SS,
19219 IX86_BUILTIN_CVTSS2SD,
19220 IX86_BUILTIN_CVTTSD2SI,
19221 IX86_BUILTIN_CVTTSD2SI64,
19223 IX86_BUILTIN_CVTPS2DQ,
19224 IX86_BUILTIN_CVTPS2PD,
19225 IX86_BUILTIN_CVTTPS2DQ,
19227 IX86_BUILTIN_MOVNTI,
19228 IX86_BUILTIN_MOVNTPD,
19229 IX86_BUILTIN_MOVNTDQ,
19231 IX86_BUILTIN_MOVQ128,
19234 IX86_BUILTIN_MASKMOVDQU,
19235 IX86_BUILTIN_MOVMSKPD,
19236 IX86_BUILTIN_PMOVMSKB128,
19238 IX86_BUILTIN_PACKSSWB128,
19239 IX86_BUILTIN_PACKSSDW128,
19240 IX86_BUILTIN_PACKUSWB128,
19242 IX86_BUILTIN_PADDB128,
19243 IX86_BUILTIN_PADDW128,
19244 IX86_BUILTIN_PADDD128,
19245 IX86_BUILTIN_PADDQ128,
19246 IX86_BUILTIN_PADDSB128,
19247 IX86_BUILTIN_PADDSW128,
19248 IX86_BUILTIN_PADDUSB128,
19249 IX86_BUILTIN_PADDUSW128,
19250 IX86_BUILTIN_PSUBB128,
19251 IX86_BUILTIN_PSUBW128,
19252 IX86_BUILTIN_PSUBD128,
19253 IX86_BUILTIN_PSUBQ128,
19254 IX86_BUILTIN_PSUBSB128,
19255 IX86_BUILTIN_PSUBSW128,
19256 IX86_BUILTIN_PSUBUSB128,
19257 IX86_BUILTIN_PSUBUSW128,
19259 IX86_BUILTIN_PAND128,
19260 IX86_BUILTIN_PANDN128,
19261 IX86_BUILTIN_POR128,
19262 IX86_BUILTIN_PXOR128,
19264 IX86_BUILTIN_PAVGB128,
19265 IX86_BUILTIN_PAVGW128,
19267 IX86_BUILTIN_PCMPEQB128,
19268 IX86_BUILTIN_PCMPEQW128,
19269 IX86_BUILTIN_PCMPEQD128,
19270 IX86_BUILTIN_PCMPGTB128,
19271 IX86_BUILTIN_PCMPGTW128,
19272 IX86_BUILTIN_PCMPGTD128,
19274 IX86_BUILTIN_PMADDWD128,
19276 IX86_BUILTIN_PMAXSW128,
19277 IX86_BUILTIN_PMAXUB128,
19278 IX86_BUILTIN_PMINSW128,
19279 IX86_BUILTIN_PMINUB128,
19281 IX86_BUILTIN_PMULUDQ,
19282 IX86_BUILTIN_PMULUDQ128,
19283 IX86_BUILTIN_PMULHUW128,
19284 IX86_BUILTIN_PMULHW128,
19285 IX86_BUILTIN_PMULLW128,
19287 IX86_BUILTIN_PSADBW128,
19288 IX86_BUILTIN_PSHUFHW,
19289 IX86_BUILTIN_PSHUFLW,
19290 IX86_BUILTIN_PSHUFD,
19292 IX86_BUILTIN_PSLLDQI128,
19293 IX86_BUILTIN_PSLLWI128,
19294 IX86_BUILTIN_PSLLDI128,
19295 IX86_BUILTIN_PSLLQI128,
19296 IX86_BUILTIN_PSRAWI128,
19297 IX86_BUILTIN_PSRADI128,
19298 IX86_BUILTIN_PSRLDQI128,
19299 IX86_BUILTIN_PSRLWI128,
19300 IX86_BUILTIN_PSRLDI128,
19301 IX86_BUILTIN_PSRLQI128,
19303 IX86_BUILTIN_PSLLDQ128,
19304 IX86_BUILTIN_PSLLW128,
19305 IX86_BUILTIN_PSLLD128,
19306 IX86_BUILTIN_PSLLQ128,
19307 IX86_BUILTIN_PSRAW128,
19308 IX86_BUILTIN_PSRAD128,
19309 IX86_BUILTIN_PSRLW128,
19310 IX86_BUILTIN_PSRLD128,
19311 IX86_BUILTIN_PSRLQ128,
19313 IX86_BUILTIN_PUNPCKHBW128,
19314 IX86_BUILTIN_PUNPCKHWD128,
19315 IX86_BUILTIN_PUNPCKHDQ128,
19316 IX86_BUILTIN_PUNPCKHQDQ128,
19317 IX86_BUILTIN_PUNPCKLBW128,
19318 IX86_BUILTIN_PUNPCKLWD128,
19319 IX86_BUILTIN_PUNPCKLDQ128,
19320 IX86_BUILTIN_PUNPCKLQDQ128,
19322 IX86_BUILTIN_CLFLUSH,
19323 IX86_BUILTIN_MFENCE,
19324 IX86_BUILTIN_LFENCE,
19327 IX86_BUILTIN_ADDSUBPS,
19328 IX86_BUILTIN_HADDPS,
19329 IX86_BUILTIN_HSUBPS,
19330 IX86_BUILTIN_MOVSHDUP,
19331 IX86_BUILTIN_MOVSLDUP,
19332 IX86_BUILTIN_ADDSUBPD,
19333 IX86_BUILTIN_HADDPD,
19334 IX86_BUILTIN_HSUBPD,
19335 IX86_BUILTIN_LDDQU,
19337 IX86_BUILTIN_MONITOR,
19338 IX86_BUILTIN_MWAIT,
19341 IX86_BUILTIN_PHADDW,
19342 IX86_BUILTIN_PHADDD,
19343 IX86_BUILTIN_PHADDSW,
19344 IX86_BUILTIN_PHSUBW,
19345 IX86_BUILTIN_PHSUBD,
19346 IX86_BUILTIN_PHSUBSW,
19347 IX86_BUILTIN_PMADDUBSW,
19348 IX86_BUILTIN_PMULHRSW,
19349 IX86_BUILTIN_PSHUFB,
19350 IX86_BUILTIN_PSIGNB,
19351 IX86_BUILTIN_PSIGNW,
19352 IX86_BUILTIN_PSIGND,
19353 IX86_BUILTIN_PALIGNR,
19354 IX86_BUILTIN_PABSB,
19355 IX86_BUILTIN_PABSW,
19356 IX86_BUILTIN_PABSD,
19358 IX86_BUILTIN_PHADDW128,
19359 IX86_BUILTIN_PHADDD128,
19360 IX86_BUILTIN_PHADDSW128,
19361 IX86_BUILTIN_PHSUBW128,
19362 IX86_BUILTIN_PHSUBD128,
19363 IX86_BUILTIN_PHSUBSW128,
19364 IX86_BUILTIN_PMADDUBSW128,
19365 IX86_BUILTIN_PMULHRSW128,
19366 IX86_BUILTIN_PSHUFB128,
19367 IX86_BUILTIN_PSIGNB128,
19368 IX86_BUILTIN_PSIGNW128,
19369 IX86_BUILTIN_PSIGND128,
19370 IX86_BUILTIN_PALIGNR128,
19371 IX86_BUILTIN_PABSB128,
19372 IX86_BUILTIN_PABSW128,
19373 IX86_BUILTIN_PABSD128,
19375 /* AMDFAM10 - SSE4A New Instructions. */
19376 IX86_BUILTIN_MOVNTSD,
19377 IX86_BUILTIN_MOVNTSS,
19378 IX86_BUILTIN_EXTRQI,
19379 IX86_BUILTIN_EXTRQ,
19380 IX86_BUILTIN_INSERTQI,
19381 IX86_BUILTIN_INSERTQ,
19384 IX86_BUILTIN_BLENDPD,
19385 IX86_BUILTIN_BLENDPS,
19386 IX86_BUILTIN_BLENDVPD,
19387 IX86_BUILTIN_BLENDVPS,
19388 IX86_BUILTIN_PBLENDVB128,
19389 IX86_BUILTIN_PBLENDW128,
19394 IX86_BUILTIN_INSERTPS128,
19396 IX86_BUILTIN_MOVNTDQA,
19397 IX86_BUILTIN_MPSADBW128,
19398 IX86_BUILTIN_PACKUSDW128,
19399 IX86_BUILTIN_PCMPEQQ,
19400 IX86_BUILTIN_PHMINPOSUW128,
19402 IX86_BUILTIN_PMAXSB128,
19403 IX86_BUILTIN_PMAXSD128,
19404 IX86_BUILTIN_PMAXUD128,
19405 IX86_BUILTIN_PMAXUW128,
19407 IX86_BUILTIN_PMINSB128,
19408 IX86_BUILTIN_PMINSD128,
19409 IX86_BUILTIN_PMINUD128,
19410 IX86_BUILTIN_PMINUW128,
19412 IX86_BUILTIN_PMOVSXBW128,
19413 IX86_BUILTIN_PMOVSXBD128,
19414 IX86_BUILTIN_PMOVSXBQ128,
19415 IX86_BUILTIN_PMOVSXWD128,
19416 IX86_BUILTIN_PMOVSXWQ128,
19417 IX86_BUILTIN_PMOVSXDQ128,
19419 IX86_BUILTIN_PMOVZXBW128,
19420 IX86_BUILTIN_PMOVZXBD128,
19421 IX86_BUILTIN_PMOVZXBQ128,
19422 IX86_BUILTIN_PMOVZXWD128,
19423 IX86_BUILTIN_PMOVZXWQ128,
19424 IX86_BUILTIN_PMOVZXDQ128,
19426 IX86_BUILTIN_PMULDQ128,
19427 IX86_BUILTIN_PMULLD128,
19429 IX86_BUILTIN_ROUNDPD,
19430 IX86_BUILTIN_ROUNDPS,
19431 IX86_BUILTIN_ROUNDSD,
19432 IX86_BUILTIN_ROUNDSS,
19434 IX86_BUILTIN_PTESTZ,
19435 IX86_BUILTIN_PTESTC,
19436 IX86_BUILTIN_PTESTNZC,
19438 IX86_BUILTIN_VEC_INIT_V2SI,
19439 IX86_BUILTIN_VEC_INIT_V4HI,
19440 IX86_BUILTIN_VEC_INIT_V8QI,
19441 IX86_BUILTIN_VEC_EXT_V2DF,
19442 IX86_BUILTIN_VEC_EXT_V2DI,
19443 IX86_BUILTIN_VEC_EXT_V4SF,
19444 IX86_BUILTIN_VEC_EXT_V4SI,
19445 IX86_BUILTIN_VEC_EXT_V8HI,
19446 IX86_BUILTIN_VEC_EXT_V2SI,
19447 IX86_BUILTIN_VEC_EXT_V4HI,
19448 IX86_BUILTIN_VEC_EXT_V16QI,
19449 IX86_BUILTIN_VEC_SET_V2DI,
19450 IX86_BUILTIN_VEC_SET_V4SF,
19451 IX86_BUILTIN_VEC_SET_V4SI,
19452 IX86_BUILTIN_VEC_SET_V8HI,
19453 IX86_BUILTIN_VEC_SET_V4HI,
19454 IX86_BUILTIN_VEC_SET_V16QI,
19456 IX86_BUILTIN_VEC_PACK_SFIX,
19459 IX86_BUILTIN_CRC32QI,
19460 IX86_BUILTIN_CRC32HI,
19461 IX86_BUILTIN_CRC32SI,
19462 IX86_BUILTIN_CRC32DI,
19464 IX86_BUILTIN_PCMPESTRI128,
19465 IX86_BUILTIN_PCMPESTRM128,
19466 IX86_BUILTIN_PCMPESTRA128,
19467 IX86_BUILTIN_PCMPESTRC128,
19468 IX86_BUILTIN_PCMPESTRO128,
19469 IX86_BUILTIN_PCMPESTRS128,
19470 IX86_BUILTIN_PCMPESTRZ128,
19471 IX86_BUILTIN_PCMPISTRI128,
19472 IX86_BUILTIN_PCMPISTRM128,
19473 IX86_BUILTIN_PCMPISTRA128,
19474 IX86_BUILTIN_PCMPISTRC128,
19475 IX86_BUILTIN_PCMPISTRO128,
19476 IX86_BUILTIN_PCMPISTRS128,
19477 IX86_BUILTIN_PCMPISTRZ128,
19479 IX86_BUILTIN_PCMPGTQ,
19481 /* AES instructions */
19482 IX86_BUILTIN_AESENC128,
19483 IX86_BUILTIN_AESENCLAST128,
19484 IX86_BUILTIN_AESDEC128,
19485 IX86_BUILTIN_AESDECLAST128,
19486 IX86_BUILTIN_AESIMC128,
19487 IX86_BUILTIN_AESKEYGENASSIST128,
19489 /* PCLMUL instruction */
19490 IX86_BUILTIN_PCLMULQDQ128,
19493 IX86_BUILTIN_ADDPD256,
19494 IX86_BUILTIN_ADDPS256,
19495 IX86_BUILTIN_ADDSUBPD256,
19496 IX86_BUILTIN_ADDSUBPS256,
19497 IX86_BUILTIN_ANDPD256,
19498 IX86_BUILTIN_ANDPS256,
19499 IX86_BUILTIN_ANDNPD256,
19500 IX86_BUILTIN_ANDNPS256,
19501 IX86_BUILTIN_BLENDPD256,
19502 IX86_BUILTIN_BLENDPS256,
19503 IX86_BUILTIN_BLENDVPD256,
19504 IX86_BUILTIN_BLENDVPS256,
19505 IX86_BUILTIN_DIVPD256,
19506 IX86_BUILTIN_DIVPS256,
19507 IX86_BUILTIN_DPPS256,
19508 IX86_BUILTIN_HADDPD256,
19509 IX86_BUILTIN_HADDPS256,
19510 IX86_BUILTIN_HSUBPD256,
19511 IX86_BUILTIN_HSUBPS256,
19512 IX86_BUILTIN_MAXPD256,
19513 IX86_BUILTIN_MAXPS256,
19514 IX86_BUILTIN_MINPD256,
19515 IX86_BUILTIN_MINPS256,
19516 IX86_BUILTIN_MULPD256,
19517 IX86_BUILTIN_MULPS256,
19518 IX86_BUILTIN_ORPD256,
19519 IX86_BUILTIN_ORPS256,
19520 IX86_BUILTIN_SHUFPD256,
19521 IX86_BUILTIN_SHUFPS256,
19522 IX86_BUILTIN_SUBPD256,
19523 IX86_BUILTIN_SUBPS256,
19524 IX86_BUILTIN_XORPD256,
19525 IX86_BUILTIN_XORPS256,
19526 IX86_BUILTIN_CMPSD,
19527 IX86_BUILTIN_CMPSS,
19528 IX86_BUILTIN_CMPPD,
19529 IX86_BUILTIN_CMPPS,
19530 IX86_BUILTIN_CMPPD256,
19531 IX86_BUILTIN_CMPPS256,
19532 IX86_BUILTIN_CVTDQ2PD256,
19533 IX86_BUILTIN_CVTDQ2PS256,
19534 IX86_BUILTIN_CVTPD2PS256,
19535 IX86_BUILTIN_CVTPS2DQ256,
19536 IX86_BUILTIN_CVTPS2PD256,
19537 IX86_BUILTIN_CVTTPD2DQ256,
19538 IX86_BUILTIN_CVTPD2DQ256,
19539 IX86_BUILTIN_CVTTPS2DQ256,
19540 IX86_BUILTIN_EXTRACTF128PD256,
19541 IX86_BUILTIN_EXTRACTF128PS256,
19542 IX86_BUILTIN_EXTRACTF128SI256,
19543 IX86_BUILTIN_VZEROALL,
19544 IX86_BUILTIN_VZEROUPPER,
19545 IX86_BUILTIN_VZEROUPPER_REX64,
19546 IX86_BUILTIN_VPERMILVARPD,
19547 IX86_BUILTIN_VPERMILVARPS,
19548 IX86_BUILTIN_VPERMILVARPD256,
19549 IX86_BUILTIN_VPERMILVARPS256,
19550 IX86_BUILTIN_VPERMILPD,
19551 IX86_BUILTIN_VPERMILPS,
19552 IX86_BUILTIN_VPERMILPD256,
19553 IX86_BUILTIN_VPERMILPS256,
19554 IX86_BUILTIN_VPERMIL2PD,
19555 IX86_BUILTIN_VPERMIL2PS,
19556 IX86_BUILTIN_VPERMIL2PD256,
19557 IX86_BUILTIN_VPERMIL2PS256,
19558 IX86_BUILTIN_VPERM2F128PD256,
19559 IX86_BUILTIN_VPERM2F128PS256,
19560 IX86_BUILTIN_VPERM2F128SI256,
19561 IX86_BUILTIN_VBROADCASTSS,
19562 IX86_BUILTIN_VBROADCASTSD256,
19563 IX86_BUILTIN_VBROADCASTSS256,
19564 IX86_BUILTIN_VBROADCASTPD256,
19565 IX86_BUILTIN_VBROADCASTPS256,
19566 IX86_BUILTIN_VINSERTF128PD256,
19567 IX86_BUILTIN_VINSERTF128PS256,
19568 IX86_BUILTIN_VINSERTF128SI256,
19569 IX86_BUILTIN_LOADUPD256,
19570 IX86_BUILTIN_LOADUPS256,
19571 IX86_BUILTIN_STOREUPD256,
19572 IX86_BUILTIN_STOREUPS256,
19573 IX86_BUILTIN_LDDQU256,
19574 IX86_BUILTIN_LOADDQU256,
19575 IX86_BUILTIN_STOREDQU256,
19576 IX86_BUILTIN_MASKLOADPD,
19577 IX86_BUILTIN_MASKLOADPS,
19578 IX86_BUILTIN_MASKSTOREPD,
19579 IX86_BUILTIN_MASKSTOREPS,
19580 IX86_BUILTIN_MASKLOADPD256,
19581 IX86_BUILTIN_MASKLOADPS256,
19582 IX86_BUILTIN_MASKSTOREPD256,
19583 IX86_BUILTIN_MASKSTOREPS256,
19584 IX86_BUILTIN_MOVSHDUP256,
19585 IX86_BUILTIN_MOVSLDUP256,
19586 IX86_BUILTIN_MOVDDUP256,
19588 IX86_BUILTIN_SQRTPD256,
19589 IX86_BUILTIN_SQRTPS256,
19590 IX86_BUILTIN_SQRTPS_NR256,
19591 IX86_BUILTIN_RSQRTPS256,
19592 IX86_BUILTIN_RSQRTPS_NR256,
19594 IX86_BUILTIN_RCPPS256,
19596 IX86_BUILTIN_ROUNDPD256,
19597 IX86_BUILTIN_ROUNDPS256,
19599 IX86_BUILTIN_UNPCKHPD256,
19600 IX86_BUILTIN_UNPCKLPD256,
19601 IX86_BUILTIN_UNPCKHPS256,
19602 IX86_BUILTIN_UNPCKLPS256,
19604 IX86_BUILTIN_SI256_SI,
19605 IX86_BUILTIN_PS256_PS,
19606 IX86_BUILTIN_PD256_PD,
19607 IX86_BUILTIN_SI_SI256,
19608 IX86_BUILTIN_PS_PS256,
19609 IX86_BUILTIN_PD_PD256,
19611 IX86_BUILTIN_VTESTZPD,
19612 IX86_BUILTIN_VTESTCPD,
19613 IX86_BUILTIN_VTESTNZCPD,
19614 IX86_BUILTIN_VTESTZPS,
19615 IX86_BUILTIN_VTESTCPS,
19616 IX86_BUILTIN_VTESTNZCPS,
19617 IX86_BUILTIN_VTESTZPD256,
19618 IX86_BUILTIN_VTESTCPD256,
19619 IX86_BUILTIN_VTESTNZCPD256,
19620 IX86_BUILTIN_VTESTZPS256,
19621 IX86_BUILTIN_VTESTCPS256,
19622 IX86_BUILTIN_VTESTNZCPS256,
19623 IX86_BUILTIN_PTESTZ256,
19624 IX86_BUILTIN_PTESTC256,
19625 IX86_BUILTIN_PTESTNZC256,
19627 IX86_BUILTIN_MOVMSKPD256,
19628 IX86_BUILTIN_MOVMSKPS256,
19630 /* TFmode support builtins. */
19632 IX86_BUILTIN_FABSQ,
19633 IX86_BUILTIN_COPYSIGNQ,
19635 /* SSE5 instructions */
19636 IX86_BUILTIN_FMADDSS,
19637 IX86_BUILTIN_FMADDSD,
19638 IX86_BUILTIN_FMADDPS,
19639 IX86_BUILTIN_FMADDPD,
19640 IX86_BUILTIN_FMSUBSS,
19641 IX86_BUILTIN_FMSUBSD,
19642 IX86_BUILTIN_FMSUBPS,
19643 IX86_BUILTIN_FMSUBPD,
19644 IX86_BUILTIN_FNMADDSS,
19645 IX86_BUILTIN_FNMADDSD,
19646 IX86_BUILTIN_FNMADDPS,
19647 IX86_BUILTIN_FNMADDPD,
19648 IX86_BUILTIN_FNMSUBSS,
19649 IX86_BUILTIN_FNMSUBSD,
19650 IX86_BUILTIN_FNMSUBPS,
19651 IX86_BUILTIN_FNMSUBPD,
19652 IX86_BUILTIN_PCMOV,
19653 IX86_BUILTIN_PCMOV_V2DI,
19654 IX86_BUILTIN_PCMOV_V4SI,
19655 IX86_BUILTIN_PCMOV_V8HI,
19656 IX86_BUILTIN_PCMOV_V16QI,
19657 IX86_BUILTIN_PCMOV_V4SF,
19658 IX86_BUILTIN_PCMOV_V2DF,
19659 IX86_BUILTIN_PPERM,
19660 IX86_BUILTIN_PERMPS,
19661 IX86_BUILTIN_PERMPD,
19662 IX86_BUILTIN_PMACSSWW,
19663 IX86_BUILTIN_PMACSWW,
19664 IX86_BUILTIN_PMACSSWD,
19665 IX86_BUILTIN_PMACSWD,
19666 IX86_BUILTIN_PMACSSDD,
19667 IX86_BUILTIN_PMACSDD,
19668 IX86_BUILTIN_PMACSSDQL,
19669 IX86_BUILTIN_PMACSSDQH,
19670 IX86_BUILTIN_PMACSDQL,
19671 IX86_BUILTIN_PMACSDQH,
19672 IX86_BUILTIN_PMADCSSWD,
19673 IX86_BUILTIN_PMADCSWD,
19674 IX86_BUILTIN_PHADDBW,
19675 IX86_BUILTIN_PHADDBD,
19676 IX86_BUILTIN_PHADDBQ,
19677 IX86_BUILTIN_PHADDWD,
19678 IX86_BUILTIN_PHADDWQ,
19679 IX86_BUILTIN_PHADDDQ,
19680 IX86_BUILTIN_PHADDUBW,
19681 IX86_BUILTIN_PHADDUBD,
19682 IX86_BUILTIN_PHADDUBQ,
19683 IX86_BUILTIN_PHADDUWD,
19684 IX86_BUILTIN_PHADDUWQ,
19685 IX86_BUILTIN_PHADDUDQ,
19686 IX86_BUILTIN_PHSUBBW,
19687 IX86_BUILTIN_PHSUBWD,
19688 IX86_BUILTIN_PHSUBDQ,
19689 IX86_BUILTIN_PROTB,
19690 IX86_BUILTIN_PROTW,
19691 IX86_BUILTIN_PROTD,
19692 IX86_BUILTIN_PROTQ,
19693 IX86_BUILTIN_PROTB_IMM,
19694 IX86_BUILTIN_PROTW_IMM,
19695 IX86_BUILTIN_PROTD_IMM,
19696 IX86_BUILTIN_PROTQ_IMM,
19697 IX86_BUILTIN_PSHLB,
19698 IX86_BUILTIN_PSHLW,
19699 IX86_BUILTIN_PSHLD,
19700 IX86_BUILTIN_PSHLQ,
19701 IX86_BUILTIN_PSHAB,
19702 IX86_BUILTIN_PSHAW,
19703 IX86_BUILTIN_PSHAD,
19704 IX86_BUILTIN_PSHAQ,
19705 IX86_BUILTIN_FRCZSS,
19706 IX86_BUILTIN_FRCZSD,
19707 IX86_BUILTIN_FRCZPS,
19708 IX86_BUILTIN_FRCZPD,
19709 IX86_BUILTIN_CVTPH2PS,
19710 IX86_BUILTIN_CVTPS2PH,
19712 IX86_BUILTIN_COMEQSS,
19713 IX86_BUILTIN_COMNESS,
19714 IX86_BUILTIN_COMLTSS,
19715 IX86_BUILTIN_COMLESS,
19716 IX86_BUILTIN_COMGTSS,
19717 IX86_BUILTIN_COMGESS,
19718 IX86_BUILTIN_COMUEQSS,
19719 IX86_BUILTIN_COMUNESS,
19720 IX86_BUILTIN_COMULTSS,
19721 IX86_BUILTIN_COMULESS,
19722 IX86_BUILTIN_COMUGTSS,
19723 IX86_BUILTIN_COMUGESS,
19724 IX86_BUILTIN_COMORDSS,
19725 IX86_BUILTIN_COMUNORDSS,
19726 IX86_BUILTIN_COMFALSESS,
19727 IX86_BUILTIN_COMTRUESS,
19729 IX86_BUILTIN_COMEQSD,
19730 IX86_BUILTIN_COMNESD,
19731 IX86_BUILTIN_COMLTSD,
19732 IX86_BUILTIN_COMLESD,
19733 IX86_BUILTIN_COMGTSD,
19734 IX86_BUILTIN_COMGESD,
19735 IX86_BUILTIN_COMUEQSD,
19736 IX86_BUILTIN_COMUNESD,
19737 IX86_BUILTIN_COMULTSD,
19738 IX86_BUILTIN_COMULESD,
19739 IX86_BUILTIN_COMUGTSD,
19740 IX86_BUILTIN_COMUGESD,
19741 IX86_BUILTIN_COMORDSD,
19742 IX86_BUILTIN_COMUNORDSD,
19743 IX86_BUILTIN_COMFALSESD,
19744 IX86_BUILTIN_COMTRUESD,
19746 IX86_BUILTIN_COMEQPS,
19747 IX86_BUILTIN_COMNEPS,
19748 IX86_BUILTIN_COMLTPS,
19749 IX86_BUILTIN_COMLEPS,
19750 IX86_BUILTIN_COMGTPS,
19751 IX86_BUILTIN_COMGEPS,
19752 IX86_BUILTIN_COMUEQPS,
19753 IX86_BUILTIN_COMUNEPS,
19754 IX86_BUILTIN_COMULTPS,
19755 IX86_BUILTIN_COMULEPS,
19756 IX86_BUILTIN_COMUGTPS,
19757 IX86_BUILTIN_COMUGEPS,
19758 IX86_BUILTIN_COMORDPS,
19759 IX86_BUILTIN_COMUNORDPS,
19760 IX86_BUILTIN_COMFALSEPS,
19761 IX86_BUILTIN_COMTRUEPS,
19763 IX86_BUILTIN_COMEQPD,
19764 IX86_BUILTIN_COMNEPD,
19765 IX86_BUILTIN_COMLTPD,
19766 IX86_BUILTIN_COMLEPD,
19767 IX86_BUILTIN_COMGTPD,
19768 IX86_BUILTIN_COMGEPD,
19769 IX86_BUILTIN_COMUEQPD,
19770 IX86_BUILTIN_COMUNEPD,
19771 IX86_BUILTIN_COMULTPD,
19772 IX86_BUILTIN_COMULEPD,
19773 IX86_BUILTIN_COMUGTPD,
19774 IX86_BUILTIN_COMUGEPD,
19775 IX86_BUILTIN_COMORDPD,
19776 IX86_BUILTIN_COMUNORDPD,
19777 IX86_BUILTIN_COMFALSEPD,
19778 IX86_BUILTIN_COMTRUEPD,
19780 IX86_BUILTIN_PCOMEQUB,
19781 IX86_BUILTIN_PCOMNEUB,
19782 IX86_BUILTIN_PCOMLTUB,
19783 IX86_BUILTIN_PCOMLEUB,
19784 IX86_BUILTIN_PCOMGTUB,
19785 IX86_BUILTIN_PCOMGEUB,
19786 IX86_BUILTIN_PCOMFALSEUB,
19787 IX86_BUILTIN_PCOMTRUEUB,
19788 IX86_BUILTIN_PCOMEQUW,
19789 IX86_BUILTIN_PCOMNEUW,
19790 IX86_BUILTIN_PCOMLTUW,
19791 IX86_BUILTIN_PCOMLEUW,
19792 IX86_BUILTIN_PCOMGTUW,
19793 IX86_BUILTIN_PCOMGEUW,
19794 IX86_BUILTIN_PCOMFALSEUW,
19795 IX86_BUILTIN_PCOMTRUEUW,
19796 IX86_BUILTIN_PCOMEQUD,
19797 IX86_BUILTIN_PCOMNEUD,
19798 IX86_BUILTIN_PCOMLTUD,
19799 IX86_BUILTIN_PCOMLEUD,
19800 IX86_BUILTIN_PCOMGTUD,
19801 IX86_BUILTIN_PCOMGEUD,
19802 IX86_BUILTIN_PCOMFALSEUD,
19803 IX86_BUILTIN_PCOMTRUEUD,
19804 IX86_BUILTIN_PCOMEQUQ,
19805 IX86_BUILTIN_PCOMNEUQ,
19806 IX86_BUILTIN_PCOMLTUQ,
19807 IX86_BUILTIN_PCOMLEUQ,
19808 IX86_BUILTIN_PCOMGTUQ,
19809 IX86_BUILTIN_PCOMGEUQ,
19810 IX86_BUILTIN_PCOMFALSEUQ,
19811 IX86_BUILTIN_PCOMTRUEUQ,
19813 IX86_BUILTIN_PCOMEQB,
19814 IX86_BUILTIN_PCOMNEB,
19815 IX86_BUILTIN_PCOMLTB,
19816 IX86_BUILTIN_PCOMLEB,
19817 IX86_BUILTIN_PCOMGTB,
19818 IX86_BUILTIN_PCOMGEB,
19819 IX86_BUILTIN_PCOMFALSEB,
19820 IX86_BUILTIN_PCOMTRUEB,
19821 IX86_BUILTIN_PCOMEQW,
19822 IX86_BUILTIN_PCOMNEW,
19823 IX86_BUILTIN_PCOMLTW,
19824 IX86_BUILTIN_PCOMLEW,
19825 IX86_BUILTIN_PCOMGTW,
19826 IX86_BUILTIN_PCOMGEW,
19827 IX86_BUILTIN_PCOMFALSEW,
19828 IX86_BUILTIN_PCOMTRUEW,
19829 IX86_BUILTIN_PCOMEQD,
19830 IX86_BUILTIN_PCOMNED,
19831 IX86_BUILTIN_PCOMLTD,
19832 IX86_BUILTIN_PCOMLED,
19833 IX86_BUILTIN_PCOMGTD,
19834 IX86_BUILTIN_PCOMGED,
19835 IX86_BUILTIN_PCOMFALSED,
19836 IX86_BUILTIN_PCOMTRUED,
19837 IX86_BUILTIN_PCOMEQQ,
19838 IX86_BUILTIN_PCOMNEQ,
19839 IX86_BUILTIN_PCOMLTQ,
19840 IX86_BUILTIN_PCOMLEQ,
19841 IX86_BUILTIN_PCOMGTQ,
19842 IX86_BUILTIN_PCOMGEQ,
19843 IX86_BUILTIN_PCOMFALSEQ,
19844 IX86_BUILTIN_PCOMTRUEQ,
19849 /* Table for the ix86 builtin decls. */
19850 static GTY(()) tree ix86_builtins[(int) IX86_BUILTIN_MAX];
19852 /* Table of all of the builtin functions that are possible with different ISA's
19853 but are waiting to be built until a function is declared to use that
19855 struct builtin_isa GTY(())
19857 tree type; /* builtin type to use in the declaration */
19858 const char *name; /* function name */
19859 int isa; /* isa_flags this builtin is defined for */
19860 bool const_p; /* true if the declaration is constant */
19863 static GTY(()) struct builtin_isa ix86_builtins_isa[(int) IX86_BUILTIN_MAX];
19866 /* Add an ix86 target builtin function with CODE, NAME and TYPE. Save the MASK
19867 * of which isa_flags to use in the ix86_builtins_isa array. Stores the
19868 * function decl in the ix86_builtins array. Returns the function decl or
19869 * NULL_TREE, if the builtin was not added.
19871 * If the front end has a special hook for builtin functions, delay adding
19872 * builtin functions that aren't in the current ISA until the ISA is changed
19873 * with function specific optimization. Doing so, can save about 300K for the
19874 * default compiler. When the builtin is expanded, check at that time whether
19877 * If the front end doesn't have a special hook, record all builtins, even if
19878 * it isn't an instruction set in the current ISA in case the user uses
19879 * function specific options for a different ISA, so that we don't get scope
19880 * errors if a builtin is added in the middle of a function scope. */
19883 def_builtin (int mask, const char *name, tree type, enum ix86_builtins code)
19885 tree decl = NULL_TREE;
19887 if (!(mask & OPTION_MASK_ISA_64BIT) || TARGET_64BIT)
19889 ix86_builtins_isa[(int) code].isa = mask;
19891 if ((mask & ix86_isa_flags) != 0
19892 || (lang_hooks.builtin_function
19893 == lang_hooks.builtin_function_ext_scope))
19896 decl = add_builtin_function (name, type, code, BUILT_IN_MD, NULL,
19898 ix86_builtins[(int) code] = decl;
19899 ix86_builtins_isa[(int) code].type = NULL_TREE;
19903 ix86_builtins[(int) code] = NULL_TREE;
19904 ix86_builtins_isa[(int) code].const_p = false;
19905 ix86_builtins_isa[(int) code].type = type;
19906 ix86_builtins_isa[(int) code].name = name;
19913 /* Like def_builtin, but also marks the function decl "const". */
19916 def_builtin_const (int mask, const char *name, tree type,
19917 enum ix86_builtins code)
19919 tree decl = def_builtin (mask, name, type, code);
19921 TREE_READONLY (decl) = 1;
19923 ix86_builtins_isa[(int) code].const_p = true;
19928 /* Add any new builtin functions for a given ISA that may not have been
19929 declared. This saves a bit of space compared to adding all of the
19930 declarations to the tree, even if we didn't use them. */
19933 ix86_add_new_builtins (int isa)
19938 for (i = 0; i < (int)IX86_BUILTIN_MAX; i++)
19940 if ((ix86_builtins_isa[i].isa & isa) != 0
19941 && ix86_builtins_isa[i].type != NULL_TREE)
19943 decl = add_builtin_function_ext_scope (ix86_builtins_isa[i].name,
19944 ix86_builtins_isa[i].type,
19945 i, BUILT_IN_MD, NULL,
19948 ix86_builtins[i] = decl;
19949 ix86_builtins_isa[i].type = NULL_TREE;
19950 if (ix86_builtins_isa[i].const_p)
19951 TREE_READONLY (decl) = 1;
19956 /* Bits for builtin_description.flag. */
19958 /* Set when we don't support the comparison natively, and should
19959 swap_comparison in order to support it. */
19960 #define BUILTIN_DESC_SWAP_OPERANDS 1
19962 struct builtin_description
19964 const unsigned int mask;
19965 const enum insn_code icode;
19966 const char *const name;
19967 const enum ix86_builtins code;
19968 const enum rtx_code comparison;
19972 static const struct builtin_description bdesc_comi[] =
19974 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comieq", IX86_BUILTIN_COMIEQSS, UNEQ, 0 },
19975 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comilt", IX86_BUILTIN_COMILTSS, UNLT, 0 },
19976 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comile", IX86_BUILTIN_COMILESS, UNLE, 0 },
19977 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comigt", IX86_BUILTIN_COMIGTSS, GT, 0 },
19978 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comige", IX86_BUILTIN_COMIGESS, GE, 0 },
19979 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comineq", IX86_BUILTIN_COMINEQSS, LTGT, 0 },
19980 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomieq", IX86_BUILTIN_UCOMIEQSS, UNEQ, 0 },
19981 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomilt", IX86_BUILTIN_UCOMILTSS, UNLT, 0 },
19982 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomile", IX86_BUILTIN_UCOMILESS, UNLE, 0 },
19983 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomigt", IX86_BUILTIN_UCOMIGTSS, GT, 0 },
19984 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomige", IX86_BUILTIN_UCOMIGESS, GE, 0 },
19985 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomineq", IX86_BUILTIN_UCOMINEQSS, LTGT, 0 },
19986 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdeq", IX86_BUILTIN_COMIEQSD, UNEQ, 0 },
19987 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdlt", IX86_BUILTIN_COMILTSD, UNLT, 0 },
19988 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdle", IX86_BUILTIN_COMILESD, UNLE, 0 },
19989 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdgt", IX86_BUILTIN_COMIGTSD, GT, 0 },
19990 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdge", IX86_BUILTIN_COMIGESD, GE, 0 },
19991 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdneq", IX86_BUILTIN_COMINEQSD, LTGT, 0 },
19992 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdeq", IX86_BUILTIN_UCOMIEQSD, UNEQ, 0 },
19993 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdlt", IX86_BUILTIN_UCOMILTSD, UNLT, 0 },
19994 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdle", IX86_BUILTIN_UCOMILESD, UNLE, 0 },
19995 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdgt", IX86_BUILTIN_UCOMIGTSD, GT, 0 },
19996 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdge", IX86_BUILTIN_UCOMIGESD, GE, 0 },
19997 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdneq", IX86_BUILTIN_UCOMINEQSD, LTGT, 0 },
20000 static const struct builtin_description bdesc_pcmpestr[] =
20003 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestri128", IX86_BUILTIN_PCMPESTRI128, UNKNOWN, 0 },
20004 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestrm128", IX86_BUILTIN_PCMPESTRM128, UNKNOWN, 0 },
20005 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestria128", IX86_BUILTIN_PCMPESTRA128, UNKNOWN, (int) CCAmode },
20006 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestric128", IX86_BUILTIN_PCMPESTRC128, UNKNOWN, (int) CCCmode },
20007 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestrio128", IX86_BUILTIN_PCMPESTRO128, UNKNOWN, (int) CCOmode },
20008 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestris128", IX86_BUILTIN_PCMPESTRS128, UNKNOWN, (int) CCSmode },
20009 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestriz128", IX86_BUILTIN_PCMPESTRZ128, UNKNOWN, (int) CCZmode },
20012 static const struct builtin_description bdesc_pcmpistr[] =
20015 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistri128", IX86_BUILTIN_PCMPISTRI128, UNKNOWN, 0 },
20016 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistrm128", IX86_BUILTIN_PCMPISTRM128, UNKNOWN, 0 },
20017 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistria128", IX86_BUILTIN_PCMPISTRA128, UNKNOWN, (int) CCAmode },
20018 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistric128", IX86_BUILTIN_PCMPISTRC128, UNKNOWN, (int) CCCmode },
20019 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistrio128", IX86_BUILTIN_PCMPISTRO128, UNKNOWN, (int) CCOmode },
20020 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistris128", IX86_BUILTIN_PCMPISTRS128, UNKNOWN, (int) CCSmode },
20021 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistriz128", IX86_BUILTIN_PCMPISTRZ128, UNKNOWN, (int) CCZmode },
20024 /* Special builtin types */
20025 enum ix86_special_builtin_type
20027 SPECIAL_FTYPE_UNKNOWN,
20029 V32QI_FTYPE_PCCHAR,
20030 V16QI_FTYPE_PCCHAR,
20032 V8SF_FTYPE_PCFLOAT,
20034 V4DF_FTYPE_PCDOUBLE,
20035 V4SF_FTYPE_PCFLOAT,
20036 V2DF_FTYPE_PCDOUBLE,
20037 V8SF_FTYPE_PCV8SF_V8SF,
20038 V4DF_FTYPE_PCV4DF_V4DF,
20039 V4SF_FTYPE_V4SF_PCV2SF,
20040 V4SF_FTYPE_PCV4SF_V4SF,
20041 V2DF_FTYPE_V2DF_PCDOUBLE,
20042 V2DF_FTYPE_PCV2DF_V2DF,
20044 VOID_FTYPE_PV2SF_V4SF,
20045 VOID_FTYPE_PV2DI_V2DI,
20046 VOID_FTYPE_PCHAR_V32QI,
20047 VOID_FTYPE_PCHAR_V16QI,
20048 VOID_FTYPE_PFLOAT_V8SF,
20049 VOID_FTYPE_PFLOAT_V4SF,
20050 VOID_FTYPE_PDOUBLE_V4DF,
20051 VOID_FTYPE_PDOUBLE_V2DF,
20053 VOID_FTYPE_PINT_INT,
20054 VOID_FTYPE_PV8SF_V8SF_V8SF,
20055 VOID_FTYPE_PV4DF_V4DF_V4DF,
20056 VOID_FTYPE_PV4SF_V4SF_V4SF,
20057 VOID_FTYPE_PV2DF_V2DF_V2DF
20060 /* Builtin types */
20061 enum ix86_builtin_type
20064 FLOAT128_FTYPE_FLOAT128,
20066 FLOAT128_FTYPE_FLOAT128_FLOAT128,
20067 INT_FTYPE_V8SF_V8SF_PTEST,
20068 INT_FTYPE_V4DI_V4DI_PTEST,
20069 INT_FTYPE_V4DF_V4DF_PTEST,
20070 INT_FTYPE_V4SF_V4SF_PTEST,
20071 INT_FTYPE_V2DI_V2DI_PTEST,
20072 INT_FTYPE_V2DF_V2DF_PTEST,
20104 V4SF_FTYPE_V4SF_VEC_MERGE,
20113 V2DF_FTYPE_V2DF_VEC_MERGE,
20124 V16QI_FTYPE_V16QI_V16QI,
20125 V16QI_FTYPE_V8HI_V8HI,
20126 V8QI_FTYPE_V8QI_V8QI,
20127 V8QI_FTYPE_V4HI_V4HI,
20128 V8HI_FTYPE_V8HI_V8HI,
20129 V8HI_FTYPE_V8HI_V8HI_COUNT,
20130 V8HI_FTYPE_V16QI_V16QI,
20131 V8HI_FTYPE_V4SI_V4SI,
20132 V8HI_FTYPE_V8HI_SI_COUNT,
20133 V8SF_FTYPE_V8SF_V8SF,
20134 V8SF_FTYPE_V8SF_V8SI,
20135 V4SI_FTYPE_V4SI_V4SI,
20136 V4SI_FTYPE_V4SI_V4SI_COUNT,
20137 V4SI_FTYPE_V8HI_V8HI,
20138 V4SI_FTYPE_V4SF_V4SF,
20139 V4SI_FTYPE_V2DF_V2DF,
20140 V4SI_FTYPE_V4SI_SI_COUNT,
20141 V4HI_FTYPE_V4HI_V4HI,
20142 V4HI_FTYPE_V4HI_V4HI_COUNT,
20143 V4HI_FTYPE_V8QI_V8QI,
20144 V4HI_FTYPE_V2SI_V2SI,
20145 V4HI_FTYPE_V4HI_SI_COUNT,
20146 V4DF_FTYPE_V4DF_V4DF,
20147 V4DF_FTYPE_V4DF_V4DI,
20148 V4SF_FTYPE_V4SF_V4SF,
20149 V4SF_FTYPE_V4SF_V4SF_SWAP,
20150 V4SF_FTYPE_V4SF_V4SI,
20151 V4SF_FTYPE_V4SF_V2SI,
20152 V4SF_FTYPE_V4SF_V2DF,
20153 V4SF_FTYPE_V4SF_DI,
20154 V4SF_FTYPE_V4SF_SI,
20155 V2DI_FTYPE_V2DI_V2DI,
20156 V2DI_FTYPE_V2DI_V2DI_COUNT,
20157 V2DI_FTYPE_V16QI_V16QI,
20158 V2DI_FTYPE_V4SI_V4SI,
20159 V2DI_FTYPE_V2DI_V16QI,
20160 V2DI_FTYPE_V2DF_V2DF,
20161 V2DI_FTYPE_V2DI_SI_COUNT,
20162 V2SI_FTYPE_V2SI_V2SI,
20163 V2SI_FTYPE_V2SI_V2SI_COUNT,
20164 V2SI_FTYPE_V4HI_V4HI,
20165 V2SI_FTYPE_V2SF_V2SF,
20166 V2SI_FTYPE_V2SI_SI_COUNT,
20167 V2DF_FTYPE_V2DF_V2DF,
20168 V2DF_FTYPE_V2DF_V2DF_SWAP,
20169 V2DF_FTYPE_V2DF_V4SF,
20170 V2DF_FTYPE_V2DF_V2DI,
20171 V2DF_FTYPE_V2DF_DI,
20172 V2DF_FTYPE_V2DF_SI,
20173 V2SF_FTYPE_V2SF_V2SF,
20174 V1DI_FTYPE_V1DI_V1DI,
20175 V1DI_FTYPE_V1DI_V1DI_COUNT,
20176 V1DI_FTYPE_V8QI_V8QI,
20177 V1DI_FTYPE_V2SI_V2SI,
20178 V1DI_FTYPE_V1DI_SI_COUNT,
20179 UINT64_FTYPE_UINT64_UINT64,
20180 UINT_FTYPE_UINT_UINT,
20181 UINT_FTYPE_UINT_USHORT,
20182 UINT_FTYPE_UINT_UCHAR,
20183 V8HI_FTYPE_V8HI_INT,
20184 V4SI_FTYPE_V4SI_INT,
20185 V4HI_FTYPE_V4HI_INT,
20186 V8SF_FTYPE_V8SF_INT,
20187 V4SI_FTYPE_V8SI_INT,
20188 V4SF_FTYPE_V8SF_INT,
20189 V2DF_FTYPE_V4DF_INT,
20190 V4DF_FTYPE_V4DF_INT,
20191 V4SF_FTYPE_V4SF_INT,
20192 V2DI_FTYPE_V2DI_INT,
20193 V2DI2TI_FTYPE_V2DI_INT,
20194 V2DF_FTYPE_V2DF_INT,
20195 V16QI_FTYPE_V16QI_V16QI_V16QI,
20196 V8SF_FTYPE_V8SF_V8SF_V8SF,
20197 V4DF_FTYPE_V4DF_V4DF_V4DF,
20198 V4SF_FTYPE_V4SF_V4SF_V4SF,
20199 V2DF_FTYPE_V2DF_V2DF_V2DF,
20200 V16QI_FTYPE_V16QI_V16QI_INT,
20201 V8SI_FTYPE_V8SI_V8SI_INT,
20202 V8SI_FTYPE_V8SI_V4SI_INT,
20203 V8HI_FTYPE_V8HI_V8HI_INT,
20204 V8SF_FTYPE_V8SF_V8SF_INT,
20205 V8SF_FTYPE_V8SF_V4SF_INT,
20206 V4SI_FTYPE_V4SI_V4SI_INT,
20207 V4DF_FTYPE_V4DF_V4DF_INT,
20208 V4DF_FTYPE_V4DF_V2DF_INT,
20209 V4SF_FTYPE_V4SF_V4SF_INT,
20210 V2DI_FTYPE_V2DI_V2DI_INT,
20211 V2DI2TI_FTYPE_V2DI_V2DI_INT,
20212 V1DI2DI_FTYPE_V1DI_V1DI_INT,
20213 V2DF_FTYPE_V2DF_V2DF_INT,
20214 V8SF_FTYPE_V8SF_V8SF_V8SI_INT,
20215 V4DF_FTYPE_V4DF_V4DF_V4DI_INT,
20216 V4SF_FTYPE_V4SF_V4SF_V4SI_INT,
20217 V2DF_FTYPE_V2DF_V2DF_V2DI_INT,
20218 V2DI_FTYPE_V2DI_UINT_UINT,
20219 V2DI_FTYPE_V2DI_V2DI_UINT_UINT
20222 /* Special builtins with variable number of arguments. */
20223 static const struct builtin_description bdesc_special_args[] =
20226 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_emms, "__builtin_ia32_emms", IX86_BUILTIN_EMMS, UNKNOWN, (int) VOID_FTYPE_VOID },
20229 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_femms, "__builtin_ia32_femms", IX86_BUILTIN_FEMMS, UNKNOWN, (int) VOID_FTYPE_VOID },
20232 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movups, "__builtin_ia32_storeups", IX86_BUILTIN_STOREUPS, UNKNOWN, (int) VOID_FTYPE_PFLOAT_V4SF },
20233 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movntv4sf, "__builtin_ia32_movntps", IX86_BUILTIN_MOVNTPS, UNKNOWN, (int) VOID_FTYPE_PFLOAT_V4SF },
20234 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movups, "__builtin_ia32_loadups", IX86_BUILTIN_LOADUPS, UNKNOWN, (int) V4SF_FTYPE_PCFLOAT },
20236 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_loadhps_exp, "__builtin_ia32_loadhps", IX86_BUILTIN_LOADHPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_PCV2SF },
20237 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_loadlps_exp, "__builtin_ia32_loadlps", IX86_BUILTIN_LOADLPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_PCV2SF },
20238 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_storehps, "__builtin_ia32_storehps", IX86_BUILTIN_STOREHPS, UNKNOWN, (int) VOID_FTYPE_PV2SF_V4SF },
20239 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_storelps, "__builtin_ia32_storelps", IX86_BUILTIN_STORELPS, UNKNOWN, (int) VOID_FTYPE_PV2SF_V4SF },
20241 /* SSE or 3DNow!A */
20242 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_sse_sfence, "__builtin_ia32_sfence", IX86_BUILTIN_SFENCE, UNKNOWN, (int) VOID_FTYPE_VOID },
20243 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_sse_movntdi, "__builtin_ia32_movntq", IX86_BUILTIN_MOVNTQ, UNKNOWN, (int) VOID_FTYPE_PDI_DI },
20246 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_lfence, "__builtin_ia32_lfence", IX86_BUILTIN_LFENCE, UNKNOWN, (int) VOID_FTYPE_VOID },
20247 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_mfence, 0, IX86_BUILTIN_MFENCE, UNKNOWN, (int) VOID_FTYPE_VOID },
20248 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movupd, "__builtin_ia32_storeupd", IX86_BUILTIN_STOREUPD, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V2DF },
20249 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movdqu, "__builtin_ia32_storedqu", IX86_BUILTIN_STOREDQU, UNKNOWN, (int) VOID_FTYPE_PCHAR_V16QI },
20250 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movntv2df, "__builtin_ia32_movntpd", IX86_BUILTIN_MOVNTPD, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V2DF },
20251 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movntv2di, "__builtin_ia32_movntdq", IX86_BUILTIN_MOVNTDQ, UNKNOWN, (int) VOID_FTYPE_PV2DI_V2DI },
20252 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movntsi, "__builtin_ia32_movnti", IX86_BUILTIN_MOVNTI, UNKNOWN, (int) VOID_FTYPE_PINT_INT },
20253 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movupd, "__builtin_ia32_loadupd", IX86_BUILTIN_LOADUPD, UNKNOWN, (int) V2DF_FTYPE_PCDOUBLE },
20254 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movdqu, "__builtin_ia32_loaddqu", IX86_BUILTIN_LOADDQU, UNKNOWN, (int) V16QI_FTYPE_PCCHAR },
20256 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_loadhpd_exp, "__builtin_ia32_loadhpd", IX86_BUILTIN_LOADHPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_PCDOUBLE },
20257 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_loadlpd_exp, "__builtin_ia32_loadlpd", IX86_BUILTIN_LOADLPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_PCDOUBLE },
20260 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_lddqu, "__builtin_ia32_lddqu", IX86_BUILTIN_LDDQU, UNKNOWN, (int) V16QI_FTYPE_PCCHAR },
20263 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_movntdqa, "__builtin_ia32_movntdqa", IX86_BUILTIN_MOVNTDQA, UNKNOWN, (int) V2DI_FTYPE_PV2DI },
20266 { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_vmmovntv2df, "__builtin_ia32_movntsd", IX86_BUILTIN_MOVNTSD, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V2DF },
20267 { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_vmmovntv4sf, "__builtin_ia32_movntss", IX86_BUILTIN_MOVNTSS, UNKNOWN, (int) VOID_FTYPE_PFLOAT_V4SF },
20270 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vzeroall, "__builtin_ia32_vzeroall", IX86_BUILTIN_VZEROALL, UNKNOWN, (int) VOID_FTYPE_VOID },
20271 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vzeroupper, 0, IX86_BUILTIN_VZEROUPPER, UNKNOWN, (int) VOID_FTYPE_VOID },
20272 { OPTION_MASK_ISA_AVX | OPTION_MASK_ISA_64BIT, CODE_FOR_avx_vzeroupper_rex64, 0, IX86_BUILTIN_VZEROUPPER_REX64, UNKNOWN, (int) VOID_FTYPE_VOID },
20274 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vbroadcastss, "__builtin_ia32_vbroadcastss", IX86_BUILTIN_VBROADCASTSS, UNKNOWN, (int) V4SF_FTYPE_PCFLOAT },
20275 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vbroadcastsd256, "__builtin_ia32_vbroadcastsd256", IX86_BUILTIN_VBROADCASTSD256, UNKNOWN, (int) V4DF_FTYPE_PCDOUBLE },
20276 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vbroadcastss256, "__builtin_ia32_vbroadcastss256", IX86_BUILTIN_VBROADCASTSS256, UNKNOWN, (int) V8SF_FTYPE_PCFLOAT },
20277 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vbroadcastf128_pd256, "__builtin_ia32_vbroadcastf128_pd256", IX86_BUILTIN_VBROADCASTPD256, UNKNOWN, (int) V4DF_FTYPE_PCV2DF },
20278 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vbroadcastf128_ps256, "__builtin_ia32_vbroadcastf128_ps256", IX86_BUILTIN_VBROADCASTPS256, UNKNOWN, (int) V8SF_FTYPE_PCV4SF },
20280 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movupd256, "__builtin_ia32_loadupd256", IX86_BUILTIN_LOADUPD256, UNKNOWN, (int) V4DF_FTYPE_PCDOUBLE },
20281 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movups256, "__builtin_ia32_loadups256", IX86_BUILTIN_LOADUPS256, UNKNOWN, (int) V8SF_FTYPE_PCFLOAT },
20282 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movupd256, "__builtin_ia32_storeupd256", IX86_BUILTIN_STOREUPD256, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V4DF },
20283 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movups256, "__builtin_ia32_storeups256", IX86_BUILTIN_STOREUPS256, UNKNOWN, (int) VOID_FTYPE_PFLOAT_V8SF },
20284 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movdqu256, "__builtin_ia32_loaddqu256", IX86_BUILTIN_LOADDQU256, UNKNOWN, (int) V32QI_FTYPE_PCCHAR },
20285 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movdqu256, "__builtin_ia32_storedqu256", IX86_BUILTIN_STOREDQU256, UNKNOWN, (int) VOID_FTYPE_PCHAR_V32QI },
20286 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_lddqu256, "__builtin_ia32_lddqu256", IX86_BUILTIN_LDDQU256, UNKNOWN, (int) V32QI_FTYPE_PCCHAR },
20288 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskloadpd, "__builtin_ia32_maskloadpd", IX86_BUILTIN_MASKLOADPD, UNKNOWN, (int) V2DF_FTYPE_PCV2DF_V2DF },
20289 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskloadps, "__builtin_ia32_maskloadps", IX86_BUILTIN_MASKLOADPS, UNKNOWN, (int) V4SF_FTYPE_PCV4SF_V4SF },
20290 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskloadpd256, "__builtin_ia32_maskloadpd256", IX86_BUILTIN_MASKLOADPD256, UNKNOWN, (int) V4DF_FTYPE_PCV4DF_V4DF },
20291 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskloadps256, "__builtin_ia32_maskloadps256", IX86_BUILTIN_MASKLOADPS256, UNKNOWN, (int) V8SF_FTYPE_PCV8SF_V8SF },
20292 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskstorepd, "__builtin_ia32_maskstorepd", IX86_BUILTIN_MASKSTOREPD, UNKNOWN, (int) VOID_FTYPE_PV2DF_V2DF_V2DF },
20293 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskstoreps, "__builtin_ia32_maskstoreps", IX86_BUILTIN_MASKSTOREPS, UNKNOWN, (int) VOID_FTYPE_PV4SF_V4SF_V4SF },
20294 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskstorepd256, "__builtin_ia32_maskstorepd256", IX86_BUILTIN_MASKSTOREPD256, UNKNOWN, (int) VOID_FTYPE_PV4DF_V4DF_V4DF },
20295 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskstoreps256, "__builtin_ia32_maskstoreps256", IX86_BUILTIN_MASKSTOREPS256, UNKNOWN, (int) VOID_FTYPE_PV8SF_V8SF_V8SF },
20298 /* Builtins with variable number of arguments. */
20299 static const struct builtin_description bdesc_args[] =
20302 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_addv8qi3, "__builtin_ia32_paddb", IX86_BUILTIN_PADDB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
20303 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_addv4hi3, "__builtin_ia32_paddw", IX86_BUILTIN_PADDW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
20304 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_addv2si3, "__builtin_ia32_paddd", IX86_BUILTIN_PADDD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
20305 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_subv8qi3, "__builtin_ia32_psubb", IX86_BUILTIN_PSUBB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
20306 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_subv4hi3, "__builtin_ia32_psubw", IX86_BUILTIN_PSUBW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
20307 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_subv2si3, "__builtin_ia32_psubd", IX86_BUILTIN_PSUBD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
20309 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ssaddv8qi3, "__builtin_ia32_paddsb", IX86_BUILTIN_PADDSB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
20310 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ssaddv4hi3, "__builtin_ia32_paddsw", IX86_BUILTIN_PADDSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
20311 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_sssubv8qi3, "__builtin_ia32_psubsb", IX86_BUILTIN_PSUBSB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
20312 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_sssubv4hi3, "__builtin_ia32_psubsw", IX86_BUILTIN_PSUBSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
20313 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_usaddv8qi3, "__builtin_ia32_paddusb", IX86_BUILTIN_PADDUSB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
20314 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_usaddv4hi3, "__builtin_ia32_paddusw", IX86_BUILTIN_PADDUSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
20315 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ussubv8qi3, "__builtin_ia32_psubusb", IX86_BUILTIN_PSUBUSB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
20316 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ussubv4hi3, "__builtin_ia32_psubusw", IX86_BUILTIN_PSUBUSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
20318 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_mulv4hi3, "__builtin_ia32_pmullw", IX86_BUILTIN_PMULLW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
20319 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_smulv4hi3_highpart, "__builtin_ia32_pmulhw", IX86_BUILTIN_PMULHW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
20321 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_andv2si3, "__builtin_ia32_pand", IX86_BUILTIN_PAND, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
20322 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_nandv2si3, "__builtin_ia32_pandn", IX86_BUILTIN_PANDN, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
20323 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_iorv2si3, "__builtin_ia32_por", IX86_BUILTIN_POR, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
20324 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_xorv2si3, "__builtin_ia32_pxor", IX86_BUILTIN_PXOR, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
20326 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_eqv8qi3, "__builtin_ia32_pcmpeqb", IX86_BUILTIN_PCMPEQB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
20327 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_eqv4hi3, "__builtin_ia32_pcmpeqw", IX86_BUILTIN_PCMPEQW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
20328 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_eqv2si3, "__builtin_ia32_pcmpeqd", IX86_BUILTIN_PCMPEQD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
20329 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_gtv8qi3, "__builtin_ia32_pcmpgtb", IX86_BUILTIN_PCMPGTB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
20330 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_gtv4hi3, "__builtin_ia32_pcmpgtw", IX86_BUILTIN_PCMPGTW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
20331 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_gtv2si3, "__builtin_ia32_pcmpgtd", IX86_BUILTIN_PCMPGTD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
20333 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpckhbw, "__builtin_ia32_punpckhbw", IX86_BUILTIN_PUNPCKHBW, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
20334 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpckhwd, "__builtin_ia32_punpckhwd", IX86_BUILTIN_PUNPCKHWD, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
20335 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpckhdq, "__builtin_ia32_punpckhdq", IX86_BUILTIN_PUNPCKHDQ, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
20336 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpcklbw, "__builtin_ia32_punpcklbw", IX86_BUILTIN_PUNPCKLBW, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
20337 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpcklwd, "__builtin_ia32_punpcklwd", IX86_BUILTIN_PUNPCKLWD, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI},
20338 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpckldq, "__builtin_ia32_punpckldq", IX86_BUILTIN_PUNPCKLDQ, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI},
20340 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_packsswb, "__builtin_ia32_packsswb", IX86_BUILTIN_PACKSSWB, UNKNOWN, (int) V8QI_FTYPE_V4HI_V4HI },
20341 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_packssdw, "__builtin_ia32_packssdw", IX86_BUILTIN_PACKSSDW, UNKNOWN, (int) V4HI_FTYPE_V2SI_V2SI },
20342 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_packuswb, "__builtin_ia32_packuswb", IX86_BUILTIN_PACKUSWB, UNKNOWN, (int) V8QI_FTYPE_V4HI_V4HI },
20344 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_pmaddwd, "__builtin_ia32_pmaddwd", IX86_BUILTIN_PMADDWD, UNKNOWN, (int) V2SI_FTYPE_V4HI_V4HI },
20346 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv4hi3, "__builtin_ia32_psllwi", IX86_BUILTIN_PSLLWI, UNKNOWN, (int) V4HI_FTYPE_V4HI_SI_COUNT },
20347 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv2si3, "__builtin_ia32_pslldi", IX86_BUILTIN_PSLLDI, UNKNOWN, (int) V2SI_FTYPE_V2SI_SI_COUNT },
20348 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv1di3, "__builtin_ia32_psllqi", IX86_BUILTIN_PSLLQI, UNKNOWN, (int) V1DI_FTYPE_V1DI_SI_COUNT },
20349 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv4hi3, "__builtin_ia32_psllw", IX86_BUILTIN_PSLLW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI_COUNT },
20350 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv2si3, "__builtin_ia32_pslld", IX86_BUILTIN_PSLLD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI_COUNT },
20351 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv1di3, "__builtin_ia32_psllq", IX86_BUILTIN_PSLLQ, UNKNOWN, (int) V1DI_FTYPE_V1DI_V1DI_COUNT },
20353 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv4hi3, "__builtin_ia32_psrlwi", IX86_BUILTIN_PSRLWI, UNKNOWN, (int) V4HI_FTYPE_V4HI_SI_COUNT },
20354 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv2si3, "__builtin_ia32_psrldi", IX86_BUILTIN_PSRLDI, UNKNOWN, (int) V2SI_FTYPE_V2SI_SI_COUNT },
20355 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv1di3, "__builtin_ia32_psrlqi", IX86_BUILTIN_PSRLQI, UNKNOWN, (int) V1DI_FTYPE_V1DI_SI_COUNT },
20356 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv4hi3, "__builtin_ia32_psrlw", IX86_BUILTIN_PSRLW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI_COUNT },
20357 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv2si3, "__builtin_ia32_psrld", IX86_BUILTIN_PSRLD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI_COUNT },
20358 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv1di3, "__builtin_ia32_psrlq", IX86_BUILTIN_PSRLQ, UNKNOWN, (int) V1DI_FTYPE_V1DI_V1DI_COUNT },
20360 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashrv4hi3, "__builtin_ia32_psrawi", IX86_BUILTIN_PSRAWI, UNKNOWN, (int) V4HI_FTYPE_V4HI_SI_COUNT },
20361 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashrv2si3, "__builtin_ia32_psradi", IX86_BUILTIN_PSRADI, UNKNOWN, (int) V2SI_FTYPE_V2SI_SI_COUNT },
20362 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashrv4hi3, "__builtin_ia32_psraw", IX86_BUILTIN_PSRAW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI_COUNT },
20363 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashrv2si3, "__builtin_ia32_psrad", IX86_BUILTIN_PSRAD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI_COUNT },
20366 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_pf2id, "__builtin_ia32_pf2id", IX86_BUILTIN_PF2ID, UNKNOWN, (int) V2SI_FTYPE_V2SF },
20367 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_floatv2si2, "__builtin_ia32_pi2fd", IX86_BUILTIN_PI2FD, UNKNOWN, (int) V2SF_FTYPE_V2SI },
20368 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_rcpv2sf2, "__builtin_ia32_pfrcp", IX86_BUILTIN_PFRCP, UNKNOWN, (int) V2SF_FTYPE_V2SF },
20369 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_rsqrtv2sf2, "__builtin_ia32_pfrsqrt", IX86_BUILTIN_PFRSQRT, UNKNOWN, (int) V2SF_FTYPE_V2SF },
20371 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_uavgv8qi3, "__builtin_ia32_pavgusb", IX86_BUILTIN_PAVGUSB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
20372 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_haddv2sf3, "__builtin_ia32_pfacc", IX86_BUILTIN_PFACC, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
20373 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_addv2sf3, "__builtin_ia32_pfadd", IX86_BUILTIN_PFADD, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
20374 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_eqv2sf3, "__builtin_ia32_pfcmpeq", IX86_BUILTIN_PFCMPEQ, UNKNOWN, (int) V2SI_FTYPE_V2SF_V2SF },
20375 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_gev2sf3, "__builtin_ia32_pfcmpge", IX86_BUILTIN_PFCMPGE, UNKNOWN, (int) V2SI_FTYPE_V2SF_V2SF },
20376 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_gtv2sf3, "__builtin_ia32_pfcmpgt", IX86_BUILTIN_PFCMPGT, UNKNOWN, (int) V2SI_FTYPE_V2SF_V2SF },
20377 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_smaxv2sf3, "__builtin_ia32_pfmax", IX86_BUILTIN_PFMAX, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
20378 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_sminv2sf3, "__builtin_ia32_pfmin", IX86_BUILTIN_PFMIN, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
20379 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_mulv2sf3, "__builtin_ia32_pfmul", IX86_BUILTIN_PFMUL, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
20380 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_rcpit1v2sf3, "__builtin_ia32_pfrcpit1", IX86_BUILTIN_PFRCPIT1, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
20381 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_rcpit2v2sf3, "__builtin_ia32_pfrcpit2", IX86_BUILTIN_PFRCPIT2, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
20382 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_rsqit1v2sf3, "__builtin_ia32_pfrsqit1", IX86_BUILTIN_PFRSQIT1, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
20383 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_subv2sf3, "__builtin_ia32_pfsub", IX86_BUILTIN_PFSUB, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
20384 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_subrv2sf3, "__builtin_ia32_pfsubr", IX86_BUILTIN_PFSUBR, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
20385 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_pmulhrwv4hi3, "__builtin_ia32_pmulhrw", IX86_BUILTIN_PMULHRW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
20388 { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pf2iw, "__builtin_ia32_pf2iw", IX86_BUILTIN_PF2IW, UNKNOWN, (int) V2SI_FTYPE_V2SF },
20389 { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pi2fw, "__builtin_ia32_pi2fw", IX86_BUILTIN_PI2FW, UNKNOWN, (int) V2SF_FTYPE_V2SI },
20390 { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pswapdv2si2, "__builtin_ia32_pswapdsi", IX86_BUILTIN_PSWAPDSI, UNKNOWN, (int) V2SI_FTYPE_V2SI },
20391 { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pswapdv2sf2, "__builtin_ia32_pswapdsf", IX86_BUILTIN_PSWAPDSF, UNKNOWN, (int) V2SF_FTYPE_V2SF },
20392 { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_hsubv2sf3, "__builtin_ia32_pfnacc", IX86_BUILTIN_PFNACC, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
20393 { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_addsubv2sf3, "__builtin_ia32_pfpnacc", IX86_BUILTIN_PFPNACC, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
20396 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movmskps, "__builtin_ia32_movmskps", IX86_BUILTIN_MOVMSKPS, UNKNOWN, (int) INT_FTYPE_V4SF },
20397 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_sqrtv4sf2, "__builtin_ia32_sqrtps", IX86_BUILTIN_SQRTPS, UNKNOWN, (int) V4SF_FTYPE_V4SF },
20398 { OPTION_MASK_ISA_SSE, CODE_FOR_sqrtv4sf2, "__builtin_ia32_sqrtps_nr", IX86_BUILTIN_SQRTPS_NR, UNKNOWN, (int) V4SF_FTYPE_V4SF },
20399 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_rsqrtv4sf2, "__builtin_ia32_rsqrtps", IX86_BUILTIN_RSQRTPS, UNKNOWN, (int) V4SF_FTYPE_V4SF },
20400 { OPTION_MASK_ISA_SSE, CODE_FOR_rsqrtv4sf2, "__builtin_ia32_rsqrtps_nr", IX86_BUILTIN_RSQRTPS_NR, UNKNOWN, (int) V4SF_FTYPE_V4SF },
20401 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_rcpv4sf2, "__builtin_ia32_rcpps", IX86_BUILTIN_RCPPS, UNKNOWN, (int) V4SF_FTYPE_V4SF },
20402 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvtps2pi, "__builtin_ia32_cvtps2pi", IX86_BUILTIN_CVTPS2PI, UNKNOWN, (int) V2SI_FTYPE_V4SF },
20403 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvtss2si, "__builtin_ia32_cvtss2si", IX86_BUILTIN_CVTSS2SI, UNKNOWN, (int) INT_FTYPE_V4SF },
20404 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_64BIT, CODE_FOR_sse_cvtss2siq, "__builtin_ia32_cvtss2si64", IX86_BUILTIN_CVTSS2SI64, UNKNOWN, (int) INT64_FTYPE_V4SF },
20405 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvttps2pi, "__builtin_ia32_cvttps2pi", IX86_BUILTIN_CVTTPS2PI, UNKNOWN, (int) V2SI_FTYPE_V4SF },
20406 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvttss2si, "__builtin_ia32_cvttss2si", IX86_BUILTIN_CVTTSS2SI, UNKNOWN, (int) INT_FTYPE_V4SF },
20407 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_64BIT, CODE_FOR_sse_cvttss2siq, "__builtin_ia32_cvttss2si64", IX86_BUILTIN_CVTTSS2SI64, UNKNOWN, (int) INT64_FTYPE_V4SF },
20409 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_shufps, "__builtin_ia32_shufps", IX86_BUILTIN_SHUFPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
20411 { OPTION_MASK_ISA_SSE, CODE_FOR_addv4sf3, "__builtin_ia32_addps", IX86_BUILTIN_ADDPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
20412 { OPTION_MASK_ISA_SSE, CODE_FOR_subv4sf3, "__builtin_ia32_subps", IX86_BUILTIN_SUBPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
20413 { OPTION_MASK_ISA_SSE, CODE_FOR_mulv4sf3, "__builtin_ia32_mulps", IX86_BUILTIN_MULPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
20414 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_divv4sf3, "__builtin_ia32_divps", IX86_BUILTIN_DIVPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
20415 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmaddv4sf3, "__builtin_ia32_addss", IX86_BUILTIN_ADDSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
20416 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmsubv4sf3, "__builtin_ia32_subss", IX86_BUILTIN_SUBSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
20417 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmulv4sf3, "__builtin_ia32_mulss", IX86_BUILTIN_MULSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
20418 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmdivv4sf3, "__builtin_ia32_divss", IX86_BUILTIN_DIVSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
20420 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpeqps", IX86_BUILTIN_CMPEQPS, EQ, (int) V4SF_FTYPE_V4SF_V4SF },
20421 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpltps", IX86_BUILTIN_CMPLTPS, LT, (int) V4SF_FTYPE_V4SF_V4SF },
20422 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpleps", IX86_BUILTIN_CMPLEPS, LE, (int) V4SF_FTYPE_V4SF_V4SF },
20423 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpgtps", IX86_BUILTIN_CMPGTPS, LT, (int) V4SF_FTYPE_V4SF_V4SF_SWAP },
20424 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpgeps", IX86_BUILTIN_CMPGEPS, LE, (int) V4SF_FTYPE_V4SF_V4SF_SWAP },
20425 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpunordps", IX86_BUILTIN_CMPUNORDPS, UNORDERED, (int) V4SF_FTYPE_V4SF_V4SF },
20426 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpneqps", IX86_BUILTIN_CMPNEQPS, NE, (int) V4SF_FTYPE_V4SF_V4SF },
20427 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpnltps", IX86_BUILTIN_CMPNLTPS, UNGE, (int) V4SF_FTYPE_V4SF_V4SF },
20428 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpnleps", IX86_BUILTIN_CMPNLEPS, UNGT, (int) V4SF_FTYPE_V4SF_V4SF },
20429 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpngtps", IX86_BUILTIN_CMPNGTPS, UNGE, (int) V4SF_FTYPE_V4SF_V4SF_SWAP },
20430 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpngeps", IX86_BUILTIN_CMPNGEPS, UNGT, (int) V4SF_FTYPE_V4SF_V4SF_SWAP},
20431 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpordps", IX86_BUILTIN_CMPORDPS, ORDERED, (int) V4SF_FTYPE_V4SF_V4SF },
20432 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpeqss", IX86_BUILTIN_CMPEQSS, EQ, (int) V4SF_FTYPE_V4SF_V4SF },
20433 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpltss", IX86_BUILTIN_CMPLTSS, LT, (int) V4SF_FTYPE_V4SF_V4SF },
20434 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpless", IX86_BUILTIN_CMPLESS, LE, (int) V4SF_FTYPE_V4SF_V4SF },
20435 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpunordss", IX86_BUILTIN_CMPUNORDSS, UNORDERED, (int) V4SF_FTYPE_V4SF_V4SF },
20436 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpneqss", IX86_BUILTIN_CMPNEQSS, NE, (int) V4SF_FTYPE_V4SF_V4SF },
20437 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpnltss", IX86_BUILTIN_CMPNLTSS, UNGE, (int) V4SF_FTYPE_V4SF_V4SF },
20438 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpnless", IX86_BUILTIN_CMPNLESS, UNGT, (int) V4SF_FTYPE_V4SF_V4SF },
20439 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpngtss", IX86_BUILTIN_CMPNGTSS, UNGE, (int) V4SF_FTYPE_V4SF_V4SF_SWAP },
20440 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpngess", IX86_BUILTIN_CMPNGESS, UNGT, (int) V4SF_FTYPE_V4SF_V4SF_SWAP },
20441 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpordss", IX86_BUILTIN_CMPORDSS, ORDERED, (int) V4SF_FTYPE_V4SF_V4SF },
20443 { OPTION_MASK_ISA_SSE, CODE_FOR_sminv4sf3, "__builtin_ia32_minps", IX86_BUILTIN_MINPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
20444 { OPTION_MASK_ISA_SSE, CODE_FOR_smaxv4sf3, "__builtin_ia32_maxps", IX86_BUILTIN_MAXPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
20445 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmsminv4sf3, "__builtin_ia32_minss", IX86_BUILTIN_MINSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
20446 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmsmaxv4sf3, "__builtin_ia32_maxss", IX86_BUILTIN_MAXSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
20448 { OPTION_MASK_ISA_SSE, CODE_FOR_andv4sf3, "__builtin_ia32_andps", IX86_BUILTIN_ANDPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
20449 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_nandv4sf3, "__builtin_ia32_andnps", IX86_BUILTIN_ANDNPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
20450 { OPTION_MASK_ISA_SSE, CODE_FOR_iorv4sf3, "__builtin_ia32_orps", IX86_BUILTIN_ORPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
20451 { OPTION_MASK_ISA_SSE, CODE_FOR_xorv4sf3, "__builtin_ia32_xorps", IX86_BUILTIN_XORPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
20453 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movss, "__builtin_ia32_movss", IX86_BUILTIN_MOVSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
20454 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movhlps_exp, "__builtin_ia32_movhlps", IX86_BUILTIN_MOVHLPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
20455 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movlhps_exp, "__builtin_ia32_movlhps", IX86_BUILTIN_MOVLHPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
20456 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_unpckhps, "__builtin_ia32_unpckhps", IX86_BUILTIN_UNPCKHPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
20457 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_unpcklps, "__builtin_ia32_unpcklps", IX86_BUILTIN_UNPCKLPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
20459 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvtpi2ps, "__builtin_ia32_cvtpi2ps", IX86_BUILTIN_CVTPI2PS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V2SI },
20460 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvtsi2ss, "__builtin_ia32_cvtsi2ss", IX86_BUILTIN_CVTSI2SS, UNKNOWN, (int) V4SF_FTYPE_V4SF_SI },
20461 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_64BIT, CODE_FOR_sse_cvtsi2ssq, "__builtin_ia32_cvtsi642ss", IX86_BUILTIN_CVTSI642SS, UNKNOWN, V4SF_FTYPE_V4SF_DI },
20463 { OPTION_MASK_ISA_SSE, CODE_FOR_rsqrtsf2, "__builtin_ia32_rsqrtf", IX86_BUILTIN_RSQRTF, UNKNOWN, (int) FLOAT_FTYPE_FLOAT },
20465 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmsqrtv4sf2, "__builtin_ia32_sqrtss", IX86_BUILTIN_SQRTSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_VEC_MERGE },
20466 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmrsqrtv4sf2, "__builtin_ia32_rsqrtss", IX86_BUILTIN_RSQRTSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_VEC_MERGE },
20467 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmrcpv4sf2, "__builtin_ia32_rcpss", IX86_BUILTIN_RCPSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_VEC_MERGE },
20469 /* SSE MMX or 3Dnow!A */
20470 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_uavgv8qi3, "__builtin_ia32_pavgb", IX86_BUILTIN_PAVGB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
20471 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_uavgv4hi3, "__builtin_ia32_pavgw", IX86_BUILTIN_PAVGW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
20472 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_umulv4hi3_highpart, "__builtin_ia32_pmulhuw", IX86_BUILTIN_PMULHUW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
20474 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_umaxv8qi3, "__builtin_ia32_pmaxub", IX86_BUILTIN_PMAXUB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
20475 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_smaxv4hi3, "__builtin_ia32_pmaxsw", IX86_BUILTIN_PMAXSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
20476 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_uminv8qi3, "__builtin_ia32_pminub", IX86_BUILTIN_PMINUB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
20477 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_sminv4hi3, "__builtin_ia32_pminsw", IX86_BUILTIN_PMINSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
20479 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_psadbw, "__builtin_ia32_psadbw", IX86_BUILTIN_PSADBW, UNKNOWN, (int) V1DI_FTYPE_V8QI_V8QI },
20480 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pmovmskb, "__builtin_ia32_pmovmskb", IX86_BUILTIN_PMOVMSKB, UNKNOWN, (int) INT_FTYPE_V8QI },
20482 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pshufw, "__builtin_ia32_pshufw", IX86_BUILTIN_PSHUFW, UNKNOWN, (int) V4HI_FTYPE_V4HI_INT },
20485 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_shufpd, "__builtin_ia32_shufpd", IX86_BUILTIN_SHUFPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
20487 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movmskpd, "__builtin_ia32_movmskpd", IX86_BUILTIN_MOVMSKPD, UNKNOWN, (int) INT_FTYPE_V2DF },
20488 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_pmovmskb, "__builtin_ia32_pmovmskb128", IX86_BUILTIN_PMOVMSKB128, UNKNOWN, (int) INT_FTYPE_V16QI },
20489 { OPTION_MASK_ISA_SSE2, CODE_FOR_sqrtv2df2, "__builtin_ia32_sqrtpd", IX86_BUILTIN_SQRTPD, UNKNOWN, (int) V2DF_FTYPE_V2DF },
20490 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtdq2pd, "__builtin_ia32_cvtdq2pd", IX86_BUILTIN_CVTDQ2PD, UNKNOWN, (int) V2DF_FTYPE_V4SI },
20491 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtdq2ps, "__builtin_ia32_cvtdq2ps", IX86_BUILTIN_CVTDQ2PS, UNKNOWN, (int) V4SF_FTYPE_V4SI },
20493 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtpd2dq, "__builtin_ia32_cvtpd2dq", IX86_BUILTIN_CVTPD2DQ, UNKNOWN, (int) V4SI_FTYPE_V2DF },
20494 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtpd2pi, "__builtin_ia32_cvtpd2pi", IX86_BUILTIN_CVTPD2PI, UNKNOWN, (int) V2SI_FTYPE_V2DF },
20495 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtpd2ps, "__builtin_ia32_cvtpd2ps", IX86_BUILTIN_CVTPD2PS, UNKNOWN, (int) V4SF_FTYPE_V2DF },
20496 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvttpd2dq, "__builtin_ia32_cvttpd2dq", IX86_BUILTIN_CVTTPD2DQ, UNKNOWN, (int) V4SI_FTYPE_V2DF },
20497 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvttpd2pi, "__builtin_ia32_cvttpd2pi", IX86_BUILTIN_CVTTPD2PI, UNKNOWN, (int) V2SI_FTYPE_V2DF },
20499 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtpi2pd, "__builtin_ia32_cvtpi2pd", IX86_BUILTIN_CVTPI2PD, UNKNOWN, (int) V2DF_FTYPE_V2SI },
20501 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtsd2si, "__builtin_ia32_cvtsd2si", IX86_BUILTIN_CVTSD2SI, UNKNOWN, (int) INT_FTYPE_V2DF },
20502 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvttsd2si, "__builtin_ia32_cvttsd2si", IX86_BUILTIN_CVTTSD2SI, UNKNOWN, (int) INT_FTYPE_V2DF },
20503 { OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_64BIT, CODE_FOR_sse2_cvtsd2siq, "__builtin_ia32_cvtsd2si64", IX86_BUILTIN_CVTSD2SI64, UNKNOWN, (int) INT64_FTYPE_V2DF },
20504 { OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_64BIT, CODE_FOR_sse2_cvttsd2siq, "__builtin_ia32_cvttsd2si64", IX86_BUILTIN_CVTTSD2SI64, UNKNOWN, (int) INT64_FTYPE_V2DF },
20506 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtps2dq, "__builtin_ia32_cvtps2dq", IX86_BUILTIN_CVTPS2DQ, UNKNOWN, (int) V4SI_FTYPE_V4SF },
20507 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtps2pd, "__builtin_ia32_cvtps2pd", IX86_BUILTIN_CVTPS2PD, UNKNOWN, (int) V2DF_FTYPE_V4SF },
20508 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvttps2dq, "__builtin_ia32_cvttps2dq", IX86_BUILTIN_CVTTPS2DQ, UNKNOWN, (int) V4SI_FTYPE_V4SF },
20510 { OPTION_MASK_ISA_SSE2, CODE_FOR_addv2df3, "__builtin_ia32_addpd", IX86_BUILTIN_ADDPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
20511 { OPTION_MASK_ISA_SSE2, CODE_FOR_subv2df3, "__builtin_ia32_subpd", IX86_BUILTIN_SUBPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
20512 { OPTION_MASK_ISA_SSE2, CODE_FOR_mulv2df3, "__builtin_ia32_mulpd", IX86_BUILTIN_MULPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
20513 { OPTION_MASK_ISA_SSE2, CODE_FOR_divv2df3, "__builtin_ia32_divpd", IX86_BUILTIN_DIVPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
20514 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmaddv2df3, "__builtin_ia32_addsd", IX86_BUILTIN_ADDSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
20515 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmsubv2df3, "__builtin_ia32_subsd", IX86_BUILTIN_SUBSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
20516 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmulv2df3, "__builtin_ia32_mulsd", IX86_BUILTIN_MULSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
20517 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmdivv2df3, "__builtin_ia32_divsd", IX86_BUILTIN_DIVSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
20519 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpeqpd", IX86_BUILTIN_CMPEQPD, EQ, (int) V2DF_FTYPE_V2DF_V2DF },
20520 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpltpd", IX86_BUILTIN_CMPLTPD, LT, (int) V2DF_FTYPE_V2DF_V2DF },
20521 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmplepd", IX86_BUILTIN_CMPLEPD, LE, (int) V2DF_FTYPE_V2DF_V2DF },
20522 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpgtpd", IX86_BUILTIN_CMPGTPD, LT, (int) V2DF_FTYPE_V2DF_V2DF_SWAP },
20523 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpgepd", IX86_BUILTIN_CMPGEPD, LE, (int) V2DF_FTYPE_V2DF_V2DF_SWAP},
20524 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpunordpd", IX86_BUILTIN_CMPUNORDPD, UNORDERED, (int) V2DF_FTYPE_V2DF_V2DF },
20525 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpneqpd", IX86_BUILTIN_CMPNEQPD, NE, (int) V2DF_FTYPE_V2DF_V2DF },
20526 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpnltpd", IX86_BUILTIN_CMPNLTPD, UNGE, (int) V2DF_FTYPE_V2DF_V2DF },
20527 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpnlepd", IX86_BUILTIN_CMPNLEPD, UNGT, (int) V2DF_FTYPE_V2DF_V2DF },
20528 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpngtpd", IX86_BUILTIN_CMPNGTPD, UNGE, (int) V2DF_FTYPE_V2DF_V2DF_SWAP },
20529 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpngepd", IX86_BUILTIN_CMPNGEPD, UNGT, (int) V2DF_FTYPE_V2DF_V2DF_SWAP },
20530 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpordpd", IX86_BUILTIN_CMPORDPD, ORDERED, (int) V2DF_FTYPE_V2DF_V2DF },
20531 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpeqsd", IX86_BUILTIN_CMPEQSD, EQ, (int) V2DF_FTYPE_V2DF_V2DF },
20532 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpltsd", IX86_BUILTIN_CMPLTSD, LT, (int) V2DF_FTYPE_V2DF_V2DF },
20533 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmplesd", IX86_BUILTIN_CMPLESD, LE, (int) V2DF_FTYPE_V2DF_V2DF },
20534 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpunordsd", IX86_BUILTIN_CMPUNORDSD, UNORDERED, (int) V2DF_FTYPE_V2DF_V2DF },
20535 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpneqsd", IX86_BUILTIN_CMPNEQSD, NE, (int) V2DF_FTYPE_V2DF_V2DF },
20536 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpnltsd", IX86_BUILTIN_CMPNLTSD, UNGE, (int) V2DF_FTYPE_V2DF_V2DF },
20537 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpnlesd", IX86_BUILTIN_CMPNLESD, UNGT, (int) V2DF_FTYPE_V2DF_V2DF },
20538 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpordsd", IX86_BUILTIN_CMPORDSD, ORDERED, (int) V2DF_FTYPE_V2DF_V2DF },
20540 { OPTION_MASK_ISA_SSE2, CODE_FOR_sminv2df3, "__builtin_ia32_minpd", IX86_BUILTIN_MINPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
20541 { OPTION_MASK_ISA_SSE2, CODE_FOR_smaxv2df3, "__builtin_ia32_maxpd", IX86_BUILTIN_MAXPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
20542 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmsminv2df3, "__builtin_ia32_minsd", IX86_BUILTIN_MINSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
20543 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmsmaxv2df3, "__builtin_ia32_maxsd", IX86_BUILTIN_MAXSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
20545 { OPTION_MASK_ISA_SSE2, CODE_FOR_andv2df3, "__builtin_ia32_andpd", IX86_BUILTIN_ANDPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
20546 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_nandv2df3, "__builtin_ia32_andnpd", IX86_BUILTIN_ANDNPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
20547 { OPTION_MASK_ISA_SSE2, CODE_FOR_iorv2df3, "__builtin_ia32_orpd", IX86_BUILTIN_ORPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
20548 { OPTION_MASK_ISA_SSE2, CODE_FOR_xorv2df3, "__builtin_ia32_xorpd", IX86_BUILTIN_XORPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
20550 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movsd, "__builtin_ia32_movsd", IX86_BUILTIN_MOVSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
20551 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_unpckhpd_exp, "__builtin_ia32_unpckhpd", IX86_BUILTIN_UNPCKHPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
20552 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_unpcklpd_exp, "__builtin_ia32_unpcklpd", IX86_BUILTIN_UNPCKLPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
20554 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_pack_sfix_v2df, "__builtin_ia32_vec_pack_sfix", IX86_BUILTIN_VEC_PACK_SFIX, UNKNOWN, (int) V4SI_FTYPE_V2DF_V2DF },
20556 { OPTION_MASK_ISA_SSE2, CODE_FOR_addv16qi3, "__builtin_ia32_paddb128", IX86_BUILTIN_PADDB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
20557 { OPTION_MASK_ISA_SSE2, CODE_FOR_addv8hi3, "__builtin_ia32_paddw128", IX86_BUILTIN_PADDW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
20558 { OPTION_MASK_ISA_SSE2, CODE_FOR_addv4si3, "__builtin_ia32_paddd128", IX86_BUILTIN_PADDD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
20559 { OPTION_MASK_ISA_SSE2, CODE_FOR_addv2di3, "__builtin_ia32_paddq128", IX86_BUILTIN_PADDQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
20560 { OPTION_MASK_ISA_SSE2, CODE_FOR_subv16qi3, "__builtin_ia32_psubb128", IX86_BUILTIN_PSUBB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
20561 { OPTION_MASK_ISA_SSE2, CODE_FOR_subv8hi3, "__builtin_ia32_psubw128", IX86_BUILTIN_PSUBW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
20562 { OPTION_MASK_ISA_SSE2, CODE_FOR_subv4si3, "__builtin_ia32_psubd128", IX86_BUILTIN_PSUBD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
20563 { OPTION_MASK_ISA_SSE2, CODE_FOR_subv2di3, "__builtin_ia32_psubq128", IX86_BUILTIN_PSUBQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
20565 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ssaddv16qi3, "__builtin_ia32_paddsb128", IX86_BUILTIN_PADDSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
20566 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ssaddv8hi3, "__builtin_ia32_paddsw128", IX86_BUILTIN_PADDSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
20567 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_sssubv16qi3, "__builtin_ia32_psubsb128", IX86_BUILTIN_PSUBSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
20568 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_sssubv8hi3, "__builtin_ia32_psubsw128", IX86_BUILTIN_PSUBSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
20569 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_usaddv16qi3, "__builtin_ia32_paddusb128", IX86_BUILTIN_PADDUSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
20570 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_usaddv8hi3, "__builtin_ia32_paddusw128", IX86_BUILTIN_PADDUSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
20571 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ussubv16qi3, "__builtin_ia32_psubusb128", IX86_BUILTIN_PSUBUSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
20572 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ussubv8hi3, "__builtin_ia32_psubusw128", IX86_BUILTIN_PSUBUSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
20574 { OPTION_MASK_ISA_SSE2, CODE_FOR_mulv8hi3, "__builtin_ia32_pmullw128", IX86_BUILTIN_PMULLW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
20575 { OPTION_MASK_ISA_SSE2, CODE_FOR_smulv8hi3_highpart, "__builtin_ia32_pmulhw128", IX86_BUILTIN_PMULHW128, UNKNOWN,(int) V8HI_FTYPE_V8HI_V8HI },
20577 { OPTION_MASK_ISA_SSE2, CODE_FOR_andv2di3, "__builtin_ia32_pand128", IX86_BUILTIN_PAND128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
20578 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_nandv2di3, "__builtin_ia32_pandn128", IX86_BUILTIN_PANDN128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
20579 { OPTION_MASK_ISA_SSE2, CODE_FOR_iorv2di3, "__builtin_ia32_por128", IX86_BUILTIN_POR128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
20580 { OPTION_MASK_ISA_SSE2, CODE_FOR_xorv2di3, "__builtin_ia32_pxor128", IX86_BUILTIN_PXOR128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
20582 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_uavgv16qi3, "__builtin_ia32_pavgb128", IX86_BUILTIN_PAVGB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
20583 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_uavgv8hi3, "__builtin_ia32_pavgw128", IX86_BUILTIN_PAVGW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
20585 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_eqv16qi3, "__builtin_ia32_pcmpeqb128", IX86_BUILTIN_PCMPEQB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
20586 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_eqv8hi3, "__builtin_ia32_pcmpeqw128", IX86_BUILTIN_PCMPEQW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
20587 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_eqv4si3, "__builtin_ia32_pcmpeqd128", IX86_BUILTIN_PCMPEQD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
20588 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_gtv16qi3, "__builtin_ia32_pcmpgtb128", IX86_BUILTIN_PCMPGTB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
20589 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_gtv8hi3, "__builtin_ia32_pcmpgtw128", IX86_BUILTIN_PCMPGTW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
20590 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_gtv4si3, "__builtin_ia32_pcmpgtd128", IX86_BUILTIN_PCMPGTD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
20592 { OPTION_MASK_ISA_SSE2, CODE_FOR_umaxv16qi3, "__builtin_ia32_pmaxub128", IX86_BUILTIN_PMAXUB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
20593 { OPTION_MASK_ISA_SSE2, CODE_FOR_smaxv8hi3, "__builtin_ia32_pmaxsw128", IX86_BUILTIN_PMAXSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
20594 { OPTION_MASK_ISA_SSE2, CODE_FOR_uminv16qi3, "__builtin_ia32_pminub128", IX86_BUILTIN_PMINUB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
20595 { OPTION_MASK_ISA_SSE2, CODE_FOR_sminv8hi3, "__builtin_ia32_pminsw128", IX86_BUILTIN_PMINSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
20597 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_punpckhbw, "__builtin_ia32_punpckhbw128", IX86_BUILTIN_PUNPCKHBW128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
20598 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_punpckhwd, "__builtin_ia32_punpckhwd128", IX86_BUILTIN_PUNPCKHWD128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
20599 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_punpckhdq, "__builtin_ia32_punpckhdq128", IX86_BUILTIN_PUNPCKHDQ128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
20600 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_punpckhqdq, "__builtin_ia32_punpckhqdq128", IX86_BUILTIN_PUNPCKHQDQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
20601 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_punpcklbw, "__builtin_ia32_punpcklbw128", IX86_BUILTIN_PUNPCKLBW128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
20602 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_punpcklwd, "__builtin_ia32_punpcklwd128", IX86_BUILTIN_PUNPCKLWD128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
20603 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_punpckldq, "__builtin_ia32_punpckldq128", IX86_BUILTIN_PUNPCKLDQ128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
20604 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_punpcklqdq, "__builtin_ia32_punpcklqdq128", IX86_BUILTIN_PUNPCKLQDQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
20606 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_packsswb, "__builtin_ia32_packsswb128", IX86_BUILTIN_PACKSSWB128, UNKNOWN, (int) V16QI_FTYPE_V8HI_V8HI },
20607 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_packssdw, "__builtin_ia32_packssdw128", IX86_BUILTIN_PACKSSDW128, UNKNOWN, (int) V8HI_FTYPE_V4SI_V4SI },
20608 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_packuswb, "__builtin_ia32_packuswb128", IX86_BUILTIN_PACKUSWB128, UNKNOWN, (int) V16QI_FTYPE_V8HI_V8HI },
20610 { OPTION_MASK_ISA_SSE2, CODE_FOR_umulv8hi3_highpart, "__builtin_ia32_pmulhuw128", IX86_BUILTIN_PMULHUW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
20611 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_psadbw, "__builtin_ia32_psadbw128", IX86_BUILTIN_PSADBW128, UNKNOWN, (int) V2DI_FTYPE_V16QI_V16QI },
20613 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_umulv1siv1di3, "__builtin_ia32_pmuludq", IX86_BUILTIN_PMULUDQ, UNKNOWN, (int) V1DI_FTYPE_V2SI_V2SI },
20614 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_umulv2siv2di3, "__builtin_ia32_pmuludq128", IX86_BUILTIN_PMULUDQ128, UNKNOWN, (int) V2DI_FTYPE_V4SI_V4SI },
20616 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_pmaddwd, "__builtin_ia32_pmaddwd128", IX86_BUILTIN_PMADDWD128, UNKNOWN, (int) V4SI_FTYPE_V8HI_V8HI },
20618 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtsi2sd, "__builtin_ia32_cvtsi2sd", IX86_BUILTIN_CVTSI2SD, UNKNOWN, (int) V2DF_FTYPE_V2DF_SI },
20619 { OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_64BIT, CODE_FOR_sse2_cvtsi2sdq, "__builtin_ia32_cvtsi642sd", IX86_BUILTIN_CVTSI642SD, UNKNOWN, (int) V2DF_FTYPE_V2DF_DI },
20620 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtsd2ss, "__builtin_ia32_cvtsd2ss", IX86_BUILTIN_CVTSD2SS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V2DF },
20621 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtss2sd, "__builtin_ia32_cvtss2sd", IX86_BUILTIN_CVTSS2SD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V4SF },
20623 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ashlti3, "__builtin_ia32_pslldqi128", IX86_BUILTIN_PSLLDQI128, UNKNOWN, (int) V2DI2TI_FTYPE_V2DI_INT },
20624 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv8hi3, "__builtin_ia32_psllwi128", IX86_BUILTIN_PSLLWI128, UNKNOWN, (int) V8HI_FTYPE_V8HI_SI_COUNT },
20625 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv4si3, "__builtin_ia32_pslldi128", IX86_BUILTIN_PSLLDI128, UNKNOWN, (int) V4SI_FTYPE_V4SI_SI_COUNT },
20626 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv2di3, "__builtin_ia32_psllqi128", IX86_BUILTIN_PSLLQI128, UNKNOWN, (int) V2DI_FTYPE_V2DI_SI_COUNT },
20627 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv8hi3, "__builtin_ia32_psllw128", IX86_BUILTIN_PSLLW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_COUNT },
20628 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv4si3, "__builtin_ia32_pslld128", IX86_BUILTIN_PSLLD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_COUNT },
20629 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv2di3, "__builtin_ia32_psllq128", IX86_BUILTIN_PSLLQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_COUNT },
20631 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_lshrti3, "__builtin_ia32_psrldqi128", IX86_BUILTIN_PSRLDQI128, UNKNOWN, (int) V2DI2TI_FTYPE_V2DI_INT },
20632 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv8hi3, "__builtin_ia32_psrlwi128", IX86_BUILTIN_PSRLWI128, UNKNOWN, (int) V8HI_FTYPE_V8HI_SI_COUNT },
20633 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv4si3, "__builtin_ia32_psrldi128", IX86_BUILTIN_PSRLDI128, UNKNOWN, (int) V4SI_FTYPE_V4SI_SI_COUNT },
20634 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv2di3, "__builtin_ia32_psrlqi128", IX86_BUILTIN_PSRLQI128, UNKNOWN, (int) V2DI_FTYPE_V2DI_SI_COUNT },
20635 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv8hi3, "__builtin_ia32_psrlw128", IX86_BUILTIN_PSRLW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_COUNT },
20636 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv4si3, "__builtin_ia32_psrld128", IX86_BUILTIN_PSRLD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_COUNT },
20637 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv2di3, "__builtin_ia32_psrlq128", IX86_BUILTIN_PSRLQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_COUNT },
20639 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashrv8hi3, "__builtin_ia32_psrawi128", IX86_BUILTIN_PSRAWI128, UNKNOWN, (int) V8HI_FTYPE_V8HI_SI_COUNT },
20640 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashrv4si3, "__builtin_ia32_psradi128", IX86_BUILTIN_PSRADI128, UNKNOWN, (int) V4SI_FTYPE_V4SI_SI_COUNT },
20641 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashrv8hi3, "__builtin_ia32_psraw128", IX86_BUILTIN_PSRAW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_COUNT },
20642 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashrv4si3, "__builtin_ia32_psrad128", IX86_BUILTIN_PSRAD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_COUNT },
20644 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_pshufd, "__builtin_ia32_pshufd", IX86_BUILTIN_PSHUFD, UNKNOWN, (int) V4SI_FTYPE_V4SI_INT },
20645 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_pshuflw, "__builtin_ia32_pshuflw", IX86_BUILTIN_PSHUFLW, UNKNOWN, (int) V8HI_FTYPE_V8HI_INT },
20646 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_pshufhw, "__builtin_ia32_pshufhw", IX86_BUILTIN_PSHUFHW, UNKNOWN, (int) V8HI_FTYPE_V8HI_INT },
20648 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmsqrtv2df2, "__builtin_ia32_sqrtsd", IX86_BUILTIN_SQRTSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_VEC_MERGE },
20650 { OPTION_MASK_ISA_SSE2, CODE_FOR_abstf2, 0, IX86_BUILTIN_FABSQ, UNKNOWN, (int) FLOAT128_FTYPE_FLOAT128 },
20651 { OPTION_MASK_ISA_SSE2, CODE_FOR_copysigntf3, 0, IX86_BUILTIN_COPYSIGNQ, UNKNOWN, (int) FLOAT128_FTYPE_FLOAT128_FLOAT128 },
20653 { OPTION_MASK_ISA_SSE, CODE_FOR_sse2_movq128, "__builtin_ia32_movq128", IX86_BUILTIN_MOVQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI },
20656 { OPTION_MASK_ISA_SSE2, CODE_FOR_mmx_addv1di3, "__builtin_ia32_paddq", IX86_BUILTIN_PADDQ, UNKNOWN, (int) V1DI_FTYPE_V1DI_V1DI },
20657 { OPTION_MASK_ISA_SSE2, CODE_FOR_mmx_subv1di3, "__builtin_ia32_psubq", IX86_BUILTIN_PSUBQ, UNKNOWN, (int) V1DI_FTYPE_V1DI_V1DI },
20660 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_movshdup, "__builtin_ia32_movshdup", IX86_BUILTIN_MOVSHDUP, UNKNOWN, (int) V4SF_FTYPE_V4SF},
20661 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_movsldup, "__builtin_ia32_movsldup", IX86_BUILTIN_MOVSLDUP, UNKNOWN, (int) V4SF_FTYPE_V4SF },
20663 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_addsubv4sf3, "__builtin_ia32_addsubps", IX86_BUILTIN_ADDSUBPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
20664 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_addsubv2df3, "__builtin_ia32_addsubpd", IX86_BUILTIN_ADDSUBPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
20665 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_haddv4sf3, "__builtin_ia32_haddps", IX86_BUILTIN_HADDPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
20666 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_haddv2df3, "__builtin_ia32_haddpd", IX86_BUILTIN_HADDPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
20667 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_hsubv4sf3, "__builtin_ia32_hsubps", IX86_BUILTIN_HSUBPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
20668 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_hsubv2df3, "__builtin_ia32_hsubpd", IX86_BUILTIN_HSUBPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
20671 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv16qi2, "__builtin_ia32_pabsb128", IX86_BUILTIN_PABSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI },
20672 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv8qi2, "__builtin_ia32_pabsb", IX86_BUILTIN_PABSB, UNKNOWN, (int) V8QI_FTYPE_V8QI },
20673 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv8hi2, "__builtin_ia32_pabsw128", IX86_BUILTIN_PABSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI },
20674 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv4hi2, "__builtin_ia32_pabsw", IX86_BUILTIN_PABSW, UNKNOWN, (int) V4HI_FTYPE_V4HI },
20675 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv4si2, "__builtin_ia32_pabsd128", IX86_BUILTIN_PABSD128, UNKNOWN, (int) V4SI_FTYPE_V4SI },
20676 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv2si2, "__builtin_ia32_pabsd", IX86_BUILTIN_PABSD, UNKNOWN, (int) V2SI_FTYPE_V2SI },
20678 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phaddwv8hi3, "__builtin_ia32_phaddw128", IX86_BUILTIN_PHADDW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
20679 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phaddwv4hi3, "__builtin_ia32_phaddw", IX86_BUILTIN_PHADDW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
20680 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phadddv4si3, "__builtin_ia32_phaddd128", IX86_BUILTIN_PHADDD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
20681 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phadddv2si3, "__builtin_ia32_phaddd", IX86_BUILTIN_PHADDD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
20682 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phaddswv8hi3, "__builtin_ia32_phaddsw128", IX86_BUILTIN_PHADDSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
20683 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phaddswv4hi3, "__builtin_ia32_phaddsw", IX86_BUILTIN_PHADDSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
20684 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubwv8hi3, "__builtin_ia32_phsubw128", IX86_BUILTIN_PHSUBW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
20685 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubwv4hi3, "__builtin_ia32_phsubw", IX86_BUILTIN_PHSUBW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
20686 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubdv4si3, "__builtin_ia32_phsubd128", IX86_BUILTIN_PHSUBD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
20687 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubdv2si3, "__builtin_ia32_phsubd", IX86_BUILTIN_PHSUBD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
20688 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubswv8hi3, "__builtin_ia32_phsubsw128", IX86_BUILTIN_PHSUBSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
20689 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubswv4hi3, "__builtin_ia32_phsubsw", IX86_BUILTIN_PHSUBSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
20690 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pmaddubsw128, "__builtin_ia32_pmaddubsw128", IX86_BUILTIN_PMADDUBSW128, UNKNOWN, (int) V8HI_FTYPE_V16QI_V16QI },
20691 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pmaddubsw, "__builtin_ia32_pmaddubsw", IX86_BUILTIN_PMADDUBSW, UNKNOWN, (int) V4HI_FTYPE_V8QI_V8QI },
20692 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pmulhrswv8hi3, "__builtin_ia32_pmulhrsw128", IX86_BUILTIN_PMULHRSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
20693 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pmulhrswv4hi3, "__builtin_ia32_pmulhrsw", IX86_BUILTIN_PMULHRSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
20694 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pshufbv16qi3, "__builtin_ia32_pshufb128", IX86_BUILTIN_PSHUFB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
20695 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pshufbv8qi3, "__builtin_ia32_pshufb", IX86_BUILTIN_PSHUFB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
20696 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv16qi3, "__builtin_ia32_psignb128", IX86_BUILTIN_PSIGNB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
20697 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv8qi3, "__builtin_ia32_psignb", IX86_BUILTIN_PSIGNB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
20698 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv8hi3, "__builtin_ia32_psignw128", IX86_BUILTIN_PSIGNW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
20699 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv4hi3, "__builtin_ia32_psignw", IX86_BUILTIN_PSIGNW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
20700 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv4si3, "__builtin_ia32_psignd128", IX86_BUILTIN_PSIGND128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
20701 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv2si3, "__builtin_ia32_psignd", IX86_BUILTIN_PSIGND, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
20704 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_palignrti, "__builtin_ia32_palignr128", IX86_BUILTIN_PALIGNR128, UNKNOWN, (int) V2DI2TI_FTYPE_V2DI_V2DI_INT },
20705 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_palignrdi, "__builtin_ia32_palignr", IX86_BUILTIN_PALIGNR, UNKNOWN, (int) V1DI2DI_FTYPE_V1DI_V1DI_INT },
20708 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_blendpd, "__builtin_ia32_blendpd", IX86_BUILTIN_BLENDPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
20709 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_blendps, "__builtin_ia32_blendps", IX86_BUILTIN_BLENDPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
20710 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_blendvpd, "__builtin_ia32_blendvpd", IX86_BUILTIN_BLENDVPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF },
20711 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_blendvps, "__builtin_ia32_blendvps", IX86_BUILTIN_BLENDVPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF },
20712 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_dppd, "__builtin_ia32_dppd", IX86_BUILTIN_DPPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
20713 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_dpps, "__builtin_ia32_dpps", IX86_BUILTIN_DPPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
20714 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_insertps, "__builtin_ia32_insertps128", IX86_BUILTIN_INSERTPS128, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
20715 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_mpsadbw, "__builtin_ia32_mpsadbw128", IX86_BUILTIN_MPSADBW128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_INT },
20716 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_pblendvb, "__builtin_ia32_pblendvb128", IX86_BUILTIN_PBLENDVB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI },
20717 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_pblendw, "__builtin_ia32_pblendw128", IX86_BUILTIN_PBLENDW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_INT },
20719 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_extendv8qiv8hi2, "__builtin_ia32_pmovsxbw128", IX86_BUILTIN_PMOVSXBW128, UNKNOWN, (int) V8HI_FTYPE_V16QI },
20720 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_extendv4qiv4si2, "__builtin_ia32_pmovsxbd128", IX86_BUILTIN_PMOVSXBD128, UNKNOWN, (int) V4SI_FTYPE_V16QI },
20721 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_extendv2qiv2di2, "__builtin_ia32_pmovsxbq128", IX86_BUILTIN_PMOVSXBQ128, UNKNOWN, (int) V2DI_FTYPE_V16QI },
20722 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_extendv4hiv4si2, "__builtin_ia32_pmovsxwd128", IX86_BUILTIN_PMOVSXWD128, UNKNOWN, (int) V4SI_FTYPE_V8HI },
20723 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_extendv2hiv2di2, "__builtin_ia32_pmovsxwq128", IX86_BUILTIN_PMOVSXWQ128, UNKNOWN, (int) V2DI_FTYPE_V8HI },
20724 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_extendv2siv2di2, "__builtin_ia32_pmovsxdq128", IX86_BUILTIN_PMOVSXDQ128, UNKNOWN, (int) V2DI_FTYPE_V4SI },
20725 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv8qiv8hi2, "__builtin_ia32_pmovzxbw128", IX86_BUILTIN_PMOVZXBW128, UNKNOWN, (int) V8HI_FTYPE_V16QI },
20726 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv4qiv4si2, "__builtin_ia32_pmovzxbd128", IX86_BUILTIN_PMOVZXBD128, UNKNOWN, (int) V4SI_FTYPE_V16QI },
20727 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv2qiv2di2, "__builtin_ia32_pmovzxbq128", IX86_BUILTIN_PMOVZXBQ128, UNKNOWN, (int) V2DI_FTYPE_V16QI },
20728 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv4hiv4si2, "__builtin_ia32_pmovzxwd128", IX86_BUILTIN_PMOVZXWD128, UNKNOWN, (int) V4SI_FTYPE_V8HI },
20729 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv2hiv2di2, "__builtin_ia32_pmovzxwq128", IX86_BUILTIN_PMOVZXWQ128, UNKNOWN, (int) V2DI_FTYPE_V8HI },
20730 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv2siv2di2, "__builtin_ia32_pmovzxdq128", IX86_BUILTIN_PMOVZXDQ128, UNKNOWN, (int) V2DI_FTYPE_V4SI },
20731 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_phminposuw, "__builtin_ia32_phminposuw128", IX86_BUILTIN_PHMINPOSUW128, UNKNOWN, (int) V8HI_FTYPE_V8HI },
20733 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_packusdw, "__builtin_ia32_packusdw128", IX86_BUILTIN_PACKUSDW128, UNKNOWN, (int) V8HI_FTYPE_V4SI_V4SI },
20734 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_eqv2di3, "__builtin_ia32_pcmpeqq", IX86_BUILTIN_PCMPEQQ, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
20735 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_smaxv16qi3, "__builtin_ia32_pmaxsb128", IX86_BUILTIN_PMAXSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
20736 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_smaxv4si3, "__builtin_ia32_pmaxsd128", IX86_BUILTIN_PMAXSD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
20737 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_umaxv4si3, "__builtin_ia32_pmaxud128", IX86_BUILTIN_PMAXUD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
20738 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_umaxv8hi3, "__builtin_ia32_pmaxuw128", IX86_BUILTIN_PMAXUW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
20739 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sminv16qi3, "__builtin_ia32_pminsb128", IX86_BUILTIN_PMINSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
20740 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sminv4si3, "__builtin_ia32_pminsd128", IX86_BUILTIN_PMINSD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
20741 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_uminv4si3, "__builtin_ia32_pminud128", IX86_BUILTIN_PMINUD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
20742 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_uminv8hi3, "__builtin_ia32_pminuw128", IX86_BUILTIN_PMINUW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
20743 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_mulv2siv2di3, "__builtin_ia32_pmuldq128", IX86_BUILTIN_PMULDQ128, UNKNOWN, (int) V2DI_FTYPE_V4SI_V4SI },
20744 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_mulv4si3, "__builtin_ia32_pmulld128", IX86_BUILTIN_PMULLD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
20746 /* SSE4.1 and SSE5 */
20747 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundpd, "__builtin_ia32_roundpd", IX86_BUILTIN_ROUNDPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_INT },
20748 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundps, "__builtin_ia32_roundps", IX86_BUILTIN_ROUNDPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_INT },
20749 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundsd, "__builtin_ia32_roundsd", IX86_BUILTIN_ROUNDSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
20750 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundss, "__builtin_ia32_roundss", IX86_BUILTIN_ROUNDSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
20752 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_ptest, "__builtin_ia32_ptestz128", IX86_BUILTIN_PTESTZ, EQ, (int) INT_FTYPE_V2DI_V2DI_PTEST },
20753 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_ptest, "__builtin_ia32_ptestc128", IX86_BUILTIN_PTESTC, LTU, (int) INT_FTYPE_V2DI_V2DI_PTEST },
20754 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_ptest, "__builtin_ia32_ptestnzc128", IX86_BUILTIN_PTESTNZC, GTU, (int) INT_FTYPE_V2DI_V2DI_PTEST },
20757 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_gtv2di3, "__builtin_ia32_pcmpgtq", IX86_BUILTIN_PCMPGTQ, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
20758 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_crc32qi, "__builtin_ia32_crc32qi", IX86_BUILTIN_CRC32QI, UNKNOWN, (int) UINT_FTYPE_UINT_UCHAR },
20759 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_crc32hi, "__builtin_ia32_crc32hi", IX86_BUILTIN_CRC32HI, UNKNOWN, (int) UINT_FTYPE_UINT_USHORT },
20760 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_crc32si, "__builtin_ia32_crc32si", IX86_BUILTIN_CRC32SI, UNKNOWN, (int) UINT_FTYPE_UINT_UINT },
20761 { OPTION_MASK_ISA_SSE4_2 | OPTION_MASK_ISA_64BIT, CODE_FOR_sse4_2_crc32di, "__builtin_ia32_crc32di", IX86_BUILTIN_CRC32DI, UNKNOWN, (int) UINT64_FTYPE_UINT64_UINT64 },
20764 { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_extrqi, "__builtin_ia32_extrqi", IX86_BUILTIN_EXTRQI, UNKNOWN, (int) V2DI_FTYPE_V2DI_UINT_UINT },
20765 { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_extrq, "__builtin_ia32_extrq", IX86_BUILTIN_EXTRQ, UNKNOWN, (int) V2DI_FTYPE_V2DI_V16QI },
20766 { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_insertqi, "__builtin_ia32_insertqi", IX86_BUILTIN_INSERTQI, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_UINT_UINT },
20767 { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_insertq, "__builtin_ia32_insertq", IX86_BUILTIN_INSERTQ, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
20770 { OPTION_MASK_ISA_SSE2, CODE_FOR_aeskeygenassist, 0, IX86_BUILTIN_AESKEYGENASSIST128, UNKNOWN, (int) V2DI_FTYPE_V2DI_INT },
20771 { OPTION_MASK_ISA_SSE2, CODE_FOR_aesimc, 0, IX86_BUILTIN_AESIMC128, UNKNOWN, (int) V2DI_FTYPE_V2DI },
20773 { OPTION_MASK_ISA_SSE2, CODE_FOR_aesenc, 0, IX86_BUILTIN_AESENC128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
20774 { OPTION_MASK_ISA_SSE2, CODE_FOR_aesenclast, 0, IX86_BUILTIN_AESENCLAST128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
20775 { OPTION_MASK_ISA_SSE2, CODE_FOR_aesdec, 0, IX86_BUILTIN_AESDEC128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
20776 { OPTION_MASK_ISA_SSE2, CODE_FOR_aesdeclast, 0, IX86_BUILTIN_AESDECLAST128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
20779 { OPTION_MASK_ISA_SSE2, CODE_FOR_pclmulqdq, 0, IX86_BUILTIN_PCLMULQDQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_INT },
20782 { OPTION_MASK_ISA_AVX, CODE_FOR_addv4df3, "__builtin_ia32_addpd256", IX86_BUILTIN_ADDPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
20783 { OPTION_MASK_ISA_AVX, CODE_FOR_addv8sf3, "__builtin_ia32_addps256", IX86_BUILTIN_ADDPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
20784 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_addsubv4df3, "__builtin_ia32_addsubpd256", IX86_BUILTIN_ADDSUBPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
20785 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_addsubv8sf3, "__builtin_ia32_addsubps256", IX86_BUILTIN_ADDSUBPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
20786 { OPTION_MASK_ISA_AVX, CODE_FOR_andv4df3, "__builtin_ia32_andpd256", IX86_BUILTIN_ANDPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
20787 { OPTION_MASK_ISA_AVX, CODE_FOR_andv8sf3, "__builtin_ia32_andps256", IX86_BUILTIN_ANDPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
20788 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_nandv4df3, "__builtin_ia32_andnpd256", IX86_BUILTIN_ANDNPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
20789 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_nandv8sf3, "__builtin_ia32_andnps256", IX86_BUILTIN_ANDNPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
20790 { OPTION_MASK_ISA_AVX, CODE_FOR_divv4df3, "__builtin_ia32_divpd256", IX86_BUILTIN_DIVPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
20791 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_divv8sf3, "__builtin_ia32_divps256", IX86_BUILTIN_DIVPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
20792 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_haddv4df3, "__builtin_ia32_haddpd256", IX86_BUILTIN_HADDPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
20793 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_hsubv8sf3, "__builtin_ia32_hsubps256", IX86_BUILTIN_HSUBPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
20794 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_hsubv4df3, "__builtin_ia32_hsubpd256", IX86_BUILTIN_HSUBPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
20795 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_haddv8sf3, "__builtin_ia32_haddps256", IX86_BUILTIN_HADDPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
20796 { OPTION_MASK_ISA_AVX, CODE_FOR_smaxv4df3, "__builtin_ia32_maxpd256", IX86_BUILTIN_MAXPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
20797 { OPTION_MASK_ISA_AVX, CODE_FOR_smaxv8sf3, "__builtin_ia32_maxps256", IX86_BUILTIN_MAXPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
20798 { OPTION_MASK_ISA_AVX, CODE_FOR_sminv4df3, "__builtin_ia32_minpd256", IX86_BUILTIN_MINPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
20799 { OPTION_MASK_ISA_AVX, CODE_FOR_sminv8sf3, "__builtin_ia32_minps256", IX86_BUILTIN_MINPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
20800 { OPTION_MASK_ISA_AVX, CODE_FOR_mulv4df3, "__builtin_ia32_mulpd256", IX86_BUILTIN_MULPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
20801 { OPTION_MASK_ISA_AVX, CODE_FOR_mulv8sf3, "__builtin_ia32_mulps256", IX86_BUILTIN_MULPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
20802 { OPTION_MASK_ISA_AVX, CODE_FOR_iorv4df3, "__builtin_ia32_orpd256", IX86_BUILTIN_ORPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
20803 { OPTION_MASK_ISA_AVX, CODE_FOR_iorv8sf3, "__builtin_ia32_orps256", IX86_BUILTIN_ORPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
20804 { OPTION_MASK_ISA_AVX, CODE_FOR_subv4df3, "__builtin_ia32_subpd256", IX86_BUILTIN_SUBPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
20805 { OPTION_MASK_ISA_AVX, CODE_FOR_subv8sf3, "__builtin_ia32_subps256", IX86_BUILTIN_SUBPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
20806 { OPTION_MASK_ISA_AVX, CODE_FOR_xorv4df3, "__builtin_ia32_xorpd256", IX86_BUILTIN_XORPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
20807 { OPTION_MASK_ISA_AVX, CODE_FOR_xorv8sf3, "__builtin_ia32_xorps256", IX86_BUILTIN_XORPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
20809 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilvarv2df3, "__builtin_ia32_vpermilvarpd", IX86_BUILTIN_VPERMILVARPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DI },
20810 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilvarv4sf3, "__builtin_ia32_vpermilvarps", IX86_BUILTIN_VPERMILVARPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SI },
20811 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilvarv4df3, "__builtin_ia32_vpermilvarpd256", IX86_BUILTIN_VPERMILVARPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DI },
20812 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilvarv8sf3, "__builtin_ia32_vpermilvarps256", IX86_BUILTIN_VPERMILVARPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SI },
20814 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_blendpd256, "__builtin_ia32_blendpd256", IX86_BUILTIN_BLENDPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_INT },
20815 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_blendps256, "__builtin_ia32_blendps256", IX86_BUILTIN_BLENDPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_INT },
20816 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_blendvpd256, "__builtin_ia32_blendvpd256", IX86_BUILTIN_BLENDVPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF },
20817 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_blendvps256, "__builtin_ia32_blendvps256", IX86_BUILTIN_BLENDVPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF },
20818 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_dpps256, "__builtin_ia32_dpps256", IX86_BUILTIN_DPPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_INT },
20819 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_shufpd256, "__builtin_ia32_shufpd256", IX86_BUILTIN_SHUFPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_INT },
20820 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_shufps256, "__builtin_ia32_shufps256", IX86_BUILTIN_SHUFPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_INT },
20821 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cmpsdv2df3, "__builtin_ia32_cmpsd", IX86_BUILTIN_CMPSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
20822 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cmpssv4sf3, "__builtin_ia32_cmpss", IX86_BUILTIN_CMPSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
20823 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cmppdv2df3, "__builtin_ia32_cmppd", IX86_BUILTIN_CMPPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
20824 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cmppsv4sf3, "__builtin_ia32_cmpps", IX86_BUILTIN_CMPPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
20825 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cmppdv4df3, "__builtin_ia32_cmppd256", IX86_BUILTIN_CMPPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_INT },
20826 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cmppsv8sf3, "__builtin_ia32_cmpps256", IX86_BUILTIN_CMPPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_INT },
20827 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vextractf128v4df, "__builtin_ia32_vextractf128_pd256", IX86_BUILTIN_EXTRACTF128PD256, UNKNOWN, (int) V2DF_FTYPE_V4DF_INT },
20828 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vextractf128v8sf, "__builtin_ia32_vextractf128_ps256", IX86_BUILTIN_EXTRACTF128PS256, UNKNOWN, (int) V4SF_FTYPE_V8SF_INT },
20829 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vextractf128v8si, "__builtin_ia32_vextractf128_si256", IX86_BUILTIN_EXTRACTF128SI256, UNKNOWN, (int) V4SI_FTYPE_V8SI_INT },
20830 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cvtdq2pd256, "__builtin_ia32_cvtdq2pd256", IX86_BUILTIN_CVTDQ2PD256, UNKNOWN, (int) V4DF_FTYPE_V4SI },
20831 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cvtdq2ps256, "__builtin_ia32_cvtdq2ps256", IX86_BUILTIN_CVTDQ2PS256, UNKNOWN, (int) V8SF_FTYPE_V8SI },
20832 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cvtpd2ps256, "__builtin_ia32_cvtpd2ps256", IX86_BUILTIN_CVTPD2PS256, UNKNOWN, (int) V4SF_FTYPE_V4DF },
20833 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cvtps2dq256, "__builtin_ia32_cvtps2dq256", IX86_BUILTIN_CVTPS2DQ256, UNKNOWN, (int) V8SI_FTYPE_V8SF },
20834 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cvtps2pd256, "__builtin_ia32_cvtps2pd256", IX86_BUILTIN_CVTPS2PD256, UNKNOWN, (int) V4DF_FTYPE_V4SF },
20835 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cvttpd2dq256, "__builtin_ia32_cvttpd2dq256", IX86_BUILTIN_CVTTPD2DQ256, UNKNOWN, (int) V4SI_FTYPE_V4DF },
20836 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cvtpd2dq256, "__builtin_ia32_cvtpd2dq256", IX86_BUILTIN_CVTPD2DQ256, UNKNOWN, (int) V4SI_FTYPE_V4DF },
20837 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cvttps2dq256, "__builtin_ia32_cvttps2dq256", IX86_BUILTIN_CVTTPS2DQ256, UNKNOWN, (int) V8SI_FTYPE_V8SF },
20838 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vperm2f128v4df3, "__builtin_ia32_vperm2f128_pd256", IX86_BUILTIN_VPERM2F128PD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_INT },
20839 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vperm2f128v8sf3, "__builtin_ia32_vperm2f128_ps256", IX86_BUILTIN_VPERM2F128PS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_INT },
20840 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vperm2f128v8si3, "__builtin_ia32_vperm2f128_si256", IX86_BUILTIN_VPERM2F128SI256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_INT },
20841 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilv2df, "__builtin_ia32_vpermilpd", IX86_BUILTIN_VPERMILPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_INT },
20842 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilv4sf, "__builtin_ia32_vpermilps", IX86_BUILTIN_VPERMILPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_INT },
20843 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilv4df, "__builtin_ia32_vpermilpd256", IX86_BUILTIN_VPERMILPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_INT },
20844 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilv8sf, "__builtin_ia32_vpermilps256", IX86_BUILTIN_VPERMILPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_INT },
20845 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermil2v2df3, "__builtin_ia32_vpermil2pd", IX86_BUILTIN_VPERMIL2PD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DI_INT },
20846 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermil2v4sf3, "__builtin_ia32_vpermil2ps", IX86_BUILTIN_VPERMIL2PS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SI_INT },
20847 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermil2v4df3, "__builtin_ia32_vpermil2pd256", IX86_BUILTIN_VPERMIL2PD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DI_INT },
20848 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermil2v8sf3, "__builtin_ia32_vpermil2ps256", IX86_BUILTIN_VPERMIL2PS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SI_INT },
20849 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vinsertf128v4df, "__builtin_ia32_vinsertf128_pd256", IX86_BUILTIN_VINSERTF128PD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V2DF_INT },
20850 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vinsertf128v8sf, "__builtin_ia32_vinsertf128_ps256", IX86_BUILTIN_VINSERTF128PS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V4SF_INT },
20851 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vinsertf128v8si, "__builtin_ia32_vinsertf128_si256", IX86_BUILTIN_VINSERTF128SI256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V4SI_INT },
20853 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movshdup256, "__builtin_ia32_movshdup256", IX86_BUILTIN_MOVSHDUP256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
20854 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movsldup256, "__builtin_ia32_movsldup256", IX86_BUILTIN_MOVSLDUP256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
20855 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movddup256, "__builtin_ia32_movddup256", IX86_BUILTIN_MOVDDUP256, UNKNOWN, (int) V4DF_FTYPE_V4DF },
20857 { OPTION_MASK_ISA_AVX, CODE_FOR_sqrtv4df2, "__builtin_ia32_sqrtpd256", IX86_BUILTIN_SQRTPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF },
20858 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_sqrtv8sf2, "__builtin_ia32_sqrtps256", IX86_BUILTIN_SQRTPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
20859 { OPTION_MASK_ISA_AVX, CODE_FOR_sqrtv8sf2, "__builtin_ia32_sqrtps_nr256", IX86_BUILTIN_SQRTPS_NR256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
20860 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_rsqrtv8sf2, "__builtin_ia32_rsqrtps256", IX86_BUILTIN_RSQRTPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
20861 { OPTION_MASK_ISA_AVX, CODE_FOR_rsqrtv8sf2, "__builtin_ia32_rsqrtps_nr256", IX86_BUILTIN_RSQRTPS_NR256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
20863 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_rcpv8sf2, "__builtin_ia32_rcpps256", IX86_BUILTIN_RCPPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
20865 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundpd256, "__builtin_ia32_roundpd256", IX86_BUILTIN_ROUNDPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_INT },
20866 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundps256, "__builtin_ia32_roundps256", IX86_BUILTIN_ROUNDPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_INT },
20868 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_unpckhpd256, "__builtin_ia32_unpckhpd256", IX86_BUILTIN_UNPCKHPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
20869 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_unpcklpd256, "__builtin_ia32_unpcklpd256", IX86_BUILTIN_UNPCKLPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
20870 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_unpckhps256, "__builtin_ia32_unpckhps256", IX86_BUILTIN_UNPCKHPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
20871 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_unpcklps256, "__builtin_ia32_unpcklps256", IX86_BUILTIN_UNPCKLPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
20873 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_si256_si, "__builtin_ia32_si256_si", IX86_BUILTIN_SI256_SI, UNKNOWN, (int) V8SI_FTYPE_V4SI },
20874 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_ps256_ps, "__builtin_ia32_ps256_ps", IX86_BUILTIN_PS256_PS, UNKNOWN, (int) V8SF_FTYPE_V4SF },
20875 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_pd256_pd, "__builtin_ia32_pd256_pd", IX86_BUILTIN_PD256_PD, UNKNOWN, (int) V4DF_FTYPE_V2DF },
20876 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_si_si256, "__builtin_ia32_si_si256", IX86_BUILTIN_SI_SI256, UNKNOWN, (int) V4SI_FTYPE_V8SI },
20877 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_ps_ps256, "__builtin_ia32_ps_ps256", IX86_BUILTIN_PS_PS256, UNKNOWN, (int) V4SF_FTYPE_V8SF },
20878 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_pd_pd256, "__builtin_ia32_pd_pd256", IX86_BUILTIN_PD_PD256, UNKNOWN, (int) V2DF_FTYPE_V4DF },
20880 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestpd, "__builtin_ia32_vtestzpd", IX86_BUILTIN_VTESTZPD, EQ, (int) INT_FTYPE_V2DF_V2DF_PTEST },
20881 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestpd, "__builtin_ia32_vtestcpd", IX86_BUILTIN_VTESTCPD, LTU, (int) INT_FTYPE_V2DF_V2DF_PTEST },
20882 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestpd, "__builtin_ia32_vtestnzcpd", IX86_BUILTIN_VTESTNZCPD, GTU, (int) INT_FTYPE_V2DF_V2DF_PTEST },
20883 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestps, "__builtin_ia32_vtestzps", IX86_BUILTIN_VTESTZPS, EQ, (int) INT_FTYPE_V4SF_V4SF_PTEST },
20884 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestps, "__builtin_ia32_vtestcps", IX86_BUILTIN_VTESTCPS, LTU, (int) INT_FTYPE_V4SF_V4SF_PTEST },
20885 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestps, "__builtin_ia32_vtestnzcps", IX86_BUILTIN_VTESTNZCPS, GTU, (int) INT_FTYPE_V4SF_V4SF_PTEST },
20886 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestpd256, "__builtin_ia32_vtestzpd256", IX86_BUILTIN_VTESTZPD256, EQ, (int) INT_FTYPE_V4DF_V4DF_PTEST },
20887 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestpd256, "__builtin_ia32_vtestcpd256", IX86_BUILTIN_VTESTCPD256, LTU, (int) INT_FTYPE_V4DF_V4DF_PTEST },
20888 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestpd256, "__builtin_ia32_vtestnzcpd256", IX86_BUILTIN_VTESTNZCPD256, GTU, (int) INT_FTYPE_V4DF_V4DF_PTEST },
20889 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestps256, "__builtin_ia32_vtestzps256", IX86_BUILTIN_VTESTZPS256, EQ, (int) INT_FTYPE_V8SF_V8SF_PTEST },
20890 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestps256, "__builtin_ia32_vtestcps256", IX86_BUILTIN_VTESTCPS256, LTU, (int) INT_FTYPE_V8SF_V8SF_PTEST },
20891 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestps256, "__builtin_ia32_vtestnzcps256", IX86_BUILTIN_VTESTNZCPS256, GTU, (int) INT_FTYPE_V8SF_V8SF_PTEST },
20892 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_ptest256, "__builtin_ia32_ptestz256", IX86_BUILTIN_PTESTZ256, EQ, (int) INT_FTYPE_V4DI_V4DI_PTEST },
20893 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_ptest256, "__builtin_ia32_ptestc256", IX86_BUILTIN_PTESTC256, LTU, (int) INT_FTYPE_V4DI_V4DI_PTEST },
20894 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_ptest256, "__builtin_ia32_ptestnzc256", IX86_BUILTIN_PTESTNZC256, GTU, (int) INT_FTYPE_V4DI_V4DI_PTEST },
20896 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movmskpd256, "__builtin_ia32_movmskpd256", IX86_BUILTIN_MOVMSKPD256, UNKNOWN, (int) INT_FTYPE_V4DF },
20897 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movmskps256, "__builtin_ia32_movmskps256", IX86_BUILTIN_MOVMSKPS256, UNKNOWN, (int) INT_FTYPE_V8SF },
20901 enum multi_arg_type {
20911 MULTI_ARG_3_PERMPS,
20912 MULTI_ARG_3_PERMPD,
20919 MULTI_ARG_2_DI_IMM,
20920 MULTI_ARG_2_SI_IMM,
20921 MULTI_ARG_2_HI_IMM,
20922 MULTI_ARG_2_QI_IMM,
20923 MULTI_ARG_2_SF_CMP,
20924 MULTI_ARG_2_DF_CMP,
20925 MULTI_ARG_2_DI_CMP,
20926 MULTI_ARG_2_SI_CMP,
20927 MULTI_ARG_2_HI_CMP,
20928 MULTI_ARG_2_QI_CMP,
20951 static const struct builtin_description bdesc_multi_arg[] =
20953 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_vmfmaddv4sf4, "__builtin_ia32_fmaddss", IX86_BUILTIN_FMADDSS, 0, (int)MULTI_ARG_3_SF },
20954 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_vmfmaddv2df4, "__builtin_ia32_fmaddsd", IX86_BUILTIN_FMADDSD, 0, (int)MULTI_ARG_3_DF },
20955 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_fmaddv4sf4, "__builtin_ia32_fmaddps", IX86_BUILTIN_FMADDPS, 0, (int)MULTI_ARG_3_SF },
20956 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_fmaddv2df4, "__builtin_ia32_fmaddpd", IX86_BUILTIN_FMADDPD, 0, (int)MULTI_ARG_3_DF },
20957 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_vmfmsubv4sf4, "__builtin_ia32_fmsubss", IX86_BUILTIN_FMSUBSS, 0, (int)MULTI_ARG_3_SF },
20958 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_vmfmsubv2df4, "__builtin_ia32_fmsubsd", IX86_BUILTIN_FMSUBSD, 0, (int)MULTI_ARG_3_DF },
20959 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_fmsubv4sf4, "__builtin_ia32_fmsubps", IX86_BUILTIN_FMSUBPS, 0, (int)MULTI_ARG_3_SF },
20960 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_fmsubv2df4, "__builtin_ia32_fmsubpd", IX86_BUILTIN_FMSUBPD, 0, (int)MULTI_ARG_3_DF },
20961 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_vmfnmaddv4sf4, "__builtin_ia32_fnmaddss", IX86_BUILTIN_FNMADDSS, 0, (int)MULTI_ARG_3_SF },
20962 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_vmfnmaddv2df4, "__builtin_ia32_fnmaddsd", IX86_BUILTIN_FNMADDSD, 0, (int)MULTI_ARG_3_DF },
20963 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_fnmaddv4sf4, "__builtin_ia32_fnmaddps", IX86_BUILTIN_FNMADDPS, 0, (int)MULTI_ARG_3_SF },
20964 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_fnmaddv2df4, "__builtin_ia32_fnmaddpd", IX86_BUILTIN_FNMADDPD, 0, (int)MULTI_ARG_3_DF },
20965 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_vmfnmsubv4sf4, "__builtin_ia32_fnmsubss", IX86_BUILTIN_FNMSUBSS, 0, (int)MULTI_ARG_3_SF },
20966 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_vmfnmsubv2df4, "__builtin_ia32_fnmsubsd", IX86_BUILTIN_FNMSUBSD, 0, (int)MULTI_ARG_3_DF },
20967 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_fnmsubv4sf4, "__builtin_ia32_fnmsubps", IX86_BUILTIN_FNMSUBPS, 0, (int)MULTI_ARG_3_SF },
20968 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_fnmsubv2df4, "__builtin_ia32_fnmsubpd", IX86_BUILTIN_FNMSUBPD, 0, (int)MULTI_ARG_3_DF },
20969 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcmov_v2di, "__builtin_ia32_pcmov", IX86_BUILTIN_PCMOV, 0, (int)MULTI_ARG_3_DI },
20970 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcmov_v2di, "__builtin_ia32_pcmov_v2di", IX86_BUILTIN_PCMOV_V2DI, 0, (int)MULTI_ARG_3_DI },
20971 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcmov_v4si, "__builtin_ia32_pcmov_v4si", IX86_BUILTIN_PCMOV_V4SI, 0, (int)MULTI_ARG_3_SI },
20972 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcmov_v8hi, "__builtin_ia32_pcmov_v8hi", IX86_BUILTIN_PCMOV_V8HI, 0, (int)MULTI_ARG_3_HI },
20973 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcmov_v16qi, "__builtin_ia32_pcmov_v16qi",IX86_BUILTIN_PCMOV_V16QI,0, (int)MULTI_ARG_3_QI },
20974 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcmov_v2df, "__builtin_ia32_pcmov_v2df", IX86_BUILTIN_PCMOV_V2DF, 0, (int)MULTI_ARG_3_DF },
20975 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcmov_v4sf, "__builtin_ia32_pcmov_v4sf", IX86_BUILTIN_PCMOV_V4SF, 0, (int)MULTI_ARG_3_SF },
20976 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pperm, "__builtin_ia32_pperm", IX86_BUILTIN_PPERM, 0, (int)MULTI_ARG_3_QI },
20977 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_permv4sf, "__builtin_ia32_permps", IX86_BUILTIN_PERMPS, 0, (int)MULTI_ARG_3_PERMPS },
20978 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_permv2df, "__builtin_ia32_permpd", IX86_BUILTIN_PERMPD, 0, (int)MULTI_ARG_3_PERMPD },
20979 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pmacssww, "__builtin_ia32_pmacssww", IX86_BUILTIN_PMACSSWW, 0, (int)MULTI_ARG_3_HI },
20980 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pmacsww, "__builtin_ia32_pmacsww", IX86_BUILTIN_PMACSWW, 0, (int)MULTI_ARG_3_HI },
20981 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pmacsswd, "__builtin_ia32_pmacsswd", IX86_BUILTIN_PMACSSWD, 0, (int)MULTI_ARG_3_HI_SI },
20982 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pmacswd, "__builtin_ia32_pmacswd", IX86_BUILTIN_PMACSWD, 0, (int)MULTI_ARG_3_HI_SI },
20983 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pmacssdd, "__builtin_ia32_pmacssdd", IX86_BUILTIN_PMACSSDD, 0, (int)MULTI_ARG_3_SI },
20984 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pmacsdd, "__builtin_ia32_pmacsdd", IX86_BUILTIN_PMACSDD, 0, (int)MULTI_ARG_3_SI },
20985 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pmacssdql, "__builtin_ia32_pmacssdql", IX86_BUILTIN_PMACSSDQL, 0, (int)MULTI_ARG_3_SI_DI },
20986 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pmacssdqh, "__builtin_ia32_pmacssdqh", IX86_BUILTIN_PMACSSDQH, 0, (int)MULTI_ARG_3_SI_DI },
20987 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pmacsdql, "__builtin_ia32_pmacsdql", IX86_BUILTIN_PMACSDQL, 0, (int)MULTI_ARG_3_SI_DI },
20988 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pmacsdqh, "__builtin_ia32_pmacsdqh", IX86_BUILTIN_PMACSDQH, 0, (int)MULTI_ARG_3_SI_DI },
20989 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pmadcsswd, "__builtin_ia32_pmadcsswd", IX86_BUILTIN_PMADCSSWD, 0, (int)MULTI_ARG_3_HI_SI },
20990 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pmadcswd, "__builtin_ia32_pmadcswd", IX86_BUILTIN_PMADCSWD, 0, (int)MULTI_ARG_3_HI_SI },
20991 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vrotlv2di3, "__builtin_ia32_protq", IX86_BUILTIN_PROTQ, 0, (int)MULTI_ARG_2_DI },
20992 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vrotlv4si3, "__builtin_ia32_protd", IX86_BUILTIN_PROTD, 0, (int)MULTI_ARG_2_SI },
20993 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vrotlv8hi3, "__builtin_ia32_protw", IX86_BUILTIN_PROTW, 0, (int)MULTI_ARG_2_HI },
20994 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vrotlv16qi3, "__builtin_ia32_protb", IX86_BUILTIN_PROTB, 0, (int)MULTI_ARG_2_QI },
20995 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_rotlv2di3, "__builtin_ia32_protqi", IX86_BUILTIN_PROTQ_IMM, 0, (int)MULTI_ARG_2_DI_IMM },
20996 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_rotlv4si3, "__builtin_ia32_protdi", IX86_BUILTIN_PROTD_IMM, 0, (int)MULTI_ARG_2_SI_IMM },
20997 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_rotlv8hi3, "__builtin_ia32_protwi", IX86_BUILTIN_PROTW_IMM, 0, (int)MULTI_ARG_2_HI_IMM },
20998 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_rotlv16qi3, "__builtin_ia32_protbi", IX86_BUILTIN_PROTB_IMM, 0, (int)MULTI_ARG_2_QI_IMM },
20999 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_ashlv2di3, "__builtin_ia32_pshaq", IX86_BUILTIN_PSHAQ, 0, (int)MULTI_ARG_2_DI },
21000 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_ashlv4si3, "__builtin_ia32_pshad", IX86_BUILTIN_PSHAD, 0, (int)MULTI_ARG_2_SI },
21001 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_ashlv8hi3, "__builtin_ia32_pshaw", IX86_BUILTIN_PSHAW, 0, (int)MULTI_ARG_2_HI },
21002 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_ashlv16qi3, "__builtin_ia32_pshab", IX86_BUILTIN_PSHAB, 0, (int)MULTI_ARG_2_QI },
21003 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_lshlv2di3, "__builtin_ia32_pshlq", IX86_BUILTIN_PSHLQ, 0, (int)MULTI_ARG_2_DI },
21004 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_lshlv4si3, "__builtin_ia32_pshld", IX86_BUILTIN_PSHLD, 0, (int)MULTI_ARG_2_SI },
21005 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_lshlv8hi3, "__builtin_ia32_pshlw", IX86_BUILTIN_PSHLW, 0, (int)MULTI_ARG_2_HI },
21006 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_lshlv16qi3, "__builtin_ia32_pshlb", IX86_BUILTIN_PSHLB, 0, (int)MULTI_ARG_2_QI },
21007 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmfrczv4sf2, "__builtin_ia32_frczss", IX86_BUILTIN_FRCZSS, 0, (int)MULTI_ARG_2_SF },
21008 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmfrczv2df2, "__builtin_ia32_frczsd", IX86_BUILTIN_FRCZSD, 0, (int)MULTI_ARG_2_DF },
21009 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_frczv4sf2, "__builtin_ia32_frczps", IX86_BUILTIN_FRCZPS, 0, (int)MULTI_ARG_1_SF },
21010 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_frczv2df2, "__builtin_ia32_frczpd", IX86_BUILTIN_FRCZPD, 0, (int)MULTI_ARG_1_DF },
21011 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_cvtph2ps, "__builtin_ia32_cvtph2ps", IX86_BUILTIN_CVTPH2PS, 0, (int)MULTI_ARG_1_PH2PS },
21012 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_cvtps2ph, "__builtin_ia32_cvtps2ph", IX86_BUILTIN_CVTPS2PH, 0, (int)MULTI_ARG_1_PS2PH },
21013 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_phaddbw, "__builtin_ia32_phaddbw", IX86_BUILTIN_PHADDBW, 0, (int)MULTI_ARG_1_QI_HI },
21014 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_phaddbd, "__builtin_ia32_phaddbd", IX86_BUILTIN_PHADDBD, 0, (int)MULTI_ARG_1_QI_SI },
21015 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_phaddbq, "__builtin_ia32_phaddbq", IX86_BUILTIN_PHADDBQ, 0, (int)MULTI_ARG_1_QI_DI },
21016 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_phaddwd, "__builtin_ia32_phaddwd", IX86_BUILTIN_PHADDWD, 0, (int)MULTI_ARG_1_HI_SI },
21017 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_phaddwq, "__builtin_ia32_phaddwq", IX86_BUILTIN_PHADDWQ, 0, (int)MULTI_ARG_1_HI_DI },
21018 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_phadddq, "__builtin_ia32_phadddq", IX86_BUILTIN_PHADDDQ, 0, (int)MULTI_ARG_1_SI_DI },
21019 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_phaddubw, "__builtin_ia32_phaddubw", IX86_BUILTIN_PHADDUBW, 0, (int)MULTI_ARG_1_QI_HI },
21020 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_phaddubd, "__builtin_ia32_phaddubd", IX86_BUILTIN_PHADDUBD, 0, (int)MULTI_ARG_1_QI_SI },
21021 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_phaddubq, "__builtin_ia32_phaddubq", IX86_BUILTIN_PHADDUBQ, 0, (int)MULTI_ARG_1_QI_DI },
21022 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_phadduwd, "__builtin_ia32_phadduwd", IX86_BUILTIN_PHADDUWD, 0, (int)MULTI_ARG_1_HI_SI },
21023 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_phadduwq, "__builtin_ia32_phadduwq", IX86_BUILTIN_PHADDUWQ, 0, (int)MULTI_ARG_1_HI_DI },
21024 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_phaddudq, "__builtin_ia32_phaddudq", IX86_BUILTIN_PHADDUDQ, 0, (int)MULTI_ARG_1_SI_DI },
21025 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_phsubbw, "__builtin_ia32_phsubbw", IX86_BUILTIN_PHSUBBW, 0, (int)MULTI_ARG_1_QI_HI },
21026 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_phsubwd, "__builtin_ia32_phsubwd", IX86_BUILTIN_PHSUBWD, 0, (int)MULTI_ARG_1_HI_SI },
21027 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_phsubdq, "__builtin_ia32_phsubdq", IX86_BUILTIN_PHSUBDQ, 0, (int)MULTI_ARG_1_SI_DI },
21029 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv4sf3, "__builtin_ia32_comeqss", IX86_BUILTIN_COMEQSS, EQ, (int)MULTI_ARG_2_SF_CMP },
21030 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv4sf3, "__builtin_ia32_comness", IX86_BUILTIN_COMNESS, NE, (int)MULTI_ARG_2_SF_CMP },
21031 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv4sf3, "__builtin_ia32_comneqss", IX86_BUILTIN_COMNESS, NE, (int)MULTI_ARG_2_SF_CMP },
21032 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv4sf3, "__builtin_ia32_comltss", IX86_BUILTIN_COMLTSS, LT, (int)MULTI_ARG_2_SF_CMP },
21033 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv4sf3, "__builtin_ia32_comless", IX86_BUILTIN_COMLESS, LE, (int)MULTI_ARG_2_SF_CMP },
21034 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv4sf3, "__builtin_ia32_comgtss", IX86_BUILTIN_COMGTSS, GT, (int)MULTI_ARG_2_SF_CMP },
21035 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv4sf3, "__builtin_ia32_comgess", IX86_BUILTIN_COMGESS, GE, (int)MULTI_ARG_2_SF_CMP },
21036 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv4sf3, "__builtin_ia32_comueqss", IX86_BUILTIN_COMUEQSS, UNEQ, (int)MULTI_ARG_2_SF_CMP },
21037 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv4sf3, "__builtin_ia32_comuness", IX86_BUILTIN_COMUNESS, LTGT, (int)MULTI_ARG_2_SF_CMP },
21038 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv4sf3, "__builtin_ia32_comuneqss", IX86_BUILTIN_COMUNESS, LTGT, (int)MULTI_ARG_2_SF_CMP },
21039 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv4sf3, "__builtin_ia32_comunltss", IX86_BUILTIN_COMULTSS, UNLT, (int)MULTI_ARG_2_SF_CMP },
21040 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv4sf3, "__builtin_ia32_comunless", IX86_BUILTIN_COMULESS, UNLE, (int)MULTI_ARG_2_SF_CMP },
21041 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv4sf3, "__builtin_ia32_comungtss", IX86_BUILTIN_COMUGTSS, UNGT, (int)MULTI_ARG_2_SF_CMP },
21042 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv4sf3, "__builtin_ia32_comungess", IX86_BUILTIN_COMUGESS, UNGE, (int)MULTI_ARG_2_SF_CMP },
21043 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv4sf3, "__builtin_ia32_comordss", IX86_BUILTIN_COMORDSS, ORDERED, (int)MULTI_ARG_2_SF_CMP },
21044 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv4sf3, "__builtin_ia32_comunordss", IX86_BUILTIN_COMUNORDSS, UNORDERED, (int)MULTI_ARG_2_SF_CMP },
21046 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv2df3, "__builtin_ia32_comeqsd", IX86_BUILTIN_COMEQSD, EQ, (int)MULTI_ARG_2_DF_CMP },
21047 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv2df3, "__builtin_ia32_comnesd", IX86_BUILTIN_COMNESD, NE, (int)MULTI_ARG_2_DF_CMP },
21048 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv2df3, "__builtin_ia32_comneqsd", IX86_BUILTIN_COMNESD, NE, (int)MULTI_ARG_2_DF_CMP },
21049 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv2df3, "__builtin_ia32_comltsd", IX86_BUILTIN_COMLTSD, LT, (int)MULTI_ARG_2_DF_CMP },
21050 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv2df3, "__builtin_ia32_comlesd", IX86_BUILTIN_COMLESD, LE, (int)MULTI_ARG_2_DF_CMP },
21051 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv2df3, "__builtin_ia32_comgtsd", IX86_BUILTIN_COMGTSD, GT, (int)MULTI_ARG_2_DF_CMP },
21052 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv2df3, "__builtin_ia32_comgesd", IX86_BUILTIN_COMGESD, GE, (int)MULTI_ARG_2_DF_CMP },
21053 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv2df3, "__builtin_ia32_comueqsd", IX86_BUILTIN_COMUEQSD, UNEQ, (int)MULTI_ARG_2_DF_CMP },
21054 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv2df3, "__builtin_ia32_comunesd", IX86_BUILTIN_COMUNESD, LTGT, (int)MULTI_ARG_2_DF_CMP },
21055 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv2df3, "__builtin_ia32_comuneqsd", IX86_BUILTIN_COMUNESD, LTGT, (int)MULTI_ARG_2_DF_CMP },
21056 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv2df3, "__builtin_ia32_comunltsd", IX86_BUILTIN_COMULTSD, UNLT, (int)MULTI_ARG_2_DF_CMP },
21057 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv2df3, "__builtin_ia32_comunlesd", IX86_BUILTIN_COMULESD, UNLE, (int)MULTI_ARG_2_DF_CMP },
21058 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv2df3, "__builtin_ia32_comungtsd", IX86_BUILTIN_COMUGTSD, UNGT, (int)MULTI_ARG_2_DF_CMP },
21059 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv2df3, "__builtin_ia32_comungesd", IX86_BUILTIN_COMUGESD, UNGE, (int)MULTI_ARG_2_DF_CMP },
21060 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv2df3, "__builtin_ia32_comordsd", IX86_BUILTIN_COMORDSD, ORDERED, (int)MULTI_ARG_2_DF_CMP },
21061 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv2df3, "__builtin_ia32_comunordsd", IX86_BUILTIN_COMUNORDSD, UNORDERED, (int)MULTI_ARG_2_DF_CMP },
21063 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4sf3, "__builtin_ia32_comeqps", IX86_BUILTIN_COMEQPS, EQ, (int)MULTI_ARG_2_SF_CMP },
21064 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4sf3, "__builtin_ia32_comneps", IX86_BUILTIN_COMNEPS, NE, (int)MULTI_ARG_2_SF_CMP },
21065 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4sf3, "__builtin_ia32_comneqps", IX86_BUILTIN_COMNEPS, NE, (int)MULTI_ARG_2_SF_CMP },
21066 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4sf3, "__builtin_ia32_comltps", IX86_BUILTIN_COMLTPS, LT, (int)MULTI_ARG_2_SF_CMP },
21067 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4sf3, "__builtin_ia32_comleps", IX86_BUILTIN_COMLEPS, LE, (int)MULTI_ARG_2_SF_CMP },
21068 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4sf3, "__builtin_ia32_comgtps", IX86_BUILTIN_COMGTPS, GT, (int)MULTI_ARG_2_SF_CMP },
21069 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4sf3, "__builtin_ia32_comgeps", IX86_BUILTIN_COMGEPS, GE, (int)MULTI_ARG_2_SF_CMP },
21070 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4sf3, "__builtin_ia32_comueqps", IX86_BUILTIN_COMUEQPS, UNEQ, (int)MULTI_ARG_2_SF_CMP },
21071 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4sf3, "__builtin_ia32_comuneps", IX86_BUILTIN_COMUNEPS, LTGT, (int)MULTI_ARG_2_SF_CMP },
21072 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4sf3, "__builtin_ia32_comuneqps", IX86_BUILTIN_COMUNEPS, LTGT, (int)MULTI_ARG_2_SF_CMP },
21073 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4sf3, "__builtin_ia32_comunltps", IX86_BUILTIN_COMULTPS, UNLT, (int)MULTI_ARG_2_SF_CMP },
21074 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4sf3, "__builtin_ia32_comunleps", IX86_BUILTIN_COMULEPS, UNLE, (int)MULTI_ARG_2_SF_CMP },
21075 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4sf3, "__builtin_ia32_comungtps", IX86_BUILTIN_COMUGTPS, UNGT, (int)MULTI_ARG_2_SF_CMP },
21076 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4sf3, "__builtin_ia32_comungeps", IX86_BUILTIN_COMUGEPS, UNGE, (int)MULTI_ARG_2_SF_CMP },
21077 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4sf3, "__builtin_ia32_comordps", IX86_BUILTIN_COMORDPS, ORDERED, (int)MULTI_ARG_2_SF_CMP },
21078 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4sf3, "__builtin_ia32_comunordps", IX86_BUILTIN_COMUNORDPS, UNORDERED, (int)MULTI_ARG_2_SF_CMP },
21080 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2df3, "__builtin_ia32_comeqpd", IX86_BUILTIN_COMEQPD, EQ, (int)MULTI_ARG_2_DF_CMP },
21081 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2df3, "__builtin_ia32_comnepd", IX86_BUILTIN_COMNEPD, NE, (int)MULTI_ARG_2_DF_CMP },
21082 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2df3, "__builtin_ia32_comneqpd", IX86_BUILTIN_COMNEPD, NE, (int)MULTI_ARG_2_DF_CMP },
21083 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2df3, "__builtin_ia32_comltpd", IX86_BUILTIN_COMLTPD, LT, (int)MULTI_ARG_2_DF_CMP },
21084 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2df3, "__builtin_ia32_comlepd", IX86_BUILTIN_COMLEPD, LE, (int)MULTI_ARG_2_DF_CMP },
21085 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2df3, "__builtin_ia32_comgtpd", IX86_BUILTIN_COMGTPD, GT, (int)MULTI_ARG_2_DF_CMP },
21086 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2df3, "__builtin_ia32_comgepd", IX86_BUILTIN_COMGEPD, GE, (int)MULTI_ARG_2_DF_CMP },
21087 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2df3, "__builtin_ia32_comueqpd", IX86_BUILTIN_COMUEQPD, UNEQ, (int)MULTI_ARG_2_DF_CMP },
21088 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2df3, "__builtin_ia32_comunepd", IX86_BUILTIN_COMUNEPD, LTGT, (int)MULTI_ARG_2_DF_CMP },
21089 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2df3, "__builtin_ia32_comuneqpd", IX86_BUILTIN_COMUNEPD, LTGT, (int)MULTI_ARG_2_DF_CMP },
21090 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2df3, "__builtin_ia32_comunltpd", IX86_BUILTIN_COMULTPD, UNLT, (int)MULTI_ARG_2_DF_CMP },
21091 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2df3, "__builtin_ia32_comunlepd", IX86_BUILTIN_COMULEPD, UNLE, (int)MULTI_ARG_2_DF_CMP },
21092 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2df3, "__builtin_ia32_comungtpd", IX86_BUILTIN_COMUGTPD, UNGT, (int)MULTI_ARG_2_DF_CMP },
21093 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2df3, "__builtin_ia32_comungepd", IX86_BUILTIN_COMUGEPD, UNGE, (int)MULTI_ARG_2_DF_CMP },
21094 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2df3, "__builtin_ia32_comordpd", IX86_BUILTIN_COMORDPD, ORDERED, (int)MULTI_ARG_2_DF_CMP },
21095 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2df3, "__builtin_ia32_comunordpd", IX86_BUILTIN_COMUNORDPD, UNORDERED, (int)MULTI_ARG_2_DF_CMP },
21097 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv16qi3, "__builtin_ia32_pcomeqb", IX86_BUILTIN_PCOMEQB, EQ, (int)MULTI_ARG_2_QI_CMP },
21098 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv16qi3, "__builtin_ia32_pcomneb", IX86_BUILTIN_PCOMNEB, NE, (int)MULTI_ARG_2_QI_CMP },
21099 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv16qi3, "__builtin_ia32_pcomneqb", IX86_BUILTIN_PCOMNEB, NE, (int)MULTI_ARG_2_QI_CMP },
21100 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv16qi3, "__builtin_ia32_pcomltb", IX86_BUILTIN_PCOMLTB, LT, (int)MULTI_ARG_2_QI_CMP },
21101 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv16qi3, "__builtin_ia32_pcomleb", IX86_BUILTIN_PCOMLEB, LE, (int)MULTI_ARG_2_QI_CMP },
21102 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv16qi3, "__builtin_ia32_pcomgtb", IX86_BUILTIN_PCOMGTB, GT, (int)MULTI_ARG_2_QI_CMP },
21103 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv16qi3, "__builtin_ia32_pcomgeb", IX86_BUILTIN_PCOMGEB, GE, (int)MULTI_ARG_2_QI_CMP },
21105 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv8hi3, "__builtin_ia32_pcomeqw", IX86_BUILTIN_PCOMEQW, EQ, (int)MULTI_ARG_2_HI_CMP },
21106 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv8hi3, "__builtin_ia32_pcomnew", IX86_BUILTIN_PCOMNEW, NE, (int)MULTI_ARG_2_HI_CMP },
21107 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv8hi3, "__builtin_ia32_pcomneqw", IX86_BUILTIN_PCOMNEW, NE, (int)MULTI_ARG_2_HI_CMP },
21108 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv8hi3, "__builtin_ia32_pcomltw", IX86_BUILTIN_PCOMLTW, LT, (int)MULTI_ARG_2_HI_CMP },
21109 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv8hi3, "__builtin_ia32_pcomlew", IX86_BUILTIN_PCOMLEW, LE, (int)MULTI_ARG_2_HI_CMP },
21110 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv8hi3, "__builtin_ia32_pcomgtw", IX86_BUILTIN_PCOMGTW, GT, (int)MULTI_ARG_2_HI_CMP },
21111 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv8hi3, "__builtin_ia32_pcomgew", IX86_BUILTIN_PCOMGEW, GE, (int)MULTI_ARG_2_HI_CMP },
21113 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4si3, "__builtin_ia32_pcomeqd", IX86_BUILTIN_PCOMEQD, EQ, (int)MULTI_ARG_2_SI_CMP },
21114 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4si3, "__builtin_ia32_pcomned", IX86_BUILTIN_PCOMNED, NE, (int)MULTI_ARG_2_SI_CMP },
21115 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4si3, "__builtin_ia32_pcomneqd", IX86_BUILTIN_PCOMNED, NE, (int)MULTI_ARG_2_SI_CMP },
21116 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4si3, "__builtin_ia32_pcomltd", IX86_BUILTIN_PCOMLTD, LT, (int)MULTI_ARG_2_SI_CMP },
21117 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4si3, "__builtin_ia32_pcomled", IX86_BUILTIN_PCOMLED, LE, (int)MULTI_ARG_2_SI_CMP },
21118 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4si3, "__builtin_ia32_pcomgtd", IX86_BUILTIN_PCOMGTD, GT, (int)MULTI_ARG_2_SI_CMP },
21119 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4si3, "__builtin_ia32_pcomged", IX86_BUILTIN_PCOMGED, GE, (int)MULTI_ARG_2_SI_CMP },
21121 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2di3, "__builtin_ia32_pcomeqq", IX86_BUILTIN_PCOMEQQ, EQ, (int)MULTI_ARG_2_DI_CMP },
21122 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2di3, "__builtin_ia32_pcomneq", IX86_BUILTIN_PCOMNEQ, NE, (int)MULTI_ARG_2_DI_CMP },
21123 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2di3, "__builtin_ia32_pcomneqq", IX86_BUILTIN_PCOMNEQ, NE, (int)MULTI_ARG_2_DI_CMP },
21124 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2di3, "__builtin_ia32_pcomltq", IX86_BUILTIN_PCOMLTQ, LT, (int)MULTI_ARG_2_DI_CMP },
21125 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2di3, "__builtin_ia32_pcomleq", IX86_BUILTIN_PCOMLEQ, LE, (int)MULTI_ARG_2_DI_CMP },
21126 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2di3, "__builtin_ia32_pcomgtq", IX86_BUILTIN_PCOMGTQ, GT, (int)MULTI_ARG_2_DI_CMP },
21127 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2di3, "__builtin_ia32_pcomgeq", IX86_BUILTIN_PCOMGEQ, GE, (int)MULTI_ARG_2_DI_CMP },
21129 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_uns2v16qi3,"__builtin_ia32_pcomequb", IX86_BUILTIN_PCOMEQUB, EQ, (int)MULTI_ARG_2_QI_CMP },
21130 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_uns2v16qi3,"__builtin_ia32_pcomneub", IX86_BUILTIN_PCOMNEUB, NE, (int)MULTI_ARG_2_QI_CMP },
21131 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_uns2v16qi3,"__builtin_ia32_pcomnequb", IX86_BUILTIN_PCOMNEUB, NE, (int)MULTI_ARG_2_QI_CMP },
21132 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_unsv16qi3, "__builtin_ia32_pcomltub", IX86_BUILTIN_PCOMLTUB, LTU, (int)MULTI_ARG_2_QI_CMP },
21133 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_unsv16qi3, "__builtin_ia32_pcomleub", IX86_BUILTIN_PCOMLEUB, LEU, (int)MULTI_ARG_2_QI_CMP },
21134 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_unsv16qi3, "__builtin_ia32_pcomgtub", IX86_BUILTIN_PCOMGTUB, GTU, (int)MULTI_ARG_2_QI_CMP },
21135 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_unsv16qi3, "__builtin_ia32_pcomgeub", IX86_BUILTIN_PCOMGEUB, GEU, (int)MULTI_ARG_2_QI_CMP },
21137 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_uns2v8hi3, "__builtin_ia32_pcomequw", IX86_BUILTIN_PCOMEQUW, EQ, (int)MULTI_ARG_2_HI_CMP },
21138 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_uns2v8hi3, "__builtin_ia32_pcomneuw", IX86_BUILTIN_PCOMNEUW, NE, (int)MULTI_ARG_2_HI_CMP },
21139 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_uns2v8hi3, "__builtin_ia32_pcomnequw", IX86_BUILTIN_PCOMNEUW, NE, (int)MULTI_ARG_2_HI_CMP },
21140 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_unsv8hi3, "__builtin_ia32_pcomltuw", IX86_BUILTIN_PCOMLTUW, LTU, (int)MULTI_ARG_2_HI_CMP },
21141 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_unsv8hi3, "__builtin_ia32_pcomleuw", IX86_BUILTIN_PCOMLEUW, LEU, (int)MULTI_ARG_2_HI_CMP },
21142 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_unsv8hi3, "__builtin_ia32_pcomgtuw", IX86_BUILTIN_PCOMGTUW, GTU, (int)MULTI_ARG_2_HI_CMP },
21143 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_unsv8hi3, "__builtin_ia32_pcomgeuw", IX86_BUILTIN_PCOMGEUW, GEU, (int)MULTI_ARG_2_HI_CMP },
21145 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_uns2v4si3, "__builtin_ia32_pcomequd", IX86_BUILTIN_PCOMEQUD, EQ, (int)MULTI_ARG_2_SI_CMP },
21146 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_uns2v4si3, "__builtin_ia32_pcomneud", IX86_BUILTIN_PCOMNEUD, NE, (int)MULTI_ARG_2_SI_CMP },
21147 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_uns2v4si3, "__builtin_ia32_pcomnequd", IX86_BUILTIN_PCOMNEUD, NE, (int)MULTI_ARG_2_SI_CMP },
21148 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_unsv4si3, "__builtin_ia32_pcomltud", IX86_BUILTIN_PCOMLTUD, LTU, (int)MULTI_ARG_2_SI_CMP },
21149 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_unsv4si3, "__builtin_ia32_pcomleud", IX86_BUILTIN_PCOMLEUD, LEU, (int)MULTI_ARG_2_SI_CMP },
21150 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_unsv4si3, "__builtin_ia32_pcomgtud", IX86_BUILTIN_PCOMGTUD, GTU, (int)MULTI_ARG_2_SI_CMP },
21151 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_unsv4si3, "__builtin_ia32_pcomgeud", IX86_BUILTIN_PCOMGEUD, GEU, (int)MULTI_ARG_2_SI_CMP },
21153 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_uns2v2di3, "__builtin_ia32_pcomequq", IX86_BUILTIN_PCOMEQUQ, EQ, (int)MULTI_ARG_2_DI_CMP },
21154 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_uns2v2di3, "__builtin_ia32_pcomneuq", IX86_BUILTIN_PCOMNEUQ, NE, (int)MULTI_ARG_2_DI_CMP },
21155 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_uns2v2di3, "__builtin_ia32_pcomnequq", IX86_BUILTIN_PCOMNEUQ, NE, (int)MULTI_ARG_2_DI_CMP },
21156 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_unsv2di3, "__builtin_ia32_pcomltuq", IX86_BUILTIN_PCOMLTUQ, LTU, (int)MULTI_ARG_2_DI_CMP },
21157 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_unsv2di3, "__builtin_ia32_pcomleuq", IX86_BUILTIN_PCOMLEUQ, LEU, (int)MULTI_ARG_2_DI_CMP },
21158 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_unsv2di3, "__builtin_ia32_pcomgtuq", IX86_BUILTIN_PCOMGTUQ, GTU, (int)MULTI_ARG_2_DI_CMP },
21159 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_unsv2di3, "__builtin_ia32_pcomgeuq", IX86_BUILTIN_PCOMGEUQ, GEU, (int)MULTI_ARG_2_DI_CMP },
21161 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_com_tfv4sf3, "__builtin_ia32_comfalsess", IX86_BUILTIN_COMFALSESS, COM_FALSE_S, (int)MULTI_ARG_2_SF_TF },
21162 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_com_tfv4sf3, "__builtin_ia32_comtruess", IX86_BUILTIN_COMTRUESS, COM_TRUE_S, (int)MULTI_ARG_2_SF_TF },
21163 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_com_tfv4sf3, "__builtin_ia32_comfalseps", IX86_BUILTIN_COMFALSEPS, COM_FALSE_P, (int)MULTI_ARG_2_SF_TF },
21164 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_com_tfv4sf3, "__builtin_ia32_comtrueps", IX86_BUILTIN_COMTRUEPS, COM_TRUE_P, (int)MULTI_ARG_2_SF_TF },
21165 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_com_tfv2df3, "__builtin_ia32_comfalsesd", IX86_BUILTIN_COMFALSESD, COM_FALSE_S, (int)MULTI_ARG_2_DF_TF },
21166 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_com_tfv2df3, "__builtin_ia32_comtruesd", IX86_BUILTIN_COMTRUESD, COM_TRUE_S, (int)MULTI_ARG_2_DF_TF },
21167 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_com_tfv2df3, "__builtin_ia32_comfalsepd", IX86_BUILTIN_COMFALSEPD, COM_FALSE_P, (int)MULTI_ARG_2_DF_TF },
21168 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_com_tfv2df3, "__builtin_ia32_comtruepd", IX86_BUILTIN_COMTRUEPD, COM_TRUE_P, (int)MULTI_ARG_2_DF_TF },
21170 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcom_tfv16qi3, "__builtin_ia32_pcomfalseb", IX86_BUILTIN_PCOMFALSEB, PCOM_FALSE, (int)MULTI_ARG_2_QI_TF },
21171 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcom_tfv8hi3, "__builtin_ia32_pcomfalsew", IX86_BUILTIN_PCOMFALSEW, PCOM_FALSE, (int)MULTI_ARG_2_HI_TF },
21172 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcom_tfv4si3, "__builtin_ia32_pcomfalsed", IX86_BUILTIN_PCOMFALSED, PCOM_FALSE, (int)MULTI_ARG_2_SI_TF },
21173 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcom_tfv2di3, "__builtin_ia32_pcomfalseq", IX86_BUILTIN_PCOMFALSEQ, PCOM_FALSE, (int)MULTI_ARG_2_DI_TF },
21174 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcom_tfv16qi3, "__builtin_ia32_pcomfalseub",IX86_BUILTIN_PCOMFALSEUB,PCOM_FALSE, (int)MULTI_ARG_2_QI_TF },
21175 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcom_tfv8hi3, "__builtin_ia32_pcomfalseuw",IX86_BUILTIN_PCOMFALSEUW,PCOM_FALSE, (int)MULTI_ARG_2_HI_TF },
21176 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcom_tfv4si3, "__builtin_ia32_pcomfalseud",IX86_BUILTIN_PCOMFALSEUD,PCOM_FALSE, (int)MULTI_ARG_2_SI_TF },
21177 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcom_tfv2di3, "__builtin_ia32_pcomfalseuq",IX86_BUILTIN_PCOMFALSEUQ,PCOM_FALSE, (int)MULTI_ARG_2_DI_TF },
21179 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcom_tfv16qi3, "__builtin_ia32_pcomtrueb", IX86_BUILTIN_PCOMTRUEB, PCOM_TRUE, (int)MULTI_ARG_2_QI_TF },
21180 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcom_tfv8hi3, "__builtin_ia32_pcomtruew", IX86_BUILTIN_PCOMTRUEW, PCOM_TRUE, (int)MULTI_ARG_2_HI_TF },
21181 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcom_tfv4si3, "__builtin_ia32_pcomtrued", IX86_BUILTIN_PCOMTRUED, PCOM_TRUE, (int)MULTI_ARG_2_SI_TF },
21182 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcom_tfv2di3, "__builtin_ia32_pcomtrueq", IX86_BUILTIN_PCOMTRUEQ, PCOM_TRUE, (int)MULTI_ARG_2_DI_TF },
21183 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcom_tfv16qi3, "__builtin_ia32_pcomtrueub", IX86_BUILTIN_PCOMTRUEUB, PCOM_TRUE, (int)MULTI_ARG_2_QI_TF },
21184 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcom_tfv8hi3, "__builtin_ia32_pcomtrueuw", IX86_BUILTIN_PCOMTRUEUW, PCOM_TRUE, (int)MULTI_ARG_2_HI_TF },
21185 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcom_tfv4si3, "__builtin_ia32_pcomtrueud", IX86_BUILTIN_PCOMTRUEUD, PCOM_TRUE, (int)MULTI_ARG_2_SI_TF },
21186 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcom_tfv2di3, "__builtin_ia32_pcomtrueuq", IX86_BUILTIN_PCOMTRUEUQ, PCOM_TRUE, (int)MULTI_ARG_2_DI_TF },
21189 /* Set up all the MMX/SSE builtins, even builtins for instructions that are not
21190 in the current target ISA to allow the user to compile particular modules
21191 with different target specific options that differ from the command line
21194 ix86_init_mmx_sse_builtins (void)
21196 const struct builtin_description * d;
21199 tree V16QI_type_node = build_vector_type_for_mode (char_type_node, V16QImode);
21200 tree V2SI_type_node = build_vector_type_for_mode (intSI_type_node, V2SImode);
21201 tree V1DI_type_node
21202 = build_vector_type_for_mode (long_long_integer_type_node, V1DImode);
21203 tree V2SF_type_node = build_vector_type_for_mode (float_type_node, V2SFmode);
21204 tree V2DI_type_node
21205 = build_vector_type_for_mode (long_long_integer_type_node, V2DImode);
21206 tree V2DF_type_node = build_vector_type_for_mode (double_type_node, V2DFmode);
21207 tree V4SF_type_node = build_vector_type_for_mode (float_type_node, V4SFmode);
21208 tree V4SI_type_node = build_vector_type_for_mode (intSI_type_node, V4SImode);
21209 tree V4HI_type_node = build_vector_type_for_mode (intHI_type_node, V4HImode);
21210 tree V8QI_type_node = build_vector_type_for_mode (char_type_node, V8QImode);
21211 tree V8HI_type_node = build_vector_type_for_mode (intHI_type_node, V8HImode);
21213 tree pchar_type_node = build_pointer_type (char_type_node);
21214 tree pcchar_type_node
21215 = build_pointer_type (build_type_variant (char_type_node, 1, 0));
21216 tree pfloat_type_node = build_pointer_type (float_type_node);
21217 tree pcfloat_type_node
21218 = build_pointer_type (build_type_variant (float_type_node, 1, 0));
21219 tree pv2sf_type_node = build_pointer_type (V2SF_type_node);
21220 tree pcv2sf_type_node
21221 = build_pointer_type (build_type_variant (V2SF_type_node, 1, 0));
21222 tree pv2di_type_node = build_pointer_type (V2DI_type_node);
21223 tree pdi_type_node = build_pointer_type (long_long_unsigned_type_node);
21226 tree int_ftype_v4sf_v4sf
21227 = build_function_type_list (integer_type_node,
21228 V4SF_type_node, V4SF_type_node, NULL_TREE);
21229 tree v4si_ftype_v4sf_v4sf
21230 = build_function_type_list (V4SI_type_node,
21231 V4SF_type_node, V4SF_type_node, NULL_TREE);
21232 /* MMX/SSE/integer conversions. */
21233 tree int_ftype_v4sf
21234 = build_function_type_list (integer_type_node,
21235 V4SF_type_node, NULL_TREE);
21236 tree int64_ftype_v4sf
21237 = build_function_type_list (long_long_integer_type_node,
21238 V4SF_type_node, NULL_TREE);
21239 tree int_ftype_v8qi
21240 = build_function_type_list (integer_type_node, V8QI_type_node, NULL_TREE);
21241 tree v4sf_ftype_v4sf_int
21242 = build_function_type_list (V4SF_type_node,
21243 V4SF_type_node, integer_type_node, NULL_TREE);
21244 tree v4sf_ftype_v4sf_int64
21245 = build_function_type_list (V4SF_type_node,
21246 V4SF_type_node, long_long_integer_type_node,
21248 tree v4sf_ftype_v4sf_v2si
21249 = build_function_type_list (V4SF_type_node,
21250 V4SF_type_node, V2SI_type_node, NULL_TREE);
21252 /* Miscellaneous. */
21253 tree v8qi_ftype_v4hi_v4hi
21254 = build_function_type_list (V8QI_type_node,
21255 V4HI_type_node, V4HI_type_node, NULL_TREE);
21256 tree v4hi_ftype_v2si_v2si
21257 = build_function_type_list (V4HI_type_node,
21258 V2SI_type_node, V2SI_type_node, NULL_TREE);
21259 tree v4sf_ftype_v4sf_v4sf_int
21260 = build_function_type_list (V4SF_type_node,
21261 V4SF_type_node, V4SF_type_node,
21262 integer_type_node, NULL_TREE);
21263 tree v2si_ftype_v4hi_v4hi
21264 = build_function_type_list (V2SI_type_node,
21265 V4HI_type_node, V4HI_type_node, NULL_TREE);
21266 tree v4hi_ftype_v4hi_int
21267 = build_function_type_list (V4HI_type_node,
21268 V4HI_type_node, integer_type_node, NULL_TREE);
21269 tree v2si_ftype_v2si_int
21270 = build_function_type_list (V2SI_type_node,
21271 V2SI_type_node, integer_type_node, NULL_TREE);
21272 tree v1di_ftype_v1di_int
21273 = build_function_type_list (V1DI_type_node,
21274 V1DI_type_node, integer_type_node, NULL_TREE);
21276 tree void_ftype_void
21277 = build_function_type (void_type_node, void_list_node);
21278 tree void_ftype_unsigned
21279 = build_function_type_list (void_type_node, unsigned_type_node, NULL_TREE);
21280 tree void_ftype_unsigned_unsigned
21281 = build_function_type_list (void_type_node, unsigned_type_node,
21282 unsigned_type_node, NULL_TREE);
21283 tree void_ftype_pcvoid_unsigned_unsigned
21284 = build_function_type_list (void_type_node, const_ptr_type_node,
21285 unsigned_type_node, unsigned_type_node,
21287 tree unsigned_ftype_void
21288 = build_function_type (unsigned_type_node, void_list_node);
21289 tree v2si_ftype_v4sf
21290 = build_function_type_list (V2SI_type_node, V4SF_type_node, NULL_TREE);
21291 /* Loads/stores. */
21292 tree void_ftype_v8qi_v8qi_pchar
21293 = build_function_type_list (void_type_node,
21294 V8QI_type_node, V8QI_type_node,
21295 pchar_type_node, NULL_TREE);
21296 tree v4sf_ftype_pcfloat
21297 = build_function_type_list (V4SF_type_node, pcfloat_type_node, NULL_TREE);
21298 tree v4sf_ftype_v4sf_pcv2sf
21299 = build_function_type_list (V4SF_type_node,
21300 V4SF_type_node, pcv2sf_type_node, NULL_TREE);
21301 tree void_ftype_pv2sf_v4sf
21302 = build_function_type_list (void_type_node,
21303 pv2sf_type_node, V4SF_type_node, NULL_TREE);
21304 tree void_ftype_pfloat_v4sf
21305 = build_function_type_list (void_type_node,
21306 pfloat_type_node, V4SF_type_node, NULL_TREE);
21307 tree void_ftype_pdi_di
21308 = build_function_type_list (void_type_node,
21309 pdi_type_node, long_long_unsigned_type_node,
21311 tree void_ftype_pv2di_v2di
21312 = build_function_type_list (void_type_node,
21313 pv2di_type_node, V2DI_type_node, NULL_TREE);
21314 /* Normal vector unops. */
21315 tree v4sf_ftype_v4sf
21316 = build_function_type_list (V4SF_type_node, V4SF_type_node, NULL_TREE);
21317 tree v16qi_ftype_v16qi
21318 = build_function_type_list (V16QI_type_node, V16QI_type_node, NULL_TREE);
21319 tree v8hi_ftype_v8hi
21320 = build_function_type_list (V8HI_type_node, V8HI_type_node, NULL_TREE);
21321 tree v4si_ftype_v4si
21322 = build_function_type_list (V4SI_type_node, V4SI_type_node, NULL_TREE);
21323 tree v8qi_ftype_v8qi
21324 = build_function_type_list (V8QI_type_node, V8QI_type_node, NULL_TREE);
21325 tree v4hi_ftype_v4hi
21326 = build_function_type_list (V4HI_type_node, V4HI_type_node, NULL_TREE);
21328 /* Normal vector binops. */
21329 tree v4sf_ftype_v4sf_v4sf
21330 = build_function_type_list (V4SF_type_node,
21331 V4SF_type_node, V4SF_type_node, NULL_TREE);
21332 tree v8qi_ftype_v8qi_v8qi
21333 = build_function_type_list (V8QI_type_node,
21334 V8QI_type_node, V8QI_type_node, NULL_TREE);
21335 tree v4hi_ftype_v4hi_v4hi
21336 = build_function_type_list (V4HI_type_node,
21337 V4HI_type_node, V4HI_type_node, NULL_TREE);
21338 tree v2si_ftype_v2si_v2si
21339 = build_function_type_list (V2SI_type_node,
21340 V2SI_type_node, V2SI_type_node, NULL_TREE);
21341 tree v1di_ftype_v1di_v1di
21342 = build_function_type_list (V1DI_type_node,
21343 V1DI_type_node, V1DI_type_node, NULL_TREE);
21344 tree v1di_ftype_v1di_v1di_int
21345 = build_function_type_list (V1DI_type_node,
21346 V1DI_type_node, V1DI_type_node,
21347 integer_type_node, NULL_TREE);
21348 tree v2si_ftype_v2sf
21349 = build_function_type_list (V2SI_type_node, V2SF_type_node, NULL_TREE);
21350 tree v2sf_ftype_v2si
21351 = build_function_type_list (V2SF_type_node, V2SI_type_node, NULL_TREE);
21352 tree v2si_ftype_v2si
21353 = build_function_type_list (V2SI_type_node, V2SI_type_node, NULL_TREE);
21354 tree v2sf_ftype_v2sf
21355 = build_function_type_list (V2SF_type_node, V2SF_type_node, NULL_TREE);
21356 tree v2sf_ftype_v2sf_v2sf
21357 = build_function_type_list (V2SF_type_node,
21358 V2SF_type_node, V2SF_type_node, NULL_TREE);
21359 tree v2si_ftype_v2sf_v2sf
21360 = build_function_type_list (V2SI_type_node,
21361 V2SF_type_node, V2SF_type_node, NULL_TREE);
21362 tree pint_type_node = build_pointer_type (integer_type_node);
21363 tree pdouble_type_node = build_pointer_type (double_type_node);
21364 tree pcdouble_type_node = build_pointer_type (
21365 build_type_variant (double_type_node, 1, 0));
21366 tree int_ftype_v2df_v2df
21367 = build_function_type_list (integer_type_node,
21368 V2DF_type_node, V2DF_type_node, NULL_TREE);
21370 tree void_ftype_pcvoid
21371 = build_function_type_list (void_type_node, const_ptr_type_node, NULL_TREE);
21372 tree v4sf_ftype_v4si
21373 = build_function_type_list (V4SF_type_node, V4SI_type_node, NULL_TREE);
21374 tree v4si_ftype_v4sf
21375 = build_function_type_list (V4SI_type_node, V4SF_type_node, NULL_TREE);
21376 tree v2df_ftype_v4si
21377 = build_function_type_list (V2DF_type_node, V4SI_type_node, NULL_TREE);
21378 tree v4si_ftype_v2df
21379 = build_function_type_list (V4SI_type_node, V2DF_type_node, NULL_TREE);
21380 tree v4si_ftype_v2df_v2df
21381 = build_function_type_list (V4SI_type_node,
21382 V2DF_type_node, V2DF_type_node, NULL_TREE);
21383 tree v2si_ftype_v2df
21384 = build_function_type_list (V2SI_type_node, V2DF_type_node, NULL_TREE);
21385 tree v4sf_ftype_v2df
21386 = build_function_type_list (V4SF_type_node, V2DF_type_node, NULL_TREE);
21387 tree v2df_ftype_v2si
21388 = build_function_type_list (V2DF_type_node, V2SI_type_node, NULL_TREE);
21389 tree v2df_ftype_v4sf
21390 = build_function_type_list (V2DF_type_node, V4SF_type_node, NULL_TREE);
21391 tree int_ftype_v2df
21392 = build_function_type_list (integer_type_node, V2DF_type_node, NULL_TREE);
21393 tree int64_ftype_v2df
21394 = build_function_type_list (long_long_integer_type_node,
21395 V2DF_type_node, NULL_TREE);
21396 tree v2df_ftype_v2df_int
21397 = build_function_type_list (V2DF_type_node,
21398 V2DF_type_node, integer_type_node, NULL_TREE);
21399 tree v2df_ftype_v2df_int64
21400 = build_function_type_list (V2DF_type_node,
21401 V2DF_type_node, long_long_integer_type_node,
21403 tree v4sf_ftype_v4sf_v2df
21404 = build_function_type_list (V4SF_type_node,
21405 V4SF_type_node, V2DF_type_node, NULL_TREE);
21406 tree v2df_ftype_v2df_v4sf
21407 = build_function_type_list (V2DF_type_node,
21408 V2DF_type_node, V4SF_type_node, NULL_TREE);
21409 tree v2df_ftype_v2df_v2df_int
21410 = build_function_type_list (V2DF_type_node,
21411 V2DF_type_node, V2DF_type_node,
21414 tree v2df_ftype_v2df_pcdouble
21415 = build_function_type_list (V2DF_type_node,
21416 V2DF_type_node, pcdouble_type_node, NULL_TREE);
21417 tree void_ftype_pdouble_v2df
21418 = build_function_type_list (void_type_node,
21419 pdouble_type_node, V2DF_type_node, NULL_TREE);
21420 tree void_ftype_pint_int
21421 = build_function_type_list (void_type_node,
21422 pint_type_node, integer_type_node, NULL_TREE);
21423 tree void_ftype_v16qi_v16qi_pchar
21424 = build_function_type_list (void_type_node,
21425 V16QI_type_node, V16QI_type_node,
21426 pchar_type_node, NULL_TREE);
21427 tree v2df_ftype_pcdouble
21428 = build_function_type_list (V2DF_type_node, pcdouble_type_node, NULL_TREE);
21429 tree v2df_ftype_v2df_v2df
21430 = build_function_type_list (V2DF_type_node,
21431 V2DF_type_node, V2DF_type_node, NULL_TREE);
21432 tree v16qi_ftype_v16qi_v16qi
21433 = build_function_type_list (V16QI_type_node,
21434 V16QI_type_node, V16QI_type_node, NULL_TREE);
21435 tree v8hi_ftype_v8hi_v8hi
21436 = build_function_type_list (V8HI_type_node,
21437 V8HI_type_node, V8HI_type_node, NULL_TREE);
21438 tree v4si_ftype_v4si_v4si
21439 = build_function_type_list (V4SI_type_node,
21440 V4SI_type_node, V4SI_type_node, NULL_TREE);
21441 tree v2di_ftype_v2di_v2di
21442 = build_function_type_list (V2DI_type_node,
21443 V2DI_type_node, V2DI_type_node, NULL_TREE);
21444 tree v2di_ftype_v2df_v2df
21445 = build_function_type_list (V2DI_type_node,
21446 V2DF_type_node, V2DF_type_node, NULL_TREE);
21447 tree v2df_ftype_v2df
21448 = build_function_type_list (V2DF_type_node, V2DF_type_node, NULL_TREE);
21449 tree v2di_ftype_v2di_int
21450 = build_function_type_list (V2DI_type_node,
21451 V2DI_type_node, integer_type_node, NULL_TREE);
21452 tree v2di_ftype_v2di_v2di_int
21453 = build_function_type_list (V2DI_type_node, V2DI_type_node,
21454 V2DI_type_node, integer_type_node, NULL_TREE);
21455 tree v4si_ftype_v4si_int
21456 = build_function_type_list (V4SI_type_node,
21457 V4SI_type_node, integer_type_node, NULL_TREE);
21458 tree v8hi_ftype_v8hi_int
21459 = build_function_type_list (V8HI_type_node,
21460 V8HI_type_node, integer_type_node, NULL_TREE);
21461 tree v4si_ftype_v8hi_v8hi
21462 = build_function_type_list (V4SI_type_node,
21463 V8HI_type_node, V8HI_type_node, NULL_TREE);
21464 tree v1di_ftype_v8qi_v8qi
21465 = build_function_type_list (V1DI_type_node,
21466 V8QI_type_node, V8QI_type_node, NULL_TREE);
21467 tree v1di_ftype_v2si_v2si
21468 = build_function_type_list (V1DI_type_node,
21469 V2SI_type_node, V2SI_type_node, NULL_TREE);
21470 tree v2di_ftype_v16qi_v16qi
21471 = build_function_type_list (V2DI_type_node,
21472 V16QI_type_node, V16QI_type_node, NULL_TREE);
21473 tree v2di_ftype_v4si_v4si
21474 = build_function_type_list (V2DI_type_node,
21475 V4SI_type_node, V4SI_type_node, NULL_TREE);
21476 tree int_ftype_v16qi
21477 = build_function_type_list (integer_type_node, V16QI_type_node, NULL_TREE);
21478 tree v16qi_ftype_pcchar
21479 = build_function_type_list (V16QI_type_node, pcchar_type_node, NULL_TREE);
21480 tree void_ftype_pchar_v16qi
21481 = build_function_type_list (void_type_node,
21482 pchar_type_node, V16QI_type_node, NULL_TREE);
21484 tree v2di_ftype_v2di_unsigned_unsigned
21485 = build_function_type_list (V2DI_type_node, V2DI_type_node,
21486 unsigned_type_node, unsigned_type_node,
21488 tree v2di_ftype_v2di_v2di_unsigned_unsigned
21489 = build_function_type_list (V2DI_type_node, V2DI_type_node, V2DI_type_node,
21490 unsigned_type_node, unsigned_type_node,
21492 tree v2di_ftype_v2di_v16qi
21493 = build_function_type_list (V2DI_type_node, V2DI_type_node, V16QI_type_node,
21495 tree v2df_ftype_v2df_v2df_v2df
21496 = build_function_type_list (V2DF_type_node,
21497 V2DF_type_node, V2DF_type_node,
21498 V2DF_type_node, NULL_TREE);
21499 tree v4sf_ftype_v4sf_v4sf_v4sf
21500 = build_function_type_list (V4SF_type_node,
21501 V4SF_type_node, V4SF_type_node,
21502 V4SF_type_node, NULL_TREE);
21503 tree v8hi_ftype_v16qi
21504 = build_function_type_list (V8HI_type_node, V16QI_type_node,
21506 tree v4si_ftype_v16qi
21507 = build_function_type_list (V4SI_type_node, V16QI_type_node,
21509 tree v2di_ftype_v16qi
21510 = build_function_type_list (V2DI_type_node, V16QI_type_node,
21512 tree v4si_ftype_v8hi
21513 = build_function_type_list (V4SI_type_node, V8HI_type_node,
21515 tree v2di_ftype_v8hi
21516 = build_function_type_list (V2DI_type_node, V8HI_type_node,
21518 tree v2di_ftype_v4si
21519 = build_function_type_list (V2DI_type_node, V4SI_type_node,
21521 tree v2di_ftype_pv2di
21522 = build_function_type_list (V2DI_type_node, pv2di_type_node,
21524 tree v16qi_ftype_v16qi_v16qi_int
21525 = build_function_type_list (V16QI_type_node, V16QI_type_node,
21526 V16QI_type_node, integer_type_node,
21528 tree v16qi_ftype_v16qi_v16qi_v16qi
21529 = build_function_type_list (V16QI_type_node, V16QI_type_node,
21530 V16QI_type_node, V16QI_type_node,
21532 tree v8hi_ftype_v8hi_v8hi_int
21533 = build_function_type_list (V8HI_type_node, V8HI_type_node,
21534 V8HI_type_node, integer_type_node,
21536 tree v4si_ftype_v4si_v4si_int
21537 = build_function_type_list (V4SI_type_node, V4SI_type_node,
21538 V4SI_type_node, integer_type_node,
21540 tree int_ftype_v2di_v2di
21541 = build_function_type_list (integer_type_node,
21542 V2DI_type_node, V2DI_type_node,
21544 tree int_ftype_v16qi_int_v16qi_int_int
21545 = build_function_type_list (integer_type_node,
21552 tree v16qi_ftype_v16qi_int_v16qi_int_int
21553 = build_function_type_list (V16QI_type_node,
21560 tree int_ftype_v16qi_v16qi_int
21561 = build_function_type_list (integer_type_node,
21567 /* SSE5 instructions */
21568 tree v2di_ftype_v2di_v2di_v2di
21569 = build_function_type_list (V2DI_type_node,
21575 tree v4si_ftype_v4si_v4si_v4si
21576 = build_function_type_list (V4SI_type_node,
21582 tree v4si_ftype_v4si_v4si_v2di
21583 = build_function_type_list (V4SI_type_node,
21589 tree v8hi_ftype_v8hi_v8hi_v8hi
21590 = build_function_type_list (V8HI_type_node,
21596 tree v8hi_ftype_v8hi_v8hi_v4si
21597 = build_function_type_list (V8HI_type_node,
21603 tree v2df_ftype_v2df_v2df_v16qi
21604 = build_function_type_list (V2DF_type_node,
21610 tree v4sf_ftype_v4sf_v4sf_v16qi
21611 = build_function_type_list (V4SF_type_node,
21617 tree v2di_ftype_v2di_si
21618 = build_function_type_list (V2DI_type_node,
21623 tree v4si_ftype_v4si_si
21624 = build_function_type_list (V4SI_type_node,
21629 tree v8hi_ftype_v8hi_si
21630 = build_function_type_list (V8HI_type_node,
21635 tree v16qi_ftype_v16qi_si
21636 = build_function_type_list (V16QI_type_node,
21640 tree v4sf_ftype_v4hi
21641 = build_function_type_list (V4SF_type_node,
21645 tree v4hi_ftype_v4sf
21646 = build_function_type_list (V4HI_type_node,
21650 tree v2di_ftype_v2di
21651 = build_function_type_list (V2DI_type_node, V2DI_type_node, NULL_TREE);
21653 tree v16qi_ftype_v8hi_v8hi
21654 = build_function_type_list (V16QI_type_node,
21655 V8HI_type_node, V8HI_type_node,
21657 tree v8hi_ftype_v4si_v4si
21658 = build_function_type_list (V8HI_type_node,
21659 V4SI_type_node, V4SI_type_node,
21661 tree v8hi_ftype_v16qi_v16qi
21662 = build_function_type_list (V8HI_type_node,
21663 V16QI_type_node, V16QI_type_node,
21665 tree v4hi_ftype_v8qi_v8qi
21666 = build_function_type_list (V4HI_type_node,
21667 V8QI_type_node, V8QI_type_node,
21669 tree unsigned_ftype_unsigned_uchar
21670 = build_function_type_list (unsigned_type_node,
21671 unsigned_type_node,
21672 unsigned_char_type_node,
21674 tree unsigned_ftype_unsigned_ushort
21675 = build_function_type_list (unsigned_type_node,
21676 unsigned_type_node,
21677 short_unsigned_type_node,
21679 tree unsigned_ftype_unsigned_unsigned
21680 = build_function_type_list (unsigned_type_node,
21681 unsigned_type_node,
21682 unsigned_type_node,
21684 tree uint64_ftype_uint64_uint64
21685 = build_function_type_list (long_long_unsigned_type_node,
21686 long_long_unsigned_type_node,
21687 long_long_unsigned_type_node,
21689 tree float_ftype_float
21690 = build_function_type_list (float_type_node,
21695 tree V32QI_type_node = build_vector_type_for_mode (char_type_node,
21697 tree V8SI_type_node = build_vector_type_for_mode (intSI_type_node,
21699 tree V8SF_type_node = build_vector_type_for_mode (float_type_node,
21701 tree V4DI_type_node = build_vector_type_for_mode (long_long_integer_type_node,
21703 tree V4DF_type_node = build_vector_type_for_mode (double_type_node,
21705 tree v8sf_ftype_v8sf
21706 = build_function_type_list (V8SF_type_node,
21709 tree v8si_ftype_v8sf
21710 = build_function_type_list (V8SI_type_node,
21713 tree v8sf_ftype_v8si
21714 = build_function_type_list (V8SF_type_node,
21717 tree v4si_ftype_v4df
21718 = build_function_type_list (V4SI_type_node,
21721 tree v4df_ftype_v4df
21722 = build_function_type_list (V4DF_type_node,
21725 tree v4df_ftype_v4si
21726 = build_function_type_list (V4DF_type_node,
21729 tree v4df_ftype_v4sf
21730 = build_function_type_list (V4DF_type_node,
21733 tree v4sf_ftype_v4df
21734 = build_function_type_list (V4SF_type_node,
21737 tree v8sf_ftype_v8sf_v8sf
21738 = build_function_type_list (V8SF_type_node,
21739 V8SF_type_node, V8SF_type_node,
21741 tree v4df_ftype_v4df_v4df
21742 = build_function_type_list (V4DF_type_node,
21743 V4DF_type_node, V4DF_type_node,
21745 tree v8sf_ftype_v8sf_int
21746 = build_function_type_list (V8SF_type_node,
21747 V8SF_type_node, integer_type_node,
21749 tree v4si_ftype_v8si_int
21750 = build_function_type_list (V4SI_type_node,
21751 V8SI_type_node, integer_type_node,
21753 tree v4df_ftype_v4df_int
21754 = build_function_type_list (V4DF_type_node,
21755 V4DF_type_node, integer_type_node,
21757 tree v4sf_ftype_v8sf_int
21758 = build_function_type_list (V4SF_type_node,
21759 V8SF_type_node, integer_type_node,
21761 tree v2df_ftype_v4df_int
21762 = build_function_type_list (V2DF_type_node,
21763 V4DF_type_node, integer_type_node,
21765 tree v8sf_ftype_v8sf_v8sf_int
21766 = build_function_type_list (V8SF_type_node,
21767 V8SF_type_node, V8SF_type_node,
21770 tree v8sf_ftype_v8sf_v8sf_v8sf
21771 = build_function_type_list (V8SF_type_node,
21772 V8SF_type_node, V8SF_type_node,
21775 tree v4df_ftype_v4df_v4df_v4df
21776 = build_function_type_list (V4DF_type_node,
21777 V4DF_type_node, V4DF_type_node,
21780 tree v8si_ftype_v8si_v8si_int
21781 = build_function_type_list (V8SI_type_node,
21782 V8SI_type_node, V8SI_type_node,
21785 tree v4df_ftype_v4df_v4df_int
21786 = build_function_type_list (V4DF_type_node,
21787 V4DF_type_node, V4DF_type_node,
21790 tree v8sf_ftype_v8sf_v8sf_v8si_int
21791 = build_function_type_list (V8SF_type_node,
21792 V8SF_type_node, V8SF_type_node,
21793 V8SI_type_node, integer_type_node,
21795 tree v4df_ftype_v4df_v4df_v4di_int
21796 = build_function_type_list (V4DF_type_node,
21797 V4DF_type_node, V4DF_type_node,
21798 V4DI_type_node, integer_type_node,
21800 tree v4sf_ftype_v4sf_v4sf_v4si_int
21801 = build_function_type_list (V4SF_type_node,
21802 V4SF_type_node, V4SF_type_node,
21803 V4SI_type_node, integer_type_node,
21805 tree v2df_ftype_v2df_v2df_v2di_int
21806 = build_function_type_list (V2DF_type_node,
21807 V2DF_type_node, V2DF_type_node,
21808 V2DI_type_node, integer_type_node,
21810 tree v8sf_ftype_pcfloat
21811 = build_function_type_list (V8SF_type_node,
21814 tree v4df_ftype_pcdouble
21815 = build_function_type_list (V4DF_type_node,
21816 pcdouble_type_node,
21818 tree pcv4sf_type_node
21819 = build_pointer_type (build_type_variant (V4SF_type_node, 1, 0));
21820 tree pcv2df_type_node
21821 = build_pointer_type (build_type_variant (V2DF_type_node, 1, 0));
21822 tree v8sf_ftype_pcv4sf
21823 = build_function_type_list (V8SF_type_node,
21826 tree v4df_ftype_pcv2df
21827 = build_function_type_list (V4DF_type_node,
21830 tree v32qi_ftype_pcchar
21831 = build_function_type_list (V32QI_type_node,
21834 tree void_ftype_pchar_v32qi
21835 = build_function_type_list (void_type_node,
21836 pchar_type_node, V32QI_type_node,
21838 tree v8si_ftype_v8si_v4si_int
21839 = build_function_type_list (V8SI_type_node,
21840 V8SI_type_node, V4SI_type_node,
21843 tree v8sf_ftype_v8sf_v4sf_int
21844 = build_function_type_list (V8SF_type_node,
21845 V8SF_type_node, V4SF_type_node,
21848 tree v4df_ftype_v4df_v2df_int
21849 = build_function_type_list (V4DF_type_node,
21850 V4DF_type_node, V2DF_type_node,
21853 tree void_ftype_pfloat_v8sf
21854 = build_function_type_list (void_type_node,
21855 pfloat_type_node, V8SF_type_node,
21857 tree void_ftype_pdouble_v4df
21858 = build_function_type_list (void_type_node,
21859 pdouble_type_node, V4DF_type_node,
21861 tree pv8sf_type_node = build_pointer_type (V8SF_type_node);
21862 tree pv4sf_type_node = build_pointer_type (V4SF_type_node);
21863 tree pv4df_type_node = build_pointer_type (V4DF_type_node);
21864 tree pv2df_type_node = build_pointer_type (V2DF_type_node);
21865 tree pcv8sf_type_node
21866 = build_pointer_type (build_type_variant (V8SF_type_node, 1, 0));
21867 tree pcv4df_type_node
21868 = build_pointer_type (build_type_variant (V4DF_type_node, 1, 0));
21869 tree v8sf_ftype_pcv8sf_v8sf
21870 = build_function_type_list (V8SF_type_node,
21871 pcv8sf_type_node, V8SF_type_node,
21873 tree v4df_ftype_pcv4df_v4df
21874 = build_function_type_list (V4DF_type_node,
21875 pcv4df_type_node, V4DF_type_node,
21877 tree v4sf_ftype_pcv4sf_v4sf
21878 = build_function_type_list (V4SF_type_node,
21879 pcv4sf_type_node, V4SF_type_node,
21881 tree v2df_ftype_pcv2df_v2df
21882 = build_function_type_list (V2DF_type_node,
21883 pcv2df_type_node, V2DF_type_node,
21885 tree void_ftype_pv8sf_v8sf_v8sf
21886 = build_function_type_list (void_type_node,
21887 pv8sf_type_node, V8SF_type_node,
21890 tree void_ftype_pv4df_v4df_v4df
21891 = build_function_type_list (void_type_node,
21892 pv4df_type_node, V4DF_type_node,
21895 tree void_ftype_pv4sf_v4sf_v4sf
21896 = build_function_type_list (void_type_node,
21897 pv4sf_type_node, V4SF_type_node,
21900 tree void_ftype_pv2df_v2df_v2df
21901 = build_function_type_list (void_type_node,
21902 pv2df_type_node, V2DF_type_node,
21905 tree v4df_ftype_v2df
21906 = build_function_type_list (V4DF_type_node,
21909 tree v8sf_ftype_v4sf
21910 = build_function_type_list (V8SF_type_node,
21913 tree v8si_ftype_v4si
21914 = build_function_type_list (V8SI_type_node,
21917 tree v2df_ftype_v4df
21918 = build_function_type_list (V2DF_type_node,
21921 tree v4sf_ftype_v8sf
21922 = build_function_type_list (V4SF_type_node,
21925 tree v4si_ftype_v8si
21926 = build_function_type_list (V4SI_type_node,
21929 tree int_ftype_v4df
21930 = build_function_type_list (integer_type_node,
21933 tree int_ftype_v8sf
21934 = build_function_type_list (integer_type_node,
21937 tree int_ftype_v8sf_v8sf
21938 = build_function_type_list (integer_type_node,
21939 V8SF_type_node, V8SF_type_node,
21941 tree int_ftype_v4di_v4di
21942 = build_function_type_list (integer_type_node,
21943 V4DI_type_node, V4DI_type_node,
21945 tree int_ftype_v4df_v4df
21946 = build_function_type_list (integer_type_node,
21947 V4DF_type_node, V4DF_type_node,
21949 tree v8sf_ftype_v8sf_v8si
21950 = build_function_type_list (V8SF_type_node,
21951 V8SF_type_node, V8SI_type_node,
21953 tree v4df_ftype_v4df_v4di
21954 = build_function_type_list (V4DF_type_node,
21955 V4DF_type_node, V4DI_type_node,
21957 tree v4sf_ftype_v4sf_v4si
21958 = build_function_type_list (V4SF_type_node,
21959 V4SF_type_node, V4SI_type_node, NULL_TREE);
21960 tree v2df_ftype_v2df_v2di
21961 = build_function_type_list (V2DF_type_node,
21962 V2DF_type_node, V2DI_type_node, NULL_TREE);
21966 /* Add all special builtins with variable number of operands. */
21967 for (i = 0, d = bdesc_special_args;
21968 i < ARRAY_SIZE (bdesc_special_args);
21976 switch ((enum ix86_special_builtin_type) d->flag)
21978 case VOID_FTYPE_VOID:
21979 type = void_ftype_void;
21981 case V32QI_FTYPE_PCCHAR:
21982 type = v32qi_ftype_pcchar;
21984 case V16QI_FTYPE_PCCHAR:
21985 type = v16qi_ftype_pcchar;
21987 case V8SF_FTYPE_PCV4SF:
21988 type = v8sf_ftype_pcv4sf;
21990 case V8SF_FTYPE_PCFLOAT:
21991 type = v8sf_ftype_pcfloat;
21993 case V4DF_FTYPE_PCV2DF:
21994 type = v4df_ftype_pcv2df;
21996 case V4DF_FTYPE_PCDOUBLE:
21997 type = v4df_ftype_pcdouble;
21999 case V4SF_FTYPE_PCFLOAT:
22000 type = v4sf_ftype_pcfloat;
22002 case V2DI_FTYPE_PV2DI:
22003 type = v2di_ftype_pv2di;
22005 case V2DF_FTYPE_PCDOUBLE:
22006 type = v2df_ftype_pcdouble;
22008 case V8SF_FTYPE_PCV8SF_V8SF:
22009 type = v8sf_ftype_pcv8sf_v8sf;
22011 case V4DF_FTYPE_PCV4DF_V4DF:
22012 type = v4df_ftype_pcv4df_v4df;
22014 case V4SF_FTYPE_V4SF_PCV2SF:
22015 type = v4sf_ftype_v4sf_pcv2sf;
22017 case V4SF_FTYPE_PCV4SF_V4SF:
22018 type = v4sf_ftype_pcv4sf_v4sf;
22020 case V2DF_FTYPE_V2DF_PCDOUBLE:
22021 type = v2df_ftype_v2df_pcdouble;
22023 case V2DF_FTYPE_PCV2DF_V2DF:
22024 type = v2df_ftype_pcv2df_v2df;
22026 case VOID_FTYPE_PV2SF_V4SF:
22027 type = void_ftype_pv2sf_v4sf;
22029 case VOID_FTYPE_PV2DI_V2DI:
22030 type = void_ftype_pv2di_v2di;
22032 case VOID_FTYPE_PCHAR_V32QI:
22033 type = void_ftype_pchar_v32qi;
22035 case VOID_FTYPE_PCHAR_V16QI:
22036 type = void_ftype_pchar_v16qi;
22038 case VOID_FTYPE_PFLOAT_V8SF:
22039 type = void_ftype_pfloat_v8sf;
22041 case VOID_FTYPE_PFLOAT_V4SF:
22042 type = void_ftype_pfloat_v4sf;
22044 case VOID_FTYPE_PDOUBLE_V4DF:
22045 type = void_ftype_pdouble_v4df;
22047 case VOID_FTYPE_PDOUBLE_V2DF:
22048 type = void_ftype_pdouble_v2df;
22050 case VOID_FTYPE_PDI_DI:
22051 type = void_ftype_pdi_di;
22053 case VOID_FTYPE_PINT_INT:
22054 type = void_ftype_pint_int;
22056 case VOID_FTYPE_PV8SF_V8SF_V8SF:
22057 type = void_ftype_pv8sf_v8sf_v8sf;
22059 case VOID_FTYPE_PV4DF_V4DF_V4DF:
22060 type = void_ftype_pv4df_v4df_v4df;
22062 case VOID_FTYPE_PV4SF_V4SF_V4SF:
22063 type = void_ftype_pv4sf_v4sf_v4sf;
22065 case VOID_FTYPE_PV2DF_V2DF_V2DF:
22066 type = void_ftype_pv2df_v2df_v2df;
22069 gcc_unreachable ();
22072 def_builtin (d->mask, d->name, type, d->code);
22075 /* Add all builtins with variable number of operands. */
22076 for (i = 0, d = bdesc_args;
22077 i < ARRAY_SIZE (bdesc_args);
22085 switch ((enum ix86_builtin_type) d->flag)
22087 case FLOAT_FTYPE_FLOAT:
22088 type = float_ftype_float;
22090 case INT_FTYPE_V8SF_V8SF_PTEST:
22091 type = int_ftype_v8sf_v8sf;
22093 case INT_FTYPE_V4DI_V4DI_PTEST:
22094 type = int_ftype_v4di_v4di;
22096 case INT_FTYPE_V4DF_V4DF_PTEST:
22097 type = int_ftype_v4df_v4df;
22099 case INT_FTYPE_V4SF_V4SF_PTEST:
22100 type = int_ftype_v4sf_v4sf;
22102 case INT_FTYPE_V2DI_V2DI_PTEST:
22103 type = int_ftype_v2di_v2di;
22105 case INT_FTYPE_V2DF_V2DF_PTEST:
22106 type = int_ftype_v2df_v2df;
22108 case INT64_FTYPE_V4SF:
22109 type = int64_ftype_v4sf;
22111 case INT64_FTYPE_V2DF:
22112 type = int64_ftype_v2df;
22114 case INT_FTYPE_V16QI:
22115 type = int_ftype_v16qi;
22117 case INT_FTYPE_V8QI:
22118 type = int_ftype_v8qi;
22120 case INT_FTYPE_V8SF:
22121 type = int_ftype_v8sf;
22123 case INT_FTYPE_V4DF:
22124 type = int_ftype_v4df;
22126 case INT_FTYPE_V4SF:
22127 type = int_ftype_v4sf;
22129 case INT_FTYPE_V2DF:
22130 type = int_ftype_v2df;
22132 case V16QI_FTYPE_V16QI:
22133 type = v16qi_ftype_v16qi;
22135 case V8SI_FTYPE_V8SF:
22136 type = v8si_ftype_v8sf;
22138 case V8SI_FTYPE_V4SI:
22139 type = v8si_ftype_v4si;
22141 case V8HI_FTYPE_V8HI:
22142 type = v8hi_ftype_v8hi;
22144 case V8HI_FTYPE_V16QI:
22145 type = v8hi_ftype_v16qi;
22147 case V8QI_FTYPE_V8QI:
22148 type = v8qi_ftype_v8qi;
22150 case V8SF_FTYPE_V8SF:
22151 type = v8sf_ftype_v8sf;
22153 case V8SF_FTYPE_V8SI:
22154 type = v8sf_ftype_v8si;
22156 case V8SF_FTYPE_V4SF:
22157 type = v8sf_ftype_v4sf;
22159 case V4SI_FTYPE_V4DF:
22160 type = v4si_ftype_v4df;
22162 case V4SI_FTYPE_V4SI:
22163 type = v4si_ftype_v4si;
22165 case V4SI_FTYPE_V16QI:
22166 type = v4si_ftype_v16qi;
22168 case V4SI_FTYPE_V8SI:
22169 type = v4si_ftype_v8si;
22171 case V4SI_FTYPE_V8HI:
22172 type = v4si_ftype_v8hi;
22174 case V4SI_FTYPE_V4SF:
22175 type = v4si_ftype_v4sf;
22177 case V4SI_FTYPE_V2DF:
22178 type = v4si_ftype_v2df;
22180 case V4HI_FTYPE_V4HI:
22181 type = v4hi_ftype_v4hi;
22183 case V4DF_FTYPE_V4DF:
22184 type = v4df_ftype_v4df;
22186 case V4DF_FTYPE_V4SI:
22187 type = v4df_ftype_v4si;
22189 case V4DF_FTYPE_V4SF:
22190 type = v4df_ftype_v4sf;
22192 case V4DF_FTYPE_V2DF:
22193 type = v4df_ftype_v2df;
22195 case V4SF_FTYPE_V4SF:
22196 case V4SF_FTYPE_V4SF_VEC_MERGE:
22197 type = v4sf_ftype_v4sf;
22199 case V4SF_FTYPE_V8SF:
22200 type = v4sf_ftype_v8sf;
22202 case V4SF_FTYPE_V4SI:
22203 type = v4sf_ftype_v4si;
22205 case V4SF_FTYPE_V4DF:
22206 type = v4sf_ftype_v4df;
22208 case V4SF_FTYPE_V2DF:
22209 type = v4sf_ftype_v2df;
22211 case V2DI_FTYPE_V2DI:
22212 type = v2di_ftype_v2di;
22214 case V2DI_FTYPE_V16QI:
22215 type = v2di_ftype_v16qi;
22217 case V2DI_FTYPE_V8HI:
22218 type = v2di_ftype_v8hi;
22220 case V2DI_FTYPE_V4SI:
22221 type = v2di_ftype_v4si;
22223 case V2SI_FTYPE_V2SI:
22224 type = v2si_ftype_v2si;
22226 case V2SI_FTYPE_V4SF:
22227 type = v2si_ftype_v4sf;
22229 case V2SI_FTYPE_V2DF:
22230 type = v2si_ftype_v2df;
22232 case V2SI_FTYPE_V2SF:
22233 type = v2si_ftype_v2sf;
22235 case V2DF_FTYPE_V4DF:
22236 type = v2df_ftype_v4df;
22238 case V2DF_FTYPE_V4SF:
22239 type = v2df_ftype_v4sf;
22241 case V2DF_FTYPE_V2DF:
22242 case V2DF_FTYPE_V2DF_VEC_MERGE:
22243 type = v2df_ftype_v2df;
22245 case V2DF_FTYPE_V2SI:
22246 type = v2df_ftype_v2si;
22248 case V2DF_FTYPE_V4SI:
22249 type = v2df_ftype_v4si;
22251 case V2SF_FTYPE_V2SF:
22252 type = v2sf_ftype_v2sf;
22254 case V2SF_FTYPE_V2SI:
22255 type = v2sf_ftype_v2si;
22257 case V16QI_FTYPE_V16QI_V16QI:
22258 type = v16qi_ftype_v16qi_v16qi;
22260 case V16QI_FTYPE_V8HI_V8HI:
22261 type = v16qi_ftype_v8hi_v8hi;
22263 case V8QI_FTYPE_V8QI_V8QI:
22264 type = v8qi_ftype_v8qi_v8qi;
22266 case V8QI_FTYPE_V4HI_V4HI:
22267 type = v8qi_ftype_v4hi_v4hi;
22269 case V8HI_FTYPE_V8HI_V8HI:
22270 case V8HI_FTYPE_V8HI_V8HI_COUNT:
22271 type = v8hi_ftype_v8hi_v8hi;
22273 case V8HI_FTYPE_V16QI_V16QI:
22274 type = v8hi_ftype_v16qi_v16qi;
22276 case V8HI_FTYPE_V4SI_V4SI:
22277 type = v8hi_ftype_v4si_v4si;
22279 case V8HI_FTYPE_V8HI_SI_COUNT:
22280 type = v8hi_ftype_v8hi_int;
22282 case V8SF_FTYPE_V8SF_V8SF:
22283 type = v8sf_ftype_v8sf_v8sf;
22285 case V8SF_FTYPE_V8SF_V8SI:
22286 type = v8sf_ftype_v8sf_v8si;
22288 case V4SI_FTYPE_V4SI_V4SI:
22289 case V4SI_FTYPE_V4SI_V4SI_COUNT:
22290 type = v4si_ftype_v4si_v4si;
22292 case V4SI_FTYPE_V8HI_V8HI:
22293 type = v4si_ftype_v8hi_v8hi;
22295 case V4SI_FTYPE_V4SF_V4SF:
22296 type = v4si_ftype_v4sf_v4sf;
22298 case V4SI_FTYPE_V2DF_V2DF:
22299 type = v4si_ftype_v2df_v2df;
22301 case V4SI_FTYPE_V4SI_SI_COUNT:
22302 type = v4si_ftype_v4si_int;
22304 case V4HI_FTYPE_V4HI_V4HI:
22305 case V4HI_FTYPE_V4HI_V4HI_COUNT:
22306 type = v4hi_ftype_v4hi_v4hi;
22308 case V4HI_FTYPE_V8QI_V8QI:
22309 type = v4hi_ftype_v8qi_v8qi;
22311 case V4HI_FTYPE_V2SI_V2SI:
22312 type = v4hi_ftype_v2si_v2si;
22314 case V4HI_FTYPE_V4HI_SI_COUNT:
22315 type = v4hi_ftype_v4hi_int;
22317 case V4DF_FTYPE_V4DF_V4DF:
22318 type = v4df_ftype_v4df_v4df;
22320 case V4DF_FTYPE_V4DF_V4DI:
22321 type = v4df_ftype_v4df_v4di;
22323 case V4SF_FTYPE_V4SF_V4SF:
22324 case V4SF_FTYPE_V4SF_V4SF_SWAP:
22325 type = v4sf_ftype_v4sf_v4sf;
22327 case V4SF_FTYPE_V4SF_V4SI:
22328 type = v4sf_ftype_v4sf_v4si;
22330 case V4SF_FTYPE_V4SF_V2SI:
22331 type = v4sf_ftype_v4sf_v2si;
22333 case V4SF_FTYPE_V4SF_V2DF:
22334 type = v4sf_ftype_v4sf_v2df;
22336 case V4SF_FTYPE_V4SF_DI:
22337 type = v4sf_ftype_v4sf_int64;
22339 case V4SF_FTYPE_V4SF_SI:
22340 type = v4sf_ftype_v4sf_int;
22342 case V2DI_FTYPE_V2DI_V2DI:
22343 case V2DI_FTYPE_V2DI_V2DI_COUNT:
22344 type = v2di_ftype_v2di_v2di;
22346 case V2DI_FTYPE_V16QI_V16QI:
22347 type = v2di_ftype_v16qi_v16qi;
22349 case V2DI_FTYPE_V4SI_V4SI:
22350 type = v2di_ftype_v4si_v4si;
22352 case V2DI_FTYPE_V2DI_V16QI:
22353 type = v2di_ftype_v2di_v16qi;
22355 case V2DI_FTYPE_V2DF_V2DF:
22356 type = v2di_ftype_v2df_v2df;
22358 case V2DI_FTYPE_V2DI_SI_COUNT:
22359 type = v2di_ftype_v2di_int;
22361 case V2SI_FTYPE_V2SI_V2SI:
22362 case V2SI_FTYPE_V2SI_V2SI_COUNT:
22363 type = v2si_ftype_v2si_v2si;
22365 case V2SI_FTYPE_V4HI_V4HI:
22366 type = v2si_ftype_v4hi_v4hi;
22368 case V2SI_FTYPE_V2SF_V2SF:
22369 type = v2si_ftype_v2sf_v2sf;
22371 case V2SI_FTYPE_V2SI_SI_COUNT:
22372 type = v2si_ftype_v2si_int;
22374 case V2DF_FTYPE_V2DF_V2DF:
22375 case V2DF_FTYPE_V2DF_V2DF_SWAP:
22376 type = v2df_ftype_v2df_v2df;
22378 case V2DF_FTYPE_V2DF_V4SF:
22379 type = v2df_ftype_v2df_v4sf;
22381 case V2DF_FTYPE_V2DF_V2DI:
22382 type = v2df_ftype_v2df_v2di;
22384 case V2DF_FTYPE_V2DF_DI:
22385 type = v2df_ftype_v2df_int64;
22387 case V2DF_FTYPE_V2DF_SI:
22388 type = v2df_ftype_v2df_int;
22390 case V2SF_FTYPE_V2SF_V2SF:
22391 type = v2sf_ftype_v2sf_v2sf;
22393 case V1DI_FTYPE_V1DI_V1DI:
22394 case V1DI_FTYPE_V1DI_V1DI_COUNT:
22395 type = v1di_ftype_v1di_v1di;
22397 case V1DI_FTYPE_V8QI_V8QI:
22398 type = v1di_ftype_v8qi_v8qi;
22400 case V1DI_FTYPE_V2SI_V2SI:
22401 type = v1di_ftype_v2si_v2si;
22403 case V1DI_FTYPE_V1DI_SI_COUNT:
22404 type = v1di_ftype_v1di_int;
22406 case UINT64_FTYPE_UINT64_UINT64:
22407 type = uint64_ftype_uint64_uint64;
22409 case UINT_FTYPE_UINT_UINT:
22410 type = unsigned_ftype_unsigned_unsigned;
22412 case UINT_FTYPE_UINT_USHORT:
22413 type = unsigned_ftype_unsigned_ushort;
22415 case UINT_FTYPE_UINT_UCHAR:
22416 type = unsigned_ftype_unsigned_uchar;
22418 case V8HI_FTYPE_V8HI_INT:
22419 type = v8hi_ftype_v8hi_int;
22421 case V8SF_FTYPE_V8SF_INT:
22422 type = v8sf_ftype_v8sf_int;
22424 case V4SI_FTYPE_V4SI_INT:
22425 type = v4si_ftype_v4si_int;
22427 case V4SI_FTYPE_V8SI_INT:
22428 type = v4si_ftype_v8si_int;
22430 case V4HI_FTYPE_V4HI_INT:
22431 type = v4hi_ftype_v4hi_int;
22433 case V4DF_FTYPE_V4DF_INT:
22434 type = v4df_ftype_v4df_int;
22436 case V4SF_FTYPE_V4SF_INT:
22437 type = v4sf_ftype_v4sf_int;
22439 case V4SF_FTYPE_V8SF_INT:
22440 type = v4sf_ftype_v8sf_int;
22442 case V2DI_FTYPE_V2DI_INT:
22443 case V2DI2TI_FTYPE_V2DI_INT:
22444 type = v2di_ftype_v2di_int;
22446 case V2DF_FTYPE_V2DF_INT:
22447 type = v2df_ftype_v2df_int;
22449 case V2DF_FTYPE_V4DF_INT:
22450 type = v2df_ftype_v4df_int;
22452 case V16QI_FTYPE_V16QI_V16QI_V16QI:
22453 type = v16qi_ftype_v16qi_v16qi_v16qi;
22455 case V8SF_FTYPE_V8SF_V8SF_V8SF:
22456 type = v8sf_ftype_v8sf_v8sf_v8sf;
22458 case V4DF_FTYPE_V4DF_V4DF_V4DF:
22459 type = v4df_ftype_v4df_v4df_v4df;
22461 case V4SF_FTYPE_V4SF_V4SF_V4SF:
22462 type = v4sf_ftype_v4sf_v4sf_v4sf;
22464 case V2DF_FTYPE_V2DF_V2DF_V2DF:
22465 type = v2df_ftype_v2df_v2df_v2df;
22467 case V16QI_FTYPE_V16QI_V16QI_INT:
22468 type = v16qi_ftype_v16qi_v16qi_int;
22470 case V8SI_FTYPE_V8SI_V8SI_INT:
22471 type = v8si_ftype_v8si_v8si_int;
22473 case V8SI_FTYPE_V8SI_V4SI_INT:
22474 type = v8si_ftype_v8si_v4si_int;
22476 case V8HI_FTYPE_V8HI_V8HI_INT:
22477 type = v8hi_ftype_v8hi_v8hi_int;
22479 case V8SF_FTYPE_V8SF_V8SF_INT:
22480 type = v8sf_ftype_v8sf_v8sf_int;
22482 case V8SF_FTYPE_V8SF_V4SF_INT:
22483 type = v8sf_ftype_v8sf_v4sf_int;
22485 case V4SI_FTYPE_V4SI_V4SI_INT:
22486 type = v4si_ftype_v4si_v4si_int;
22488 case V4DF_FTYPE_V4DF_V4DF_INT:
22489 type = v4df_ftype_v4df_v4df_int;
22491 case V4DF_FTYPE_V4DF_V2DF_INT:
22492 type = v4df_ftype_v4df_v2df_int;
22494 case V4SF_FTYPE_V4SF_V4SF_INT:
22495 type = v4sf_ftype_v4sf_v4sf_int;
22497 case V2DI_FTYPE_V2DI_V2DI_INT:
22498 case V2DI2TI_FTYPE_V2DI_V2DI_INT:
22499 type = v2di_ftype_v2di_v2di_int;
22501 case V2DF_FTYPE_V2DF_V2DF_INT:
22502 type = v2df_ftype_v2df_v2df_int;
22504 case V2DI_FTYPE_V2DI_UINT_UINT:
22505 type = v2di_ftype_v2di_unsigned_unsigned;
22507 case V2DI_FTYPE_V2DI_V2DI_UINT_UINT:
22508 type = v2di_ftype_v2di_v2di_unsigned_unsigned;
22510 case V1DI2DI_FTYPE_V1DI_V1DI_INT:
22511 type = v1di_ftype_v1di_v1di_int;
22513 case V8SF_FTYPE_V8SF_V8SF_V8SI_INT:
22514 type = v8sf_ftype_v8sf_v8sf_v8si_int;
22516 case V4DF_FTYPE_V4DF_V4DF_V4DI_INT:
22517 type = v4df_ftype_v4df_v4df_v4di_int;
22519 case V4SF_FTYPE_V4SF_V4SF_V4SI_INT:
22520 type = v4sf_ftype_v4sf_v4sf_v4si_int;
22522 case V2DF_FTYPE_V2DF_V2DF_V2DI_INT:
22523 type = v2df_ftype_v2df_v2df_v2di_int;
22526 gcc_unreachable ();
22529 def_builtin_const (d->mask, d->name, type, d->code);
22532 /* pcmpestr[im] insns. */
22533 for (i = 0, d = bdesc_pcmpestr;
22534 i < ARRAY_SIZE (bdesc_pcmpestr);
22537 if (d->code == IX86_BUILTIN_PCMPESTRM128)
22538 ftype = v16qi_ftype_v16qi_int_v16qi_int_int;
22540 ftype = int_ftype_v16qi_int_v16qi_int_int;
22541 def_builtin_const (d->mask, d->name, ftype, d->code);
22544 /* pcmpistr[im] insns. */
22545 for (i = 0, d = bdesc_pcmpistr;
22546 i < ARRAY_SIZE (bdesc_pcmpistr);
22549 if (d->code == IX86_BUILTIN_PCMPISTRM128)
22550 ftype = v16qi_ftype_v16qi_v16qi_int;
22552 ftype = int_ftype_v16qi_v16qi_int;
22553 def_builtin_const (d->mask, d->name, ftype, d->code);
22556 /* comi/ucomi insns. */
22557 for (i = 0, d = bdesc_comi; i < ARRAY_SIZE (bdesc_comi); i++, d++)
22558 if (d->mask == OPTION_MASK_ISA_SSE2)
22559 def_builtin_const (d->mask, d->name, int_ftype_v2df_v2df, d->code);
22561 def_builtin_const (d->mask, d->name, int_ftype_v4sf_v4sf, d->code);
22564 def_builtin (OPTION_MASK_ISA_SSE, "__builtin_ia32_ldmxcsr", void_ftype_unsigned, IX86_BUILTIN_LDMXCSR);
22565 def_builtin (OPTION_MASK_ISA_SSE, "__builtin_ia32_stmxcsr", unsigned_ftype_void, IX86_BUILTIN_STMXCSR);
22567 /* SSE or 3DNow!A */
22568 def_builtin (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, "__builtin_ia32_maskmovq", void_ftype_v8qi_v8qi_pchar, IX86_BUILTIN_MASKMOVQ);
22571 def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_maskmovdqu", void_ftype_v16qi_v16qi_pchar, IX86_BUILTIN_MASKMOVDQU);
22573 def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_clflush", void_ftype_pcvoid, IX86_BUILTIN_CLFLUSH);
22574 x86_mfence = def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_mfence", void_ftype_void, IX86_BUILTIN_MFENCE);
22577 def_builtin (OPTION_MASK_ISA_SSE3, "__builtin_ia32_monitor", void_ftype_pcvoid_unsigned_unsigned, IX86_BUILTIN_MONITOR);
22578 def_builtin (OPTION_MASK_ISA_SSE3, "__builtin_ia32_mwait", void_ftype_unsigned_unsigned, IX86_BUILTIN_MWAIT);
22581 def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aesenc128", v2di_ftype_v2di_v2di, IX86_BUILTIN_AESENC128);
22582 def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aesenclast128", v2di_ftype_v2di_v2di, IX86_BUILTIN_AESENCLAST128);
22583 def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aesdec128", v2di_ftype_v2di_v2di, IX86_BUILTIN_AESDEC128);
22584 def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aesdeclast128", v2di_ftype_v2di_v2di, IX86_BUILTIN_AESDECLAST128);
22585 def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aesimc128", v2di_ftype_v2di, IX86_BUILTIN_AESIMC128);
22586 def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aeskeygenassist128", v2di_ftype_v2di_int, IX86_BUILTIN_AESKEYGENASSIST128);
22589 def_builtin_const (OPTION_MASK_ISA_PCLMUL, "__builtin_ia32_pclmulqdq128", v2di_ftype_v2di_v2di_int, IX86_BUILTIN_PCLMULQDQ128);
22592 def_builtin (OPTION_MASK_ISA_AVX, "__builtin_ia32_vzeroupper", void_ftype_void,
22593 TARGET_64BIT ? IX86_BUILTIN_VZEROUPPER_REX64 : IX86_BUILTIN_VZEROUPPER);
22595 /* Access to the vec_init patterns. */
22596 ftype = build_function_type_list (V2SI_type_node, integer_type_node,
22597 integer_type_node, NULL_TREE);
22598 def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_vec_init_v2si", ftype, IX86_BUILTIN_VEC_INIT_V2SI);
22600 ftype = build_function_type_list (V4HI_type_node, short_integer_type_node,
22601 short_integer_type_node,
22602 short_integer_type_node,
22603 short_integer_type_node, NULL_TREE);
22604 def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_vec_init_v4hi", ftype, IX86_BUILTIN_VEC_INIT_V4HI);
22606 ftype = build_function_type_list (V8QI_type_node, char_type_node,
22607 char_type_node, char_type_node,
22608 char_type_node, char_type_node,
22609 char_type_node, char_type_node,
22610 char_type_node, NULL_TREE);
22611 def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_vec_init_v8qi", ftype, IX86_BUILTIN_VEC_INIT_V8QI);
22613 /* Access to the vec_extract patterns. */
22614 ftype = build_function_type_list (double_type_node, V2DF_type_node,
22615 integer_type_node, NULL_TREE);
22616 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v2df", ftype, IX86_BUILTIN_VEC_EXT_V2DF);
22618 ftype = build_function_type_list (long_long_integer_type_node,
22619 V2DI_type_node, integer_type_node,
22621 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v2di", ftype, IX86_BUILTIN_VEC_EXT_V2DI);
22623 ftype = build_function_type_list (float_type_node, V4SF_type_node,
22624 integer_type_node, NULL_TREE);
22625 def_builtin_const (OPTION_MASK_ISA_SSE, "__builtin_ia32_vec_ext_v4sf", ftype, IX86_BUILTIN_VEC_EXT_V4SF);
22627 ftype = build_function_type_list (intSI_type_node, V4SI_type_node,
22628 integer_type_node, NULL_TREE);
22629 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v4si", ftype, IX86_BUILTIN_VEC_EXT_V4SI);
22631 ftype = build_function_type_list (intHI_type_node, V8HI_type_node,
22632 integer_type_node, NULL_TREE);
22633 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v8hi", ftype, IX86_BUILTIN_VEC_EXT_V8HI);
22635 ftype = build_function_type_list (intHI_type_node, V4HI_type_node,
22636 integer_type_node, NULL_TREE);
22637 def_builtin_const (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, "__builtin_ia32_vec_ext_v4hi", ftype, IX86_BUILTIN_VEC_EXT_V4HI);
22639 ftype = build_function_type_list (intSI_type_node, V2SI_type_node,
22640 integer_type_node, NULL_TREE);
22641 def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_vec_ext_v2si", ftype, IX86_BUILTIN_VEC_EXT_V2SI);
22643 ftype = build_function_type_list (intQI_type_node, V16QI_type_node,
22644 integer_type_node, NULL_TREE);
22645 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v16qi", ftype, IX86_BUILTIN_VEC_EXT_V16QI);
22647 /* Access to the vec_set patterns. */
22648 ftype = build_function_type_list (V2DI_type_node, V2DI_type_node,
22650 integer_type_node, NULL_TREE);
22651 def_builtin_const (OPTION_MASK_ISA_SSE4_1 | OPTION_MASK_ISA_64BIT, "__builtin_ia32_vec_set_v2di", ftype, IX86_BUILTIN_VEC_SET_V2DI);
22653 ftype = build_function_type_list (V4SF_type_node, V4SF_type_node,
22655 integer_type_node, NULL_TREE);
22656 def_builtin_const (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_vec_set_v4sf", ftype, IX86_BUILTIN_VEC_SET_V4SF);
22658 ftype = build_function_type_list (V4SI_type_node, V4SI_type_node,
22660 integer_type_node, NULL_TREE);
22661 def_builtin_const (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_vec_set_v4si", ftype, IX86_BUILTIN_VEC_SET_V4SI);
22663 ftype = build_function_type_list (V8HI_type_node, V8HI_type_node,
22665 integer_type_node, NULL_TREE);
22666 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_set_v8hi", ftype, IX86_BUILTIN_VEC_SET_V8HI);
22668 ftype = build_function_type_list (V4HI_type_node, V4HI_type_node,
22670 integer_type_node, NULL_TREE);
22671 def_builtin_const (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, "__builtin_ia32_vec_set_v4hi", ftype, IX86_BUILTIN_VEC_SET_V4HI);
22673 ftype = build_function_type_list (V16QI_type_node, V16QI_type_node,
22675 integer_type_node, NULL_TREE);
22676 def_builtin_const (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_vec_set_v16qi", ftype, IX86_BUILTIN_VEC_SET_V16QI);
22678 /* Add SSE5 multi-arg argument instructions */
22679 for (i = 0, d = bdesc_multi_arg; i < ARRAY_SIZE (bdesc_multi_arg); i++, d++)
22681 tree mtype = NULL_TREE;
22686 switch ((enum multi_arg_type)d->flag)
22688 case MULTI_ARG_3_SF: mtype = v4sf_ftype_v4sf_v4sf_v4sf; break;
22689 case MULTI_ARG_3_DF: mtype = v2df_ftype_v2df_v2df_v2df; break;
22690 case MULTI_ARG_3_DI: mtype = v2di_ftype_v2di_v2di_v2di; break;
22691 case MULTI_ARG_3_SI: mtype = v4si_ftype_v4si_v4si_v4si; break;
22692 case MULTI_ARG_3_SI_DI: mtype = v4si_ftype_v4si_v4si_v2di; break;
22693 case MULTI_ARG_3_HI: mtype = v8hi_ftype_v8hi_v8hi_v8hi; break;
22694 case MULTI_ARG_3_HI_SI: mtype = v8hi_ftype_v8hi_v8hi_v4si; break;
22695 case MULTI_ARG_3_QI: mtype = v16qi_ftype_v16qi_v16qi_v16qi; break;
22696 case MULTI_ARG_3_PERMPS: mtype = v4sf_ftype_v4sf_v4sf_v16qi; break;
22697 case MULTI_ARG_3_PERMPD: mtype = v2df_ftype_v2df_v2df_v16qi; break;
22698 case MULTI_ARG_2_SF: mtype = v4sf_ftype_v4sf_v4sf; break;
22699 case MULTI_ARG_2_DF: mtype = v2df_ftype_v2df_v2df; break;
22700 case MULTI_ARG_2_DI: mtype = v2di_ftype_v2di_v2di; break;
22701 case MULTI_ARG_2_SI: mtype = v4si_ftype_v4si_v4si; break;
22702 case MULTI_ARG_2_HI: mtype = v8hi_ftype_v8hi_v8hi; break;
22703 case MULTI_ARG_2_QI: mtype = v16qi_ftype_v16qi_v16qi; break;
22704 case MULTI_ARG_2_DI_IMM: mtype = v2di_ftype_v2di_si; break;
22705 case MULTI_ARG_2_SI_IMM: mtype = v4si_ftype_v4si_si; break;
22706 case MULTI_ARG_2_HI_IMM: mtype = v8hi_ftype_v8hi_si; break;
22707 case MULTI_ARG_2_QI_IMM: mtype = v16qi_ftype_v16qi_si; break;
22708 case MULTI_ARG_2_SF_CMP: mtype = v4sf_ftype_v4sf_v4sf; break;
22709 case MULTI_ARG_2_DF_CMP: mtype = v2df_ftype_v2df_v2df; break;
22710 case MULTI_ARG_2_DI_CMP: mtype = v2di_ftype_v2di_v2di; break;
22711 case MULTI_ARG_2_SI_CMP: mtype = v4si_ftype_v4si_v4si; break;
22712 case MULTI_ARG_2_HI_CMP: mtype = v8hi_ftype_v8hi_v8hi; break;
22713 case MULTI_ARG_2_QI_CMP: mtype = v16qi_ftype_v16qi_v16qi; break;
22714 case MULTI_ARG_2_SF_TF: mtype = v4sf_ftype_v4sf_v4sf; break;
22715 case MULTI_ARG_2_DF_TF: mtype = v2df_ftype_v2df_v2df; break;
22716 case MULTI_ARG_2_DI_TF: mtype = v2di_ftype_v2di_v2di; break;
22717 case MULTI_ARG_2_SI_TF: mtype = v4si_ftype_v4si_v4si; break;
22718 case MULTI_ARG_2_HI_TF: mtype = v8hi_ftype_v8hi_v8hi; break;
22719 case MULTI_ARG_2_QI_TF: mtype = v16qi_ftype_v16qi_v16qi; break;
22720 case MULTI_ARG_1_SF: mtype = v4sf_ftype_v4sf; break;
22721 case MULTI_ARG_1_DF: mtype = v2df_ftype_v2df; break;
22722 case MULTI_ARG_1_DI: mtype = v2di_ftype_v2di; break;
22723 case MULTI_ARG_1_SI: mtype = v4si_ftype_v4si; break;
22724 case MULTI_ARG_1_HI: mtype = v8hi_ftype_v8hi; break;
22725 case MULTI_ARG_1_QI: mtype = v16qi_ftype_v16qi; break;
22726 case MULTI_ARG_1_SI_DI: mtype = v2di_ftype_v4si; break;
22727 case MULTI_ARG_1_HI_DI: mtype = v2di_ftype_v8hi; break;
22728 case MULTI_ARG_1_HI_SI: mtype = v4si_ftype_v8hi; break;
22729 case MULTI_ARG_1_QI_DI: mtype = v2di_ftype_v16qi; break;
22730 case MULTI_ARG_1_QI_SI: mtype = v4si_ftype_v16qi; break;
22731 case MULTI_ARG_1_QI_HI: mtype = v8hi_ftype_v16qi; break;
22732 case MULTI_ARG_1_PH2PS: mtype = v4sf_ftype_v4hi; break;
22733 case MULTI_ARG_1_PS2PH: mtype = v4hi_ftype_v4sf; break;
22734 case MULTI_ARG_UNKNOWN:
22736 gcc_unreachable ();
22740 def_builtin_const (d->mask, d->name, mtype, d->code);
22744 /* Internal method for ix86_init_builtins. */
22747 ix86_init_builtins_va_builtins_abi (void)
22749 tree ms_va_ref, sysv_va_ref;
22750 tree fnvoid_va_end_ms, fnvoid_va_end_sysv;
22751 tree fnvoid_va_start_ms, fnvoid_va_start_sysv;
22752 tree fnvoid_va_copy_ms, fnvoid_va_copy_sysv;
22753 tree fnattr_ms = NULL_TREE, fnattr_sysv = NULL_TREE;
22757 fnattr_ms = build_tree_list (get_identifier ("ms_abi"), NULL_TREE);
22758 fnattr_sysv = build_tree_list (get_identifier ("sysv_abi"), NULL_TREE);
22759 ms_va_ref = build_reference_type (ms_va_list_type_node);
22761 build_pointer_type (TREE_TYPE (sysv_va_list_type_node));
22764 build_function_type_list (void_type_node, ms_va_ref, NULL_TREE);
22765 fnvoid_va_start_ms =
22766 build_varargs_function_type_list (void_type_node, ms_va_ref, NULL_TREE);
22767 fnvoid_va_end_sysv =
22768 build_function_type_list (void_type_node, sysv_va_ref, NULL_TREE);
22769 fnvoid_va_start_sysv =
22770 build_varargs_function_type_list (void_type_node, sysv_va_ref,
22772 fnvoid_va_copy_ms =
22773 build_function_type_list (void_type_node, ms_va_ref, ms_va_list_type_node,
22775 fnvoid_va_copy_sysv =
22776 build_function_type_list (void_type_node, sysv_va_ref,
22777 sysv_va_ref, NULL_TREE);
22779 add_builtin_function ("__builtin_ms_va_start", fnvoid_va_start_ms,
22780 BUILT_IN_VA_START, BUILT_IN_NORMAL, NULL, fnattr_ms);
22781 add_builtin_function ("__builtin_ms_va_end", fnvoid_va_end_ms,
22782 BUILT_IN_VA_END, BUILT_IN_NORMAL, NULL, fnattr_ms);
22783 add_builtin_function ("__builtin_ms_va_copy", fnvoid_va_copy_ms,
22784 BUILT_IN_VA_COPY, BUILT_IN_NORMAL, NULL, fnattr_ms);
22785 add_builtin_function ("__builtin_sysv_va_start", fnvoid_va_start_sysv,
22786 BUILT_IN_VA_START, BUILT_IN_NORMAL, NULL, fnattr_sysv);
22787 add_builtin_function ("__builtin_sysv_va_end", fnvoid_va_end_sysv,
22788 BUILT_IN_VA_END, BUILT_IN_NORMAL, NULL, fnattr_sysv);
22789 add_builtin_function ("__builtin_sysv_va_copy", fnvoid_va_copy_sysv,
22790 BUILT_IN_VA_COPY, BUILT_IN_NORMAL, NULL, fnattr_sysv);
22794 ix86_init_builtins (void)
22796 tree float128_type_node = make_node (REAL_TYPE);
22799 /* The __float80 type. */
22800 if (TYPE_MODE (long_double_type_node) == XFmode)
22801 (*lang_hooks.types.register_builtin_type) (long_double_type_node,
22805 /* The __float80 type. */
22806 tree float80_type_node = make_node (REAL_TYPE);
22808 TYPE_PRECISION (float80_type_node) = 80;
22809 layout_type (float80_type_node);
22810 (*lang_hooks.types.register_builtin_type) (float80_type_node,
22814 /* The __float128 type. */
22815 TYPE_PRECISION (float128_type_node) = 128;
22816 layout_type (float128_type_node);
22817 (*lang_hooks.types.register_builtin_type) (float128_type_node,
22820 /* TFmode support builtins. */
22821 ftype = build_function_type (float128_type_node, void_list_node);
22822 decl = add_builtin_function ("__builtin_infq", ftype,
22823 IX86_BUILTIN_INFQ, BUILT_IN_MD,
22825 ix86_builtins[(int) IX86_BUILTIN_INFQ] = decl;
22827 /* We will expand them to normal call if SSE2 isn't available since
22828 they are used by libgcc. */
22829 ftype = build_function_type_list (float128_type_node,
22830 float128_type_node,
22832 decl = add_builtin_function ("__builtin_fabsq", ftype,
22833 IX86_BUILTIN_FABSQ, BUILT_IN_MD,
22834 "__fabstf2", NULL_TREE);
22835 ix86_builtins[(int) IX86_BUILTIN_FABSQ] = decl;
22836 TREE_READONLY (decl) = 1;
22838 ftype = build_function_type_list (float128_type_node,
22839 float128_type_node,
22840 float128_type_node,
22842 decl = add_builtin_function ("__builtin_copysignq", ftype,
22843 IX86_BUILTIN_COPYSIGNQ, BUILT_IN_MD,
22844 "__copysigntf3", NULL_TREE);
22845 ix86_builtins[(int) IX86_BUILTIN_COPYSIGNQ] = decl;
22846 TREE_READONLY (decl) = 1;
22848 ix86_init_mmx_sse_builtins ();
22850 ix86_init_builtins_va_builtins_abi ();
22853 /* Errors in the source file can cause expand_expr to return const0_rtx
22854 where we expect a vector. To avoid crashing, use one of the vector
22855 clear instructions. */
22857 safe_vector_operand (rtx x, enum machine_mode mode)
22859 if (x == const0_rtx)
22860 x = CONST0_RTX (mode);
22864 /* Subroutine of ix86_expand_builtin to take care of binop insns. */
22867 ix86_expand_binop_builtin (enum insn_code icode, tree exp, rtx target)
22870 tree arg0 = CALL_EXPR_ARG (exp, 0);
22871 tree arg1 = CALL_EXPR_ARG (exp, 1);
22872 rtx op0 = expand_normal (arg0);
22873 rtx op1 = expand_normal (arg1);
22874 enum machine_mode tmode = insn_data[icode].operand[0].mode;
22875 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
22876 enum machine_mode mode1 = insn_data[icode].operand[2].mode;
22878 if (VECTOR_MODE_P (mode0))
22879 op0 = safe_vector_operand (op0, mode0);
22880 if (VECTOR_MODE_P (mode1))
22881 op1 = safe_vector_operand (op1, mode1);
22883 if (optimize || !target
22884 || GET_MODE (target) != tmode
22885 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
22886 target = gen_reg_rtx (tmode);
22888 if (GET_MODE (op1) == SImode && mode1 == TImode)
22890 rtx x = gen_reg_rtx (V4SImode);
22891 emit_insn (gen_sse2_loadd (x, op1));
22892 op1 = gen_lowpart (TImode, x);
22895 if (!(*insn_data[icode].operand[1].predicate) (op0, mode0))
22896 op0 = copy_to_mode_reg (mode0, op0);
22897 if (!(*insn_data[icode].operand[2].predicate) (op1, mode1))
22898 op1 = copy_to_mode_reg (mode1, op1);
22900 pat = GEN_FCN (icode) (target, op0, op1);
22909 /* Subroutine of ix86_expand_builtin to take care of 2-4 argument insns. */
22912 ix86_expand_multi_arg_builtin (enum insn_code icode, tree exp, rtx target,
22913 enum multi_arg_type m_type,
22914 enum insn_code sub_code)
22919 bool comparison_p = false;
22921 bool last_arg_constant = false;
22922 int num_memory = 0;
22925 enum machine_mode mode;
22928 enum machine_mode tmode = insn_data[icode].operand[0].mode;
22932 case MULTI_ARG_3_SF:
22933 case MULTI_ARG_3_DF:
22934 case MULTI_ARG_3_DI:
22935 case MULTI_ARG_3_SI:
22936 case MULTI_ARG_3_SI_DI:
22937 case MULTI_ARG_3_HI:
22938 case MULTI_ARG_3_HI_SI:
22939 case MULTI_ARG_3_QI:
22940 case MULTI_ARG_3_PERMPS:
22941 case MULTI_ARG_3_PERMPD:
22945 case MULTI_ARG_2_SF:
22946 case MULTI_ARG_2_DF:
22947 case MULTI_ARG_2_DI:
22948 case MULTI_ARG_2_SI:
22949 case MULTI_ARG_2_HI:
22950 case MULTI_ARG_2_QI:
22954 case MULTI_ARG_2_DI_IMM:
22955 case MULTI_ARG_2_SI_IMM:
22956 case MULTI_ARG_2_HI_IMM:
22957 case MULTI_ARG_2_QI_IMM:
22959 last_arg_constant = true;
22962 case MULTI_ARG_1_SF:
22963 case MULTI_ARG_1_DF:
22964 case MULTI_ARG_1_DI:
22965 case MULTI_ARG_1_SI:
22966 case MULTI_ARG_1_HI:
22967 case MULTI_ARG_1_QI:
22968 case MULTI_ARG_1_SI_DI:
22969 case MULTI_ARG_1_HI_DI:
22970 case MULTI_ARG_1_HI_SI:
22971 case MULTI_ARG_1_QI_DI:
22972 case MULTI_ARG_1_QI_SI:
22973 case MULTI_ARG_1_QI_HI:
22974 case MULTI_ARG_1_PH2PS:
22975 case MULTI_ARG_1_PS2PH:
22979 case MULTI_ARG_2_SF_CMP:
22980 case MULTI_ARG_2_DF_CMP:
22981 case MULTI_ARG_2_DI_CMP:
22982 case MULTI_ARG_2_SI_CMP:
22983 case MULTI_ARG_2_HI_CMP:
22984 case MULTI_ARG_2_QI_CMP:
22986 comparison_p = true;
22989 case MULTI_ARG_2_SF_TF:
22990 case MULTI_ARG_2_DF_TF:
22991 case MULTI_ARG_2_DI_TF:
22992 case MULTI_ARG_2_SI_TF:
22993 case MULTI_ARG_2_HI_TF:
22994 case MULTI_ARG_2_QI_TF:
22999 case MULTI_ARG_UNKNOWN:
23001 gcc_unreachable ();
23004 if (optimize || !target
23005 || GET_MODE (target) != tmode
23006 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
23007 target = gen_reg_rtx (tmode);
23009 gcc_assert (nargs <= 4);
23011 for (i = 0; i < nargs; i++)
23013 tree arg = CALL_EXPR_ARG (exp, i);
23014 rtx op = expand_normal (arg);
23015 int adjust = (comparison_p) ? 1 : 0;
23016 enum machine_mode mode = insn_data[icode].operand[i+adjust+1].mode;
23018 if (last_arg_constant && i == nargs-1)
23020 if (GET_CODE (op) != CONST_INT)
23022 error ("last argument must be an immediate");
23023 return gen_reg_rtx (tmode);
23028 if (VECTOR_MODE_P (mode))
23029 op = safe_vector_operand (op, mode);
23031 /* If we aren't optimizing, only allow one memory operand to be
23033 if (memory_operand (op, mode))
23036 gcc_assert (GET_MODE (op) == mode || GET_MODE (op) == VOIDmode);
23039 || ! (*insn_data[icode].operand[i+adjust+1].predicate) (op, mode)
23041 op = force_reg (mode, op);
23045 args[i].mode = mode;
23051 pat = GEN_FCN (icode) (target, args[0].op);
23056 pat = GEN_FCN (icode) (target, args[0].op, args[1].op,
23057 GEN_INT ((int)sub_code));
23058 else if (! comparison_p)
23059 pat = GEN_FCN (icode) (target, args[0].op, args[1].op);
23062 rtx cmp_op = gen_rtx_fmt_ee (sub_code, GET_MODE (target),
23066 pat = GEN_FCN (icode) (target, cmp_op, args[0].op, args[1].op);
23071 pat = GEN_FCN (icode) (target, args[0].op, args[1].op, args[2].op);
23075 gcc_unreachable ();
23085 /* Subroutine of ix86_expand_args_builtin to take care of scalar unop
23086 insns with vec_merge. */
23089 ix86_expand_unop_vec_merge_builtin (enum insn_code icode, tree exp,
23093 tree arg0 = CALL_EXPR_ARG (exp, 0);
23094 rtx op1, op0 = expand_normal (arg0);
23095 enum machine_mode tmode = insn_data[icode].operand[0].mode;
23096 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
23098 if (optimize || !target
23099 || GET_MODE (target) != tmode
23100 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
23101 target = gen_reg_rtx (tmode);
23103 if (VECTOR_MODE_P (mode0))
23104 op0 = safe_vector_operand (op0, mode0);
23106 if ((optimize && !register_operand (op0, mode0))
23107 || ! (*insn_data[icode].operand[1].predicate) (op0, mode0))
23108 op0 = copy_to_mode_reg (mode0, op0);
23111 if (! (*insn_data[icode].operand[2].predicate) (op1, mode0))
23112 op1 = copy_to_mode_reg (mode0, op1);
23114 pat = GEN_FCN (icode) (target, op0, op1);
23121 /* Subroutine of ix86_expand_builtin to take care of comparison insns. */
23124 ix86_expand_sse_compare (const struct builtin_description *d,
23125 tree exp, rtx target, bool swap)
23128 tree arg0 = CALL_EXPR_ARG (exp, 0);
23129 tree arg1 = CALL_EXPR_ARG (exp, 1);
23130 rtx op0 = expand_normal (arg0);
23131 rtx op1 = expand_normal (arg1);
23133 enum machine_mode tmode = insn_data[d->icode].operand[0].mode;
23134 enum machine_mode mode0 = insn_data[d->icode].operand[1].mode;
23135 enum machine_mode mode1 = insn_data[d->icode].operand[2].mode;
23136 enum rtx_code comparison = d->comparison;
23138 if (VECTOR_MODE_P (mode0))
23139 op0 = safe_vector_operand (op0, mode0);
23140 if (VECTOR_MODE_P (mode1))
23141 op1 = safe_vector_operand (op1, mode1);
23143 /* Swap operands if we have a comparison that isn't available in
23147 rtx tmp = gen_reg_rtx (mode1);
23148 emit_move_insn (tmp, op1);
23153 if (optimize || !target
23154 || GET_MODE (target) != tmode
23155 || ! (*insn_data[d->icode].operand[0].predicate) (target, tmode))
23156 target = gen_reg_rtx (tmode);
23158 if ((optimize && !register_operand (op0, mode0))
23159 || ! (*insn_data[d->icode].operand[1].predicate) (op0, mode0))
23160 op0 = copy_to_mode_reg (mode0, op0);
23161 if ((optimize && !register_operand (op1, mode1))
23162 || ! (*insn_data[d->icode].operand[2].predicate) (op1, mode1))
23163 op1 = copy_to_mode_reg (mode1, op1);
23165 op2 = gen_rtx_fmt_ee (comparison, mode0, op0, op1);
23166 pat = GEN_FCN (d->icode) (target, op0, op1, op2);
23173 /* Subroutine of ix86_expand_builtin to take care of comi insns. */
23176 ix86_expand_sse_comi (const struct builtin_description *d, tree exp,
23180 tree arg0 = CALL_EXPR_ARG (exp, 0);
23181 tree arg1 = CALL_EXPR_ARG (exp, 1);
23182 rtx op0 = expand_normal (arg0);
23183 rtx op1 = expand_normal (arg1);
23184 enum machine_mode mode0 = insn_data[d->icode].operand[0].mode;
23185 enum machine_mode mode1 = insn_data[d->icode].operand[1].mode;
23186 enum rtx_code comparison = d->comparison;
23188 if (VECTOR_MODE_P (mode0))
23189 op0 = safe_vector_operand (op0, mode0);
23190 if (VECTOR_MODE_P (mode1))
23191 op1 = safe_vector_operand (op1, mode1);
23193 /* Swap operands if we have a comparison that isn't available in
23195 if (d->flag & BUILTIN_DESC_SWAP_OPERANDS)
23202 target = gen_reg_rtx (SImode);
23203 emit_move_insn (target, const0_rtx);
23204 target = gen_rtx_SUBREG (QImode, target, 0);
23206 if ((optimize && !register_operand (op0, mode0))
23207 || !(*insn_data[d->icode].operand[0].predicate) (op0, mode0))
23208 op0 = copy_to_mode_reg (mode0, op0);
23209 if ((optimize && !register_operand (op1, mode1))
23210 || !(*insn_data[d->icode].operand[1].predicate) (op1, mode1))
23211 op1 = copy_to_mode_reg (mode1, op1);
23213 pat = GEN_FCN (d->icode) (op0, op1);
23217 emit_insn (gen_rtx_SET (VOIDmode,
23218 gen_rtx_STRICT_LOW_PART (VOIDmode, target),
23219 gen_rtx_fmt_ee (comparison, QImode,
23223 return SUBREG_REG (target);
23226 /* Subroutine of ix86_expand_builtin to take care of ptest insns. */
23229 ix86_expand_sse_ptest (const struct builtin_description *d, tree exp,
23233 tree arg0 = CALL_EXPR_ARG (exp, 0);
23234 tree arg1 = CALL_EXPR_ARG (exp, 1);
23235 rtx op0 = expand_normal (arg0);
23236 rtx op1 = expand_normal (arg1);
23237 enum machine_mode mode0 = insn_data[d->icode].operand[0].mode;
23238 enum machine_mode mode1 = insn_data[d->icode].operand[1].mode;
23239 enum rtx_code comparison = d->comparison;
23241 if (VECTOR_MODE_P (mode0))
23242 op0 = safe_vector_operand (op0, mode0);
23243 if (VECTOR_MODE_P (mode1))
23244 op1 = safe_vector_operand (op1, mode1);
23246 target = gen_reg_rtx (SImode);
23247 emit_move_insn (target, const0_rtx);
23248 target = gen_rtx_SUBREG (QImode, target, 0);
23250 if ((optimize && !register_operand (op0, mode0))
23251 || !(*insn_data[d->icode].operand[0].predicate) (op0, mode0))
23252 op0 = copy_to_mode_reg (mode0, op0);
23253 if ((optimize && !register_operand (op1, mode1))
23254 || !(*insn_data[d->icode].operand[1].predicate) (op1, mode1))
23255 op1 = copy_to_mode_reg (mode1, op1);
23257 pat = GEN_FCN (d->icode) (op0, op1);
23261 emit_insn (gen_rtx_SET (VOIDmode,
23262 gen_rtx_STRICT_LOW_PART (VOIDmode, target),
23263 gen_rtx_fmt_ee (comparison, QImode,
23267 return SUBREG_REG (target);
23270 /* Subroutine of ix86_expand_builtin to take care of pcmpestr[im] insns. */
23273 ix86_expand_sse_pcmpestr (const struct builtin_description *d,
23274 tree exp, rtx target)
23277 tree arg0 = CALL_EXPR_ARG (exp, 0);
23278 tree arg1 = CALL_EXPR_ARG (exp, 1);
23279 tree arg2 = CALL_EXPR_ARG (exp, 2);
23280 tree arg3 = CALL_EXPR_ARG (exp, 3);
23281 tree arg4 = CALL_EXPR_ARG (exp, 4);
23282 rtx scratch0, scratch1;
23283 rtx op0 = expand_normal (arg0);
23284 rtx op1 = expand_normal (arg1);
23285 rtx op2 = expand_normal (arg2);
23286 rtx op3 = expand_normal (arg3);
23287 rtx op4 = expand_normal (arg4);
23288 enum machine_mode tmode0, tmode1, modev2, modei3, modev4, modei5, modeimm;
23290 tmode0 = insn_data[d->icode].operand[0].mode;
23291 tmode1 = insn_data[d->icode].operand[1].mode;
23292 modev2 = insn_data[d->icode].operand[2].mode;
23293 modei3 = insn_data[d->icode].operand[3].mode;
23294 modev4 = insn_data[d->icode].operand[4].mode;
23295 modei5 = insn_data[d->icode].operand[5].mode;
23296 modeimm = insn_data[d->icode].operand[6].mode;
23298 if (VECTOR_MODE_P (modev2))
23299 op0 = safe_vector_operand (op0, modev2);
23300 if (VECTOR_MODE_P (modev4))
23301 op2 = safe_vector_operand (op2, modev4);
23303 if (! (*insn_data[d->icode].operand[2].predicate) (op0, modev2))
23304 op0 = copy_to_mode_reg (modev2, op0);
23305 if (! (*insn_data[d->icode].operand[3].predicate) (op1, modei3))
23306 op1 = copy_to_mode_reg (modei3, op1);
23307 if ((optimize && !register_operand (op2, modev4))
23308 || !(*insn_data[d->icode].operand[4].predicate) (op2, modev4))
23309 op2 = copy_to_mode_reg (modev4, op2);
23310 if (! (*insn_data[d->icode].operand[5].predicate) (op3, modei5))
23311 op3 = copy_to_mode_reg (modei5, op3);
23313 if (! (*insn_data[d->icode].operand[6].predicate) (op4, modeimm))
23315 error ("the fifth argument must be a 8-bit immediate");
23319 if (d->code == IX86_BUILTIN_PCMPESTRI128)
23321 if (optimize || !target
23322 || GET_MODE (target) != tmode0
23323 || ! (*insn_data[d->icode].operand[0].predicate) (target, tmode0))
23324 target = gen_reg_rtx (tmode0);
23326 scratch1 = gen_reg_rtx (tmode1);
23328 pat = GEN_FCN (d->icode) (target, scratch1, op0, op1, op2, op3, op4);
23330 else if (d->code == IX86_BUILTIN_PCMPESTRM128)
23332 if (optimize || !target
23333 || GET_MODE (target) != tmode1
23334 || ! (*insn_data[d->icode].operand[1].predicate) (target, tmode1))
23335 target = gen_reg_rtx (tmode1);
23337 scratch0 = gen_reg_rtx (tmode0);
23339 pat = GEN_FCN (d->icode) (scratch0, target, op0, op1, op2, op3, op4);
23343 gcc_assert (d->flag);
23345 scratch0 = gen_reg_rtx (tmode0);
23346 scratch1 = gen_reg_rtx (tmode1);
23348 pat = GEN_FCN (d->icode) (scratch0, scratch1, op0, op1, op2, op3, op4);
23358 target = gen_reg_rtx (SImode);
23359 emit_move_insn (target, const0_rtx);
23360 target = gen_rtx_SUBREG (QImode, target, 0);
23363 (gen_rtx_SET (VOIDmode, gen_rtx_STRICT_LOW_PART (VOIDmode, target),
23364 gen_rtx_fmt_ee (EQ, QImode,
23365 gen_rtx_REG ((enum machine_mode) d->flag,
23368 return SUBREG_REG (target);
23375 /* Subroutine of ix86_expand_builtin to take care of pcmpistr[im] insns. */
23378 ix86_expand_sse_pcmpistr (const struct builtin_description *d,
23379 tree exp, rtx target)
23382 tree arg0 = CALL_EXPR_ARG (exp, 0);
23383 tree arg1 = CALL_EXPR_ARG (exp, 1);
23384 tree arg2 = CALL_EXPR_ARG (exp, 2);
23385 rtx scratch0, scratch1;
23386 rtx op0 = expand_normal (arg0);
23387 rtx op1 = expand_normal (arg1);
23388 rtx op2 = expand_normal (arg2);
23389 enum machine_mode tmode0, tmode1, modev2, modev3, modeimm;
23391 tmode0 = insn_data[d->icode].operand[0].mode;
23392 tmode1 = insn_data[d->icode].operand[1].mode;
23393 modev2 = insn_data[d->icode].operand[2].mode;
23394 modev3 = insn_data[d->icode].operand[3].mode;
23395 modeimm = insn_data[d->icode].operand[4].mode;
23397 if (VECTOR_MODE_P (modev2))
23398 op0 = safe_vector_operand (op0, modev2);
23399 if (VECTOR_MODE_P (modev3))
23400 op1 = safe_vector_operand (op1, modev3);
23402 if (! (*insn_data[d->icode].operand[2].predicate) (op0, modev2))
23403 op0 = copy_to_mode_reg (modev2, op0);
23404 if ((optimize && !register_operand (op1, modev3))
23405 || !(*insn_data[d->icode].operand[3].predicate) (op1, modev3))
23406 op1 = copy_to_mode_reg (modev3, op1);
23408 if (! (*insn_data[d->icode].operand[4].predicate) (op2, modeimm))
23410 error ("the third argument must be a 8-bit immediate");
23414 if (d->code == IX86_BUILTIN_PCMPISTRI128)
23416 if (optimize || !target
23417 || GET_MODE (target) != tmode0
23418 || ! (*insn_data[d->icode].operand[0].predicate) (target, tmode0))
23419 target = gen_reg_rtx (tmode0);
23421 scratch1 = gen_reg_rtx (tmode1);
23423 pat = GEN_FCN (d->icode) (target, scratch1, op0, op1, op2);
23425 else if (d->code == IX86_BUILTIN_PCMPISTRM128)
23427 if (optimize || !target
23428 || GET_MODE (target) != tmode1
23429 || ! (*insn_data[d->icode].operand[1].predicate) (target, tmode1))
23430 target = gen_reg_rtx (tmode1);
23432 scratch0 = gen_reg_rtx (tmode0);
23434 pat = GEN_FCN (d->icode) (scratch0, target, op0, op1, op2);
23438 gcc_assert (d->flag);
23440 scratch0 = gen_reg_rtx (tmode0);
23441 scratch1 = gen_reg_rtx (tmode1);
23443 pat = GEN_FCN (d->icode) (scratch0, scratch1, op0, op1, op2);
23453 target = gen_reg_rtx (SImode);
23454 emit_move_insn (target, const0_rtx);
23455 target = gen_rtx_SUBREG (QImode, target, 0);
23458 (gen_rtx_SET (VOIDmode, gen_rtx_STRICT_LOW_PART (VOIDmode, target),
23459 gen_rtx_fmt_ee (EQ, QImode,
23460 gen_rtx_REG ((enum machine_mode) d->flag,
23463 return SUBREG_REG (target);
23469 /* Subroutine of ix86_expand_builtin to take care of insns with
23470 variable number of operands. */
23473 ix86_expand_args_builtin (const struct builtin_description *d,
23474 tree exp, rtx target)
23476 rtx pat, real_target;
23477 unsigned int i, nargs;
23478 unsigned int nargs_constant = 0;
23479 int num_memory = 0;
23483 enum machine_mode mode;
23485 bool last_arg_count = false;
23486 enum insn_code icode = d->icode;
23487 const struct insn_data *insn_p = &insn_data[icode];
23488 enum machine_mode tmode = insn_p->operand[0].mode;
23489 enum machine_mode rmode = VOIDmode;
23491 enum rtx_code comparison = d->comparison;
23493 switch ((enum ix86_builtin_type) d->flag)
23495 case INT_FTYPE_V8SF_V8SF_PTEST:
23496 case INT_FTYPE_V4DI_V4DI_PTEST:
23497 case INT_FTYPE_V4DF_V4DF_PTEST:
23498 case INT_FTYPE_V4SF_V4SF_PTEST:
23499 case INT_FTYPE_V2DI_V2DI_PTEST:
23500 case INT_FTYPE_V2DF_V2DF_PTEST:
23501 return ix86_expand_sse_ptest (d, exp, target);
23502 case FLOAT128_FTYPE_FLOAT128:
23503 case FLOAT_FTYPE_FLOAT:
23504 case INT64_FTYPE_V4SF:
23505 case INT64_FTYPE_V2DF:
23506 case INT_FTYPE_V16QI:
23507 case INT_FTYPE_V8QI:
23508 case INT_FTYPE_V8SF:
23509 case INT_FTYPE_V4DF:
23510 case INT_FTYPE_V4SF:
23511 case INT_FTYPE_V2DF:
23512 case V16QI_FTYPE_V16QI:
23513 case V8SI_FTYPE_V8SF:
23514 case V8SI_FTYPE_V4SI:
23515 case V8HI_FTYPE_V8HI:
23516 case V8HI_FTYPE_V16QI:
23517 case V8QI_FTYPE_V8QI:
23518 case V8SF_FTYPE_V8SF:
23519 case V8SF_FTYPE_V8SI:
23520 case V8SF_FTYPE_V4SF:
23521 case V4SI_FTYPE_V4SI:
23522 case V4SI_FTYPE_V16QI:
23523 case V4SI_FTYPE_V4SF:
23524 case V4SI_FTYPE_V8SI:
23525 case V4SI_FTYPE_V8HI:
23526 case V4SI_FTYPE_V4DF:
23527 case V4SI_FTYPE_V2DF:
23528 case V4HI_FTYPE_V4HI:
23529 case V4DF_FTYPE_V4DF:
23530 case V4DF_FTYPE_V4SI:
23531 case V4DF_FTYPE_V4SF:
23532 case V4DF_FTYPE_V2DF:
23533 case V4SF_FTYPE_V4SF:
23534 case V4SF_FTYPE_V4SI:
23535 case V4SF_FTYPE_V8SF:
23536 case V4SF_FTYPE_V4DF:
23537 case V4SF_FTYPE_V2DF:
23538 case V2DI_FTYPE_V2DI:
23539 case V2DI_FTYPE_V16QI:
23540 case V2DI_FTYPE_V8HI:
23541 case V2DI_FTYPE_V4SI:
23542 case V2DF_FTYPE_V2DF:
23543 case V2DF_FTYPE_V4SI:
23544 case V2DF_FTYPE_V4DF:
23545 case V2DF_FTYPE_V4SF:
23546 case V2DF_FTYPE_V2SI:
23547 case V2SI_FTYPE_V2SI:
23548 case V2SI_FTYPE_V4SF:
23549 case V2SI_FTYPE_V2SF:
23550 case V2SI_FTYPE_V2DF:
23551 case V2SF_FTYPE_V2SF:
23552 case V2SF_FTYPE_V2SI:
23555 case V4SF_FTYPE_V4SF_VEC_MERGE:
23556 case V2DF_FTYPE_V2DF_VEC_MERGE:
23557 return ix86_expand_unop_vec_merge_builtin (icode, exp, target);
23558 case FLOAT128_FTYPE_FLOAT128_FLOAT128:
23559 case V16QI_FTYPE_V16QI_V16QI:
23560 case V16QI_FTYPE_V8HI_V8HI:
23561 case V8QI_FTYPE_V8QI_V8QI:
23562 case V8QI_FTYPE_V4HI_V4HI:
23563 case V8HI_FTYPE_V8HI_V8HI:
23564 case V8HI_FTYPE_V16QI_V16QI:
23565 case V8HI_FTYPE_V4SI_V4SI:
23566 case V8SF_FTYPE_V8SF_V8SF:
23567 case V8SF_FTYPE_V8SF_V8SI:
23568 case V4SI_FTYPE_V4SI_V4SI:
23569 case V4SI_FTYPE_V8HI_V8HI:
23570 case V4SI_FTYPE_V4SF_V4SF:
23571 case V4SI_FTYPE_V2DF_V2DF:
23572 case V4HI_FTYPE_V4HI_V4HI:
23573 case V4HI_FTYPE_V8QI_V8QI:
23574 case V4HI_FTYPE_V2SI_V2SI:
23575 case V4DF_FTYPE_V4DF_V4DF:
23576 case V4DF_FTYPE_V4DF_V4DI:
23577 case V4SF_FTYPE_V4SF_V4SF:
23578 case V4SF_FTYPE_V4SF_V4SI:
23579 case V4SF_FTYPE_V4SF_V2SI:
23580 case V4SF_FTYPE_V4SF_V2DF:
23581 case V4SF_FTYPE_V4SF_DI:
23582 case V4SF_FTYPE_V4SF_SI:
23583 case V2DI_FTYPE_V2DI_V2DI:
23584 case V2DI_FTYPE_V16QI_V16QI:
23585 case V2DI_FTYPE_V4SI_V4SI:
23586 case V2DI_FTYPE_V2DI_V16QI:
23587 case V2DI_FTYPE_V2DF_V2DF:
23588 case V2SI_FTYPE_V2SI_V2SI:
23589 case V2SI_FTYPE_V4HI_V4HI:
23590 case V2SI_FTYPE_V2SF_V2SF:
23591 case V2DF_FTYPE_V2DF_V2DF:
23592 case V2DF_FTYPE_V2DF_V4SF:
23593 case V2DF_FTYPE_V2DF_V2DI:
23594 case V2DF_FTYPE_V2DF_DI:
23595 case V2DF_FTYPE_V2DF_SI:
23596 case V2SF_FTYPE_V2SF_V2SF:
23597 case V1DI_FTYPE_V1DI_V1DI:
23598 case V1DI_FTYPE_V8QI_V8QI:
23599 case V1DI_FTYPE_V2SI_V2SI:
23600 if (comparison == UNKNOWN)
23601 return ix86_expand_binop_builtin (icode, exp, target);
23604 case V4SF_FTYPE_V4SF_V4SF_SWAP:
23605 case V2DF_FTYPE_V2DF_V2DF_SWAP:
23606 gcc_assert (comparison != UNKNOWN);
23610 case V8HI_FTYPE_V8HI_V8HI_COUNT:
23611 case V8HI_FTYPE_V8HI_SI_COUNT:
23612 case V4SI_FTYPE_V4SI_V4SI_COUNT:
23613 case V4SI_FTYPE_V4SI_SI_COUNT:
23614 case V4HI_FTYPE_V4HI_V4HI_COUNT:
23615 case V4HI_FTYPE_V4HI_SI_COUNT:
23616 case V2DI_FTYPE_V2DI_V2DI_COUNT:
23617 case V2DI_FTYPE_V2DI_SI_COUNT:
23618 case V2SI_FTYPE_V2SI_V2SI_COUNT:
23619 case V2SI_FTYPE_V2SI_SI_COUNT:
23620 case V1DI_FTYPE_V1DI_V1DI_COUNT:
23621 case V1DI_FTYPE_V1DI_SI_COUNT:
23623 last_arg_count = true;
23625 case UINT64_FTYPE_UINT64_UINT64:
23626 case UINT_FTYPE_UINT_UINT:
23627 case UINT_FTYPE_UINT_USHORT:
23628 case UINT_FTYPE_UINT_UCHAR:
23631 case V2DI2TI_FTYPE_V2DI_INT:
23634 nargs_constant = 1;
23636 case V8HI_FTYPE_V8HI_INT:
23637 case V8SF_FTYPE_V8SF_INT:
23638 case V4SI_FTYPE_V4SI_INT:
23639 case V4SI_FTYPE_V8SI_INT:
23640 case V4HI_FTYPE_V4HI_INT:
23641 case V4DF_FTYPE_V4DF_INT:
23642 case V4SF_FTYPE_V4SF_INT:
23643 case V4SF_FTYPE_V8SF_INT:
23644 case V2DI_FTYPE_V2DI_INT:
23645 case V2DF_FTYPE_V2DF_INT:
23646 case V2DF_FTYPE_V4DF_INT:
23648 nargs_constant = 1;
23650 case V16QI_FTYPE_V16QI_V16QI_V16QI:
23651 case V8SF_FTYPE_V8SF_V8SF_V8SF:
23652 case V4DF_FTYPE_V4DF_V4DF_V4DF:
23653 case V4SF_FTYPE_V4SF_V4SF_V4SF:
23654 case V2DF_FTYPE_V2DF_V2DF_V2DF:
23657 case V16QI_FTYPE_V16QI_V16QI_INT:
23658 case V8HI_FTYPE_V8HI_V8HI_INT:
23659 case V8SI_FTYPE_V8SI_V8SI_INT:
23660 case V8SI_FTYPE_V8SI_V4SI_INT:
23661 case V8SF_FTYPE_V8SF_V8SF_INT:
23662 case V8SF_FTYPE_V8SF_V4SF_INT:
23663 case V4SI_FTYPE_V4SI_V4SI_INT:
23664 case V4DF_FTYPE_V4DF_V4DF_INT:
23665 case V4DF_FTYPE_V4DF_V2DF_INT:
23666 case V4SF_FTYPE_V4SF_V4SF_INT:
23667 case V2DI_FTYPE_V2DI_V2DI_INT:
23668 case V2DF_FTYPE_V2DF_V2DF_INT:
23670 nargs_constant = 1;
23672 case V2DI2TI_FTYPE_V2DI_V2DI_INT:
23675 nargs_constant = 1;
23677 case V1DI2DI_FTYPE_V1DI_V1DI_INT:
23680 nargs_constant = 1;
23682 case V2DI_FTYPE_V2DI_UINT_UINT:
23684 nargs_constant = 2;
23686 case V8SF_FTYPE_V8SF_V8SF_V8SI_INT:
23687 case V4DF_FTYPE_V4DF_V4DF_V4DI_INT:
23688 case V4SF_FTYPE_V4SF_V4SF_V4SI_INT:
23689 case V2DF_FTYPE_V2DF_V2DF_V2DI_INT:
23691 nargs_constant = 1;
23693 case V2DI_FTYPE_V2DI_V2DI_UINT_UINT:
23695 nargs_constant = 2;
23698 gcc_unreachable ();
23701 gcc_assert (nargs <= ARRAY_SIZE (args));
23703 if (comparison != UNKNOWN)
23705 gcc_assert (nargs == 2);
23706 return ix86_expand_sse_compare (d, exp, target, swap);
23709 if (rmode == VOIDmode || rmode == tmode)
23713 || GET_MODE (target) != tmode
23714 || ! (*insn_p->operand[0].predicate) (target, tmode))
23715 target = gen_reg_rtx (tmode);
23716 real_target = target;
23720 target = gen_reg_rtx (rmode);
23721 real_target = simplify_gen_subreg (tmode, target, rmode, 0);
23724 for (i = 0; i < nargs; i++)
23726 tree arg = CALL_EXPR_ARG (exp, i);
23727 rtx op = expand_normal (arg);
23728 enum machine_mode mode = insn_p->operand[i + 1].mode;
23729 bool match = (*insn_p->operand[i + 1].predicate) (op, mode);
23731 if (last_arg_count && (i + 1) == nargs)
23733 /* SIMD shift insns take either an 8-bit immediate or
23734 register as count. But builtin functions take int as
23735 count. If count doesn't match, we put it in register. */
23738 op = simplify_gen_subreg (SImode, op, GET_MODE (op), 0);
23739 if (!(*insn_p->operand[i + 1].predicate) (op, mode))
23740 op = copy_to_reg (op);
23743 else if ((nargs - i) <= nargs_constant)
23748 case CODE_FOR_sse4_1_roundpd:
23749 case CODE_FOR_sse4_1_roundps:
23750 case CODE_FOR_sse4_1_roundsd:
23751 case CODE_FOR_sse4_1_roundss:
23752 case CODE_FOR_sse4_1_blendps:
23753 case CODE_FOR_avx_blendpd256:
23754 case CODE_FOR_avx_vpermilv4df:
23755 case CODE_FOR_avx_roundpd256:
23756 case CODE_FOR_avx_roundps256:
23757 error ("the last argument must be a 4-bit immediate");
23760 case CODE_FOR_sse4_1_blendpd:
23761 case CODE_FOR_avx_vpermilv2df:
23762 case CODE_FOR_avx_vpermil2v2df3:
23763 case CODE_FOR_avx_vpermil2v4sf3:
23764 case CODE_FOR_avx_vpermil2v4df3:
23765 case CODE_FOR_avx_vpermil2v8sf3:
23766 error ("the last argument must be a 2-bit immediate");
23769 case CODE_FOR_avx_vextractf128v4df:
23770 case CODE_FOR_avx_vextractf128v8sf:
23771 case CODE_FOR_avx_vextractf128v8si:
23772 case CODE_FOR_avx_vinsertf128v4df:
23773 case CODE_FOR_avx_vinsertf128v8sf:
23774 case CODE_FOR_avx_vinsertf128v8si:
23775 error ("the last argument must be a 1-bit immediate");
23778 case CODE_FOR_avx_cmpsdv2df3:
23779 case CODE_FOR_avx_cmpssv4sf3:
23780 case CODE_FOR_avx_cmppdv2df3:
23781 case CODE_FOR_avx_cmppsv4sf3:
23782 case CODE_FOR_avx_cmppdv4df3:
23783 case CODE_FOR_avx_cmppsv8sf3:
23784 error ("the last argument must be a 5-bit immediate");
23788 switch (nargs_constant)
23791 if ((nargs - i) == nargs_constant)
23793 error ("the next to last argument must be an 8-bit immediate");
23797 error ("the last argument must be an 8-bit immediate");
23800 gcc_unreachable ();
23807 if (VECTOR_MODE_P (mode))
23808 op = safe_vector_operand (op, mode);
23810 /* If we aren't optimizing, only allow one memory operand to
23812 if (memory_operand (op, mode))
23815 if (GET_MODE (op) == mode || GET_MODE (op) == VOIDmode)
23817 if (optimize || !match || num_memory > 1)
23818 op = copy_to_mode_reg (mode, op);
23822 op = copy_to_reg (op);
23823 op = simplify_gen_subreg (mode, op, GET_MODE (op), 0);
23828 args[i].mode = mode;
23834 pat = GEN_FCN (icode) (real_target, args[0].op);
23837 pat = GEN_FCN (icode) (real_target, args[0].op, args[1].op);
23840 pat = GEN_FCN (icode) (real_target, args[0].op, args[1].op,
23844 pat = GEN_FCN (icode) (real_target, args[0].op, args[1].op,
23845 args[2].op, args[3].op);
23848 gcc_unreachable ();
23858 /* Subroutine of ix86_expand_builtin to take care of special insns
23859 with variable number of operands. */
23862 ix86_expand_special_args_builtin (const struct builtin_description *d,
23863 tree exp, rtx target)
23867 unsigned int i, nargs, arg_adjust, memory;
23871 enum machine_mode mode;
23873 enum insn_code icode = d->icode;
23874 bool last_arg_constant = false;
23875 const struct insn_data *insn_p = &insn_data[icode];
23876 enum machine_mode tmode = insn_p->operand[0].mode;
23877 enum { load, store } klass;
23879 switch ((enum ix86_special_builtin_type) d->flag)
23881 case VOID_FTYPE_VOID:
23882 emit_insn (GEN_FCN (icode) (target));
23884 case V2DI_FTYPE_PV2DI:
23885 case V32QI_FTYPE_PCCHAR:
23886 case V16QI_FTYPE_PCCHAR:
23887 case V8SF_FTYPE_PCV4SF:
23888 case V8SF_FTYPE_PCFLOAT:
23889 case V4SF_FTYPE_PCFLOAT:
23890 case V4DF_FTYPE_PCV2DF:
23891 case V4DF_FTYPE_PCDOUBLE:
23892 case V2DF_FTYPE_PCDOUBLE:
23897 case VOID_FTYPE_PV2SF_V4SF:
23898 case VOID_FTYPE_PV2DI_V2DI:
23899 case VOID_FTYPE_PCHAR_V32QI:
23900 case VOID_FTYPE_PCHAR_V16QI:
23901 case VOID_FTYPE_PFLOAT_V8SF:
23902 case VOID_FTYPE_PFLOAT_V4SF:
23903 case VOID_FTYPE_PDOUBLE_V4DF:
23904 case VOID_FTYPE_PDOUBLE_V2DF:
23905 case VOID_FTYPE_PDI_DI:
23906 case VOID_FTYPE_PINT_INT:
23909 /* Reserve memory operand for target. */
23910 memory = ARRAY_SIZE (args);
23912 case V4SF_FTYPE_V4SF_PCV2SF:
23913 case V2DF_FTYPE_V2DF_PCDOUBLE:
23918 case V8SF_FTYPE_PCV8SF_V8SF:
23919 case V4DF_FTYPE_PCV4DF_V4DF:
23920 case V4SF_FTYPE_PCV4SF_V4SF:
23921 case V2DF_FTYPE_PCV2DF_V2DF:
23926 case VOID_FTYPE_PV8SF_V8SF_V8SF:
23927 case VOID_FTYPE_PV4DF_V4DF_V4DF:
23928 case VOID_FTYPE_PV4SF_V4SF_V4SF:
23929 case VOID_FTYPE_PV2DF_V2DF_V2DF:
23932 /* Reserve memory operand for target. */
23933 memory = ARRAY_SIZE (args);
23936 gcc_unreachable ();
23939 gcc_assert (nargs <= ARRAY_SIZE (args));
23941 if (klass == store)
23943 arg = CALL_EXPR_ARG (exp, 0);
23944 op = expand_normal (arg);
23945 gcc_assert (target == 0);
23946 target = gen_rtx_MEM (tmode, copy_to_mode_reg (Pmode, op));
23954 || GET_MODE (target) != tmode
23955 || ! (*insn_p->operand[0].predicate) (target, tmode))
23956 target = gen_reg_rtx (tmode);
23959 for (i = 0; i < nargs; i++)
23961 enum machine_mode mode = insn_p->operand[i + 1].mode;
23964 arg = CALL_EXPR_ARG (exp, i + arg_adjust);
23965 op = expand_normal (arg);
23966 match = (*insn_p->operand[i + 1].predicate) (op, mode);
23968 if (last_arg_constant && (i + 1) == nargs)
23974 error ("the last argument must be an 8-bit immediate");
23982 /* This must be the memory operand. */
23983 op = gen_rtx_MEM (mode, copy_to_mode_reg (Pmode, op));
23984 gcc_assert (GET_MODE (op) == mode
23985 || GET_MODE (op) == VOIDmode);
23989 /* This must be register. */
23990 if (VECTOR_MODE_P (mode))
23991 op = safe_vector_operand (op, mode);
23993 gcc_assert (GET_MODE (op) == mode
23994 || GET_MODE (op) == VOIDmode);
23995 op = copy_to_mode_reg (mode, op);
24000 args[i].mode = mode;
24006 pat = GEN_FCN (icode) (target, args[0].op);
24009 pat = GEN_FCN (icode) (target, args[0].op, args[1].op);
24012 gcc_unreachable ();
24018 return klass == store ? 0 : target;
24021 /* Return the integer constant in ARG. Constrain it to be in the range
24022 of the subparts of VEC_TYPE; issue an error if not. */
24025 get_element_number (tree vec_type, tree arg)
24027 unsigned HOST_WIDE_INT elt, max = TYPE_VECTOR_SUBPARTS (vec_type) - 1;
24029 if (!host_integerp (arg, 1)
24030 || (elt = tree_low_cst (arg, 1), elt > max))
24032 error ("selector must be an integer constant in the range 0..%wi", max);
24039 /* A subroutine of ix86_expand_builtin. These builtins are a wrapper around
24040 ix86_expand_vector_init. We DO have language-level syntax for this, in
24041 the form of (type){ init-list }. Except that since we can't place emms
24042 instructions from inside the compiler, we can't allow the use of MMX
24043 registers unless the user explicitly asks for it. So we do *not* define
24044 vec_set/vec_extract/vec_init patterns for MMX modes in mmx.md. Instead
24045 we have builtins invoked by mmintrin.h that gives us license to emit
24046 these sorts of instructions. */
24049 ix86_expand_vec_init_builtin (tree type, tree exp, rtx target)
24051 enum machine_mode tmode = TYPE_MODE (type);
24052 enum machine_mode inner_mode = GET_MODE_INNER (tmode);
24053 int i, n_elt = GET_MODE_NUNITS (tmode);
24054 rtvec v = rtvec_alloc (n_elt);
24056 gcc_assert (VECTOR_MODE_P (tmode));
24057 gcc_assert (call_expr_nargs (exp) == n_elt);
24059 for (i = 0; i < n_elt; ++i)
24061 rtx x = expand_normal (CALL_EXPR_ARG (exp, i));
24062 RTVEC_ELT (v, i) = gen_lowpart (inner_mode, x);
24065 if (!target || !register_operand (target, tmode))
24066 target = gen_reg_rtx (tmode);
24068 ix86_expand_vector_init (true, target, gen_rtx_PARALLEL (tmode, v));
24072 /* A subroutine of ix86_expand_builtin. These builtins are a wrapper around
24073 ix86_expand_vector_extract. They would be redundant (for non-MMX) if we
24074 had a language-level syntax for referencing vector elements. */
24077 ix86_expand_vec_ext_builtin (tree exp, rtx target)
24079 enum machine_mode tmode, mode0;
24084 arg0 = CALL_EXPR_ARG (exp, 0);
24085 arg1 = CALL_EXPR_ARG (exp, 1);
24087 op0 = expand_normal (arg0);
24088 elt = get_element_number (TREE_TYPE (arg0), arg1);
24090 tmode = TYPE_MODE (TREE_TYPE (TREE_TYPE (arg0)));
24091 mode0 = TYPE_MODE (TREE_TYPE (arg0));
24092 gcc_assert (VECTOR_MODE_P (mode0));
24094 op0 = force_reg (mode0, op0);
24096 if (optimize || !target || !register_operand (target, tmode))
24097 target = gen_reg_rtx (tmode);
24099 ix86_expand_vector_extract (true, target, op0, elt);
24104 /* A subroutine of ix86_expand_builtin. These builtins are a wrapper around
24105 ix86_expand_vector_set. They would be redundant (for non-MMX) if we had
24106 a language-level syntax for referencing vector elements. */
24109 ix86_expand_vec_set_builtin (tree exp)
24111 enum machine_mode tmode, mode1;
24112 tree arg0, arg1, arg2;
24114 rtx op0, op1, target;
24116 arg0 = CALL_EXPR_ARG (exp, 0);
24117 arg1 = CALL_EXPR_ARG (exp, 1);
24118 arg2 = CALL_EXPR_ARG (exp, 2);
24120 tmode = TYPE_MODE (TREE_TYPE (arg0));
24121 mode1 = TYPE_MODE (TREE_TYPE (TREE_TYPE (arg0)));
24122 gcc_assert (VECTOR_MODE_P (tmode));
24124 op0 = expand_expr (arg0, NULL_RTX, tmode, EXPAND_NORMAL);
24125 op1 = expand_expr (arg1, NULL_RTX, mode1, EXPAND_NORMAL);
24126 elt = get_element_number (TREE_TYPE (arg0), arg2);
24128 if (GET_MODE (op1) != mode1 && GET_MODE (op1) != VOIDmode)
24129 op1 = convert_modes (mode1, GET_MODE (op1), op1, true);
24131 op0 = force_reg (tmode, op0);
24132 op1 = force_reg (mode1, op1);
24134 /* OP0 is the source of these builtin functions and shouldn't be
24135 modified. Create a copy, use it and return it as target. */
24136 target = gen_reg_rtx (tmode);
24137 emit_move_insn (target, op0);
24138 ix86_expand_vector_set (true, target, op1, elt);
24143 /* Expand an expression EXP that calls a built-in function,
24144 with result going to TARGET if that's convenient
24145 (and in mode MODE if that's convenient).
24146 SUBTARGET may be used as the target for computing one of EXP's operands.
24147 IGNORE is nonzero if the value is to be ignored. */
24150 ix86_expand_builtin (tree exp, rtx target, rtx subtarget ATTRIBUTE_UNUSED,
24151 enum machine_mode mode ATTRIBUTE_UNUSED,
24152 int ignore ATTRIBUTE_UNUSED)
24154 const struct builtin_description *d;
24156 enum insn_code icode;
24157 tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
24158 tree arg0, arg1, arg2;
24159 rtx op0, op1, op2, pat;
24160 enum machine_mode mode0, mode1, mode2;
24161 unsigned int fcode = DECL_FUNCTION_CODE (fndecl);
24163 /* Determine whether the builtin function is available under the current ISA.
24164 Originally the builtin was not created if it wasn't applicable to the
24165 current ISA based on the command line switches. With function specific
24166 options, we need to check in the context of the function making the call
24167 whether it is supported. */
24168 if (ix86_builtins_isa[fcode].isa
24169 && !(ix86_builtins_isa[fcode].isa & ix86_isa_flags))
24171 char *opts = ix86_target_string (ix86_builtins_isa[fcode].isa, 0, NULL,
24172 NULL, NULL, false);
24175 error ("%qE needs unknown isa option", fndecl);
24178 gcc_assert (opts != NULL);
24179 error ("%qE needs isa option %s", fndecl, opts);
24187 case IX86_BUILTIN_MASKMOVQ:
24188 case IX86_BUILTIN_MASKMOVDQU:
24189 icode = (fcode == IX86_BUILTIN_MASKMOVQ
24190 ? CODE_FOR_mmx_maskmovq
24191 : CODE_FOR_sse2_maskmovdqu);
24192 /* Note the arg order is different from the operand order. */
24193 arg1 = CALL_EXPR_ARG (exp, 0);
24194 arg2 = CALL_EXPR_ARG (exp, 1);
24195 arg0 = CALL_EXPR_ARG (exp, 2);
24196 op0 = expand_normal (arg0);
24197 op1 = expand_normal (arg1);
24198 op2 = expand_normal (arg2);
24199 mode0 = insn_data[icode].operand[0].mode;
24200 mode1 = insn_data[icode].operand[1].mode;
24201 mode2 = insn_data[icode].operand[2].mode;
24203 op0 = force_reg (Pmode, op0);
24204 op0 = gen_rtx_MEM (mode1, op0);
24206 if (! (*insn_data[icode].operand[0].predicate) (op0, mode0))
24207 op0 = copy_to_mode_reg (mode0, op0);
24208 if (! (*insn_data[icode].operand[1].predicate) (op1, mode1))
24209 op1 = copy_to_mode_reg (mode1, op1);
24210 if (! (*insn_data[icode].operand[2].predicate) (op2, mode2))
24211 op2 = copy_to_mode_reg (mode2, op2);
24212 pat = GEN_FCN (icode) (op0, op1, op2);
24218 case IX86_BUILTIN_LDMXCSR:
24219 op0 = expand_normal (CALL_EXPR_ARG (exp, 0));
24220 target = assign_386_stack_local (SImode, SLOT_VIRTUAL);
24221 emit_move_insn (target, op0);
24222 emit_insn (gen_sse_ldmxcsr (target));
24225 case IX86_BUILTIN_STMXCSR:
24226 target = assign_386_stack_local (SImode, SLOT_VIRTUAL);
24227 emit_insn (gen_sse_stmxcsr (target));
24228 return copy_to_mode_reg (SImode, target);
24230 case IX86_BUILTIN_CLFLUSH:
24231 arg0 = CALL_EXPR_ARG (exp, 0);
24232 op0 = expand_normal (arg0);
24233 icode = CODE_FOR_sse2_clflush;
24234 if (! (*insn_data[icode].operand[0].predicate) (op0, Pmode))
24235 op0 = copy_to_mode_reg (Pmode, op0);
24237 emit_insn (gen_sse2_clflush (op0));
24240 case IX86_BUILTIN_MONITOR:
24241 arg0 = CALL_EXPR_ARG (exp, 0);
24242 arg1 = CALL_EXPR_ARG (exp, 1);
24243 arg2 = CALL_EXPR_ARG (exp, 2);
24244 op0 = expand_normal (arg0);
24245 op1 = expand_normal (arg1);
24246 op2 = expand_normal (arg2);
24248 op0 = copy_to_mode_reg (Pmode, op0);
24250 op1 = copy_to_mode_reg (SImode, op1);
24252 op2 = copy_to_mode_reg (SImode, op2);
24253 emit_insn ((*ix86_gen_monitor) (op0, op1, op2));
24256 case IX86_BUILTIN_MWAIT:
24257 arg0 = CALL_EXPR_ARG (exp, 0);
24258 arg1 = CALL_EXPR_ARG (exp, 1);
24259 op0 = expand_normal (arg0);
24260 op1 = expand_normal (arg1);
24262 op0 = copy_to_mode_reg (SImode, op0);
24264 op1 = copy_to_mode_reg (SImode, op1);
24265 emit_insn (gen_sse3_mwait (op0, op1));
24268 case IX86_BUILTIN_VEC_INIT_V2SI:
24269 case IX86_BUILTIN_VEC_INIT_V4HI:
24270 case IX86_BUILTIN_VEC_INIT_V8QI:
24271 return ix86_expand_vec_init_builtin (TREE_TYPE (exp), exp, target);
24273 case IX86_BUILTIN_VEC_EXT_V2DF:
24274 case IX86_BUILTIN_VEC_EXT_V2DI:
24275 case IX86_BUILTIN_VEC_EXT_V4SF:
24276 case IX86_BUILTIN_VEC_EXT_V4SI:
24277 case IX86_BUILTIN_VEC_EXT_V8HI:
24278 case IX86_BUILTIN_VEC_EXT_V2SI:
24279 case IX86_BUILTIN_VEC_EXT_V4HI:
24280 case IX86_BUILTIN_VEC_EXT_V16QI:
24281 return ix86_expand_vec_ext_builtin (exp, target);
24283 case IX86_BUILTIN_VEC_SET_V2DI:
24284 case IX86_BUILTIN_VEC_SET_V4SF:
24285 case IX86_BUILTIN_VEC_SET_V4SI:
24286 case IX86_BUILTIN_VEC_SET_V8HI:
24287 case IX86_BUILTIN_VEC_SET_V4HI:
24288 case IX86_BUILTIN_VEC_SET_V16QI:
24289 return ix86_expand_vec_set_builtin (exp);
24291 case IX86_BUILTIN_INFQ:
24293 REAL_VALUE_TYPE inf;
24297 tmp = CONST_DOUBLE_FROM_REAL_VALUE (inf, mode);
24299 tmp = validize_mem (force_const_mem (mode, tmp));
24302 target = gen_reg_rtx (mode);
24304 emit_move_insn (target, tmp);
24312 for (i = 0, d = bdesc_special_args;
24313 i < ARRAY_SIZE (bdesc_special_args);
24315 if (d->code == fcode)
24316 return ix86_expand_special_args_builtin (d, exp, target);
24318 for (i = 0, d = bdesc_args;
24319 i < ARRAY_SIZE (bdesc_args);
24321 if (d->code == fcode)
24324 case IX86_BUILTIN_FABSQ:
24325 case IX86_BUILTIN_COPYSIGNQ:
24327 /* Emit a normal call if SSE2 isn't available. */
24328 return expand_call (exp, target, ignore);
24330 return ix86_expand_args_builtin (d, exp, target);
24333 for (i = 0, d = bdesc_comi; i < ARRAY_SIZE (bdesc_comi); i++, d++)
24334 if (d->code == fcode)
24335 return ix86_expand_sse_comi (d, exp, target);
24337 for (i = 0, d = bdesc_pcmpestr;
24338 i < ARRAY_SIZE (bdesc_pcmpestr);
24340 if (d->code == fcode)
24341 return ix86_expand_sse_pcmpestr (d, exp, target);
24343 for (i = 0, d = bdesc_pcmpistr;
24344 i < ARRAY_SIZE (bdesc_pcmpistr);
24346 if (d->code == fcode)
24347 return ix86_expand_sse_pcmpistr (d, exp, target);
24349 for (i = 0, d = bdesc_multi_arg; i < ARRAY_SIZE (bdesc_multi_arg); i++, d++)
24350 if (d->code == fcode)
24351 return ix86_expand_multi_arg_builtin (d->icode, exp, target,
24352 (enum multi_arg_type)d->flag,
24355 gcc_unreachable ();
24358 /* Returns a function decl for a vectorized version of the builtin function
24359 with builtin function code FN and the result vector type TYPE, or NULL_TREE
24360 if it is not available. */
24363 ix86_builtin_vectorized_function (unsigned int fn, tree type_out,
24366 enum machine_mode in_mode, out_mode;
24369 if (TREE_CODE (type_out) != VECTOR_TYPE
24370 || TREE_CODE (type_in) != VECTOR_TYPE)
24373 out_mode = TYPE_MODE (TREE_TYPE (type_out));
24374 out_n = TYPE_VECTOR_SUBPARTS (type_out);
24375 in_mode = TYPE_MODE (TREE_TYPE (type_in));
24376 in_n = TYPE_VECTOR_SUBPARTS (type_in);
24380 case BUILT_IN_SQRT:
24381 if (out_mode == DFmode && out_n == 2
24382 && in_mode == DFmode && in_n == 2)
24383 return ix86_builtins[IX86_BUILTIN_SQRTPD];
24386 case BUILT_IN_SQRTF:
24387 if (out_mode == SFmode && out_n == 4
24388 && in_mode == SFmode && in_n == 4)
24389 return ix86_builtins[IX86_BUILTIN_SQRTPS_NR];
24392 case BUILT_IN_LRINT:
24393 if (out_mode == SImode && out_n == 4
24394 && in_mode == DFmode && in_n == 2)
24395 return ix86_builtins[IX86_BUILTIN_VEC_PACK_SFIX];
24398 case BUILT_IN_LRINTF:
24399 if (out_mode == SImode && out_n == 4
24400 && in_mode == SFmode && in_n == 4)
24401 return ix86_builtins[IX86_BUILTIN_CVTPS2DQ];
24408 /* Dispatch to a handler for a vectorization library. */
24409 if (ix86_veclib_handler)
24410 return (*ix86_veclib_handler)(fn, type_out, type_in);
24415 /* Handler for an SVML-style interface to
24416 a library with vectorized intrinsics. */
24419 ix86_veclibabi_svml (enum built_in_function fn, tree type_out, tree type_in)
24422 tree fntype, new_fndecl, args;
24425 enum machine_mode el_mode, in_mode;
24428 /* The SVML is suitable for unsafe math only. */
24429 if (!flag_unsafe_math_optimizations)
24432 el_mode = TYPE_MODE (TREE_TYPE (type_out));
24433 n = TYPE_VECTOR_SUBPARTS (type_out);
24434 in_mode = TYPE_MODE (TREE_TYPE (type_in));
24435 in_n = TYPE_VECTOR_SUBPARTS (type_in);
24436 if (el_mode != in_mode
24444 case BUILT_IN_LOG10:
24446 case BUILT_IN_TANH:
24448 case BUILT_IN_ATAN:
24449 case BUILT_IN_ATAN2:
24450 case BUILT_IN_ATANH:
24451 case BUILT_IN_CBRT:
24452 case BUILT_IN_SINH:
24454 case BUILT_IN_ASINH:
24455 case BUILT_IN_ASIN:
24456 case BUILT_IN_COSH:
24458 case BUILT_IN_ACOSH:
24459 case BUILT_IN_ACOS:
24460 if (el_mode != DFmode || n != 2)
24464 case BUILT_IN_EXPF:
24465 case BUILT_IN_LOGF:
24466 case BUILT_IN_LOG10F:
24467 case BUILT_IN_POWF:
24468 case BUILT_IN_TANHF:
24469 case BUILT_IN_TANF:
24470 case BUILT_IN_ATANF:
24471 case BUILT_IN_ATAN2F:
24472 case BUILT_IN_ATANHF:
24473 case BUILT_IN_CBRTF:
24474 case BUILT_IN_SINHF:
24475 case BUILT_IN_SINF:
24476 case BUILT_IN_ASINHF:
24477 case BUILT_IN_ASINF:
24478 case BUILT_IN_COSHF:
24479 case BUILT_IN_COSF:
24480 case BUILT_IN_ACOSHF:
24481 case BUILT_IN_ACOSF:
24482 if (el_mode != SFmode || n != 4)
24490 bname = IDENTIFIER_POINTER (DECL_NAME (implicit_built_in_decls[fn]));
24492 if (fn == BUILT_IN_LOGF)
24493 strcpy (name, "vmlsLn4");
24494 else if (fn == BUILT_IN_LOG)
24495 strcpy (name, "vmldLn2");
24498 sprintf (name, "vmls%s", bname+10);
24499 name[strlen (name)-1] = '4';
24502 sprintf (name, "vmld%s2", bname+10);
24504 /* Convert to uppercase. */
24508 for (args = DECL_ARGUMENTS (implicit_built_in_decls[fn]); args;
24509 args = TREE_CHAIN (args))
24513 fntype = build_function_type_list (type_out, type_in, NULL);
24515 fntype = build_function_type_list (type_out, type_in, type_in, NULL);
24517 /* Build a function declaration for the vectorized function. */
24518 new_fndecl = build_decl (FUNCTION_DECL, get_identifier (name), fntype);
24519 TREE_PUBLIC (new_fndecl) = 1;
24520 DECL_EXTERNAL (new_fndecl) = 1;
24521 DECL_IS_NOVOPS (new_fndecl) = 1;
24522 TREE_READONLY (new_fndecl) = 1;
24527 /* Handler for an ACML-style interface to
24528 a library with vectorized intrinsics. */
24531 ix86_veclibabi_acml (enum built_in_function fn, tree type_out, tree type_in)
24533 char name[20] = "__vr.._";
24534 tree fntype, new_fndecl, args;
24537 enum machine_mode el_mode, in_mode;
24540 /* The ACML is 64bits only and suitable for unsafe math only as
24541 it does not correctly support parts of IEEE with the required
24542 precision such as denormals. */
24544 || !flag_unsafe_math_optimizations)
24547 el_mode = TYPE_MODE (TREE_TYPE (type_out));
24548 n = TYPE_VECTOR_SUBPARTS (type_out);
24549 in_mode = TYPE_MODE (TREE_TYPE (type_in));
24550 in_n = TYPE_VECTOR_SUBPARTS (type_in);
24551 if (el_mode != in_mode
24561 case BUILT_IN_LOG2:
24562 case BUILT_IN_LOG10:
24565 if (el_mode != DFmode
24570 case BUILT_IN_SINF:
24571 case BUILT_IN_COSF:
24572 case BUILT_IN_EXPF:
24573 case BUILT_IN_POWF:
24574 case BUILT_IN_LOGF:
24575 case BUILT_IN_LOG2F:
24576 case BUILT_IN_LOG10F:
24579 if (el_mode != SFmode
24588 bname = IDENTIFIER_POINTER (DECL_NAME (implicit_built_in_decls[fn]));
24589 sprintf (name + 7, "%s", bname+10);
24592 for (args = DECL_ARGUMENTS (implicit_built_in_decls[fn]); args;
24593 args = TREE_CHAIN (args))
24597 fntype = build_function_type_list (type_out, type_in, NULL);
24599 fntype = build_function_type_list (type_out, type_in, type_in, NULL);
24601 /* Build a function declaration for the vectorized function. */
24602 new_fndecl = build_decl (FUNCTION_DECL, get_identifier (name), fntype);
24603 TREE_PUBLIC (new_fndecl) = 1;
24604 DECL_EXTERNAL (new_fndecl) = 1;
24605 DECL_IS_NOVOPS (new_fndecl) = 1;
24606 TREE_READONLY (new_fndecl) = 1;
24612 /* Returns a decl of a function that implements conversion of an integer vector
24613 into a floating-point vector, or vice-versa. TYPE is the type of the integer
24614 side of the conversion.
24615 Return NULL_TREE if it is not available. */
24618 ix86_vectorize_builtin_conversion (unsigned int code, tree type)
24620 if (TREE_CODE (type) != VECTOR_TYPE)
24626 switch (TYPE_MODE (type))
24629 return ix86_builtins[IX86_BUILTIN_CVTDQ2PS];
24634 case FIX_TRUNC_EXPR:
24635 switch (TYPE_MODE (type))
24638 return ix86_builtins[IX86_BUILTIN_CVTTPS2DQ];
24648 /* Returns a code for a target-specific builtin that implements
24649 reciprocal of the function, or NULL_TREE if not available. */
24652 ix86_builtin_reciprocal (unsigned int fn, bool md_fn,
24653 bool sqrt ATTRIBUTE_UNUSED)
24655 if (! (TARGET_SSE_MATH && TARGET_RECIP && !optimize_insn_for_size_p ()
24656 && flag_finite_math_only && !flag_trapping_math
24657 && flag_unsafe_math_optimizations))
24661 /* Machine dependent builtins. */
24664 /* Vectorized version of sqrt to rsqrt conversion. */
24665 case IX86_BUILTIN_SQRTPS_NR:
24666 return ix86_builtins[IX86_BUILTIN_RSQRTPS_NR];
24672 /* Normal builtins. */
24675 /* Sqrt to rsqrt conversion. */
24676 case BUILT_IN_SQRTF:
24677 return ix86_builtins[IX86_BUILTIN_RSQRTF];
24684 /* Store OPERAND to the memory after reload is completed. This means
24685 that we can't easily use assign_stack_local. */
24687 ix86_force_to_memory (enum machine_mode mode, rtx operand)
24691 gcc_assert (reload_completed);
24692 if (!TARGET_64BIT_MS_ABI && TARGET_RED_ZONE)
24694 result = gen_rtx_MEM (mode,
24695 gen_rtx_PLUS (Pmode,
24697 GEN_INT (-RED_ZONE_SIZE)));
24698 emit_move_insn (result, operand);
24700 else if ((TARGET_64BIT_MS_ABI || !TARGET_RED_ZONE) && TARGET_64BIT)
24706 operand = gen_lowpart (DImode, operand);
24710 gen_rtx_SET (VOIDmode,
24711 gen_rtx_MEM (DImode,
24712 gen_rtx_PRE_DEC (DImode,
24713 stack_pointer_rtx)),
24717 gcc_unreachable ();
24719 result = gen_rtx_MEM (mode, stack_pointer_rtx);
24728 split_di (&operand, 1, operands, operands + 1);
24730 gen_rtx_SET (VOIDmode,
24731 gen_rtx_MEM (SImode,
24732 gen_rtx_PRE_DEC (Pmode,
24733 stack_pointer_rtx)),
24736 gen_rtx_SET (VOIDmode,
24737 gen_rtx_MEM (SImode,
24738 gen_rtx_PRE_DEC (Pmode,
24739 stack_pointer_rtx)),
24744 /* Store HImodes as SImodes. */
24745 operand = gen_lowpart (SImode, operand);
24749 gen_rtx_SET (VOIDmode,
24750 gen_rtx_MEM (GET_MODE (operand),
24751 gen_rtx_PRE_DEC (SImode,
24752 stack_pointer_rtx)),
24756 gcc_unreachable ();
24758 result = gen_rtx_MEM (mode, stack_pointer_rtx);
24763 /* Free operand from the memory. */
24765 ix86_free_from_memory (enum machine_mode mode)
24767 if (!TARGET_RED_ZONE || TARGET_64BIT_MS_ABI)
24771 if (mode == DImode || TARGET_64BIT)
24775 /* Use LEA to deallocate stack space. In peephole2 it will be converted
24776 to pop or add instruction if registers are available. */
24777 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
24778 gen_rtx_PLUS (Pmode, stack_pointer_rtx,
24783 /* Put float CONST_DOUBLE in the constant pool instead of fp regs.
24784 QImode must go into class Q_REGS.
24785 Narrow ALL_REGS to GENERAL_REGS. This supports allowing movsf and
24786 movdf to do mem-to-mem moves through integer regs. */
24788 ix86_preferred_reload_class (rtx x, enum reg_class regclass)
24790 enum machine_mode mode = GET_MODE (x);
24792 /* We're only allowed to return a subclass of CLASS. Many of the
24793 following checks fail for NO_REGS, so eliminate that early. */
24794 if (regclass == NO_REGS)
24797 /* All classes can load zeros. */
24798 if (x == CONST0_RTX (mode))
24801 /* Force constants into memory if we are loading a (nonzero) constant into
24802 an MMX or SSE register. This is because there are no MMX/SSE instructions
24803 to load from a constant. */
24805 && (MAYBE_MMX_CLASS_P (regclass) || MAYBE_SSE_CLASS_P (regclass)))
24808 /* Prefer SSE regs only, if we can use them for math. */
24809 if (TARGET_SSE_MATH && !TARGET_MIX_SSE_I387 && SSE_FLOAT_MODE_P (mode))
24810 return SSE_CLASS_P (regclass) ? regclass : NO_REGS;
24812 /* Floating-point constants need more complex checks. */
24813 if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) != VOIDmode)
24815 /* General regs can load everything. */
24816 if (reg_class_subset_p (regclass, GENERAL_REGS))
24819 /* Floats can load 0 and 1 plus some others. Note that we eliminated
24820 zero above. We only want to wind up preferring 80387 registers if
24821 we plan on doing computation with them. */
24823 && standard_80387_constant_p (x))
24825 /* Limit class to non-sse. */
24826 if (regclass == FLOAT_SSE_REGS)
24828 if (regclass == FP_TOP_SSE_REGS)
24830 if (regclass == FP_SECOND_SSE_REGS)
24831 return FP_SECOND_REG;
24832 if (regclass == FLOAT_INT_REGS || regclass == FLOAT_REGS)
24839 /* Generally when we see PLUS here, it's the function invariant
24840 (plus soft-fp const_int). Which can only be computed into general
24842 if (GET_CODE (x) == PLUS)
24843 return reg_class_subset_p (regclass, GENERAL_REGS) ? regclass : NO_REGS;
24845 /* QImode constants are easy to load, but non-constant QImode data
24846 must go into Q_REGS. */
24847 if (GET_MODE (x) == QImode && !CONSTANT_P (x))
24849 if (reg_class_subset_p (regclass, Q_REGS))
24851 if (reg_class_subset_p (Q_REGS, regclass))
24859 /* Discourage putting floating-point values in SSE registers unless
24860 SSE math is being used, and likewise for the 387 registers. */
24862 ix86_preferred_output_reload_class (rtx x, enum reg_class regclass)
24864 enum machine_mode mode = GET_MODE (x);
24866 /* Restrict the output reload class to the register bank that we are doing
24867 math on. If we would like not to return a subset of CLASS, reject this
24868 alternative: if reload cannot do this, it will still use its choice. */
24869 mode = GET_MODE (x);
24870 if (TARGET_SSE_MATH && SSE_FLOAT_MODE_P (mode))
24871 return MAYBE_SSE_CLASS_P (regclass) ? SSE_REGS : NO_REGS;
24873 if (X87_FLOAT_MODE_P (mode))
24875 if (regclass == FP_TOP_SSE_REGS)
24877 else if (regclass == FP_SECOND_SSE_REGS)
24878 return FP_SECOND_REG;
24880 return FLOAT_CLASS_P (regclass) ? regclass : NO_REGS;
24886 static enum reg_class
24887 ix86_secondary_reload (bool in_p, rtx x, enum reg_class rclass,
24888 enum machine_mode mode,
24889 secondary_reload_info *sri ATTRIBUTE_UNUSED)
24891 /* QImode spills from non-QI registers require
24892 intermediate register on 32bit targets. */
24893 if (!in_p && mode == QImode && !TARGET_64BIT
24894 && (rclass == GENERAL_REGS
24895 || rclass == LEGACY_REGS
24896 || rclass == INDEX_REGS))
24905 if (regno >= FIRST_PSEUDO_REGISTER || GET_CODE (x) == SUBREG)
24906 regno = true_regnum (x);
24908 /* Return Q_REGS if the operand is in memory. */
24916 /* If we are copying between general and FP registers, we need a memory
24917 location. The same is true for SSE and MMX registers.
24919 To optimize register_move_cost performance, allow inline variant.
24921 The macro can't work reliably when one of the CLASSES is class containing
24922 registers from multiple units (SSE, MMX, integer). We avoid this by never
24923 combining those units in single alternative in the machine description.
24924 Ensure that this constraint holds to avoid unexpected surprises.
24926 When STRICT is false, we are being called from REGISTER_MOVE_COST, so do not
24927 enforce these sanity checks. */
24930 inline_secondary_memory_needed (enum reg_class class1, enum reg_class class2,
24931 enum machine_mode mode, int strict)
24933 if (MAYBE_FLOAT_CLASS_P (class1) != FLOAT_CLASS_P (class1)
24934 || MAYBE_FLOAT_CLASS_P (class2) != FLOAT_CLASS_P (class2)
24935 || MAYBE_SSE_CLASS_P (class1) != SSE_CLASS_P (class1)
24936 || MAYBE_SSE_CLASS_P (class2) != SSE_CLASS_P (class2)
24937 || MAYBE_MMX_CLASS_P (class1) != MMX_CLASS_P (class1)
24938 || MAYBE_MMX_CLASS_P (class2) != MMX_CLASS_P (class2))
24940 gcc_assert (!strict);
24944 if (FLOAT_CLASS_P (class1) != FLOAT_CLASS_P (class2))
24947 /* ??? This is a lie. We do have moves between mmx/general, and for
24948 mmx/sse2. But by saying we need secondary memory we discourage the
24949 register allocator from using the mmx registers unless needed. */
24950 if (MMX_CLASS_P (class1) != MMX_CLASS_P (class2))
24953 if (SSE_CLASS_P (class1) != SSE_CLASS_P (class2))
24955 /* SSE1 doesn't have any direct moves from other classes. */
24959 /* If the target says that inter-unit moves are more expensive
24960 than moving through memory, then don't generate them. */
24961 if (!TARGET_INTER_UNIT_MOVES)
24964 /* Between SSE and general, we have moves no larger than word size. */
24965 if (GET_MODE_SIZE (mode) > UNITS_PER_WORD)
24973 ix86_secondary_memory_needed (enum reg_class class1, enum reg_class class2,
24974 enum machine_mode mode, int strict)
24976 return inline_secondary_memory_needed (class1, class2, mode, strict);
24979 /* Return true if the registers in CLASS cannot represent the change from
24980 modes FROM to TO. */
24983 ix86_cannot_change_mode_class (enum machine_mode from, enum machine_mode to,
24984 enum reg_class regclass)
24989 /* x87 registers can't do subreg at all, as all values are reformatted
24990 to extended precision. */
24991 if (MAYBE_FLOAT_CLASS_P (regclass))
24994 if (MAYBE_SSE_CLASS_P (regclass) || MAYBE_MMX_CLASS_P (regclass))
24996 /* Vector registers do not support QI or HImode loads. If we don't
24997 disallow a change to these modes, reload will assume it's ok to
24998 drop the subreg from (subreg:SI (reg:HI 100) 0). This affects
24999 the vec_dupv4hi pattern. */
25000 if (GET_MODE_SIZE (from) < 4)
25003 /* Vector registers do not support subreg with nonzero offsets, which
25004 are otherwise valid for integer registers. Since we can't see
25005 whether we have a nonzero offset from here, prohibit all
25006 nonparadoxical subregs changing size. */
25007 if (GET_MODE_SIZE (to) < GET_MODE_SIZE (from))
25014 /* Return the cost of moving data of mode M between a
25015 register and memory. A value of 2 is the default; this cost is
25016 relative to those in `REGISTER_MOVE_COST'.
25018 This function is used extensively by register_move_cost that is used to
25019 build tables at startup. Make it inline in this case.
25020 When IN is 2, return maximum of in and out move cost.
25022 If moving between registers and memory is more expensive than
25023 between two registers, you should define this macro to express the
25026 Model also increased moving costs of QImode registers in non
25030 inline_memory_move_cost (enum machine_mode mode, enum reg_class regclass,
25034 if (FLOAT_CLASS_P (regclass))
25052 return MAX (ix86_cost->fp_load [index], ix86_cost->fp_store [index]);
25053 return in ? ix86_cost->fp_load [index] : ix86_cost->fp_store [index];
25055 if (SSE_CLASS_P (regclass))
25058 switch (GET_MODE_SIZE (mode))
25073 return MAX (ix86_cost->sse_load [index], ix86_cost->sse_store [index]);
25074 return in ? ix86_cost->sse_load [index] : ix86_cost->sse_store [index];
25076 if (MMX_CLASS_P (regclass))
25079 switch (GET_MODE_SIZE (mode))
25091 return MAX (ix86_cost->mmx_load [index], ix86_cost->mmx_store [index]);
25092 return in ? ix86_cost->mmx_load [index] : ix86_cost->mmx_store [index];
25094 switch (GET_MODE_SIZE (mode))
25097 if (Q_CLASS_P (regclass) || TARGET_64BIT)
25100 return ix86_cost->int_store[0];
25101 if (TARGET_PARTIAL_REG_DEPENDENCY
25102 && optimize_function_for_speed_p (cfun))
25103 cost = ix86_cost->movzbl_load;
25105 cost = ix86_cost->int_load[0];
25107 return MAX (cost, ix86_cost->int_store[0]);
25113 return MAX (ix86_cost->movzbl_load, ix86_cost->int_store[0] + 4);
25115 return ix86_cost->movzbl_load;
25117 return ix86_cost->int_store[0] + 4;
25122 return MAX (ix86_cost->int_load[1], ix86_cost->int_store[1]);
25123 return in ? ix86_cost->int_load[1] : ix86_cost->int_store[1];
25125 /* Compute number of 32bit moves needed. TFmode is moved as XFmode. */
25126 if (mode == TFmode)
25129 cost = MAX (ix86_cost->int_load[2] , ix86_cost->int_store[2]);
25131 cost = ix86_cost->int_load[2];
25133 cost = ix86_cost->int_store[2];
25134 return (cost * (((int) GET_MODE_SIZE (mode)
25135 + UNITS_PER_WORD - 1) / UNITS_PER_WORD));
25140 ix86_memory_move_cost (enum machine_mode mode, enum reg_class regclass, int in)
25142 return inline_memory_move_cost (mode, regclass, in);
25146 /* Return the cost of moving data from a register in class CLASS1 to
25147 one in class CLASS2.
25149 It is not required that the cost always equal 2 when FROM is the same as TO;
25150 on some machines it is expensive to move between registers if they are not
25151 general registers. */
25154 ix86_register_move_cost (enum machine_mode mode, enum reg_class class1,
25155 enum reg_class class2)
25157 /* In case we require secondary memory, compute cost of the store followed
25158 by load. In order to avoid bad register allocation choices, we need
25159 for this to be *at least* as high as the symmetric MEMORY_MOVE_COST. */
25161 if (inline_secondary_memory_needed (class1, class2, mode, 0))
25165 cost += inline_memory_move_cost (mode, class1, 2);
25166 cost += inline_memory_move_cost (mode, class2, 2);
25168 /* In case of copying from general_purpose_register we may emit multiple
25169 stores followed by single load causing memory size mismatch stall.
25170 Count this as arbitrarily high cost of 20. */
25171 if (CLASS_MAX_NREGS (class1, mode) > CLASS_MAX_NREGS (class2, mode))
25174 /* In the case of FP/MMX moves, the registers actually overlap, and we
25175 have to switch modes in order to treat them differently. */
25176 if ((MMX_CLASS_P (class1) && MAYBE_FLOAT_CLASS_P (class2))
25177 || (MMX_CLASS_P (class2) && MAYBE_FLOAT_CLASS_P (class1)))
25183 /* Moves between SSE/MMX and integer unit are expensive. */
25184 if (MMX_CLASS_P (class1) != MMX_CLASS_P (class2)
25185 || SSE_CLASS_P (class1) != SSE_CLASS_P (class2))
25187 /* ??? By keeping returned value relatively high, we limit the number
25188 of moves between integer and MMX/SSE registers for all targets.
25189 Additionally, high value prevents problem with x86_modes_tieable_p(),
25190 where integer modes in MMX/SSE registers are not tieable
25191 because of missing QImode and HImode moves to, from or between
25192 MMX/SSE registers. */
25193 return MAX (8, ix86_cost->mmxsse_to_integer);
25195 if (MAYBE_FLOAT_CLASS_P (class1))
25196 return ix86_cost->fp_move;
25197 if (MAYBE_SSE_CLASS_P (class1))
25198 return ix86_cost->sse_move;
25199 if (MAYBE_MMX_CLASS_P (class1))
25200 return ix86_cost->mmx_move;
25204 /* Return 1 if hard register REGNO can hold a value of machine-mode MODE. */
25207 ix86_hard_regno_mode_ok (int regno, enum machine_mode mode)
25209 /* Flags and only flags can only hold CCmode values. */
25210 if (CC_REGNO_P (regno))
25211 return GET_MODE_CLASS (mode) == MODE_CC;
25212 if (GET_MODE_CLASS (mode) == MODE_CC
25213 || GET_MODE_CLASS (mode) == MODE_RANDOM
25214 || GET_MODE_CLASS (mode) == MODE_PARTIAL_INT)
25216 if (FP_REGNO_P (regno))
25217 return VALID_FP_MODE_P (mode);
25218 if (SSE_REGNO_P (regno))
25220 /* We implement the move patterns for all vector modes into and
25221 out of SSE registers, even when no operation instructions
25222 are available. OImode move is available only when AVX is
25224 return ((TARGET_AVX && mode == OImode)
25225 || VALID_AVX256_REG_MODE (mode)
25226 || VALID_SSE_REG_MODE (mode)
25227 || VALID_SSE2_REG_MODE (mode)
25228 || VALID_MMX_REG_MODE (mode)
25229 || VALID_MMX_REG_MODE_3DNOW (mode));
25231 if (MMX_REGNO_P (regno))
25233 /* We implement the move patterns for 3DNOW modes even in MMX mode,
25234 so if the register is available at all, then we can move data of
25235 the given mode into or out of it. */
25236 return (VALID_MMX_REG_MODE (mode)
25237 || VALID_MMX_REG_MODE_3DNOW (mode));
25240 if (mode == QImode)
25242 /* Take care for QImode values - they can be in non-QI regs,
25243 but then they do cause partial register stalls. */
25244 if (regno < 4 || TARGET_64BIT)
25246 if (!TARGET_PARTIAL_REG_STALL)
25248 return reload_in_progress || reload_completed;
25250 /* We handle both integer and floats in the general purpose registers. */
25251 else if (VALID_INT_MODE_P (mode))
25253 else if (VALID_FP_MODE_P (mode))
25255 else if (VALID_DFP_MODE_P (mode))
25257 /* Lots of MMX code casts 8 byte vector modes to DImode. If we then go
25258 on to use that value in smaller contexts, this can easily force a
25259 pseudo to be allocated to GENERAL_REGS. Since this is no worse than
25260 supporting DImode, allow it. */
25261 else if (VALID_MMX_REG_MODE_3DNOW (mode) || VALID_MMX_REG_MODE (mode))
25267 /* A subroutine of ix86_modes_tieable_p. Return true if MODE is a
25268 tieable integer mode. */
25271 ix86_tieable_integer_mode_p (enum machine_mode mode)
25280 return TARGET_64BIT || !TARGET_PARTIAL_REG_STALL;
25283 return TARGET_64BIT;
25290 /* Return true if MODE1 is accessible in a register that can hold MODE2
25291 without copying. That is, all register classes that can hold MODE2
25292 can also hold MODE1. */
25295 ix86_modes_tieable_p (enum machine_mode mode1, enum machine_mode mode2)
25297 if (mode1 == mode2)
25300 if (ix86_tieable_integer_mode_p (mode1)
25301 && ix86_tieable_integer_mode_p (mode2))
25304 /* MODE2 being XFmode implies fp stack or general regs, which means we
25305 can tie any smaller floating point modes to it. Note that we do not
25306 tie this with TFmode. */
25307 if (mode2 == XFmode)
25308 return mode1 == SFmode || mode1 == DFmode;
25310 /* MODE2 being DFmode implies fp stack, general or sse regs, which means
25311 that we can tie it with SFmode. */
25312 if (mode2 == DFmode)
25313 return mode1 == SFmode;
25315 /* If MODE2 is only appropriate for an SSE register, then tie with
25316 any other mode acceptable to SSE registers. */
25317 if (GET_MODE_SIZE (mode2) == 16
25318 && ix86_hard_regno_mode_ok (FIRST_SSE_REG, mode2))
25319 return (GET_MODE_SIZE (mode1) == 16
25320 && ix86_hard_regno_mode_ok (FIRST_SSE_REG, mode1));
25322 /* If MODE2 is appropriate for an MMX register, then tie
25323 with any other mode acceptable to MMX registers. */
25324 if (GET_MODE_SIZE (mode2) == 8
25325 && ix86_hard_regno_mode_ok (FIRST_MMX_REG, mode2))
25326 return (GET_MODE_SIZE (mode1) == 8
25327 && ix86_hard_regno_mode_ok (FIRST_MMX_REG, mode1));
25332 /* Compute a (partial) cost for rtx X. Return true if the complete
25333 cost has been computed, and false if subexpressions should be
25334 scanned. In either case, *TOTAL contains the cost result. */
25337 ix86_rtx_costs (rtx x, int code, int outer_code_i, int *total, bool speed)
25339 enum rtx_code outer_code = (enum rtx_code) outer_code_i;
25340 enum machine_mode mode = GET_MODE (x);
25341 const struct processor_costs *cost = speed ? ix86_cost : &ix86_size_cost;
25349 if (TARGET_64BIT && !x86_64_immediate_operand (x, VOIDmode))
25351 else if (TARGET_64BIT && !x86_64_zext_immediate_operand (x, VOIDmode))
25353 else if (flag_pic && SYMBOLIC_CONST (x)
25355 || (!GET_CODE (x) != LABEL_REF
25356 && (GET_CODE (x) != SYMBOL_REF
25357 || !SYMBOL_REF_LOCAL_P (x)))))
25364 if (mode == VOIDmode)
25367 switch (standard_80387_constant_p (x))
25372 default: /* Other constants */
25377 /* Start with (MEM (SYMBOL_REF)), since that's where
25378 it'll probably end up. Add a penalty for size. */
25379 *total = (COSTS_N_INSNS (1)
25380 + (flag_pic != 0 && !TARGET_64BIT)
25381 + (mode == SFmode ? 0 : mode == DFmode ? 1 : 2));
25387 /* The zero extensions is often completely free on x86_64, so make
25388 it as cheap as possible. */
25389 if (TARGET_64BIT && mode == DImode
25390 && GET_MODE (XEXP (x, 0)) == SImode)
25392 else if (TARGET_ZERO_EXTEND_WITH_AND)
25393 *total = cost->add;
25395 *total = cost->movzx;
25399 *total = cost->movsx;
25403 if (CONST_INT_P (XEXP (x, 1))
25404 && (GET_MODE (XEXP (x, 0)) != DImode || TARGET_64BIT))
25406 HOST_WIDE_INT value = INTVAL (XEXP (x, 1));
25409 *total = cost->add;
25412 if ((value == 2 || value == 3)
25413 && cost->lea <= cost->shift_const)
25415 *total = cost->lea;
25425 if (!TARGET_64BIT && GET_MODE (XEXP (x, 0)) == DImode)
25427 if (CONST_INT_P (XEXP (x, 1)))
25429 if (INTVAL (XEXP (x, 1)) > 32)
25430 *total = cost->shift_const + COSTS_N_INSNS (2);
25432 *total = cost->shift_const * 2;
25436 if (GET_CODE (XEXP (x, 1)) == AND)
25437 *total = cost->shift_var * 2;
25439 *total = cost->shift_var * 6 + COSTS_N_INSNS (2);
25444 if (CONST_INT_P (XEXP (x, 1)))
25445 *total = cost->shift_const;
25447 *total = cost->shift_var;
25452 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
25454 /* ??? SSE scalar cost should be used here. */
25455 *total = cost->fmul;
25458 else if (X87_FLOAT_MODE_P (mode))
25460 *total = cost->fmul;
25463 else if (FLOAT_MODE_P (mode))
25465 /* ??? SSE vector cost should be used here. */
25466 *total = cost->fmul;
25471 rtx op0 = XEXP (x, 0);
25472 rtx op1 = XEXP (x, 1);
25474 if (CONST_INT_P (XEXP (x, 1)))
25476 unsigned HOST_WIDE_INT value = INTVAL (XEXP (x, 1));
25477 for (nbits = 0; value != 0; value &= value - 1)
25481 /* This is arbitrary. */
25484 /* Compute costs correctly for widening multiplication. */
25485 if ((GET_CODE (op0) == SIGN_EXTEND || GET_CODE (op0) == ZERO_EXTEND)
25486 && GET_MODE_SIZE (GET_MODE (XEXP (op0, 0))) * 2
25487 == GET_MODE_SIZE (mode))
25489 int is_mulwiden = 0;
25490 enum machine_mode inner_mode = GET_MODE (op0);
25492 if (GET_CODE (op0) == GET_CODE (op1))
25493 is_mulwiden = 1, op1 = XEXP (op1, 0);
25494 else if (CONST_INT_P (op1))
25496 if (GET_CODE (op0) == SIGN_EXTEND)
25497 is_mulwiden = trunc_int_for_mode (INTVAL (op1), inner_mode)
25500 is_mulwiden = !(INTVAL (op1) & ~GET_MODE_MASK (inner_mode));
25504 op0 = XEXP (op0, 0), mode = GET_MODE (op0);
25507 *total = (cost->mult_init[MODE_INDEX (mode)]
25508 + nbits * cost->mult_bit
25509 + rtx_cost (op0, outer_code, speed) + rtx_cost (op1, outer_code, speed));
25518 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
25519 /* ??? SSE cost should be used here. */
25520 *total = cost->fdiv;
25521 else if (X87_FLOAT_MODE_P (mode))
25522 *total = cost->fdiv;
25523 else if (FLOAT_MODE_P (mode))
25524 /* ??? SSE vector cost should be used here. */
25525 *total = cost->fdiv;
25527 *total = cost->divide[MODE_INDEX (mode)];
25531 if (GET_MODE_CLASS (mode) == MODE_INT
25532 && GET_MODE_BITSIZE (mode) <= GET_MODE_BITSIZE (Pmode))
25534 if (GET_CODE (XEXP (x, 0)) == PLUS
25535 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
25536 && CONST_INT_P (XEXP (XEXP (XEXP (x, 0), 0), 1))
25537 && CONSTANT_P (XEXP (x, 1)))
25539 HOST_WIDE_INT val = INTVAL (XEXP (XEXP (XEXP (x, 0), 0), 1));
25540 if (val == 2 || val == 4 || val == 8)
25542 *total = cost->lea;
25543 *total += rtx_cost (XEXP (XEXP (x, 0), 1), outer_code, speed);
25544 *total += rtx_cost (XEXP (XEXP (XEXP (x, 0), 0), 0),
25545 outer_code, speed);
25546 *total += rtx_cost (XEXP (x, 1), outer_code, speed);
25550 else if (GET_CODE (XEXP (x, 0)) == MULT
25551 && CONST_INT_P (XEXP (XEXP (x, 0), 1)))
25553 HOST_WIDE_INT val = INTVAL (XEXP (XEXP (x, 0), 1));
25554 if (val == 2 || val == 4 || val == 8)
25556 *total = cost->lea;
25557 *total += rtx_cost (XEXP (XEXP (x, 0), 0), outer_code, speed);
25558 *total += rtx_cost (XEXP (x, 1), outer_code, speed);
25562 else if (GET_CODE (XEXP (x, 0)) == PLUS)
25564 *total = cost->lea;
25565 *total += rtx_cost (XEXP (XEXP (x, 0), 0), outer_code, speed);
25566 *total += rtx_cost (XEXP (XEXP (x, 0), 1), outer_code, speed);
25567 *total += rtx_cost (XEXP (x, 1), outer_code, speed);
25574 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
25576 /* ??? SSE cost should be used here. */
25577 *total = cost->fadd;
25580 else if (X87_FLOAT_MODE_P (mode))
25582 *total = cost->fadd;
25585 else if (FLOAT_MODE_P (mode))
25587 /* ??? SSE vector cost should be used here. */
25588 *total = cost->fadd;
25596 if (!TARGET_64BIT && mode == DImode)
25598 *total = (cost->add * 2
25599 + (rtx_cost (XEXP (x, 0), outer_code, speed)
25600 << (GET_MODE (XEXP (x, 0)) != DImode))
25601 + (rtx_cost (XEXP (x, 1), outer_code, speed)
25602 << (GET_MODE (XEXP (x, 1)) != DImode)));
25608 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
25610 /* ??? SSE cost should be used here. */
25611 *total = cost->fchs;
25614 else if (X87_FLOAT_MODE_P (mode))
25616 *total = cost->fchs;
25619 else if (FLOAT_MODE_P (mode))
25621 /* ??? SSE vector cost should be used here. */
25622 *total = cost->fchs;
25628 if (!TARGET_64BIT && mode == DImode)
25629 *total = cost->add * 2;
25631 *total = cost->add;
25635 if (GET_CODE (XEXP (x, 0)) == ZERO_EXTRACT
25636 && XEXP (XEXP (x, 0), 1) == const1_rtx
25637 && CONST_INT_P (XEXP (XEXP (x, 0), 2))
25638 && XEXP (x, 1) == const0_rtx)
25640 /* This kind of construct is implemented using test[bwl].
25641 Treat it as if we had an AND. */
25642 *total = (cost->add
25643 + rtx_cost (XEXP (XEXP (x, 0), 0), outer_code, speed)
25644 + rtx_cost (const1_rtx, outer_code, speed));
25650 if (!(SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH))
25655 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
25656 /* ??? SSE cost should be used here. */
25657 *total = cost->fabs;
25658 else if (X87_FLOAT_MODE_P (mode))
25659 *total = cost->fabs;
25660 else if (FLOAT_MODE_P (mode))
25661 /* ??? SSE vector cost should be used here. */
25662 *total = cost->fabs;
25666 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
25667 /* ??? SSE cost should be used here. */
25668 *total = cost->fsqrt;
25669 else if (X87_FLOAT_MODE_P (mode))
25670 *total = cost->fsqrt;
25671 else if (FLOAT_MODE_P (mode))
25672 /* ??? SSE vector cost should be used here. */
25673 *total = cost->fsqrt;
25677 if (XINT (x, 1) == UNSPEC_TP)
25688 static int current_machopic_label_num;
25690 /* Given a symbol name and its associated stub, write out the
25691 definition of the stub. */
25694 machopic_output_stub (FILE *file, const char *symb, const char *stub)
25696 unsigned int length;
25697 char *binder_name, *symbol_name, lazy_ptr_name[32];
25698 int label = ++current_machopic_label_num;
25700 /* For 64-bit we shouldn't get here. */
25701 gcc_assert (!TARGET_64BIT);
25703 /* Lose our funky encoding stuff so it doesn't contaminate the stub. */
25704 symb = (*targetm.strip_name_encoding) (symb);
25706 length = strlen (stub);
25707 binder_name = XALLOCAVEC (char, length + 32);
25708 GEN_BINDER_NAME_FOR_STUB (binder_name, stub, length);
25710 length = strlen (symb);
25711 symbol_name = XALLOCAVEC (char, length + 32);
25712 GEN_SYMBOL_NAME_FOR_SYMBOL (symbol_name, symb, length);
25714 sprintf (lazy_ptr_name, "L%d$lz", label);
25717 switch_to_section (darwin_sections[machopic_picsymbol_stub_section]);
25719 switch_to_section (darwin_sections[machopic_symbol_stub_section]);
25721 fprintf (file, "%s:\n", stub);
25722 fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
25726 fprintf (file, "\tcall\tLPC$%d\nLPC$%d:\tpopl\t%%eax\n", label, label);
25727 fprintf (file, "\tmovl\t%s-LPC$%d(%%eax),%%edx\n", lazy_ptr_name, label);
25728 fprintf (file, "\tjmp\t*%%edx\n");
25731 fprintf (file, "\tjmp\t*%s\n", lazy_ptr_name);
25733 fprintf (file, "%s:\n", binder_name);
25737 fprintf (file, "\tlea\t%s-LPC$%d(%%eax),%%eax\n", lazy_ptr_name, label);
25738 fprintf (file, "\tpushl\t%%eax\n");
25741 fprintf (file, "\tpushl\t$%s\n", lazy_ptr_name);
25743 fprintf (file, "\tjmp\tdyld_stub_binding_helper\n");
25745 switch_to_section (darwin_sections[machopic_lazy_symbol_ptr_section]);
25746 fprintf (file, "%s:\n", lazy_ptr_name);
25747 fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
25748 fprintf (file, "\t.long %s\n", binder_name);
25752 darwin_x86_file_end (void)
25754 darwin_file_end ();
25757 #endif /* TARGET_MACHO */
25759 /* Order the registers for register allocator. */
25762 x86_order_regs_for_local_alloc (void)
25767 /* First allocate the local general purpose registers. */
25768 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
25769 if (GENERAL_REGNO_P (i) && call_used_regs[i])
25770 reg_alloc_order [pos++] = i;
25772 /* Global general purpose registers. */
25773 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
25774 if (GENERAL_REGNO_P (i) && !call_used_regs[i])
25775 reg_alloc_order [pos++] = i;
25777 /* x87 registers come first in case we are doing FP math
25779 if (!TARGET_SSE_MATH)
25780 for (i = FIRST_STACK_REG; i <= LAST_STACK_REG; i++)
25781 reg_alloc_order [pos++] = i;
25783 /* SSE registers. */
25784 for (i = FIRST_SSE_REG; i <= LAST_SSE_REG; i++)
25785 reg_alloc_order [pos++] = i;
25786 for (i = FIRST_REX_SSE_REG; i <= LAST_REX_SSE_REG; i++)
25787 reg_alloc_order [pos++] = i;
25789 /* x87 registers. */
25790 if (TARGET_SSE_MATH)
25791 for (i = FIRST_STACK_REG; i <= LAST_STACK_REG; i++)
25792 reg_alloc_order [pos++] = i;
25794 for (i = FIRST_MMX_REG; i <= LAST_MMX_REG; i++)
25795 reg_alloc_order [pos++] = i;
25797 /* Initialize the rest of array as we do not allocate some registers
25799 while (pos < FIRST_PSEUDO_REGISTER)
25800 reg_alloc_order [pos++] = 0;
25803 /* Handle a "ms_abi" or "sysv" attribute; arguments as in
25804 struct attribute_spec.handler. */
25806 ix86_handle_abi_attribute (tree *node, tree name,
25807 tree args ATTRIBUTE_UNUSED,
25808 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
25810 if (TREE_CODE (*node) != FUNCTION_TYPE
25811 && TREE_CODE (*node) != METHOD_TYPE
25812 && TREE_CODE (*node) != FIELD_DECL
25813 && TREE_CODE (*node) != TYPE_DECL)
25815 warning (OPT_Wattributes, "%qs attribute only applies to functions",
25816 IDENTIFIER_POINTER (name));
25817 *no_add_attrs = true;
25822 warning (OPT_Wattributes, "%qs attribute only available for 64-bit",
25823 IDENTIFIER_POINTER (name));
25824 *no_add_attrs = true;
25828 /* Can combine regparm with all attributes but fastcall. */
25829 if (is_attribute_p ("ms_abi", name))
25831 if (lookup_attribute ("sysv_abi", TYPE_ATTRIBUTES (*node)))
25833 error ("ms_abi and sysv_abi attributes are not compatible");
25838 else if (is_attribute_p ("sysv_abi", name))
25840 if (lookup_attribute ("ms_abi", TYPE_ATTRIBUTES (*node)))
25842 error ("ms_abi and sysv_abi attributes are not compatible");
25851 /* Handle a "ms_struct" or "gcc_struct" attribute; arguments as in
25852 struct attribute_spec.handler. */
25854 ix86_handle_struct_attribute (tree *node, tree name,
25855 tree args ATTRIBUTE_UNUSED,
25856 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
25859 if (DECL_P (*node))
25861 if (TREE_CODE (*node) == TYPE_DECL)
25862 type = &TREE_TYPE (*node);
25867 if (!(type && (TREE_CODE (*type) == RECORD_TYPE
25868 || TREE_CODE (*type) == UNION_TYPE)))
25870 warning (OPT_Wattributes, "%qs attribute ignored",
25871 IDENTIFIER_POINTER (name));
25872 *no_add_attrs = true;
25875 else if ((is_attribute_p ("ms_struct", name)
25876 && lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (*type)))
25877 || ((is_attribute_p ("gcc_struct", name)
25878 && lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (*type)))))
25880 warning (OPT_Wattributes, "%qs incompatible attribute ignored",
25881 IDENTIFIER_POINTER (name));
25882 *no_add_attrs = true;
25889 ix86_ms_bitfield_layout_p (const_tree record_type)
25891 return (TARGET_MS_BITFIELD_LAYOUT &&
25892 !lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (record_type)))
25893 || lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (record_type));
25896 /* Returns an expression indicating where the this parameter is
25897 located on entry to the FUNCTION. */
25900 x86_this_parameter (tree function)
25902 tree type = TREE_TYPE (function);
25903 bool aggr = aggregate_value_p (TREE_TYPE (type), type) != 0;
25908 const int *parm_regs;
25910 if (ix86_function_type_abi (type) == MS_ABI)
25911 parm_regs = x86_64_ms_abi_int_parameter_registers;
25913 parm_regs = x86_64_int_parameter_registers;
25914 return gen_rtx_REG (DImode, parm_regs[aggr]);
25917 nregs = ix86_function_regparm (type, function);
25919 if (nregs > 0 && !stdarg_p (type))
25923 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type)))
25924 regno = aggr ? DX_REG : CX_REG;
25932 return gen_rtx_MEM (SImode,
25933 plus_constant (stack_pointer_rtx, 4));
25936 return gen_rtx_REG (SImode, regno);
25939 return gen_rtx_MEM (SImode, plus_constant (stack_pointer_rtx, aggr ? 8 : 4));
25942 /* Determine whether x86_output_mi_thunk can succeed. */
25945 x86_can_output_mi_thunk (const_tree thunk ATTRIBUTE_UNUSED,
25946 HOST_WIDE_INT delta ATTRIBUTE_UNUSED,
25947 HOST_WIDE_INT vcall_offset, const_tree function)
25949 /* 64-bit can handle anything. */
25953 /* For 32-bit, everything's fine if we have one free register. */
25954 if (ix86_function_regparm (TREE_TYPE (function), function) < 3)
25957 /* Need a free register for vcall_offset. */
25961 /* Need a free register for GOT references. */
25962 if (flag_pic && !(*targetm.binds_local_p) (function))
25965 /* Otherwise ok. */
25969 /* Output the assembler code for a thunk function. THUNK_DECL is the
25970 declaration for the thunk function itself, FUNCTION is the decl for
25971 the target function. DELTA is an immediate constant offset to be
25972 added to THIS. If VCALL_OFFSET is nonzero, the word at
25973 *(*this + vcall_offset) should be added to THIS. */
25976 x86_output_mi_thunk (FILE *file ATTRIBUTE_UNUSED,
25977 tree thunk ATTRIBUTE_UNUSED, HOST_WIDE_INT delta,
25978 HOST_WIDE_INT vcall_offset, tree function)
25981 rtx this_param = x86_this_parameter (function);
25984 /* If VCALL_OFFSET, we'll need THIS in a register. Might as well
25985 pull it in now and let DELTA benefit. */
25986 if (REG_P (this_param))
25987 this_reg = this_param;
25988 else if (vcall_offset)
25990 /* Put the this parameter into %eax. */
25991 xops[0] = this_param;
25992 xops[1] = this_reg = gen_rtx_REG (Pmode, AX_REG);
25993 output_asm_insn ("mov%z1\t{%0, %1|%1, %0}", xops);
25996 this_reg = NULL_RTX;
25998 /* Adjust the this parameter by a fixed constant. */
26001 xops[0] = GEN_INT (delta);
26002 xops[1] = this_reg ? this_reg : this_param;
26005 if (!x86_64_general_operand (xops[0], DImode))
26007 tmp = gen_rtx_REG (DImode, R10_REG);
26009 output_asm_insn ("mov{q}\t{%1, %0|%0, %1}", xops);
26011 xops[1] = this_param;
26013 output_asm_insn ("add{q}\t{%0, %1|%1, %0}", xops);
26016 output_asm_insn ("add{l}\t{%0, %1|%1, %0}", xops);
26019 /* Adjust the this parameter by a value stored in the vtable. */
26023 tmp = gen_rtx_REG (DImode, R10_REG);
26026 int tmp_regno = CX_REG;
26027 if (lookup_attribute ("fastcall",
26028 TYPE_ATTRIBUTES (TREE_TYPE (function))))
26029 tmp_regno = AX_REG;
26030 tmp = gen_rtx_REG (SImode, tmp_regno);
26033 xops[0] = gen_rtx_MEM (Pmode, this_reg);
26035 output_asm_insn ("mov%z1\t{%0, %1|%1, %0}", xops);
26037 /* Adjust the this parameter. */
26038 xops[0] = gen_rtx_MEM (Pmode, plus_constant (tmp, vcall_offset));
26039 if (TARGET_64BIT && !memory_operand (xops[0], Pmode))
26041 rtx tmp2 = gen_rtx_REG (DImode, R11_REG);
26042 xops[0] = GEN_INT (vcall_offset);
26044 output_asm_insn ("mov{q}\t{%0, %1|%1, %0}", xops);
26045 xops[0] = gen_rtx_MEM (Pmode, gen_rtx_PLUS (Pmode, tmp, tmp2));
26047 xops[1] = this_reg;
26048 output_asm_insn ("add%z1\t{%0, %1|%1, %0}", xops);
26051 /* If necessary, drop THIS back to its stack slot. */
26052 if (this_reg && this_reg != this_param)
26054 xops[0] = this_reg;
26055 xops[1] = this_param;
26056 output_asm_insn ("mov%z1\t{%0, %1|%1, %0}", xops);
26059 xops[0] = XEXP (DECL_RTL (function), 0);
26062 if (!flag_pic || (*targetm.binds_local_p) (function))
26063 output_asm_insn ("jmp\t%P0", xops);
26064 /* All thunks should be in the same object as their target,
26065 and thus binds_local_p should be true. */
26066 else if (TARGET_64BIT && cfun->machine->call_abi == MS_ABI)
26067 gcc_unreachable ();
26070 tmp = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, xops[0]), UNSPEC_GOTPCREL);
26071 tmp = gen_rtx_CONST (Pmode, tmp);
26072 tmp = gen_rtx_MEM (QImode, tmp);
26074 output_asm_insn ("jmp\t%A0", xops);
26079 if (!flag_pic || (*targetm.binds_local_p) (function))
26080 output_asm_insn ("jmp\t%P0", xops);
26085 rtx sym_ref = XEXP (DECL_RTL (function), 0);
26086 tmp = (gen_rtx_SYMBOL_REF
26088 machopic_indirection_name (sym_ref, /*stub_p=*/true)));
26089 tmp = gen_rtx_MEM (QImode, tmp);
26091 output_asm_insn ("jmp\t%0", xops);
26094 #endif /* TARGET_MACHO */
26096 tmp = gen_rtx_REG (SImode, CX_REG);
26097 output_set_got (tmp, NULL_RTX);
26100 output_asm_insn ("mov{l}\t{%0@GOT(%1), %1|%1, %0@GOT[%1]}", xops);
26101 output_asm_insn ("jmp\t{*}%1", xops);
26107 x86_file_start (void)
26109 default_file_start ();
26111 darwin_file_start ();
26113 if (X86_FILE_START_VERSION_DIRECTIVE)
26114 fputs ("\t.version\t\"01.01\"\n", asm_out_file);
26115 if (X86_FILE_START_FLTUSED)
26116 fputs ("\t.global\t__fltused\n", asm_out_file);
26117 if (ix86_asm_dialect == ASM_INTEL)
26118 fputs ("\t.intel_syntax noprefix\n", asm_out_file);
26122 x86_field_alignment (tree field, int computed)
26124 enum machine_mode mode;
26125 tree type = TREE_TYPE (field);
26127 if (TARGET_64BIT || TARGET_ALIGN_DOUBLE)
26129 mode = TYPE_MODE (strip_array_types (type));
26130 if (mode == DFmode || mode == DCmode
26131 || GET_MODE_CLASS (mode) == MODE_INT
26132 || GET_MODE_CLASS (mode) == MODE_COMPLEX_INT)
26133 return MIN (32, computed);
26137 /* Output assembler code to FILE to increment profiler label # LABELNO
26138 for profiling a function entry. */
26140 x86_function_profiler (FILE *file, int labelno ATTRIBUTE_UNUSED)
26144 #ifndef NO_PROFILE_COUNTERS
26145 fprintf (file, "\tleaq\t%sP%d@(%%rip),%%r11\n", LPREFIX, labelno);
26148 if (DEFAULT_ABI == SYSV_ABI && flag_pic)
26149 fprintf (file, "\tcall\t*%s@GOTPCREL(%%rip)\n", MCOUNT_NAME);
26151 fprintf (file, "\tcall\t%s\n", MCOUNT_NAME);
26155 #ifndef NO_PROFILE_COUNTERS
26156 fprintf (file, "\tleal\t%sP%d@GOTOFF(%%ebx),%%%s\n",
26157 LPREFIX, labelno, PROFILE_COUNT_REGISTER);
26159 fprintf (file, "\tcall\t*%s@GOT(%%ebx)\n", MCOUNT_NAME);
26163 #ifndef NO_PROFILE_COUNTERS
26164 fprintf (file, "\tmovl\t$%sP%d,%%%s\n", LPREFIX, labelno,
26165 PROFILE_COUNT_REGISTER);
26167 fprintf (file, "\tcall\t%s\n", MCOUNT_NAME);
26171 /* We don't have exact information about the insn sizes, but we may assume
26172 quite safely that we are informed about all 1 byte insns and memory
26173 address sizes. This is enough to eliminate unnecessary padding in
26177 min_insn_size (rtx insn)
26181 if (!INSN_P (insn) || !active_insn_p (insn))
26184 /* Discard alignments we've emit and jump instructions. */
26185 if (GET_CODE (PATTERN (insn)) == UNSPEC_VOLATILE
26186 && XINT (PATTERN (insn), 1) == UNSPECV_ALIGN)
26189 && (GET_CODE (PATTERN (insn)) == ADDR_VEC
26190 || GET_CODE (PATTERN (insn)) == ADDR_DIFF_VEC))
26193 /* Important case - calls are always 5 bytes.
26194 It is common to have many calls in the row. */
26196 && symbolic_reference_mentioned_p (PATTERN (insn))
26197 && !SIBLING_CALL_P (insn))
26199 if (get_attr_length (insn) <= 1)
26202 /* For normal instructions we may rely on the sizes of addresses
26203 and the presence of symbol to require 4 bytes of encoding.
26204 This is not the case for jumps where references are PC relative. */
26205 if (!JUMP_P (insn))
26207 l = get_attr_length_address (insn);
26208 if (l < 4 && symbolic_reference_mentioned_p (PATTERN (insn)))
26217 /* AMD K8 core mispredicts jumps when there are more than 3 jumps in 16 byte
26221 ix86_avoid_jump_misspredicts (void)
26223 rtx insn, start = get_insns ();
26224 int nbytes = 0, njumps = 0;
26227 /* Look for all minimal intervals of instructions containing 4 jumps.
26228 The intervals are bounded by START and INSN. NBYTES is the total
26229 size of instructions in the interval including INSN and not including
26230 START. When the NBYTES is smaller than 16 bytes, it is possible
26231 that the end of START and INSN ends up in the same 16byte page.
26233 The smallest offset in the page INSN can start is the case where START
26234 ends on the offset 0. Offset of INSN is then NBYTES - sizeof (INSN).
26235 We add p2align to 16byte window with maxskip 17 - NBYTES + sizeof (INSN).
26237 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
26240 nbytes += min_insn_size (insn);
26242 fprintf(dump_file, "Insn %i estimated to %i bytes\n",
26243 INSN_UID (insn), min_insn_size (insn));
26245 && GET_CODE (PATTERN (insn)) != ADDR_VEC
26246 && GET_CODE (PATTERN (insn)) != ADDR_DIFF_VEC)
26254 start = NEXT_INSN (start);
26255 if ((JUMP_P (start)
26256 && GET_CODE (PATTERN (start)) != ADDR_VEC
26257 && GET_CODE (PATTERN (start)) != ADDR_DIFF_VEC)
26259 njumps--, isjump = 1;
26262 nbytes -= min_insn_size (start);
26264 gcc_assert (njumps >= 0);
26266 fprintf (dump_file, "Interval %i to %i has %i bytes\n",
26267 INSN_UID (start), INSN_UID (insn), nbytes);
26269 if (njumps == 3 && isjump && nbytes < 16)
26271 int padsize = 15 - nbytes + min_insn_size (insn);
26274 fprintf (dump_file, "Padding insn %i by %i bytes!\n",
26275 INSN_UID (insn), padsize);
26276 emit_insn_before (gen_align (GEN_INT (padsize)), insn);
26281 /* AMD Athlon works faster
26282 when RET is not destination of conditional jump or directly preceded
26283 by other jump instruction. We avoid the penalty by inserting NOP just
26284 before the RET instructions in such cases. */
26286 ix86_pad_returns (void)
26291 FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR->preds)
26293 basic_block bb = e->src;
26294 rtx ret = BB_END (bb);
26296 bool replace = false;
26298 if (!JUMP_P (ret) || GET_CODE (PATTERN (ret)) != RETURN
26299 || optimize_bb_for_size_p (bb))
26301 for (prev = PREV_INSN (ret); prev; prev = PREV_INSN (prev))
26302 if (active_insn_p (prev) || LABEL_P (prev))
26304 if (prev && LABEL_P (prev))
26309 FOR_EACH_EDGE (e, ei, bb->preds)
26310 if (EDGE_FREQUENCY (e) && e->src->index >= 0
26311 && !(e->flags & EDGE_FALLTHRU))
26316 prev = prev_active_insn (ret);
26318 && ((JUMP_P (prev) && any_condjump_p (prev))
26321 /* Empty functions get branch mispredict even when the jump destination
26322 is not visible to us. */
26323 if (!prev && cfun->function_frequency > FUNCTION_FREQUENCY_UNLIKELY_EXECUTED)
26328 emit_insn_before (gen_return_internal_long (), ret);
26334 /* Implement machine specific optimizations. We implement padding of returns
26335 for K8 CPUs and pass to avoid 4 jumps in the single 16 byte window. */
26339 if (TARGET_PAD_RETURNS && optimize
26340 && optimize_function_for_speed_p (cfun))
26341 ix86_pad_returns ();
26342 if (TARGET_FOUR_JUMP_LIMIT && optimize
26343 && optimize_function_for_speed_p (cfun))
26344 ix86_avoid_jump_misspredicts ();
26347 /* Return nonzero when QImode register that must be represented via REX prefix
26350 x86_extended_QIreg_mentioned_p (rtx insn)
26353 extract_insn_cached (insn);
26354 for (i = 0; i < recog_data.n_operands; i++)
26355 if (REG_P (recog_data.operand[i])
26356 && REGNO (recog_data.operand[i]) >= 4)
26361 /* Return nonzero when P points to register encoded via REX prefix.
26362 Called via for_each_rtx. */
26364 extended_reg_mentioned_1 (rtx *p, void *data ATTRIBUTE_UNUSED)
26366 unsigned int regno;
26369 regno = REGNO (*p);
26370 return REX_INT_REGNO_P (regno) || REX_SSE_REGNO_P (regno);
26373 /* Return true when INSN mentions register that must be encoded using REX
26376 x86_extended_reg_mentioned_p (rtx insn)
26378 return for_each_rtx (INSN_P (insn) ? &PATTERN (insn) : &insn,
26379 extended_reg_mentioned_1, NULL);
26382 /* Generate an unsigned DImode/SImode to FP conversion. This is the same code
26383 optabs would emit if we didn't have TFmode patterns. */
26386 x86_emit_floatuns (rtx operands[2])
26388 rtx neglab, donelab, i0, i1, f0, in, out;
26389 enum machine_mode mode, inmode;
26391 inmode = GET_MODE (operands[1]);
26392 gcc_assert (inmode == SImode || inmode == DImode);
26395 in = force_reg (inmode, operands[1]);
26396 mode = GET_MODE (out);
26397 neglab = gen_label_rtx ();
26398 donelab = gen_label_rtx ();
26399 f0 = gen_reg_rtx (mode);
26401 emit_cmp_and_jump_insns (in, const0_rtx, LT, const0_rtx, inmode, 0, neglab);
26403 expand_float (out, in, 0);
26405 emit_jump_insn (gen_jump (donelab));
26408 emit_label (neglab);
26410 i0 = expand_simple_binop (inmode, LSHIFTRT, in, const1_rtx, NULL,
26412 i1 = expand_simple_binop (inmode, AND, in, const1_rtx, NULL,
26414 i0 = expand_simple_binop (inmode, IOR, i0, i1, i0, 1, OPTAB_DIRECT);
26416 expand_float (f0, i0, 0);
26418 emit_insn (gen_rtx_SET (VOIDmode, out, gen_rtx_PLUS (mode, f0, f0)));
26420 emit_label (donelab);
26423 /* A subroutine of ix86_expand_vector_init. Store into TARGET a vector
26424 with all elements equal to VAR. Return true if successful. */
26427 ix86_expand_vector_init_duplicate (bool mmx_ok, enum machine_mode mode,
26428 rtx target, rtx val)
26430 enum machine_mode hmode, smode, wsmode, wvmode;
26445 val = force_reg (GET_MODE_INNER (mode), val);
26446 x = gen_rtx_VEC_DUPLICATE (mode, val);
26447 emit_insn (gen_rtx_SET (VOIDmode, target, x));
26453 if (TARGET_SSE || TARGET_3DNOW_A)
26455 val = gen_lowpart (SImode, val);
26456 x = gen_rtx_TRUNCATE (HImode, val);
26457 x = gen_rtx_VEC_DUPLICATE (mode, x);
26458 emit_insn (gen_rtx_SET (VOIDmode, target, x));
26480 /* Extend HImode to SImode using a paradoxical SUBREG. */
26481 tmp1 = gen_reg_rtx (SImode);
26482 emit_move_insn (tmp1, gen_lowpart (SImode, val));
26483 /* Insert the SImode value as low element of V4SImode vector. */
26484 tmp2 = gen_reg_rtx (V4SImode);
26485 tmp1 = gen_rtx_VEC_MERGE (V4SImode,
26486 gen_rtx_VEC_DUPLICATE (V4SImode, tmp1),
26487 CONST0_RTX (V4SImode),
26489 emit_insn (gen_rtx_SET (VOIDmode, tmp2, tmp1));
26490 /* Cast the V4SImode vector back to a V8HImode vector. */
26491 tmp1 = gen_reg_rtx (V8HImode);
26492 emit_move_insn (tmp1, gen_lowpart (V8HImode, tmp2));
26493 /* Duplicate the low short through the whole low SImode word. */
26494 emit_insn (gen_sse2_punpcklwd (tmp1, tmp1, tmp1));
26495 /* Cast the V8HImode vector back to a V4SImode vector. */
26496 tmp2 = gen_reg_rtx (V4SImode);
26497 emit_move_insn (tmp2, gen_lowpart (V4SImode, tmp1));
26498 /* Replicate the low element of the V4SImode vector. */
26499 emit_insn (gen_sse2_pshufd (tmp2, tmp2, const0_rtx));
26500 /* Cast the V2SImode back to V8HImode, and store in target. */
26501 emit_move_insn (target, gen_lowpart (V8HImode, tmp2));
26512 /* Extend QImode to SImode using a paradoxical SUBREG. */
26513 tmp1 = gen_reg_rtx (SImode);
26514 emit_move_insn (tmp1, gen_lowpart (SImode, val));
26515 /* Insert the SImode value as low element of V4SImode vector. */
26516 tmp2 = gen_reg_rtx (V4SImode);
26517 tmp1 = gen_rtx_VEC_MERGE (V4SImode,
26518 gen_rtx_VEC_DUPLICATE (V4SImode, tmp1),
26519 CONST0_RTX (V4SImode),
26521 emit_insn (gen_rtx_SET (VOIDmode, tmp2, tmp1));
26522 /* Cast the V4SImode vector back to a V16QImode vector. */
26523 tmp1 = gen_reg_rtx (V16QImode);
26524 emit_move_insn (tmp1, gen_lowpart (V16QImode, tmp2));
26525 /* Duplicate the low byte through the whole low SImode word. */
26526 emit_insn (gen_sse2_punpcklbw (tmp1, tmp1, tmp1));
26527 emit_insn (gen_sse2_punpcklbw (tmp1, tmp1, tmp1));
26528 /* Cast the V16QImode vector back to a V4SImode vector. */
26529 tmp2 = gen_reg_rtx (V4SImode);
26530 emit_move_insn (tmp2, gen_lowpart (V4SImode, tmp1));
26531 /* Replicate the low element of the V4SImode vector. */
26532 emit_insn (gen_sse2_pshufd (tmp2, tmp2, const0_rtx));
26533 /* Cast the V2SImode back to V16QImode, and store in target. */
26534 emit_move_insn (target, gen_lowpart (V16QImode, tmp2));
26542 /* Replicate the value once into the next wider mode and recurse. */
26543 val = convert_modes (wsmode, smode, val, true);
26544 x = expand_simple_binop (wsmode, ASHIFT, val,
26545 GEN_INT (GET_MODE_BITSIZE (smode)),
26546 NULL_RTX, 1, OPTAB_LIB_WIDEN);
26547 val = expand_simple_binop (wsmode, IOR, val, x, x, 1, OPTAB_LIB_WIDEN);
26549 x = gen_reg_rtx (wvmode);
26550 if (!ix86_expand_vector_init_duplicate (mmx_ok, wvmode, x, val))
26551 gcc_unreachable ();
26552 emit_move_insn (target, gen_lowpart (mode, x));
26575 rtx tmp = gen_reg_rtx (hmode);
26576 ix86_expand_vector_init_duplicate (mmx_ok, hmode, tmp, val);
26577 emit_insn (gen_rtx_SET (VOIDmode, target,
26578 gen_rtx_VEC_CONCAT (mode, tmp, tmp)));
26587 /* A subroutine of ix86_expand_vector_init. Store into TARGET a vector
26588 whose ONE_VAR element is VAR, and other elements are zero. Return true
26592 ix86_expand_vector_init_one_nonzero (bool mmx_ok, enum machine_mode mode,
26593 rtx target, rtx var, int one_var)
26595 enum machine_mode vsimode;
26598 bool use_vector_set = false;
26603 /* For SSE4.1, we normally use vector set. But if the second
26604 element is zero and inter-unit moves are OK, we use movq
26606 use_vector_set = (TARGET_64BIT
26608 && !(TARGET_INTER_UNIT_MOVES
26614 use_vector_set = TARGET_SSE4_1;
26617 use_vector_set = TARGET_SSE2;
26620 use_vector_set = TARGET_SSE || TARGET_3DNOW_A;
26628 use_vector_set = TARGET_AVX;
26634 if (use_vector_set)
26636 emit_insn (gen_rtx_SET (VOIDmode, target, CONST0_RTX (mode)));
26637 var = force_reg (GET_MODE_INNER (mode), var);
26638 ix86_expand_vector_set (mmx_ok, target, var, one_var);
26654 var = force_reg (GET_MODE_INNER (mode), var);
26655 x = gen_rtx_VEC_CONCAT (mode, var, CONST0_RTX (GET_MODE_INNER (mode)));
26656 emit_insn (gen_rtx_SET (VOIDmode, target, x));
26661 if (!REG_P (target) || REGNO (target) < FIRST_PSEUDO_REGISTER)
26662 new_target = gen_reg_rtx (mode);
26664 new_target = target;
26665 var = force_reg (GET_MODE_INNER (mode), var);
26666 x = gen_rtx_VEC_DUPLICATE (mode, var);
26667 x = gen_rtx_VEC_MERGE (mode, x, CONST0_RTX (mode), const1_rtx);
26668 emit_insn (gen_rtx_SET (VOIDmode, new_target, x));
26671 /* We need to shuffle the value to the correct position, so
26672 create a new pseudo to store the intermediate result. */
26674 /* With SSE2, we can use the integer shuffle insns. */
26675 if (mode != V4SFmode && TARGET_SSE2)
26677 emit_insn (gen_sse2_pshufd_1 (new_target, new_target,
26679 GEN_INT (one_var == 1 ? 0 : 1),
26680 GEN_INT (one_var == 2 ? 0 : 1),
26681 GEN_INT (one_var == 3 ? 0 : 1)));
26682 if (target != new_target)
26683 emit_move_insn (target, new_target);
26687 /* Otherwise convert the intermediate result to V4SFmode and
26688 use the SSE1 shuffle instructions. */
26689 if (mode != V4SFmode)
26691 tmp = gen_reg_rtx (V4SFmode);
26692 emit_move_insn (tmp, gen_lowpart (V4SFmode, new_target));
26697 emit_insn (gen_sse_shufps_v4sf (tmp, tmp, tmp,
26699 GEN_INT (one_var == 1 ? 0 : 1),
26700 GEN_INT (one_var == 2 ? 0+4 : 1+4),
26701 GEN_INT (one_var == 3 ? 0+4 : 1+4)));
26703 if (mode != V4SFmode)
26704 emit_move_insn (target, gen_lowpart (V4SImode, tmp));
26705 else if (tmp != target)
26706 emit_move_insn (target, tmp);
26708 else if (target != new_target)
26709 emit_move_insn (target, new_target);
26714 vsimode = V4SImode;
26720 vsimode = V2SImode;
26726 /* Zero extend the variable element to SImode and recurse. */
26727 var = convert_modes (SImode, GET_MODE_INNER (mode), var, true);
26729 x = gen_reg_rtx (vsimode);
26730 if (!ix86_expand_vector_init_one_nonzero (mmx_ok, vsimode, x,
26732 gcc_unreachable ();
26734 emit_move_insn (target, gen_lowpart (mode, x));
26742 /* A subroutine of ix86_expand_vector_init. Store into TARGET a vector
26743 consisting of the values in VALS. It is known that all elements
26744 except ONE_VAR are constants. Return true if successful. */
26747 ix86_expand_vector_init_one_var (bool mmx_ok, enum machine_mode mode,
26748 rtx target, rtx vals, int one_var)
26750 rtx var = XVECEXP (vals, 0, one_var);
26751 enum machine_mode wmode;
26754 const_vec = copy_rtx (vals);
26755 XVECEXP (const_vec, 0, one_var) = CONST0_RTX (GET_MODE_INNER (mode));
26756 const_vec = gen_rtx_CONST_VECTOR (mode, XVEC (const_vec, 0));
26764 /* For the two element vectors, it's just as easy to use
26765 the general case. */
26789 /* There's no way to set one QImode entry easily. Combine
26790 the variable value with its adjacent constant value, and
26791 promote to an HImode set. */
26792 x = XVECEXP (vals, 0, one_var ^ 1);
26795 var = convert_modes (HImode, QImode, var, true);
26796 var = expand_simple_binop (HImode, ASHIFT, var, GEN_INT (8),
26797 NULL_RTX, 1, OPTAB_LIB_WIDEN);
26798 x = GEN_INT (INTVAL (x) & 0xff);
26802 var = convert_modes (HImode, QImode, var, true);
26803 x = gen_int_mode (INTVAL (x) << 8, HImode);
26805 if (x != const0_rtx)
26806 var = expand_simple_binop (HImode, IOR, var, x, var,
26807 1, OPTAB_LIB_WIDEN);
26809 x = gen_reg_rtx (wmode);
26810 emit_move_insn (x, gen_lowpart (wmode, const_vec));
26811 ix86_expand_vector_set (mmx_ok, x, var, one_var >> 1);
26813 emit_move_insn (target, gen_lowpart (mode, x));
26820 emit_move_insn (target, const_vec);
26821 ix86_expand_vector_set (mmx_ok, target, var, one_var);
26825 /* A subroutine of ix86_expand_vector_init_general. Use vector
26826 concatenate to handle the most general case: all values variable,
26827 and none identical. */
26830 ix86_expand_vector_init_concat (enum machine_mode mode,
26831 rtx target, rtx *ops, int n)
26833 enum machine_mode cmode, hmode = VOIDmode;
26834 rtx first[8], second[4];
26874 gcc_unreachable ();
26877 if (!register_operand (ops[1], cmode))
26878 ops[1] = force_reg (cmode, ops[1]);
26879 if (!register_operand (ops[0], cmode))
26880 ops[0] = force_reg (cmode, ops[0]);
26881 emit_insn (gen_rtx_SET (VOIDmode, target,
26882 gen_rtx_VEC_CONCAT (mode, ops[0],
26902 gcc_unreachable ();
26918 gcc_unreachable ();
26923 /* FIXME: We process inputs backward to help RA. PR 36222. */
26926 for (; i > 0; i -= 2, j--)
26928 first[j] = gen_reg_rtx (cmode);
26929 v = gen_rtvec (2, ops[i - 1], ops[i]);
26930 ix86_expand_vector_init (false, first[j],
26931 gen_rtx_PARALLEL (cmode, v));
26937 gcc_assert (hmode != VOIDmode);
26938 for (i = j = 0; i < n; i += 2, j++)
26940 second[j] = gen_reg_rtx (hmode);
26941 ix86_expand_vector_init_concat (hmode, second [j],
26945 ix86_expand_vector_init_concat (mode, target, second, n);
26948 ix86_expand_vector_init_concat (mode, target, first, n);
26952 gcc_unreachable ();
26956 /* A subroutine of ix86_expand_vector_init_general. Use vector
26957 interleave to handle the most general case: all values variable,
26958 and none identical. */
26961 ix86_expand_vector_init_interleave (enum machine_mode mode,
26962 rtx target, rtx *ops, int n)
26964 enum machine_mode first_imode, second_imode, third_imode, inner_mode;
26967 rtx (*gen_load_even) (rtx, rtx, rtx);
26968 rtx (*gen_interleave_first_low) (rtx, rtx, rtx);
26969 rtx (*gen_interleave_second_low) (rtx, rtx, rtx);
26974 gen_load_even = gen_vec_setv8hi;
26975 gen_interleave_first_low = gen_vec_interleave_lowv4si;
26976 gen_interleave_second_low = gen_vec_interleave_lowv2di;
26977 inner_mode = HImode;
26978 first_imode = V4SImode;
26979 second_imode = V2DImode;
26980 third_imode = VOIDmode;
26983 gen_load_even = gen_vec_setv16qi;
26984 gen_interleave_first_low = gen_vec_interleave_lowv8hi;
26985 gen_interleave_second_low = gen_vec_interleave_lowv4si;
26986 inner_mode = QImode;
26987 first_imode = V8HImode;
26988 second_imode = V4SImode;
26989 third_imode = V2DImode;
26992 gcc_unreachable ();
26995 for (i = 0; i < n; i++)
26997 /* Extend the odd elment to SImode using a paradoxical SUBREG. */
26998 op0 = gen_reg_rtx (SImode);
26999 emit_move_insn (op0, gen_lowpart (SImode, ops [i + i]));
27001 /* Insert the SImode value as low element of V4SImode vector. */
27002 op1 = gen_reg_rtx (V4SImode);
27003 op0 = gen_rtx_VEC_MERGE (V4SImode,
27004 gen_rtx_VEC_DUPLICATE (V4SImode,
27006 CONST0_RTX (V4SImode),
27008 emit_insn (gen_rtx_SET (VOIDmode, op1, op0));
27010 /* Cast the V4SImode vector back to a vector in orignal mode. */
27011 op0 = gen_reg_rtx (mode);
27012 emit_move_insn (op0, gen_lowpart (mode, op1));
27014 /* Load even elements into the second positon. */
27015 emit_insn ((*gen_load_even) (op0,
27016 force_reg (inner_mode,
27020 /* Cast vector to FIRST_IMODE vector. */
27021 ops[i] = gen_reg_rtx (first_imode);
27022 emit_move_insn (ops[i], gen_lowpart (first_imode, op0));
27025 /* Interleave low FIRST_IMODE vectors. */
27026 for (i = j = 0; i < n; i += 2, j++)
27028 op0 = gen_reg_rtx (first_imode);
27029 emit_insn ((*gen_interleave_first_low) (op0, ops[i], ops[i + 1]));
27031 /* Cast FIRST_IMODE vector to SECOND_IMODE vector. */
27032 ops[j] = gen_reg_rtx (second_imode);
27033 emit_move_insn (ops[j], gen_lowpart (second_imode, op0));
27036 /* Interleave low SECOND_IMODE vectors. */
27037 switch (second_imode)
27040 for (i = j = 0; i < n / 2; i += 2, j++)
27042 op0 = gen_reg_rtx (second_imode);
27043 emit_insn ((*gen_interleave_second_low) (op0, ops[i],
27046 /* Cast the SECOND_IMODE vector to the THIRD_IMODE
27048 ops[j] = gen_reg_rtx (third_imode);
27049 emit_move_insn (ops[j], gen_lowpart (third_imode, op0));
27051 second_imode = V2DImode;
27052 gen_interleave_second_low = gen_vec_interleave_lowv2di;
27056 op0 = gen_reg_rtx (second_imode);
27057 emit_insn ((*gen_interleave_second_low) (op0, ops[0],
27060 /* Cast the SECOND_IMODE vector back to a vector on original
27062 emit_insn (gen_rtx_SET (VOIDmode, target,
27063 gen_lowpart (mode, op0)));
27067 gcc_unreachable ();
27071 /* A subroutine of ix86_expand_vector_init. Handle the most general case:
27072 all values variable, and none identical. */
27075 ix86_expand_vector_init_general (bool mmx_ok, enum machine_mode mode,
27076 rtx target, rtx vals)
27078 rtx ops[32], op0, op1;
27079 enum machine_mode half_mode = VOIDmode;
27086 if (!mmx_ok && !TARGET_SSE)
27098 n = GET_MODE_NUNITS (mode);
27099 for (i = 0; i < n; i++)
27100 ops[i] = XVECEXP (vals, 0, i);
27101 ix86_expand_vector_init_concat (mode, target, ops, n);
27105 half_mode = V16QImode;
27109 half_mode = V8HImode;
27113 n = GET_MODE_NUNITS (mode);
27114 for (i = 0; i < n; i++)
27115 ops[i] = XVECEXP (vals, 0, i);
27116 op0 = gen_reg_rtx (half_mode);
27117 op1 = gen_reg_rtx (half_mode);
27118 ix86_expand_vector_init_interleave (half_mode, op0, ops,
27120 ix86_expand_vector_init_interleave (half_mode, op1,
27121 &ops [n >> 1], n >> 2);
27122 emit_insn (gen_rtx_SET (VOIDmode, target,
27123 gen_rtx_VEC_CONCAT (mode, op0, op1)));
27127 if (!TARGET_SSE4_1)
27135 /* Don't use ix86_expand_vector_init_interleave if we can't
27136 move from GPR to SSE register directly. */
27137 if (!TARGET_INTER_UNIT_MOVES)
27140 n = GET_MODE_NUNITS (mode);
27141 for (i = 0; i < n; i++)
27142 ops[i] = XVECEXP (vals, 0, i);
27143 ix86_expand_vector_init_interleave (mode, target, ops, n >> 1);
27151 gcc_unreachable ();
27155 int i, j, n_elts, n_words, n_elt_per_word;
27156 enum machine_mode inner_mode;
27157 rtx words[4], shift;
27159 inner_mode = GET_MODE_INNER (mode);
27160 n_elts = GET_MODE_NUNITS (mode);
27161 n_words = GET_MODE_SIZE (mode) / UNITS_PER_WORD;
27162 n_elt_per_word = n_elts / n_words;
27163 shift = GEN_INT (GET_MODE_BITSIZE (inner_mode));
27165 for (i = 0; i < n_words; ++i)
27167 rtx word = NULL_RTX;
27169 for (j = 0; j < n_elt_per_word; ++j)
27171 rtx elt = XVECEXP (vals, 0, (i+1)*n_elt_per_word - j - 1);
27172 elt = convert_modes (word_mode, inner_mode, elt, true);
27178 word = expand_simple_binop (word_mode, ASHIFT, word, shift,
27179 word, 1, OPTAB_LIB_WIDEN);
27180 word = expand_simple_binop (word_mode, IOR, word, elt,
27181 word, 1, OPTAB_LIB_WIDEN);
27189 emit_move_insn (target, gen_lowpart (mode, words[0]));
27190 else if (n_words == 2)
27192 rtx tmp = gen_reg_rtx (mode);
27193 emit_clobber (tmp);
27194 emit_move_insn (gen_lowpart (word_mode, tmp), words[0]);
27195 emit_move_insn (gen_highpart (word_mode, tmp), words[1]);
27196 emit_move_insn (target, tmp);
27198 else if (n_words == 4)
27200 rtx tmp = gen_reg_rtx (V4SImode);
27201 gcc_assert (word_mode == SImode);
27202 vals = gen_rtx_PARALLEL (V4SImode, gen_rtvec_v (4, words));
27203 ix86_expand_vector_init_general (false, V4SImode, tmp, vals);
27204 emit_move_insn (target, gen_lowpart (mode, tmp));
27207 gcc_unreachable ();
27211 /* Initialize vector TARGET via VALS. Suppress the use of MMX
27212 instructions unless MMX_OK is true. */
27215 ix86_expand_vector_init (bool mmx_ok, rtx target, rtx vals)
27217 enum machine_mode mode = GET_MODE (target);
27218 enum machine_mode inner_mode = GET_MODE_INNER (mode);
27219 int n_elts = GET_MODE_NUNITS (mode);
27220 int n_var = 0, one_var = -1;
27221 bool all_same = true, all_const_zero = true;
27225 for (i = 0; i < n_elts; ++i)
27227 x = XVECEXP (vals, 0, i);
27228 if (!(CONST_INT_P (x)
27229 || GET_CODE (x) == CONST_DOUBLE
27230 || GET_CODE (x) == CONST_FIXED))
27231 n_var++, one_var = i;
27232 else if (x != CONST0_RTX (inner_mode))
27233 all_const_zero = false;
27234 if (i > 0 && !rtx_equal_p (x, XVECEXP (vals, 0, 0)))
27238 /* Constants are best loaded from the constant pool. */
27241 emit_move_insn (target, gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0)));
27245 /* If all values are identical, broadcast the value. */
27247 && ix86_expand_vector_init_duplicate (mmx_ok, mode, target,
27248 XVECEXP (vals, 0, 0)))
27251 /* Values where only one field is non-constant are best loaded from
27252 the pool and overwritten via move later. */
27256 && ix86_expand_vector_init_one_nonzero (mmx_ok, mode, target,
27257 XVECEXP (vals, 0, one_var),
27261 if (ix86_expand_vector_init_one_var (mmx_ok, mode, target, vals, one_var))
27265 ix86_expand_vector_init_general (mmx_ok, mode, target, vals);
27269 ix86_expand_vector_set (bool mmx_ok, rtx target, rtx val, int elt)
27271 enum machine_mode mode = GET_MODE (target);
27272 enum machine_mode inner_mode = GET_MODE_INNER (mode);
27273 enum machine_mode half_mode;
27274 bool use_vec_merge = false;
27276 static rtx (*gen_extract[6][2]) (rtx, rtx)
27278 { gen_vec_extract_lo_v32qi, gen_vec_extract_hi_v32qi },
27279 { gen_vec_extract_lo_v16hi, gen_vec_extract_hi_v16hi },
27280 { gen_vec_extract_lo_v8si, gen_vec_extract_hi_v8si },
27281 { gen_vec_extract_lo_v4di, gen_vec_extract_hi_v4di },
27282 { gen_vec_extract_lo_v8sf, gen_vec_extract_hi_v8sf },
27283 { gen_vec_extract_lo_v4df, gen_vec_extract_hi_v4df }
27285 static rtx (*gen_insert[6][2]) (rtx, rtx, rtx)
27287 { gen_vec_set_lo_v32qi, gen_vec_set_hi_v32qi },
27288 { gen_vec_set_lo_v16hi, gen_vec_set_hi_v16hi },
27289 { gen_vec_set_lo_v8si, gen_vec_set_hi_v8si },
27290 { gen_vec_set_lo_v4di, gen_vec_set_hi_v4di },
27291 { gen_vec_set_lo_v8sf, gen_vec_set_hi_v8sf },
27292 { gen_vec_set_lo_v4df, gen_vec_set_hi_v4df }
27302 tmp = gen_reg_rtx (GET_MODE_INNER (mode));
27303 ix86_expand_vector_extract (true, tmp, target, 1 - elt);
27305 tmp = gen_rtx_VEC_CONCAT (mode, tmp, val);
27307 tmp = gen_rtx_VEC_CONCAT (mode, val, tmp);
27308 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
27314 use_vec_merge = TARGET_SSE4_1;
27322 /* For the two element vectors, we implement a VEC_CONCAT with
27323 the extraction of the other element. */
27325 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, GEN_INT (1 - elt)));
27326 tmp = gen_rtx_VEC_SELECT (inner_mode, target, tmp);
27329 op0 = val, op1 = tmp;
27331 op0 = tmp, op1 = val;
27333 tmp = gen_rtx_VEC_CONCAT (mode, op0, op1);
27334 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
27339 use_vec_merge = TARGET_SSE4_1;
27346 use_vec_merge = true;
27350 /* tmp = target = A B C D */
27351 tmp = copy_to_reg (target);
27352 /* target = A A B B */
27353 emit_insn (gen_sse_unpcklps (target, target, target));
27354 /* target = X A B B */
27355 ix86_expand_vector_set (false, target, val, 0);
27356 /* target = A X C D */
27357 emit_insn (gen_sse_shufps_v4sf (target, target, tmp,
27358 GEN_INT (1), GEN_INT (0),
27359 GEN_INT (2+4), GEN_INT (3+4)));
27363 /* tmp = target = A B C D */
27364 tmp = copy_to_reg (target);
27365 /* tmp = X B C D */
27366 ix86_expand_vector_set (false, tmp, val, 0);
27367 /* target = A B X D */
27368 emit_insn (gen_sse_shufps_v4sf (target, target, tmp,
27369 GEN_INT (0), GEN_INT (1),
27370 GEN_INT (0+4), GEN_INT (3+4)));
27374 /* tmp = target = A B C D */
27375 tmp = copy_to_reg (target);
27376 /* tmp = X B C D */
27377 ix86_expand_vector_set (false, tmp, val, 0);
27378 /* target = A B X D */
27379 emit_insn (gen_sse_shufps_v4sf (target, target, tmp,
27380 GEN_INT (0), GEN_INT (1),
27381 GEN_INT (2+4), GEN_INT (0+4)));
27385 gcc_unreachable ();
27390 use_vec_merge = TARGET_SSE4_1;
27394 /* Element 0 handled by vec_merge below. */
27397 use_vec_merge = true;
27403 /* With SSE2, use integer shuffles to swap element 0 and ELT,
27404 store into element 0, then shuffle them back. */
27408 order[0] = GEN_INT (elt);
27409 order[1] = const1_rtx;
27410 order[2] = const2_rtx;
27411 order[3] = GEN_INT (3);
27412 order[elt] = const0_rtx;
27414 emit_insn (gen_sse2_pshufd_1 (target, target, order[0],
27415 order[1], order[2], order[3]));
27417 ix86_expand_vector_set (false, target, val, 0);
27419 emit_insn (gen_sse2_pshufd_1 (target, target, order[0],
27420 order[1], order[2], order[3]));
27424 /* For SSE1, we have to reuse the V4SF code. */
27425 ix86_expand_vector_set (false, gen_lowpart (V4SFmode, target),
27426 gen_lowpart (SFmode, val), elt);
27431 use_vec_merge = TARGET_SSE2;
27434 use_vec_merge = mmx_ok && (TARGET_SSE || TARGET_3DNOW_A);
27438 use_vec_merge = TARGET_SSE4_1;
27445 half_mode = V16QImode;
27451 half_mode = V8HImode;
27457 half_mode = V4SImode;
27463 half_mode = V2DImode;
27469 half_mode = V4SFmode;
27475 half_mode = V2DFmode;
27481 /* Compute offset. */
27485 gcc_assert (i <= 1);
27487 /* Extract the half. */
27488 tmp = gen_reg_rtx (half_mode);
27489 emit_insn ((*gen_extract[j][i]) (tmp, target));
27491 /* Put val in tmp at elt. */
27492 ix86_expand_vector_set (false, tmp, val, elt);
27495 emit_insn ((*gen_insert[j][i]) (target, target, tmp));
27504 tmp = gen_rtx_VEC_DUPLICATE (mode, val);
27505 tmp = gen_rtx_VEC_MERGE (mode, tmp, target, GEN_INT (1 << elt));
27506 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
27510 rtx mem = assign_stack_temp (mode, GET_MODE_SIZE (mode), false);
27512 emit_move_insn (mem, target);
27514 tmp = adjust_address (mem, inner_mode, elt*GET_MODE_SIZE (inner_mode));
27515 emit_move_insn (tmp, val);
27517 emit_move_insn (target, mem);
27522 ix86_expand_vector_extract (bool mmx_ok, rtx target, rtx vec, int elt)
27524 enum machine_mode mode = GET_MODE (vec);
27525 enum machine_mode inner_mode = GET_MODE_INNER (mode);
27526 bool use_vec_extr = false;
27539 use_vec_extr = true;
27543 use_vec_extr = TARGET_SSE4_1;
27555 tmp = gen_reg_rtx (mode);
27556 emit_insn (gen_sse_shufps_v4sf (tmp, vec, vec,
27557 GEN_INT (elt), GEN_INT (elt),
27558 GEN_INT (elt+4), GEN_INT (elt+4)));
27562 tmp = gen_reg_rtx (mode);
27563 emit_insn (gen_sse_unpckhps (tmp, vec, vec));
27567 gcc_unreachable ();
27570 use_vec_extr = true;
27575 use_vec_extr = TARGET_SSE4_1;
27589 tmp = gen_reg_rtx (mode);
27590 emit_insn (gen_sse2_pshufd_1 (tmp, vec,
27591 GEN_INT (elt), GEN_INT (elt),
27592 GEN_INT (elt), GEN_INT (elt)));
27596 tmp = gen_reg_rtx (mode);
27597 emit_insn (gen_sse2_punpckhdq (tmp, vec, vec));
27601 gcc_unreachable ();
27604 use_vec_extr = true;
27609 /* For SSE1, we have to reuse the V4SF code. */
27610 ix86_expand_vector_extract (false, gen_lowpart (SFmode, target),
27611 gen_lowpart (V4SFmode, vec), elt);
27617 use_vec_extr = TARGET_SSE2;
27620 use_vec_extr = mmx_ok && (TARGET_SSE || TARGET_3DNOW_A);
27624 use_vec_extr = TARGET_SSE4_1;
27628 /* ??? Could extract the appropriate HImode element and shift. */
27635 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, GEN_INT (elt)));
27636 tmp = gen_rtx_VEC_SELECT (inner_mode, vec, tmp);
27638 /* Let the rtl optimizers know about the zero extension performed. */
27639 if (inner_mode == QImode || inner_mode == HImode)
27641 tmp = gen_rtx_ZERO_EXTEND (SImode, tmp);
27642 target = gen_lowpart (SImode, target);
27645 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
27649 rtx mem = assign_stack_temp (mode, GET_MODE_SIZE (mode), false);
27651 emit_move_insn (mem, vec);
27653 tmp = adjust_address (mem, inner_mode, elt*GET_MODE_SIZE (inner_mode));
27654 emit_move_insn (target, tmp);
27658 /* Expand a vector reduction on V4SFmode for SSE1. FN is the binary
27659 pattern to reduce; DEST is the destination; IN is the input vector. */
27662 ix86_expand_reduc_v4sf (rtx (*fn) (rtx, rtx, rtx), rtx dest, rtx in)
27664 rtx tmp1, tmp2, tmp3;
27666 tmp1 = gen_reg_rtx (V4SFmode);
27667 tmp2 = gen_reg_rtx (V4SFmode);
27668 tmp3 = gen_reg_rtx (V4SFmode);
27670 emit_insn (gen_sse_movhlps (tmp1, in, in));
27671 emit_insn (fn (tmp2, tmp1, in));
27673 emit_insn (gen_sse_shufps_v4sf (tmp3, tmp2, tmp2,
27674 GEN_INT (1), GEN_INT (1),
27675 GEN_INT (1+4), GEN_INT (1+4)));
27676 emit_insn (fn (dest, tmp2, tmp3));
27679 /* Target hook for scalar_mode_supported_p. */
27681 ix86_scalar_mode_supported_p (enum machine_mode mode)
27683 if (DECIMAL_FLOAT_MODE_P (mode))
27685 else if (mode == TFmode)
27688 return default_scalar_mode_supported_p (mode);
27691 /* Implements target hook vector_mode_supported_p. */
27693 ix86_vector_mode_supported_p (enum machine_mode mode)
27695 if (TARGET_SSE && VALID_SSE_REG_MODE (mode))
27697 if (TARGET_SSE2 && VALID_SSE2_REG_MODE (mode))
27699 if (TARGET_AVX && VALID_AVX256_REG_MODE (mode))
27701 if (TARGET_MMX && VALID_MMX_REG_MODE (mode))
27703 if (TARGET_3DNOW && VALID_MMX_REG_MODE_3DNOW (mode))
27708 /* Target hook for c_mode_for_suffix. */
27709 static enum machine_mode
27710 ix86_c_mode_for_suffix (char suffix)
27720 /* Worker function for TARGET_MD_ASM_CLOBBERS.
27722 We do this in the new i386 backend to maintain source compatibility
27723 with the old cc0-based compiler. */
27726 ix86_md_asm_clobbers (tree outputs ATTRIBUTE_UNUSED,
27727 tree inputs ATTRIBUTE_UNUSED,
27730 clobbers = tree_cons (NULL_TREE, build_string (5, "flags"),
27732 clobbers = tree_cons (NULL_TREE, build_string (4, "fpsr"),
27737 /* Implements target vector targetm.asm.encode_section_info. This
27738 is not used by netware. */
27740 static void ATTRIBUTE_UNUSED
27741 ix86_encode_section_info (tree decl, rtx rtl, int first)
27743 default_encode_section_info (decl, rtl, first);
27745 if (TREE_CODE (decl) == VAR_DECL
27746 && (TREE_STATIC (decl) || DECL_EXTERNAL (decl))
27747 && ix86_in_large_data_p (decl))
27748 SYMBOL_REF_FLAGS (XEXP (rtl, 0)) |= SYMBOL_FLAG_FAR_ADDR;
27751 /* Worker function for REVERSE_CONDITION. */
27754 ix86_reverse_condition (enum rtx_code code, enum machine_mode mode)
27756 return (mode != CCFPmode && mode != CCFPUmode
27757 ? reverse_condition (code)
27758 : reverse_condition_maybe_unordered (code));
27761 /* Output code to perform an x87 FP register move, from OPERANDS[1]
27765 output_387_reg_move (rtx insn, rtx *operands)
27767 if (REG_P (operands[0]))
27769 if (REG_P (operands[1])
27770 && find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
27772 if (REGNO (operands[0]) == FIRST_STACK_REG)
27773 return output_387_ffreep (operands, 0);
27774 return "fstp\t%y0";
27776 if (STACK_TOP_P (operands[0]))
27777 return "fld%z1\t%y1";
27780 else if (MEM_P (operands[0]))
27782 gcc_assert (REG_P (operands[1]));
27783 if (find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
27784 return "fstp%z0\t%y0";
27787 /* There is no non-popping store to memory for XFmode.
27788 So if we need one, follow the store with a load. */
27789 if (GET_MODE (operands[0]) == XFmode)
27790 return "fstp%z0\t%y0\n\tfld%z0\t%y0";
27792 return "fst%z0\t%y0";
27799 /* Output code to perform a conditional jump to LABEL, if C2 flag in
27800 FP status register is set. */
27803 ix86_emit_fp_unordered_jump (rtx label)
27805 rtx reg = gen_reg_rtx (HImode);
27808 emit_insn (gen_x86_fnstsw_1 (reg));
27810 if (TARGET_SAHF && (TARGET_USE_SAHF || optimize_insn_for_size_p ()))
27812 emit_insn (gen_x86_sahf_1 (reg));
27814 temp = gen_rtx_REG (CCmode, FLAGS_REG);
27815 temp = gen_rtx_UNORDERED (VOIDmode, temp, const0_rtx);
27819 emit_insn (gen_testqi_ext_ccno_0 (reg, GEN_INT (0x04)));
27821 temp = gen_rtx_REG (CCNOmode, FLAGS_REG);
27822 temp = gen_rtx_NE (VOIDmode, temp, const0_rtx);
27825 temp = gen_rtx_IF_THEN_ELSE (VOIDmode, temp,
27826 gen_rtx_LABEL_REF (VOIDmode, label),
27828 temp = gen_rtx_SET (VOIDmode, pc_rtx, temp);
27830 emit_jump_insn (temp);
27831 predict_jump (REG_BR_PROB_BASE * 10 / 100);
27834 /* Output code to perform a log1p XFmode calculation. */
27836 void ix86_emit_i387_log1p (rtx op0, rtx op1)
27838 rtx label1 = gen_label_rtx ();
27839 rtx label2 = gen_label_rtx ();
27841 rtx tmp = gen_reg_rtx (XFmode);
27842 rtx tmp2 = gen_reg_rtx (XFmode);
27844 emit_insn (gen_absxf2 (tmp, op1));
27845 emit_insn (gen_cmpxf (tmp,
27846 CONST_DOUBLE_FROM_REAL_VALUE (
27847 REAL_VALUE_ATOF ("0.29289321881345247561810596348408353", XFmode),
27849 emit_jump_insn (gen_bge (label1));
27851 emit_move_insn (tmp2, standard_80387_constant_rtx (4)); /* fldln2 */
27852 emit_insn (gen_fyl2xp1xf3_i387 (op0, op1, tmp2));
27853 emit_jump (label2);
27855 emit_label (label1);
27856 emit_move_insn (tmp, CONST1_RTX (XFmode));
27857 emit_insn (gen_addxf3 (tmp, op1, tmp));
27858 emit_move_insn (tmp2, standard_80387_constant_rtx (4)); /* fldln2 */
27859 emit_insn (gen_fyl2xxf3_i387 (op0, tmp, tmp2));
27861 emit_label (label2);
27864 /* Output code to perform a Newton-Rhapson approximation of a single precision
27865 floating point divide [http://en.wikipedia.org/wiki/N-th_root_algorithm]. */
27867 void ix86_emit_swdivsf (rtx res, rtx a, rtx b, enum machine_mode mode)
27869 rtx x0, x1, e0, e1, two;
27871 x0 = gen_reg_rtx (mode);
27872 e0 = gen_reg_rtx (mode);
27873 e1 = gen_reg_rtx (mode);
27874 x1 = gen_reg_rtx (mode);
27876 two = CONST_DOUBLE_FROM_REAL_VALUE (dconst2, SFmode);
27878 if (VECTOR_MODE_P (mode))
27879 two = ix86_build_const_vector (SFmode, true, two);
27881 two = force_reg (mode, two);
27883 /* a / b = a * rcp(b) * (2.0 - b * rcp(b)) */
27885 /* x0 = rcp(b) estimate */
27886 emit_insn (gen_rtx_SET (VOIDmode, x0,
27887 gen_rtx_UNSPEC (mode, gen_rtvec (1, b),
27890 emit_insn (gen_rtx_SET (VOIDmode, e0,
27891 gen_rtx_MULT (mode, x0, b)));
27893 emit_insn (gen_rtx_SET (VOIDmode, e1,
27894 gen_rtx_MINUS (mode, two, e0)));
27896 emit_insn (gen_rtx_SET (VOIDmode, x1,
27897 gen_rtx_MULT (mode, x0, e1)));
27899 emit_insn (gen_rtx_SET (VOIDmode, res,
27900 gen_rtx_MULT (mode, a, x1)));
27903 /* Output code to perform a Newton-Rhapson approximation of a
27904 single precision floating point [reciprocal] square root. */
27906 void ix86_emit_swsqrtsf (rtx res, rtx a, enum machine_mode mode,
27909 rtx x0, e0, e1, e2, e3, mthree, mhalf;
27912 x0 = gen_reg_rtx (mode);
27913 e0 = gen_reg_rtx (mode);
27914 e1 = gen_reg_rtx (mode);
27915 e2 = gen_reg_rtx (mode);
27916 e3 = gen_reg_rtx (mode);
27918 real_from_integer (&r, VOIDmode, -3, -1, 0);
27919 mthree = CONST_DOUBLE_FROM_REAL_VALUE (r, SFmode);
27921 real_arithmetic (&r, NEGATE_EXPR, &dconsthalf, NULL);
27922 mhalf = CONST_DOUBLE_FROM_REAL_VALUE (r, SFmode);
27924 if (VECTOR_MODE_P (mode))
27926 mthree = ix86_build_const_vector (SFmode, true, mthree);
27927 mhalf = ix86_build_const_vector (SFmode, true, mhalf);
27930 /* sqrt(a) = -0.5 * a * rsqrtss(a) * (a * rsqrtss(a) * rsqrtss(a) - 3.0)
27931 rsqrt(a) = -0.5 * rsqrtss(a) * (a * rsqrtss(a) * rsqrtss(a) - 3.0) */
27933 /* x0 = rsqrt(a) estimate */
27934 emit_insn (gen_rtx_SET (VOIDmode, x0,
27935 gen_rtx_UNSPEC (mode, gen_rtvec (1, a),
27938 /* If (a == 0.0) Filter out infinity to prevent NaN for sqrt(0.0). */
27943 zero = gen_reg_rtx (mode);
27944 mask = gen_reg_rtx (mode);
27946 zero = force_reg (mode, CONST0_RTX(mode));
27947 emit_insn (gen_rtx_SET (VOIDmode, mask,
27948 gen_rtx_NE (mode, zero, a)));
27950 emit_insn (gen_rtx_SET (VOIDmode, x0,
27951 gen_rtx_AND (mode, x0, mask)));
27955 emit_insn (gen_rtx_SET (VOIDmode, e0,
27956 gen_rtx_MULT (mode, x0, a)));
27958 emit_insn (gen_rtx_SET (VOIDmode, e1,
27959 gen_rtx_MULT (mode, e0, x0)));
27962 mthree = force_reg (mode, mthree);
27963 emit_insn (gen_rtx_SET (VOIDmode, e2,
27964 gen_rtx_PLUS (mode, e1, mthree)));
27966 mhalf = force_reg (mode, mhalf);
27968 /* e3 = -.5 * x0 */
27969 emit_insn (gen_rtx_SET (VOIDmode, e3,
27970 gen_rtx_MULT (mode, x0, mhalf)));
27972 /* e3 = -.5 * e0 */
27973 emit_insn (gen_rtx_SET (VOIDmode, e3,
27974 gen_rtx_MULT (mode, e0, mhalf)));
27975 /* ret = e2 * e3 */
27976 emit_insn (gen_rtx_SET (VOIDmode, res,
27977 gen_rtx_MULT (mode, e2, e3)));
27980 /* Solaris implementation of TARGET_ASM_NAMED_SECTION. */
27982 static void ATTRIBUTE_UNUSED
27983 i386_solaris_elf_named_section (const char *name, unsigned int flags,
27986 /* With Binutils 2.15, the "@unwind" marker must be specified on
27987 every occurrence of the ".eh_frame" section, not just the first
27990 && strcmp (name, ".eh_frame") == 0)
27992 fprintf (asm_out_file, "\t.section\t%s,\"%s\",@unwind\n", name,
27993 flags & SECTION_WRITE ? "aw" : "a");
27996 default_elf_asm_named_section (name, flags, decl);
27999 /* Return the mangling of TYPE if it is an extended fundamental type. */
28001 static const char *
28002 ix86_mangle_type (const_tree type)
28004 type = TYPE_MAIN_VARIANT (type);
28006 if (TREE_CODE (type) != VOID_TYPE && TREE_CODE (type) != BOOLEAN_TYPE
28007 && TREE_CODE (type) != INTEGER_TYPE && TREE_CODE (type) != REAL_TYPE)
28010 switch (TYPE_MODE (type))
28013 /* __float128 is "g". */
28016 /* "long double" or __float80 is "e". */
28023 /* For 32-bit code we can save PIC register setup by using
28024 __stack_chk_fail_local hidden function instead of calling
28025 __stack_chk_fail directly. 64-bit code doesn't need to setup any PIC
28026 register, so it is better to call __stack_chk_fail directly. */
28029 ix86_stack_protect_fail (void)
28031 return TARGET_64BIT
28032 ? default_external_stack_protect_fail ()
28033 : default_hidden_stack_protect_fail ();
28036 /* Select a format to encode pointers in exception handling data. CODE
28037 is 0 for data, 1 for code labels, 2 for function pointers. GLOBAL is
28038 true if the symbol may be affected by dynamic relocations.
28040 ??? All x86 object file formats are capable of representing this.
28041 After all, the relocation needed is the same as for the call insn.
28042 Whether or not a particular assembler allows us to enter such, I
28043 guess we'll have to see. */
28045 asm_preferred_eh_data_format (int code, int global)
28049 int type = DW_EH_PE_sdata8;
28051 || ix86_cmodel == CM_SMALL_PIC
28052 || (ix86_cmodel == CM_MEDIUM_PIC && (global || code)))
28053 type = DW_EH_PE_sdata4;
28054 return (global ? DW_EH_PE_indirect : 0) | DW_EH_PE_pcrel | type;
28056 if (ix86_cmodel == CM_SMALL
28057 || (ix86_cmodel == CM_MEDIUM && code))
28058 return DW_EH_PE_udata4;
28059 return DW_EH_PE_absptr;
28062 /* Expand copysign from SIGN to the positive value ABS_VALUE
28063 storing in RESULT. If MASK is non-null, it shall be a mask to mask out
28066 ix86_sse_copysign_to_positive (rtx result, rtx abs_value, rtx sign, rtx mask)
28068 enum machine_mode mode = GET_MODE (sign);
28069 rtx sgn = gen_reg_rtx (mode);
28070 if (mask == NULL_RTX)
28072 mask = ix86_build_signbit_mask (mode, VECTOR_MODE_P (mode), false);
28073 if (!VECTOR_MODE_P (mode))
28075 /* We need to generate a scalar mode mask in this case. */
28076 rtx tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, const0_rtx));
28077 tmp = gen_rtx_VEC_SELECT (mode, mask, tmp);
28078 mask = gen_reg_rtx (mode);
28079 emit_insn (gen_rtx_SET (VOIDmode, mask, tmp));
28083 mask = gen_rtx_NOT (mode, mask);
28084 emit_insn (gen_rtx_SET (VOIDmode, sgn,
28085 gen_rtx_AND (mode, mask, sign)));
28086 emit_insn (gen_rtx_SET (VOIDmode, result,
28087 gen_rtx_IOR (mode, abs_value, sgn)));
28090 /* Expand fabs (OP0) and return a new rtx that holds the result. The
28091 mask for masking out the sign-bit is stored in *SMASK, if that is
28094 ix86_expand_sse_fabs (rtx op0, rtx *smask)
28096 enum machine_mode mode = GET_MODE (op0);
28099 xa = gen_reg_rtx (mode);
28100 mask = ix86_build_signbit_mask (mode, VECTOR_MODE_P (mode), true);
28101 if (!VECTOR_MODE_P (mode))
28103 /* We need to generate a scalar mode mask in this case. */
28104 rtx tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, const0_rtx));
28105 tmp = gen_rtx_VEC_SELECT (mode, mask, tmp);
28106 mask = gen_reg_rtx (mode);
28107 emit_insn (gen_rtx_SET (VOIDmode, mask, tmp));
28109 emit_insn (gen_rtx_SET (VOIDmode, xa,
28110 gen_rtx_AND (mode, op0, mask)));
28118 /* Expands a comparison of OP0 with OP1 using comparison code CODE,
28119 swapping the operands if SWAP_OPERANDS is true. The expanded
28120 code is a forward jump to a newly created label in case the
28121 comparison is true. The generated label rtx is returned. */
28123 ix86_expand_sse_compare_and_jump (enum rtx_code code, rtx op0, rtx op1,
28124 bool swap_operands)
28135 label = gen_label_rtx ();
28136 tmp = gen_rtx_REG (CCFPUmode, FLAGS_REG);
28137 emit_insn (gen_rtx_SET (VOIDmode, tmp,
28138 gen_rtx_COMPARE (CCFPUmode, op0, op1)));
28139 tmp = gen_rtx_fmt_ee (code, VOIDmode, tmp, const0_rtx);
28140 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
28141 gen_rtx_LABEL_REF (VOIDmode, label), pc_rtx);
28142 tmp = emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
28143 JUMP_LABEL (tmp) = label;
28148 /* Expand a mask generating SSE comparison instruction comparing OP0 with OP1
28149 using comparison code CODE. Operands are swapped for the comparison if
28150 SWAP_OPERANDS is true. Returns a rtx for the generated mask. */
28152 ix86_expand_sse_compare_mask (enum rtx_code code, rtx op0, rtx op1,
28153 bool swap_operands)
28155 enum machine_mode mode = GET_MODE (op0);
28156 rtx mask = gen_reg_rtx (mode);
28165 if (mode == DFmode)
28166 emit_insn (gen_sse2_maskcmpdf3 (mask, op0, op1,
28167 gen_rtx_fmt_ee (code, mode, op0, op1)));
28169 emit_insn (gen_sse_maskcmpsf3 (mask, op0, op1,
28170 gen_rtx_fmt_ee (code, mode, op0, op1)));
28175 /* Generate and return a rtx of mode MODE for 2**n where n is the number
28176 of bits of the mantissa of MODE, which must be one of DFmode or SFmode. */
28178 ix86_gen_TWO52 (enum machine_mode mode)
28180 REAL_VALUE_TYPE TWO52r;
28183 real_ldexp (&TWO52r, &dconst1, mode == DFmode ? 52 : 23);
28184 TWO52 = const_double_from_real_value (TWO52r, mode);
28185 TWO52 = force_reg (mode, TWO52);
28190 /* Expand SSE sequence for computing lround from OP1 storing
28193 ix86_expand_lround (rtx op0, rtx op1)
28195 /* C code for the stuff we're doing below:
28196 tmp = op1 + copysign (nextafter (0.5, 0.0), op1)
28199 enum machine_mode mode = GET_MODE (op1);
28200 const struct real_format *fmt;
28201 REAL_VALUE_TYPE pred_half, half_minus_pred_half;
28204 /* load nextafter (0.5, 0.0) */
28205 fmt = REAL_MODE_FORMAT (mode);
28206 real_2expN (&half_minus_pred_half, -(fmt->p) - 1, mode);
28207 REAL_ARITHMETIC (pred_half, MINUS_EXPR, dconsthalf, half_minus_pred_half);
28209 /* adj = copysign (0.5, op1) */
28210 adj = force_reg (mode, const_double_from_real_value (pred_half, mode));
28211 ix86_sse_copysign_to_positive (adj, adj, force_reg (mode, op1), NULL_RTX);
28213 /* adj = op1 + adj */
28214 adj = expand_simple_binop (mode, PLUS, adj, op1, NULL_RTX, 0, OPTAB_DIRECT);
28216 /* op0 = (imode)adj */
28217 expand_fix (op0, adj, 0);
28220 /* Expand SSE2 sequence for computing lround from OPERAND1 storing
28223 ix86_expand_lfloorceil (rtx op0, rtx op1, bool do_floor)
28225 /* C code for the stuff we're doing below (for do_floor):
28227 xi -= (double)xi > op1 ? 1 : 0;
28230 enum machine_mode fmode = GET_MODE (op1);
28231 enum machine_mode imode = GET_MODE (op0);
28232 rtx ireg, freg, label, tmp;
28234 /* reg = (long)op1 */
28235 ireg = gen_reg_rtx (imode);
28236 expand_fix (ireg, op1, 0);
28238 /* freg = (double)reg */
28239 freg = gen_reg_rtx (fmode);
28240 expand_float (freg, ireg, 0);
28242 /* ireg = (freg > op1) ? ireg - 1 : ireg */
28243 label = ix86_expand_sse_compare_and_jump (UNLE,
28244 freg, op1, !do_floor);
28245 tmp = expand_simple_binop (imode, do_floor ? MINUS : PLUS,
28246 ireg, const1_rtx, NULL_RTX, 0, OPTAB_DIRECT);
28247 emit_move_insn (ireg, tmp);
28249 emit_label (label);
28250 LABEL_NUSES (label) = 1;
28252 emit_move_insn (op0, ireg);
28255 /* Expand rint (IEEE round to nearest) rounding OPERAND1 and storing the
28256 result in OPERAND0. */
28258 ix86_expand_rint (rtx operand0, rtx operand1)
28260 /* C code for the stuff we're doing below:
28261 xa = fabs (operand1);
28262 if (!isless (xa, 2**52))
28264 xa = xa + 2**52 - 2**52;
28265 return copysign (xa, operand1);
28267 enum machine_mode mode = GET_MODE (operand0);
28268 rtx res, xa, label, TWO52, mask;
28270 res = gen_reg_rtx (mode);
28271 emit_move_insn (res, operand1);
28273 /* xa = abs (operand1) */
28274 xa = ix86_expand_sse_fabs (res, &mask);
28276 /* if (!isless (xa, TWO52)) goto label; */
28277 TWO52 = ix86_gen_TWO52 (mode);
28278 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
28280 xa = expand_simple_binop (mode, PLUS, xa, TWO52, NULL_RTX, 0, OPTAB_DIRECT);
28281 xa = expand_simple_binop (mode, MINUS, xa, TWO52, xa, 0, OPTAB_DIRECT);
28283 ix86_sse_copysign_to_positive (res, xa, res, mask);
28285 emit_label (label);
28286 LABEL_NUSES (label) = 1;
28288 emit_move_insn (operand0, res);
28291 /* Expand SSE2 sequence for computing floor or ceil from OPERAND1 storing
28294 ix86_expand_floorceildf_32 (rtx operand0, rtx operand1, bool do_floor)
28296 /* C code for the stuff we expand below.
28297 double xa = fabs (x), x2;
28298 if (!isless (xa, TWO52))
28300 xa = xa + TWO52 - TWO52;
28301 x2 = copysign (xa, x);
28310 enum machine_mode mode = GET_MODE (operand0);
28311 rtx xa, TWO52, tmp, label, one, res, mask;
28313 TWO52 = ix86_gen_TWO52 (mode);
28315 /* Temporary for holding the result, initialized to the input
28316 operand to ease control flow. */
28317 res = gen_reg_rtx (mode);
28318 emit_move_insn (res, operand1);
28320 /* xa = abs (operand1) */
28321 xa = ix86_expand_sse_fabs (res, &mask);
28323 /* if (!isless (xa, TWO52)) goto label; */
28324 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
28326 /* xa = xa + TWO52 - TWO52; */
28327 xa = expand_simple_binop (mode, PLUS, xa, TWO52, NULL_RTX, 0, OPTAB_DIRECT);
28328 xa = expand_simple_binop (mode, MINUS, xa, TWO52, xa, 0, OPTAB_DIRECT);
28330 /* xa = copysign (xa, operand1) */
28331 ix86_sse_copysign_to_positive (xa, xa, res, mask);
28333 /* generate 1.0 or -1.0 */
28334 one = force_reg (mode,
28335 const_double_from_real_value (do_floor
28336 ? dconst1 : dconstm1, mode));
28338 /* Compensate: xa = xa - (xa > operand1 ? 1 : 0) */
28339 tmp = ix86_expand_sse_compare_mask (UNGT, xa, res, !do_floor);
28340 emit_insn (gen_rtx_SET (VOIDmode, tmp,
28341 gen_rtx_AND (mode, one, tmp)));
28342 /* We always need to subtract here to preserve signed zero. */
28343 tmp = expand_simple_binop (mode, MINUS,
28344 xa, tmp, NULL_RTX, 0, OPTAB_DIRECT);
28345 emit_move_insn (res, tmp);
28347 emit_label (label);
28348 LABEL_NUSES (label) = 1;
28350 emit_move_insn (operand0, res);
28353 /* Expand SSE2 sequence for computing floor or ceil from OPERAND1 storing
28356 ix86_expand_floorceil (rtx operand0, rtx operand1, bool do_floor)
28358 /* C code for the stuff we expand below.
28359 double xa = fabs (x), x2;
28360 if (!isless (xa, TWO52))
28362 x2 = (double)(long)x;
28369 if (HONOR_SIGNED_ZEROS (mode))
28370 return copysign (x2, x);
28373 enum machine_mode mode = GET_MODE (operand0);
28374 rtx xa, xi, TWO52, tmp, label, one, res, mask;
28376 TWO52 = ix86_gen_TWO52 (mode);
28378 /* Temporary for holding the result, initialized to the input
28379 operand to ease control flow. */
28380 res = gen_reg_rtx (mode);
28381 emit_move_insn (res, operand1);
28383 /* xa = abs (operand1) */
28384 xa = ix86_expand_sse_fabs (res, &mask);
28386 /* if (!isless (xa, TWO52)) goto label; */
28387 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
28389 /* xa = (double)(long)x */
28390 xi = gen_reg_rtx (mode == DFmode ? DImode : SImode);
28391 expand_fix (xi, res, 0);
28392 expand_float (xa, xi, 0);
28395 one = force_reg (mode, const_double_from_real_value (dconst1, mode));
28397 /* Compensate: xa = xa - (xa > operand1 ? 1 : 0) */
28398 tmp = ix86_expand_sse_compare_mask (UNGT, xa, res, !do_floor);
28399 emit_insn (gen_rtx_SET (VOIDmode, tmp,
28400 gen_rtx_AND (mode, one, tmp)));
28401 tmp = expand_simple_binop (mode, do_floor ? MINUS : PLUS,
28402 xa, tmp, NULL_RTX, 0, OPTAB_DIRECT);
28403 emit_move_insn (res, tmp);
28405 if (HONOR_SIGNED_ZEROS (mode))
28406 ix86_sse_copysign_to_positive (res, res, force_reg (mode, operand1), mask);
28408 emit_label (label);
28409 LABEL_NUSES (label) = 1;
28411 emit_move_insn (operand0, res);
28414 /* Expand SSE sequence for computing round from OPERAND1 storing
28415 into OPERAND0. Sequence that works without relying on DImode truncation
28416 via cvttsd2siq that is only available on 64bit targets. */
28418 ix86_expand_rounddf_32 (rtx operand0, rtx operand1)
28420 /* C code for the stuff we expand below.
28421 double xa = fabs (x), xa2, x2;
28422 if (!isless (xa, TWO52))
28424 Using the absolute value and copying back sign makes
28425 -0.0 -> -0.0 correct.
28426 xa2 = xa + TWO52 - TWO52;
28431 else if (dxa > 0.5)
28433 x2 = copysign (xa2, x);
28436 enum machine_mode mode = GET_MODE (operand0);
28437 rtx xa, xa2, dxa, TWO52, tmp, label, half, mhalf, one, res, mask;
28439 TWO52 = ix86_gen_TWO52 (mode);
28441 /* Temporary for holding the result, initialized to the input
28442 operand to ease control flow. */
28443 res = gen_reg_rtx (mode);
28444 emit_move_insn (res, operand1);
28446 /* xa = abs (operand1) */
28447 xa = ix86_expand_sse_fabs (res, &mask);
28449 /* if (!isless (xa, TWO52)) goto label; */
28450 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
28452 /* xa2 = xa + TWO52 - TWO52; */
28453 xa2 = expand_simple_binop (mode, PLUS, xa, TWO52, NULL_RTX, 0, OPTAB_DIRECT);
28454 xa2 = expand_simple_binop (mode, MINUS, xa2, TWO52, xa2, 0, OPTAB_DIRECT);
28456 /* dxa = xa2 - xa; */
28457 dxa = expand_simple_binop (mode, MINUS, xa2, xa, NULL_RTX, 0, OPTAB_DIRECT);
28459 /* generate 0.5, 1.0 and -0.5 */
28460 half = force_reg (mode, const_double_from_real_value (dconsthalf, mode));
28461 one = expand_simple_binop (mode, PLUS, half, half, NULL_RTX, 0, OPTAB_DIRECT);
28462 mhalf = expand_simple_binop (mode, MINUS, half, one, NULL_RTX,
28466 tmp = gen_reg_rtx (mode);
28467 /* xa2 = xa2 - (dxa > 0.5 ? 1 : 0) */
28468 tmp = ix86_expand_sse_compare_mask (UNGT, dxa, half, false);
28469 emit_insn (gen_rtx_SET (VOIDmode, tmp,
28470 gen_rtx_AND (mode, one, tmp)));
28471 xa2 = expand_simple_binop (mode, MINUS, xa2, tmp, NULL_RTX, 0, OPTAB_DIRECT);
28472 /* xa2 = xa2 + (dxa <= -0.5 ? 1 : 0) */
28473 tmp = ix86_expand_sse_compare_mask (UNGE, mhalf, dxa, false);
28474 emit_insn (gen_rtx_SET (VOIDmode, tmp,
28475 gen_rtx_AND (mode, one, tmp)));
28476 xa2 = expand_simple_binop (mode, PLUS, xa2, tmp, NULL_RTX, 0, OPTAB_DIRECT);
28478 /* res = copysign (xa2, operand1) */
28479 ix86_sse_copysign_to_positive (res, xa2, force_reg (mode, operand1), mask);
28481 emit_label (label);
28482 LABEL_NUSES (label) = 1;
28484 emit_move_insn (operand0, res);
28487 /* Expand SSE sequence for computing trunc from OPERAND1 storing
28490 ix86_expand_trunc (rtx operand0, rtx operand1)
28492 /* C code for SSE variant we expand below.
28493 double xa = fabs (x), x2;
28494 if (!isless (xa, TWO52))
28496 x2 = (double)(long)x;
28497 if (HONOR_SIGNED_ZEROS (mode))
28498 return copysign (x2, x);
28501 enum machine_mode mode = GET_MODE (operand0);
28502 rtx xa, xi, TWO52, label, res, mask;
28504 TWO52 = ix86_gen_TWO52 (mode);
28506 /* Temporary for holding the result, initialized to the input
28507 operand to ease control flow. */
28508 res = gen_reg_rtx (mode);
28509 emit_move_insn (res, operand1);
28511 /* xa = abs (operand1) */
28512 xa = ix86_expand_sse_fabs (res, &mask);
28514 /* if (!isless (xa, TWO52)) goto label; */
28515 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
28517 /* x = (double)(long)x */
28518 xi = gen_reg_rtx (mode == DFmode ? DImode : SImode);
28519 expand_fix (xi, res, 0);
28520 expand_float (res, xi, 0);
28522 if (HONOR_SIGNED_ZEROS (mode))
28523 ix86_sse_copysign_to_positive (res, res, force_reg (mode, operand1), mask);
28525 emit_label (label);
28526 LABEL_NUSES (label) = 1;
28528 emit_move_insn (operand0, res);
28531 /* Expand SSE sequence for computing trunc from OPERAND1 storing
28534 ix86_expand_truncdf_32 (rtx operand0, rtx operand1)
28536 enum machine_mode mode = GET_MODE (operand0);
28537 rtx xa, mask, TWO52, label, one, res, smask, tmp;
28539 /* C code for SSE variant we expand below.
28540 double xa = fabs (x), x2;
28541 if (!isless (xa, TWO52))
28543 xa2 = xa + TWO52 - TWO52;
28547 x2 = copysign (xa2, x);
28551 TWO52 = ix86_gen_TWO52 (mode);
28553 /* Temporary for holding the result, initialized to the input
28554 operand to ease control flow. */
28555 res = gen_reg_rtx (mode);
28556 emit_move_insn (res, operand1);
28558 /* xa = abs (operand1) */
28559 xa = ix86_expand_sse_fabs (res, &smask);
28561 /* if (!isless (xa, TWO52)) goto label; */
28562 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
28564 /* res = xa + TWO52 - TWO52; */
28565 tmp = expand_simple_binop (mode, PLUS, xa, TWO52, NULL_RTX, 0, OPTAB_DIRECT);
28566 tmp = expand_simple_binop (mode, MINUS, tmp, TWO52, tmp, 0, OPTAB_DIRECT);
28567 emit_move_insn (res, tmp);
28570 one = force_reg (mode, const_double_from_real_value (dconst1, mode));
28572 /* Compensate: res = xa2 - (res > xa ? 1 : 0) */
28573 mask = ix86_expand_sse_compare_mask (UNGT, res, xa, false);
28574 emit_insn (gen_rtx_SET (VOIDmode, mask,
28575 gen_rtx_AND (mode, mask, one)));
28576 tmp = expand_simple_binop (mode, MINUS,
28577 res, mask, NULL_RTX, 0, OPTAB_DIRECT);
28578 emit_move_insn (res, tmp);
28580 /* res = copysign (res, operand1) */
28581 ix86_sse_copysign_to_positive (res, res, force_reg (mode, operand1), smask);
28583 emit_label (label);
28584 LABEL_NUSES (label) = 1;
28586 emit_move_insn (operand0, res);
28589 /* Expand SSE sequence for computing round from OPERAND1 storing
28592 ix86_expand_round (rtx operand0, rtx operand1)
28594 /* C code for the stuff we're doing below:
28595 double xa = fabs (x);
28596 if (!isless (xa, TWO52))
28598 xa = (double)(long)(xa + nextafter (0.5, 0.0));
28599 return copysign (xa, x);
28601 enum machine_mode mode = GET_MODE (operand0);
28602 rtx res, TWO52, xa, label, xi, half, mask;
28603 const struct real_format *fmt;
28604 REAL_VALUE_TYPE pred_half, half_minus_pred_half;
28606 /* Temporary for holding the result, initialized to the input
28607 operand to ease control flow. */
28608 res = gen_reg_rtx (mode);
28609 emit_move_insn (res, operand1);
28611 TWO52 = ix86_gen_TWO52 (mode);
28612 xa = ix86_expand_sse_fabs (res, &mask);
28613 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
28615 /* load nextafter (0.5, 0.0) */
28616 fmt = REAL_MODE_FORMAT (mode);
28617 real_2expN (&half_minus_pred_half, -(fmt->p) - 1, mode);
28618 REAL_ARITHMETIC (pred_half, MINUS_EXPR, dconsthalf, half_minus_pred_half);
28620 /* xa = xa + 0.5 */
28621 half = force_reg (mode, const_double_from_real_value (pred_half, mode));
28622 xa = expand_simple_binop (mode, PLUS, xa, half, NULL_RTX, 0, OPTAB_DIRECT);
28624 /* xa = (double)(int64_t)xa */
28625 xi = gen_reg_rtx (mode == DFmode ? DImode : SImode);
28626 expand_fix (xi, xa, 0);
28627 expand_float (xa, xi, 0);
28629 /* res = copysign (xa, operand1) */
28630 ix86_sse_copysign_to_positive (res, xa, force_reg (mode, operand1), mask);
28632 emit_label (label);
28633 LABEL_NUSES (label) = 1;
28635 emit_move_insn (operand0, res);
28639 /* Validate whether a SSE5 instruction is valid or not.
28640 OPERANDS is the array of operands.
28641 NUM is the number of operands.
28642 USES_OC0 is true if the instruction uses OC0 and provides 4 variants.
28643 NUM_MEMORY is the maximum number of memory operands to accept.
28644 when COMMUTATIVE is set, operand 1 and 2 can be swapped. */
28647 ix86_sse5_valid_op_p (rtx operands[], rtx insn ATTRIBUTE_UNUSED, int num,
28648 bool uses_oc0, int num_memory, bool commutative)
28654 /* Count the number of memory arguments */
28657 for (i = 0; i < num; i++)
28659 enum machine_mode mode = GET_MODE (operands[i]);
28660 if (register_operand (operands[i], mode))
28663 else if (memory_operand (operands[i], mode))
28665 mem_mask |= (1 << i);
28671 rtx pattern = PATTERN (insn);
28673 /* allow 0 for pcmov */
28674 if (GET_CODE (pattern) != SET
28675 || GET_CODE (SET_SRC (pattern)) != IF_THEN_ELSE
28677 || operands[i] != CONST0_RTX (mode))
28682 /* Special case pmacsdq{l,h} where we allow the 3rd argument to be
28683 a memory operation. */
28684 if (num_memory < 0)
28686 num_memory = -num_memory;
28687 if ((mem_mask & (1 << (num-1))) != 0)
28689 mem_mask &= ~(1 << (num-1));
28694 /* If there were no memory operations, allow the insn */
28698 /* Do not allow the destination register to be a memory operand. */
28699 else if (mem_mask & (1 << 0))
28702 /* If there are too many memory operations, disallow the instruction. While
28703 the hardware only allows 1 memory reference, before register allocation
28704 for some insns, we allow two memory operations sometimes in order to allow
28705 code like the following to be optimized:
28707 float fmadd (float *a, float *b, float *c) { return (*a * *b) + *c; }
28709 or similar cases that are vectorized into using the fmaddss
28711 else if (mem_count > num_memory)
28714 /* Don't allow more than one memory operation if not optimizing. */
28715 else if (mem_count > 1 && !optimize)
28718 else if (num == 4 && mem_count == 1)
28720 /* formats (destination is the first argument), example fmaddss:
28721 xmm1, xmm1, xmm2, xmm3/mem
28722 xmm1, xmm1, xmm2/mem, xmm3
28723 xmm1, xmm2, xmm3/mem, xmm1
28724 xmm1, xmm2/mem, xmm3, xmm1 */
28726 return ((mem_mask == (1 << 1))
28727 || (mem_mask == (1 << 2))
28728 || (mem_mask == (1 << 3)));
28730 /* format, example pmacsdd:
28731 xmm1, xmm2, xmm3/mem, xmm1 */
28733 return (mem_mask == (1 << 2) || mem_mask == (1 << 1));
28735 return (mem_mask == (1 << 2));
28738 else if (num == 4 && num_memory == 2)
28740 /* If there are two memory operations, we can load one of the memory ops
28741 into the destination register. This is for optimizing the
28742 multiply/add ops, which the combiner has optimized both the multiply
28743 and the add insns to have a memory operation. We have to be careful
28744 that the destination doesn't overlap with the inputs. */
28745 rtx op0 = operands[0];
28747 if (reg_mentioned_p (op0, operands[1])
28748 || reg_mentioned_p (op0, operands[2])
28749 || reg_mentioned_p (op0, operands[3]))
28752 /* formats (destination is the first argument), example fmaddss:
28753 xmm1, xmm1, xmm2, xmm3/mem
28754 xmm1, xmm1, xmm2/mem, xmm3
28755 xmm1, xmm2, xmm3/mem, xmm1
28756 xmm1, xmm2/mem, xmm3, xmm1
28758 For the oc0 case, we will load either operands[1] or operands[3] into
28759 operands[0], so any combination of 2 memory operands is ok. */
28763 /* format, example pmacsdd:
28764 xmm1, xmm2, xmm3/mem, xmm1
28766 For the integer multiply/add instructions be more restrictive and
28767 require operands[2] and operands[3] to be the memory operands. */
28769 return (mem_mask == ((1 << 1) | (1 << 3)) || ((1 << 2) | (1 << 3)));
28771 return (mem_mask == ((1 << 2) | (1 << 3)));
28774 else if (num == 3 && num_memory == 1)
28776 /* formats, example protb:
28777 xmm1, xmm2, xmm3/mem
28778 xmm1, xmm2/mem, xmm3 */
28780 return ((mem_mask == (1 << 1)) || (mem_mask == (1 << 2)));
28782 /* format, example comeq:
28783 xmm1, xmm2, xmm3/mem */
28785 return (mem_mask == (1 << 2));
28789 gcc_unreachable ();
28795 /* Fixup an SSE5 instruction that has 2 memory input references into a form the
28796 hardware will allow by using the destination register to load one of the
28797 memory operations. Presently this is used by the multiply/add routines to
28798 allow 2 memory references. */
28801 ix86_expand_sse5_multiple_memory (rtx operands[],
28803 enum machine_mode mode)
28805 rtx op0 = operands[0];
28807 || memory_operand (op0, mode)
28808 || reg_mentioned_p (op0, operands[1])
28809 || reg_mentioned_p (op0, operands[2])
28810 || reg_mentioned_p (op0, operands[3]))
28811 gcc_unreachable ();
28813 /* For 2 memory operands, pick either operands[1] or operands[3] to move into
28814 the destination register. */
28815 if (memory_operand (operands[1], mode))
28817 emit_move_insn (op0, operands[1]);
28820 else if (memory_operand (operands[3], mode))
28822 emit_move_insn (op0, operands[3]);
28826 gcc_unreachable ();
28832 /* Table of valid machine attributes. */
28833 static const struct attribute_spec ix86_attribute_table[] =
28835 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler } */
28836 /* Stdcall attribute says callee is responsible for popping arguments
28837 if they are not variable. */
28838 { "stdcall", 0, 0, false, true, true, ix86_handle_cconv_attribute },
28839 /* Fastcall attribute says callee is responsible for popping arguments
28840 if they are not variable. */
28841 { "fastcall", 0, 0, false, true, true, ix86_handle_cconv_attribute },
28842 /* Cdecl attribute says the callee is a normal C declaration */
28843 { "cdecl", 0, 0, false, true, true, ix86_handle_cconv_attribute },
28844 /* Regparm attribute specifies how many integer arguments are to be
28845 passed in registers. */
28846 { "regparm", 1, 1, false, true, true, ix86_handle_cconv_attribute },
28847 /* Sseregparm attribute says we are using x86_64 calling conventions
28848 for FP arguments. */
28849 { "sseregparm", 0, 0, false, true, true, ix86_handle_cconv_attribute },
28850 /* force_align_arg_pointer says this function realigns the stack at entry. */
28851 { (const char *)&ix86_force_align_arg_pointer_string, 0, 0,
28852 false, true, true, ix86_handle_cconv_attribute },
28853 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
28854 { "dllimport", 0, 0, false, false, false, handle_dll_attribute },
28855 { "dllexport", 0, 0, false, false, false, handle_dll_attribute },
28856 { "shared", 0, 0, true, false, false, ix86_handle_shared_attribute },
28858 { "ms_struct", 0, 0, false, false, false, ix86_handle_struct_attribute },
28859 { "gcc_struct", 0, 0, false, false, false, ix86_handle_struct_attribute },
28860 #ifdef SUBTARGET_ATTRIBUTE_TABLE
28861 SUBTARGET_ATTRIBUTE_TABLE,
28863 /* ms_abi and sysv_abi calling convention function attributes. */
28864 { "ms_abi", 0, 0, false, true, true, ix86_handle_abi_attribute },
28865 { "sysv_abi", 0, 0, false, true, true, ix86_handle_abi_attribute },
28867 { NULL, 0, 0, false, false, false, NULL }
28870 /* Implement targetm.vectorize.builtin_vectorization_cost. */
28872 x86_builtin_vectorization_cost (bool runtime_test)
28874 /* If the branch of the runtime test is taken - i.e. - the vectorized
28875 version is skipped - this incurs a misprediction cost (because the
28876 vectorized version is expected to be the fall-through). So we subtract
28877 the latency of a mispredicted branch from the costs that are incured
28878 when the vectorized version is executed.
28880 TODO: The values in individual target tables have to be tuned or new
28881 fields may be needed. For eg. on K8, the default branch path is the
28882 not-taken path. If the taken path is predicted correctly, the minimum
28883 penalty of going down the taken-path is 1 cycle. If the taken-path is
28884 not predicted correctly, then the minimum penalty is 10 cycles. */
28888 return (-(ix86_cost->cond_taken_branch_cost));
28894 /* This function returns the calling abi specific va_list type node.
28895 It returns the FNDECL specific va_list type. */
28898 ix86_fn_abi_va_list (tree fndecl)
28903 return va_list_type_node;
28904 gcc_assert (fndecl != NULL_TREE);
28905 abi = ix86_function_abi ((const_tree) fndecl);
28908 return ms_va_list_type_node;
28910 return sysv_va_list_type_node;
28913 /* Returns the canonical va_list type specified by TYPE. If there
28914 is no valid TYPE provided, it return NULL_TREE. */
28917 ix86_canonical_va_list_type (tree type)
28921 /* Resolve references and pointers to va_list type. */
28922 if (INDIRECT_REF_P (type))
28923 type = TREE_TYPE (type);
28924 else if (POINTER_TYPE_P (type) && POINTER_TYPE_P (TREE_TYPE(type)))
28925 type = TREE_TYPE (type);
28929 wtype = va_list_type_node;
28930 gcc_assert (wtype != NULL_TREE);
28932 if (TREE_CODE (wtype) == ARRAY_TYPE)
28934 /* If va_list is an array type, the argument may have decayed
28935 to a pointer type, e.g. by being passed to another function.
28936 In that case, unwrap both types so that we can compare the
28937 underlying records. */
28938 if (TREE_CODE (htype) == ARRAY_TYPE
28939 || POINTER_TYPE_P (htype))
28941 wtype = TREE_TYPE (wtype);
28942 htype = TREE_TYPE (htype);
28945 if (TYPE_MAIN_VARIANT (wtype) == TYPE_MAIN_VARIANT (htype))
28946 return va_list_type_node;
28947 wtype = sysv_va_list_type_node;
28948 gcc_assert (wtype != NULL_TREE);
28950 if (TREE_CODE (wtype) == ARRAY_TYPE)
28952 /* If va_list is an array type, the argument may have decayed
28953 to a pointer type, e.g. by being passed to another function.
28954 In that case, unwrap both types so that we can compare the
28955 underlying records. */
28956 if (TREE_CODE (htype) == ARRAY_TYPE
28957 || POINTER_TYPE_P (htype))
28959 wtype = TREE_TYPE (wtype);
28960 htype = TREE_TYPE (htype);
28963 if (TYPE_MAIN_VARIANT (wtype) == TYPE_MAIN_VARIANT (htype))
28964 return sysv_va_list_type_node;
28965 wtype = ms_va_list_type_node;
28966 gcc_assert (wtype != NULL_TREE);
28968 if (TREE_CODE (wtype) == ARRAY_TYPE)
28970 /* If va_list is an array type, the argument may have decayed
28971 to a pointer type, e.g. by being passed to another function.
28972 In that case, unwrap both types so that we can compare the
28973 underlying records. */
28974 if (TREE_CODE (htype) == ARRAY_TYPE
28975 || POINTER_TYPE_P (htype))
28977 wtype = TREE_TYPE (wtype);
28978 htype = TREE_TYPE (htype);
28981 if (TYPE_MAIN_VARIANT (wtype) == TYPE_MAIN_VARIANT (htype))
28982 return ms_va_list_type_node;
28985 return std_canonical_va_list_type (type);
28988 /* Iterate through the target-specific builtin types for va_list.
28989 IDX denotes the iterator, *PTREE is set to the result type of
28990 the va_list builtin, and *PNAME to its internal type.
28991 Returns zero if there is no element for this index, otherwise
28992 IDX should be increased upon the next call.
28993 Note, do not iterate a base builtin's name like __builtin_va_list.
28994 Used from c_common_nodes_and_builtins. */
28997 ix86_enum_va_list (int idx, const char **pname, tree *ptree)
29003 *ptree = ms_va_list_type_node;
29004 *pname = "__builtin_ms_va_list";
29007 *ptree = sysv_va_list_type_node;
29008 *pname = "__builtin_sysv_va_list";
29016 /* Initialize the GCC target structure. */
29017 #undef TARGET_RETURN_IN_MEMORY
29018 #define TARGET_RETURN_IN_MEMORY ix86_return_in_memory
29020 #undef TARGET_ATTRIBUTE_TABLE
29021 #define TARGET_ATTRIBUTE_TABLE ix86_attribute_table
29022 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
29023 # undef TARGET_MERGE_DECL_ATTRIBUTES
29024 # define TARGET_MERGE_DECL_ATTRIBUTES merge_dllimport_decl_attributes
29027 #undef TARGET_COMP_TYPE_ATTRIBUTES
29028 #define TARGET_COMP_TYPE_ATTRIBUTES ix86_comp_type_attributes
29030 #undef TARGET_INIT_BUILTINS
29031 #define TARGET_INIT_BUILTINS ix86_init_builtins
29032 #undef TARGET_EXPAND_BUILTIN
29033 #define TARGET_EXPAND_BUILTIN ix86_expand_builtin
29035 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION
29036 #define TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION \
29037 ix86_builtin_vectorized_function
29039 #undef TARGET_VECTORIZE_BUILTIN_CONVERSION
29040 #define TARGET_VECTORIZE_BUILTIN_CONVERSION ix86_vectorize_builtin_conversion
29042 #undef TARGET_BUILTIN_RECIPROCAL
29043 #define TARGET_BUILTIN_RECIPROCAL ix86_builtin_reciprocal
29045 #undef TARGET_ASM_FUNCTION_EPILOGUE
29046 #define TARGET_ASM_FUNCTION_EPILOGUE ix86_output_function_epilogue
29048 #undef TARGET_ENCODE_SECTION_INFO
29049 #ifndef SUBTARGET_ENCODE_SECTION_INFO
29050 #define TARGET_ENCODE_SECTION_INFO ix86_encode_section_info
29052 #define TARGET_ENCODE_SECTION_INFO SUBTARGET_ENCODE_SECTION_INFO
29055 #undef TARGET_ASM_OPEN_PAREN
29056 #define TARGET_ASM_OPEN_PAREN ""
29057 #undef TARGET_ASM_CLOSE_PAREN
29058 #define TARGET_ASM_CLOSE_PAREN ""
29060 #undef TARGET_ASM_ALIGNED_HI_OP
29061 #define TARGET_ASM_ALIGNED_HI_OP ASM_SHORT
29062 #undef TARGET_ASM_ALIGNED_SI_OP
29063 #define TARGET_ASM_ALIGNED_SI_OP ASM_LONG
29065 #undef TARGET_ASM_ALIGNED_DI_OP
29066 #define TARGET_ASM_ALIGNED_DI_OP ASM_QUAD
29069 #undef TARGET_ASM_UNALIGNED_HI_OP
29070 #define TARGET_ASM_UNALIGNED_HI_OP TARGET_ASM_ALIGNED_HI_OP
29071 #undef TARGET_ASM_UNALIGNED_SI_OP
29072 #define TARGET_ASM_UNALIGNED_SI_OP TARGET_ASM_ALIGNED_SI_OP
29073 #undef TARGET_ASM_UNALIGNED_DI_OP
29074 #define TARGET_ASM_UNALIGNED_DI_OP TARGET_ASM_ALIGNED_DI_OP
29076 #undef TARGET_SCHED_ADJUST_COST
29077 #define TARGET_SCHED_ADJUST_COST ix86_adjust_cost
29078 #undef TARGET_SCHED_ISSUE_RATE
29079 #define TARGET_SCHED_ISSUE_RATE ix86_issue_rate
29080 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD
29081 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD \
29082 ia32_multipass_dfa_lookahead
29084 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
29085 #define TARGET_FUNCTION_OK_FOR_SIBCALL ix86_function_ok_for_sibcall
29088 #undef TARGET_HAVE_TLS
29089 #define TARGET_HAVE_TLS true
29091 #undef TARGET_CANNOT_FORCE_CONST_MEM
29092 #define TARGET_CANNOT_FORCE_CONST_MEM ix86_cannot_force_const_mem
29093 #undef TARGET_USE_BLOCKS_FOR_CONSTANT_P
29094 #define TARGET_USE_BLOCKS_FOR_CONSTANT_P hook_bool_mode_const_rtx_true
29096 #undef TARGET_DELEGITIMIZE_ADDRESS
29097 #define TARGET_DELEGITIMIZE_ADDRESS ix86_delegitimize_address
29099 #undef TARGET_MS_BITFIELD_LAYOUT_P
29100 #define TARGET_MS_BITFIELD_LAYOUT_P ix86_ms_bitfield_layout_p
29103 #undef TARGET_BINDS_LOCAL_P
29104 #define TARGET_BINDS_LOCAL_P darwin_binds_local_p
29106 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
29107 #undef TARGET_BINDS_LOCAL_P
29108 #define TARGET_BINDS_LOCAL_P i386_pe_binds_local_p
29111 #undef TARGET_ASM_OUTPUT_MI_THUNK
29112 #define TARGET_ASM_OUTPUT_MI_THUNK x86_output_mi_thunk
29113 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
29114 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK x86_can_output_mi_thunk
29116 #undef TARGET_ASM_FILE_START
29117 #define TARGET_ASM_FILE_START x86_file_start
29119 #undef TARGET_DEFAULT_TARGET_FLAGS
29120 #define TARGET_DEFAULT_TARGET_FLAGS \
29122 | TARGET_SUBTARGET_DEFAULT \
29123 | TARGET_TLS_DIRECT_SEG_REFS_DEFAULT)
29125 #undef TARGET_HANDLE_OPTION
29126 #define TARGET_HANDLE_OPTION ix86_handle_option
29128 #undef TARGET_RTX_COSTS
29129 #define TARGET_RTX_COSTS ix86_rtx_costs
29130 #undef TARGET_ADDRESS_COST
29131 #define TARGET_ADDRESS_COST ix86_address_cost
29133 #undef TARGET_FIXED_CONDITION_CODE_REGS
29134 #define TARGET_FIXED_CONDITION_CODE_REGS ix86_fixed_condition_code_regs
29135 #undef TARGET_CC_MODES_COMPATIBLE
29136 #define TARGET_CC_MODES_COMPATIBLE ix86_cc_modes_compatible
29138 #undef TARGET_MACHINE_DEPENDENT_REORG
29139 #define TARGET_MACHINE_DEPENDENT_REORG ix86_reorg
29141 #undef TARGET_BUILD_BUILTIN_VA_LIST
29142 #define TARGET_BUILD_BUILTIN_VA_LIST ix86_build_builtin_va_list
29144 #undef TARGET_FN_ABI_VA_LIST
29145 #define TARGET_FN_ABI_VA_LIST ix86_fn_abi_va_list
29147 #undef TARGET_CANONICAL_VA_LIST_TYPE
29148 #define TARGET_CANONICAL_VA_LIST_TYPE ix86_canonical_va_list_type
29150 #undef TARGET_EXPAND_BUILTIN_VA_START
29151 #define TARGET_EXPAND_BUILTIN_VA_START ix86_va_start
29153 #undef TARGET_MD_ASM_CLOBBERS
29154 #define TARGET_MD_ASM_CLOBBERS ix86_md_asm_clobbers
29156 #undef TARGET_PROMOTE_PROTOTYPES
29157 #define TARGET_PROMOTE_PROTOTYPES hook_bool_const_tree_true
29158 #undef TARGET_STRUCT_VALUE_RTX
29159 #define TARGET_STRUCT_VALUE_RTX ix86_struct_value_rtx
29160 #undef TARGET_SETUP_INCOMING_VARARGS
29161 #define TARGET_SETUP_INCOMING_VARARGS ix86_setup_incoming_varargs
29162 #undef TARGET_MUST_PASS_IN_STACK
29163 #define TARGET_MUST_PASS_IN_STACK ix86_must_pass_in_stack
29164 #undef TARGET_PASS_BY_REFERENCE
29165 #define TARGET_PASS_BY_REFERENCE ix86_pass_by_reference
29166 #undef TARGET_INTERNAL_ARG_POINTER
29167 #define TARGET_INTERNAL_ARG_POINTER ix86_internal_arg_pointer
29168 #undef TARGET_UPDATE_STACK_BOUNDARY
29169 #define TARGET_UPDATE_STACK_BOUNDARY ix86_update_stack_boundary
29170 #undef TARGET_GET_DRAP_RTX
29171 #define TARGET_GET_DRAP_RTX ix86_get_drap_rtx
29172 #undef TARGET_DWARF_HANDLE_FRAME_UNSPEC
29173 #define TARGET_DWARF_HANDLE_FRAME_UNSPEC ix86_dwarf_handle_frame_unspec
29174 #undef TARGET_STRICT_ARGUMENT_NAMING
29175 #define TARGET_STRICT_ARGUMENT_NAMING hook_bool_CUMULATIVE_ARGS_true
29177 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
29178 #define TARGET_GIMPLIFY_VA_ARG_EXPR ix86_gimplify_va_arg
29180 #undef TARGET_SCALAR_MODE_SUPPORTED_P
29181 #define TARGET_SCALAR_MODE_SUPPORTED_P ix86_scalar_mode_supported_p
29183 #undef TARGET_VECTOR_MODE_SUPPORTED_P
29184 #define TARGET_VECTOR_MODE_SUPPORTED_P ix86_vector_mode_supported_p
29186 #undef TARGET_C_MODE_FOR_SUFFIX
29187 #define TARGET_C_MODE_FOR_SUFFIX ix86_c_mode_for_suffix
29190 #undef TARGET_ASM_OUTPUT_DWARF_DTPREL
29191 #define TARGET_ASM_OUTPUT_DWARF_DTPREL i386_output_dwarf_dtprel
29194 #ifdef SUBTARGET_INSERT_ATTRIBUTES
29195 #undef TARGET_INSERT_ATTRIBUTES
29196 #define TARGET_INSERT_ATTRIBUTES SUBTARGET_INSERT_ATTRIBUTES
29199 #undef TARGET_MANGLE_TYPE
29200 #define TARGET_MANGLE_TYPE ix86_mangle_type
29202 #undef TARGET_STACK_PROTECT_FAIL
29203 #define TARGET_STACK_PROTECT_FAIL ix86_stack_protect_fail
29205 #undef TARGET_FUNCTION_VALUE
29206 #define TARGET_FUNCTION_VALUE ix86_function_value
29208 #undef TARGET_SECONDARY_RELOAD
29209 #define TARGET_SECONDARY_RELOAD ix86_secondary_reload
29211 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST
29212 #define TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST x86_builtin_vectorization_cost
29214 #undef TARGET_SET_CURRENT_FUNCTION
29215 #define TARGET_SET_CURRENT_FUNCTION ix86_set_current_function
29217 #undef TARGET_OPTION_VALID_ATTRIBUTE_P
29218 #define TARGET_OPTION_VALID_ATTRIBUTE_P ix86_valid_target_attribute_p
29220 #undef TARGET_OPTION_SAVE
29221 #define TARGET_OPTION_SAVE ix86_function_specific_save
29223 #undef TARGET_OPTION_RESTORE
29224 #define TARGET_OPTION_RESTORE ix86_function_specific_restore
29226 #undef TARGET_OPTION_PRINT
29227 #define TARGET_OPTION_PRINT ix86_function_specific_print
29229 #undef TARGET_OPTION_CAN_INLINE_P
29230 #define TARGET_OPTION_CAN_INLINE_P ix86_can_inline_p
29232 struct gcc_target targetm = TARGET_INITIALIZER;
29234 #include "gt-i386.h"