1 /* Subroutines used for code generation on IA-32.
2 Copyright (C) 1988, 1992, 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001,
3 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009
4 Free Software Foundation, Inc.
6 This file is part of GCC.
8 GCC is free software; you can redistribute it and/or modify
9 it under the terms of the GNU General Public License as published by
10 the Free Software Foundation; either version 3, or (at your option)
13 GCC is distributed in the hope that it will be useful,
14 but WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 GNU General Public License for more details.
18 You should have received a copy of the GNU General Public License
19 along with GCC; see the file COPYING3. If not see
20 <http://www.gnu.org/licenses/>. */
24 #include "coretypes.h"
30 #include "hard-reg-set.h"
32 #include "insn-config.h"
33 #include "conditions.h"
35 #include "insn-codes.h"
36 #include "insn-attr.h"
45 #include "basic-block.h"
48 #include "target-def.h"
49 #include "langhooks.h"
54 #include "tm-constrs.h"
58 static int x86_builtin_vectorization_cost (bool);
59 static rtx legitimize_dllimport_symbol (rtx, bool);
61 #ifndef CHECK_STACK_LIMIT
62 #define CHECK_STACK_LIMIT (-1)
65 /* Return index of given mode in mult and division cost tables. */
66 #define MODE_INDEX(mode) \
67 ((mode) == QImode ? 0 \
68 : (mode) == HImode ? 1 \
69 : (mode) == SImode ? 2 \
70 : (mode) == DImode ? 3 \
73 /* Processor costs (relative to an add) */
74 /* We assume COSTS_N_INSNS is defined as (N)*4 and an addition is 2 bytes. */
75 #define COSTS_N_BYTES(N) ((N) * 2)
77 #define DUMMY_STRINGOP_ALGS {libcall, {{-1, libcall}}}
80 struct processor_costs ix86_size_cost = {/* costs for tuning for size */
81 COSTS_N_BYTES (2), /* cost of an add instruction */
82 COSTS_N_BYTES (3), /* cost of a lea instruction */
83 COSTS_N_BYTES (2), /* variable shift costs */
84 COSTS_N_BYTES (3), /* constant shift costs */
85 {COSTS_N_BYTES (3), /* cost of starting multiply for QI */
86 COSTS_N_BYTES (3), /* HI */
87 COSTS_N_BYTES (3), /* SI */
88 COSTS_N_BYTES (3), /* DI */
89 COSTS_N_BYTES (5)}, /* other */
90 0, /* cost of multiply per each bit set */
91 {COSTS_N_BYTES (3), /* cost of a divide/mod for QI */
92 COSTS_N_BYTES (3), /* HI */
93 COSTS_N_BYTES (3), /* SI */
94 COSTS_N_BYTES (3), /* DI */
95 COSTS_N_BYTES (5)}, /* other */
96 COSTS_N_BYTES (3), /* cost of movsx */
97 COSTS_N_BYTES (3), /* cost of movzx */
100 2, /* cost for loading QImode using movzbl */
101 {2, 2, 2}, /* cost of loading integer registers
102 in QImode, HImode and SImode.
103 Relative to reg-reg move (2). */
104 {2, 2, 2}, /* cost of storing integer registers */
105 2, /* cost of reg,reg fld/fst */
106 {2, 2, 2}, /* cost of loading fp registers
107 in SFmode, DFmode and XFmode */
108 {2, 2, 2}, /* cost of storing fp registers
109 in SFmode, DFmode and XFmode */
110 3, /* cost of moving MMX register */
111 {3, 3}, /* cost of loading MMX registers
112 in SImode and DImode */
113 {3, 3}, /* cost of storing MMX registers
114 in SImode and DImode */
115 3, /* cost of moving SSE register */
116 {3, 3, 3}, /* cost of loading SSE registers
117 in SImode, DImode and TImode */
118 {3, 3, 3}, /* cost of storing SSE registers
119 in SImode, DImode and TImode */
120 3, /* MMX or SSE register to integer */
121 0, /* size of l1 cache */
122 0, /* size of l2 cache */
123 0, /* size of prefetch block */
124 0, /* number of parallel prefetches */
126 COSTS_N_BYTES (2), /* cost of FADD and FSUB insns. */
127 COSTS_N_BYTES (2), /* cost of FMUL instruction. */
128 COSTS_N_BYTES (2), /* cost of FDIV instruction. */
129 COSTS_N_BYTES (2), /* cost of FABS instruction. */
130 COSTS_N_BYTES (2), /* cost of FCHS instruction. */
131 COSTS_N_BYTES (2), /* cost of FSQRT instruction. */
132 {{rep_prefix_1_byte, {{-1, rep_prefix_1_byte}}},
133 {rep_prefix_1_byte, {{-1, rep_prefix_1_byte}}}},
134 {{rep_prefix_1_byte, {{-1, rep_prefix_1_byte}}},
135 {rep_prefix_1_byte, {{-1, rep_prefix_1_byte}}}},
136 1, /* scalar_stmt_cost. */
137 1, /* scalar load_cost. */
138 1, /* scalar_store_cost. */
139 1, /* vec_stmt_cost. */
140 1, /* vec_to_scalar_cost. */
141 1, /* scalar_to_vec_cost. */
142 1, /* vec_align_load_cost. */
143 1, /* vec_unalign_load_cost. */
144 1, /* vec_store_cost. */
145 1, /* cond_taken_branch_cost. */
146 1, /* cond_not_taken_branch_cost. */
149 /* Processor costs (relative to an add) */
151 struct processor_costs i386_cost = { /* 386 specific costs */
152 COSTS_N_INSNS (1), /* cost of an add instruction */
153 COSTS_N_INSNS (1), /* cost of a lea instruction */
154 COSTS_N_INSNS (3), /* variable shift costs */
155 COSTS_N_INSNS (2), /* constant shift costs */
156 {COSTS_N_INSNS (6), /* cost of starting multiply for QI */
157 COSTS_N_INSNS (6), /* HI */
158 COSTS_N_INSNS (6), /* SI */
159 COSTS_N_INSNS (6), /* DI */
160 COSTS_N_INSNS (6)}, /* other */
161 COSTS_N_INSNS (1), /* cost of multiply per each bit set */
162 {COSTS_N_INSNS (23), /* cost of a divide/mod for QI */
163 COSTS_N_INSNS (23), /* HI */
164 COSTS_N_INSNS (23), /* SI */
165 COSTS_N_INSNS (23), /* DI */
166 COSTS_N_INSNS (23)}, /* other */
167 COSTS_N_INSNS (3), /* cost of movsx */
168 COSTS_N_INSNS (2), /* cost of movzx */
169 15, /* "large" insn */
171 4, /* cost for loading QImode using movzbl */
172 {2, 4, 2}, /* cost of loading integer registers
173 in QImode, HImode and SImode.
174 Relative to reg-reg move (2). */
175 {2, 4, 2}, /* cost of storing integer registers */
176 2, /* cost of reg,reg fld/fst */
177 {8, 8, 8}, /* cost of loading fp registers
178 in SFmode, DFmode and XFmode */
179 {8, 8, 8}, /* cost of storing fp registers
180 in SFmode, DFmode and XFmode */
181 2, /* cost of moving MMX register */
182 {4, 8}, /* cost of loading MMX registers
183 in SImode and DImode */
184 {4, 8}, /* cost of storing MMX registers
185 in SImode and DImode */
186 2, /* cost of moving SSE register */
187 {4, 8, 16}, /* cost of loading SSE registers
188 in SImode, DImode and TImode */
189 {4, 8, 16}, /* cost of storing SSE registers
190 in SImode, DImode and TImode */
191 3, /* MMX or SSE register to integer */
192 0, /* size of l1 cache */
193 0, /* size of l2 cache */
194 0, /* size of prefetch block */
195 0, /* number of parallel prefetches */
197 COSTS_N_INSNS (23), /* cost of FADD and FSUB insns. */
198 COSTS_N_INSNS (27), /* cost of FMUL instruction. */
199 COSTS_N_INSNS (88), /* cost of FDIV instruction. */
200 COSTS_N_INSNS (22), /* cost of FABS instruction. */
201 COSTS_N_INSNS (24), /* cost of FCHS instruction. */
202 COSTS_N_INSNS (122), /* cost of FSQRT instruction. */
203 {{rep_prefix_1_byte, {{-1, rep_prefix_1_byte}}},
204 DUMMY_STRINGOP_ALGS},
205 {{rep_prefix_1_byte, {{-1, rep_prefix_1_byte}}},
206 DUMMY_STRINGOP_ALGS},
207 1, /* scalar_stmt_cost. */
208 1, /* scalar load_cost. */
209 1, /* scalar_store_cost. */
210 1, /* vec_stmt_cost. */
211 1, /* vec_to_scalar_cost. */
212 1, /* scalar_to_vec_cost. */
213 1, /* vec_align_load_cost. */
214 2, /* vec_unalign_load_cost. */
215 1, /* vec_store_cost. */
216 3, /* cond_taken_branch_cost. */
217 1, /* cond_not_taken_branch_cost. */
221 struct processor_costs i486_cost = { /* 486 specific costs */
222 COSTS_N_INSNS (1), /* cost of an add instruction */
223 COSTS_N_INSNS (1), /* cost of a lea instruction */
224 COSTS_N_INSNS (3), /* variable shift costs */
225 COSTS_N_INSNS (2), /* constant shift costs */
226 {COSTS_N_INSNS (12), /* cost of starting multiply for QI */
227 COSTS_N_INSNS (12), /* HI */
228 COSTS_N_INSNS (12), /* SI */
229 COSTS_N_INSNS (12), /* DI */
230 COSTS_N_INSNS (12)}, /* other */
231 1, /* cost of multiply per each bit set */
232 {COSTS_N_INSNS (40), /* cost of a divide/mod for QI */
233 COSTS_N_INSNS (40), /* HI */
234 COSTS_N_INSNS (40), /* SI */
235 COSTS_N_INSNS (40), /* DI */
236 COSTS_N_INSNS (40)}, /* other */
237 COSTS_N_INSNS (3), /* cost of movsx */
238 COSTS_N_INSNS (2), /* cost of movzx */
239 15, /* "large" insn */
241 4, /* cost for loading QImode using movzbl */
242 {2, 4, 2}, /* cost of loading integer registers
243 in QImode, HImode and SImode.
244 Relative to reg-reg move (2). */
245 {2, 4, 2}, /* cost of storing integer registers */
246 2, /* cost of reg,reg fld/fst */
247 {8, 8, 8}, /* cost of loading fp registers
248 in SFmode, DFmode and XFmode */
249 {8, 8, 8}, /* cost of storing fp registers
250 in SFmode, DFmode and XFmode */
251 2, /* cost of moving MMX register */
252 {4, 8}, /* cost of loading MMX registers
253 in SImode and DImode */
254 {4, 8}, /* cost of storing MMX registers
255 in SImode and DImode */
256 2, /* cost of moving SSE register */
257 {4, 8, 16}, /* cost of loading SSE registers
258 in SImode, DImode and TImode */
259 {4, 8, 16}, /* cost of storing SSE registers
260 in SImode, DImode and TImode */
261 3, /* MMX or SSE register to integer */
262 4, /* size of l1 cache. 486 has 8kB cache
263 shared for code and data, so 4kB is
264 not really precise. */
265 4, /* size of l2 cache */
266 0, /* size of prefetch block */
267 0, /* number of parallel prefetches */
269 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
270 COSTS_N_INSNS (16), /* cost of FMUL instruction. */
271 COSTS_N_INSNS (73), /* cost of FDIV instruction. */
272 COSTS_N_INSNS (3), /* cost of FABS instruction. */
273 COSTS_N_INSNS (3), /* cost of FCHS instruction. */
274 COSTS_N_INSNS (83), /* cost of FSQRT instruction. */
275 {{rep_prefix_4_byte, {{-1, rep_prefix_4_byte}}},
276 DUMMY_STRINGOP_ALGS},
277 {{rep_prefix_4_byte, {{-1, rep_prefix_4_byte}}},
278 DUMMY_STRINGOP_ALGS},
279 1, /* scalar_stmt_cost. */
280 1, /* scalar load_cost. */
281 1, /* scalar_store_cost. */
282 1, /* vec_stmt_cost. */
283 1, /* vec_to_scalar_cost. */
284 1, /* scalar_to_vec_cost. */
285 1, /* vec_align_load_cost. */
286 2, /* vec_unalign_load_cost. */
287 1, /* vec_store_cost. */
288 3, /* cond_taken_branch_cost. */
289 1, /* cond_not_taken_branch_cost. */
293 struct processor_costs pentium_cost = {
294 COSTS_N_INSNS (1), /* cost of an add instruction */
295 COSTS_N_INSNS (1), /* cost of a lea instruction */
296 COSTS_N_INSNS (4), /* variable shift costs */
297 COSTS_N_INSNS (1), /* constant shift costs */
298 {COSTS_N_INSNS (11), /* cost of starting multiply for QI */
299 COSTS_N_INSNS (11), /* HI */
300 COSTS_N_INSNS (11), /* SI */
301 COSTS_N_INSNS (11), /* DI */
302 COSTS_N_INSNS (11)}, /* other */
303 0, /* cost of multiply per each bit set */
304 {COSTS_N_INSNS (25), /* cost of a divide/mod for QI */
305 COSTS_N_INSNS (25), /* HI */
306 COSTS_N_INSNS (25), /* SI */
307 COSTS_N_INSNS (25), /* DI */
308 COSTS_N_INSNS (25)}, /* other */
309 COSTS_N_INSNS (3), /* cost of movsx */
310 COSTS_N_INSNS (2), /* cost of movzx */
311 8, /* "large" insn */
313 6, /* cost for loading QImode using movzbl */
314 {2, 4, 2}, /* cost of loading integer registers
315 in QImode, HImode and SImode.
316 Relative to reg-reg move (2). */
317 {2, 4, 2}, /* cost of storing integer registers */
318 2, /* cost of reg,reg fld/fst */
319 {2, 2, 6}, /* cost of loading fp registers
320 in SFmode, DFmode and XFmode */
321 {4, 4, 6}, /* cost of storing fp registers
322 in SFmode, DFmode and XFmode */
323 8, /* cost of moving MMX register */
324 {8, 8}, /* cost of loading MMX registers
325 in SImode and DImode */
326 {8, 8}, /* cost of storing MMX registers
327 in SImode and DImode */
328 2, /* cost of moving SSE register */
329 {4, 8, 16}, /* cost of loading SSE registers
330 in SImode, DImode and TImode */
331 {4, 8, 16}, /* cost of storing SSE registers
332 in SImode, DImode and TImode */
333 3, /* MMX or SSE register to integer */
334 8, /* size of l1 cache. */
335 8, /* size of l2 cache */
336 0, /* size of prefetch block */
337 0, /* number of parallel prefetches */
339 COSTS_N_INSNS (3), /* cost of FADD and FSUB insns. */
340 COSTS_N_INSNS (3), /* cost of FMUL instruction. */
341 COSTS_N_INSNS (39), /* cost of FDIV instruction. */
342 COSTS_N_INSNS (1), /* cost of FABS instruction. */
343 COSTS_N_INSNS (1), /* cost of FCHS instruction. */
344 COSTS_N_INSNS (70), /* cost of FSQRT instruction. */
345 {{libcall, {{256, rep_prefix_4_byte}, {-1, libcall}}},
346 DUMMY_STRINGOP_ALGS},
347 {{libcall, {{-1, rep_prefix_4_byte}}},
348 DUMMY_STRINGOP_ALGS},
349 1, /* scalar_stmt_cost. */
350 1, /* scalar load_cost. */
351 1, /* scalar_store_cost. */
352 1, /* vec_stmt_cost. */
353 1, /* vec_to_scalar_cost. */
354 1, /* scalar_to_vec_cost. */
355 1, /* vec_align_load_cost. */
356 2, /* vec_unalign_load_cost. */
357 1, /* vec_store_cost. */
358 3, /* cond_taken_branch_cost. */
359 1, /* cond_not_taken_branch_cost. */
363 struct processor_costs pentiumpro_cost = {
364 COSTS_N_INSNS (1), /* cost of an add instruction */
365 COSTS_N_INSNS (1), /* cost of a lea instruction */
366 COSTS_N_INSNS (1), /* variable shift costs */
367 COSTS_N_INSNS (1), /* constant shift costs */
368 {COSTS_N_INSNS (4), /* cost of starting multiply for QI */
369 COSTS_N_INSNS (4), /* HI */
370 COSTS_N_INSNS (4), /* SI */
371 COSTS_N_INSNS (4), /* DI */
372 COSTS_N_INSNS (4)}, /* other */
373 0, /* cost of multiply per each bit set */
374 {COSTS_N_INSNS (17), /* cost of a divide/mod for QI */
375 COSTS_N_INSNS (17), /* HI */
376 COSTS_N_INSNS (17), /* SI */
377 COSTS_N_INSNS (17), /* DI */
378 COSTS_N_INSNS (17)}, /* other */
379 COSTS_N_INSNS (1), /* cost of movsx */
380 COSTS_N_INSNS (1), /* cost of movzx */
381 8, /* "large" insn */
383 2, /* cost for loading QImode using movzbl */
384 {4, 4, 4}, /* cost of loading integer registers
385 in QImode, HImode and SImode.
386 Relative to reg-reg move (2). */
387 {2, 2, 2}, /* cost of storing integer registers */
388 2, /* cost of reg,reg fld/fst */
389 {2, 2, 6}, /* cost of loading fp registers
390 in SFmode, DFmode and XFmode */
391 {4, 4, 6}, /* cost of storing fp registers
392 in SFmode, DFmode and XFmode */
393 2, /* cost of moving MMX register */
394 {2, 2}, /* cost of loading MMX registers
395 in SImode and DImode */
396 {2, 2}, /* cost of storing MMX registers
397 in SImode and DImode */
398 2, /* cost of moving SSE register */
399 {2, 2, 8}, /* cost of loading SSE registers
400 in SImode, DImode and TImode */
401 {2, 2, 8}, /* cost of storing SSE registers
402 in SImode, DImode and TImode */
403 3, /* MMX or SSE register to integer */
404 8, /* size of l1 cache. */
405 256, /* size of l2 cache */
406 32, /* size of prefetch block */
407 6, /* number of parallel prefetches */
409 COSTS_N_INSNS (3), /* cost of FADD and FSUB insns. */
410 COSTS_N_INSNS (5), /* cost of FMUL instruction. */
411 COSTS_N_INSNS (56), /* cost of FDIV instruction. */
412 COSTS_N_INSNS (2), /* cost of FABS instruction. */
413 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
414 COSTS_N_INSNS (56), /* cost of FSQRT instruction. */
415 /* PentiumPro has optimized rep instructions for blocks aligned by 8 bytes (we ensure
416 the alignment). For small blocks inline loop is still a noticeable win, for bigger
417 blocks either rep movsl or rep movsb is way to go. Rep movsb has apparently
418 more expensive startup time in CPU, but after 4K the difference is down in the noise.
420 {{rep_prefix_4_byte, {{128, loop}, {1024, unrolled_loop},
421 {8192, rep_prefix_4_byte}, {-1, rep_prefix_1_byte}}},
422 DUMMY_STRINGOP_ALGS},
423 {{rep_prefix_4_byte, {{1024, unrolled_loop},
424 {8192, rep_prefix_4_byte}, {-1, libcall}}},
425 DUMMY_STRINGOP_ALGS},
426 1, /* scalar_stmt_cost. */
427 1, /* scalar load_cost. */
428 1, /* scalar_store_cost. */
429 1, /* vec_stmt_cost. */
430 1, /* vec_to_scalar_cost. */
431 1, /* scalar_to_vec_cost. */
432 1, /* vec_align_load_cost. */
433 2, /* vec_unalign_load_cost. */
434 1, /* vec_store_cost. */
435 3, /* cond_taken_branch_cost. */
436 1, /* cond_not_taken_branch_cost. */
440 struct processor_costs geode_cost = {
441 COSTS_N_INSNS (1), /* cost of an add instruction */
442 COSTS_N_INSNS (1), /* cost of a lea instruction */
443 COSTS_N_INSNS (2), /* variable shift costs */
444 COSTS_N_INSNS (1), /* constant shift costs */
445 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
446 COSTS_N_INSNS (4), /* HI */
447 COSTS_N_INSNS (7), /* SI */
448 COSTS_N_INSNS (7), /* DI */
449 COSTS_N_INSNS (7)}, /* other */
450 0, /* cost of multiply per each bit set */
451 {COSTS_N_INSNS (15), /* cost of a divide/mod for QI */
452 COSTS_N_INSNS (23), /* HI */
453 COSTS_N_INSNS (39), /* SI */
454 COSTS_N_INSNS (39), /* DI */
455 COSTS_N_INSNS (39)}, /* other */
456 COSTS_N_INSNS (1), /* cost of movsx */
457 COSTS_N_INSNS (1), /* cost of movzx */
458 8, /* "large" insn */
460 1, /* cost for loading QImode using movzbl */
461 {1, 1, 1}, /* cost of loading integer registers
462 in QImode, HImode and SImode.
463 Relative to reg-reg move (2). */
464 {1, 1, 1}, /* cost of storing integer registers */
465 1, /* cost of reg,reg fld/fst */
466 {1, 1, 1}, /* cost of loading fp registers
467 in SFmode, DFmode and XFmode */
468 {4, 6, 6}, /* cost of storing fp registers
469 in SFmode, DFmode and XFmode */
471 1, /* cost of moving MMX register */
472 {1, 1}, /* cost of loading MMX registers
473 in SImode and DImode */
474 {1, 1}, /* cost of storing MMX registers
475 in SImode and DImode */
476 1, /* cost of moving SSE register */
477 {1, 1, 1}, /* cost of loading SSE registers
478 in SImode, DImode and TImode */
479 {1, 1, 1}, /* cost of storing SSE registers
480 in SImode, DImode and TImode */
481 1, /* MMX or SSE register to integer */
482 64, /* size of l1 cache. */
483 128, /* size of l2 cache. */
484 32, /* size of prefetch block */
485 1, /* number of parallel prefetches */
487 COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */
488 COSTS_N_INSNS (11), /* cost of FMUL instruction. */
489 COSTS_N_INSNS (47), /* cost of FDIV instruction. */
490 COSTS_N_INSNS (1), /* cost of FABS instruction. */
491 COSTS_N_INSNS (1), /* cost of FCHS instruction. */
492 COSTS_N_INSNS (54), /* cost of FSQRT instruction. */
493 {{libcall, {{256, rep_prefix_4_byte}, {-1, libcall}}},
494 DUMMY_STRINGOP_ALGS},
495 {{libcall, {{256, rep_prefix_4_byte}, {-1, libcall}}},
496 DUMMY_STRINGOP_ALGS},
497 1, /* scalar_stmt_cost. */
498 1, /* scalar load_cost. */
499 1, /* scalar_store_cost. */
500 1, /* vec_stmt_cost. */
501 1, /* vec_to_scalar_cost. */
502 1, /* scalar_to_vec_cost. */
503 1, /* vec_align_load_cost. */
504 2, /* vec_unalign_load_cost. */
505 1, /* vec_store_cost. */
506 3, /* cond_taken_branch_cost. */
507 1, /* cond_not_taken_branch_cost. */
511 struct processor_costs k6_cost = {
512 COSTS_N_INSNS (1), /* cost of an add instruction */
513 COSTS_N_INSNS (2), /* cost of a lea instruction */
514 COSTS_N_INSNS (1), /* variable shift costs */
515 COSTS_N_INSNS (1), /* constant shift costs */
516 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
517 COSTS_N_INSNS (3), /* HI */
518 COSTS_N_INSNS (3), /* SI */
519 COSTS_N_INSNS (3), /* DI */
520 COSTS_N_INSNS (3)}, /* other */
521 0, /* cost of multiply per each bit set */
522 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
523 COSTS_N_INSNS (18), /* HI */
524 COSTS_N_INSNS (18), /* SI */
525 COSTS_N_INSNS (18), /* DI */
526 COSTS_N_INSNS (18)}, /* other */
527 COSTS_N_INSNS (2), /* cost of movsx */
528 COSTS_N_INSNS (2), /* cost of movzx */
529 8, /* "large" insn */
531 3, /* cost for loading QImode using movzbl */
532 {4, 5, 4}, /* cost of loading integer registers
533 in QImode, HImode and SImode.
534 Relative to reg-reg move (2). */
535 {2, 3, 2}, /* cost of storing integer registers */
536 4, /* cost of reg,reg fld/fst */
537 {6, 6, 6}, /* cost of loading fp registers
538 in SFmode, DFmode and XFmode */
539 {4, 4, 4}, /* cost of storing fp registers
540 in SFmode, DFmode and XFmode */
541 2, /* cost of moving MMX register */
542 {2, 2}, /* cost of loading MMX registers
543 in SImode and DImode */
544 {2, 2}, /* cost of storing MMX registers
545 in SImode and DImode */
546 2, /* cost of moving SSE register */
547 {2, 2, 8}, /* cost of loading SSE registers
548 in SImode, DImode and TImode */
549 {2, 2, 8}, /* cost of storing SSE registers
550 in SImode, DImode and TImode */
551 6, /* MMX or SSE register to integer */
552 32, /* size of l1 cache. */
553 32, /* size of l2 cache. Some models
554 have integrated l2 cache, but
555 optimizing for k6 is not important
556 enough to worry about that. */
557 32, /* size of prefetch block */
558 1, /* number of parallel prefetches */
560 COSTS_N_INSNS (2), /* cost of FADD and FSUB insns. */
561 COSTS_N_INSNS (2), /* cost of FMUL instruction. */
562 COSTS_N_INSNS (56), /* cost of FDIV instruction. */
563 COSTS_N_INSNS (2), /* cost of FABS instruction. */
564 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
565 COSTS_N_INSNS (56), /* cost of FSQRT instruction. */
566 {{libcall, {{256, rep_prefix_4_byte}, {-1, libcall}}},
567 DUMMY_STRINGOP_ALGS},
568 {{libcall, {{256, rep_prefix_4_byte}, {-1, libcall}}},
569 DUMMY_STRINGOP_ALGS},
570 1, /* scalar_stmt_cost. */
571 1, /* scalar load_cost. */
572 1, /* scalar_store_cost. */
573 1, /* vec_stmt_cost. */
574 1, /* vec_to_scalar_cost. */
575 1, /* scalar_to_vec_cost. */
576 1, /* vec_align_load_cost. */
577 2, /* vec_unalign_load_cost. */
578 1, /* vec_store_cost. */
579 3, /* cond_taken_branch_cost. */
580 1, /* cond_not_taken_branch_cost. */
584 struct processor_costs athlon_cost = {
585 COSTS_N_INSNS (1), /* cost of an add instruction */
586 COSTS_N_INSNS (2), /* cost of a lea instruction */
587 COSTS_N_INSNS (1), /* variable shift costs */
588 COSTS_N_INSNS (1), /* constant shift costs */
589 {COSTS_N_INSNS (5), /* cost of starting multiply for QI */
590 COSTS_N_INSNS (5), /* HI */
591 COSTS_N_INSNS (5), /* SI */
592 COSTS_N_INSNS (5), /* DI */
593 COSTS_N_INSNS (5)}, /* other */
594 0, /* cost of multiply per each bit set */
595 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
596 COSTS_N_INSNS (26), /* HI */
597 COSTS_N_INSNS (42), /* SI */
598 COSTS_N_INSNS (74), /* DI */
599 COSTS_N_INSNS (74)}, /* other */
600 COSTS_N_INSNS (1), /* cost of movsx */
601 COSTS_N_INSNS (1), /* cost of movzx */
602 8, /* "large" insn */
604 4, /* cost for loading QImode using movzbl */
605 {3, 4, 3}, /* cost of loading integer registers
606 in QImode, HImode and SImode.
607 Relative to reg-reg move (2). */
608 {3, 4, 3}, /* cost of storing integer registers */
609 4, /* cost of reg,reg fld/fst */
610 {4, 4, 12}, /* cost of loading fp registers
611 in SFmode, DFmode and XFmode */
612 {6, 6, 8}, /* cost of storing fp registers
613 in SFmode, DFmode and XFmode */
614 2, /* cost of moving MMX register */
615 {4, 4}, /* cost of loading MMX registers
616 in SImode and DImode */
617 {4, 4}, /* cost of storing MMX registers
618 in SImode and DImode */
619 2, /* cost of moving SSE register */
620 {4, 4, 6}, /* cost of loading SSE registers
621 in SImode, DImode and TImode */
622 {4, 4, 5}, /* cost of storing SSE registers
623 in SImode, DImode and TImode */
624 5, /* MMX or SSE register to integer */
625 64, /* size of l1 cache. */
626 256, /* size of l2 cache. */
627 64, /* size of prefetch block */
628 6, /* number of parallel prefetches */
630 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
631 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
632 COSTS_N_INSNS (24), /* cost of FDIV instruction. */
633 COSTS_N_INSNS (2), /* cost of FABS instruction. */
634 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
635 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
636 /* For some reason, Athlon deals better with REP prefix (relative to loops)
637 compared to K8. Alignment becomes important after 8 bytes for memcpy and
638 128 bytes for memset. */
639 {{libcall, {{2048, rep_prefix_4_byte}, {-1, libcall}}},
640 DUMMY_STRINGOP_ALGS},
641 {{libcall, {{2048, rep_prefix_4_byte}, {-1, libcall}}},
642 DUMMY_STRINGOP_ALGS},
643 1, /* scalar_stmt_cost. */
644 1, /* scalar load_cost. */
645 1, /* scalar_store_cost. */
646 1, /* vec_stmt_cost. */
647 1, /* vec_to_scalar_cost. */
648 1, /* scalar_to_vec_cost. */
649 1, /* vec_align_load_cost. */
650 2, /* vec_unalign_load_cost. */
651 1, /* vec_store_cost. */
652 3, /* cond_taken_branch_cost. */
653 1, /* cond_not_taken_branch_cost. */
657 struct processor_costs k8_cost = {
658 COSTS_N_INSNS (1), /* cost of an add instruction */
659 COSTS_N_INSNS (2), /* cost of a lea instruction */
660 COSTS_N_INSNS (1), /* variable shift costs */
661 COSTS_N_INSNS (1), /* constant shift costs */
662 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
663 COSTS_N_INSNS (4), /* HI */
664 COSTS_N_INSNS (3), /* SI */
665 COSTS_N_INSNS (4), /* DI */
666 COSTS_N_INSNS (5)}, /* other */
667 0, /* cost of multiply per each bit set */
668 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
669 COSTS_N_INSNS (26), /* HI */
670 COSTS_N_INSNS (42), /* SI */
671 COSTS_N_INSNS (74), /* DI */
672 COSTS_N_INSNS (74)}, /* other */
673 COSTS_N_INSNS (1), /* cost of movsx */
674 COSTS_N_INSNS (1), /* cost of movzx */
675 8, /* "large" insn */
677 4, /* cost for loading QImode using movzbl */
678 {3, 4, 3}, /* cost of loading integer registers
679 in QImode, HImode and SImode.
680 Relative to reg-reg move (2). */
681 {3, 4, 3}, /* cost of storing integer registers */
682 4, /* cost of reg,reg fld/fst */
683 {4, 4, 12}, /* cost of loading fp registers
684 in SFmode, DFmode and XFmode */
685 {6, 6, 8}, /* cost of storing fp registers
686 in SFmode, DFmode and XFmode */
687 2, /* cost of moving MMX register */
688 {3, 3}, /* cost of loading MMX registers
689 in SImode and DImode */
690 {4, 4}, /* cost of storing MMX registers
691 in SImode and DImode */
692 2, /* cost of moving SSE register */
693 {4, 3, 6}, /* cost of loading SSE registers
694 in SImode, DImode and TImode */
695 {4, 4, 5}, /* cost of storing SSE registers
696 in SImode, DImode and TImode */
697 5, /* MMX or SSE register to integer */
698 64, /* size of l1 cache. */
699 512, /* size of l2 cache. */
700 64, /* size of prefetch block */
701 /* New AMD processors never drop prefetches; if they cannot be performed
702 immediately, they are queued. We set number of simultaneous prefetches
703 to a large constant to reflect this (it probably is not a good idea not
704 to limit number of prefetches at all, as their execution also takes some
706 100, /* number of parallel prefetches */
708 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
709 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
710 COSTS_N_INSNS (19), /* cost of FDIV instruction. */
711 COSTS_N_INSNS (2), /* cost of FABS instruction. */
712 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
713 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
714 /* K8 has optimized REP instruction for medium sized blocks, but for very small
715 blocks it is better to use loop. For large blocks, libcall can do
716 nontemporary accesses and beat inline considerably. */
717 {{libcall, {{6, loop}, {14, unrolled_loop}, {-1, rep_prefix_4_byte}}},
718 {libcall, {{16, loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
719 {{libcall, {{8, loop}, {24, unrolled_loop},
720 {2048, rep_prefix_4_byte}, {-1, libcall}}},
721 {libcall, {{48, unrolled_loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
722 4, /* scalar_stmt_cost. */
723 2, /* scalar load_cost. */
724 2, /* scalar_store_cost. */
725 5, /* vec_stmt_cost. */
726 0, /* vec_to_scalar_cost. */
727 2, /* scalar_to_vec_cost. */
728 2, /* vec_align_load_cost. */
729 3, /* vec_unalign_load_cost. */
730 3, /* vec_store_cost. */
731 3, /* cond_taken_branch_cost. */
732 2, /* cond_not_taken_branch_cost. */
735 struct processor_costs amdfam10_cost = {
736 COSTS_N_INSNS (1), /* cost of an add instruction */
737 COSTS_N_INSNS (2), /* cost of a lea instruction */
738 COSTS_N_INSNS (1), /* variable shift costs */
739 COSTS_N_INSNS (1), /* constant shift costs */
740 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
741 COSTS_N_INSNS (4), /* HI */
742 COSTS_N_INSNS (3), /* SI */
743 COSTS_N_INSNS (4), /* DI */
744 COSTS_N_INSNS (5)}, /* other */
745 0, /* cost of multiply per each bit set */
746 {COSTS_N_INSNS (19), /* cost of a divide/mod for QI */
747 COSTS_N_INSNS (35), /* HI */
748 COSTS_N_INSNS (51), /* SI */
749 COSTS_N_INSNS (83), /* DI */
750 COSTS_N_INSNS (83)}, /* other */
751 COSTS_N_INSNS (1), /* cost of movsx */
752 COSTS_N_INSNS (1), /* cost of movzx */
753 8, /* "large" insn */
755 4, /* cost for loading QImode using movzbl */
756 {3, 4, 3}, /* cost of loading integer registers
757 in QImode, HImode and SImode.
758 Relative to reg-reg move (2). */
759 {3, 4, 3}, /* cost of storing integer registers */
760 4, /* cost of reg,reg fld/fst */
761 {4, 4, 12}, /* cost of loading fp registers
762 in SFmode, DFmode and XFmode */
763 {6, 6, 8}, /* cost of storing fp registers
764 in SFmode, DFmode and XFmode */
765 2, /* cost of moving MMX register */
766 {3, 3}, /* cost of loading MMX registers
767 in SImode and DImode */
768 {4, 4}, /* cost of storing MMX registers
769 in SImode and DImode */
770 2, /* cost of moving SSE register */
771 {4, 4, 3}, /* cost of loading SSE registers
772 in SImode, DImode and TImode */
773 {4, 4, 5}, /* cost of storing SSE registers
774 in SImode, DImode and TImode */
775 3, /* MMX or SSE register to integer */
777 MOVD reg64, xmmreg Double FSTORE 4
778 MOVD reg32, xmmreg Double FSTORE 4
780 MOVD reg64, xmmreg Double FADD 3
782 MOVD reg32, xmmreg Double FADD 3
784 64, /* size of l1 cache. */
785 512, /* size of l2 cache. */
786 64, /* size of prefetch block */
787 /* New AMD processors never drop prefetches; if they cannot be performed
788 immediately, they are queued. We set number of simultaneous prefetches
789 to a large constant to reflect this (it probably is not a good idea not
790 to limit number of prefetches at all, as their execution also takes some
792 100, /* number of parallel prefetches */
794 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
795 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
796 COSTS_N_INSNS (19), /* cost of FDIV instruction. */
797 COSTS_N_INSNS (2), /* cost of FABS instruction. */
798 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
799 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
801 /* AMDFAM10 has optimized REP instruction for medium sized blocks, but for
802 very small blocks it is better to use loop. For large blocks, libcall can
803 do nontemporary accesses and beat inline considerably. */
804 {{libcall, {{6, loop}, {14, unrolled_loop}, {-1, rep_prefix_4_byte}}},
805 {libcall, {{16, loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
806 {{libcall, {{8, loop}, {24, unrolled_loop},
807 {2048, rep_prefix_4_byte}, {-1, libcall}}},
808 {libcall, {{48, unrolled_loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
809 4, /* scalar_stmt_cost. */
810 2, /* scalar load_cost. */
811 2, /* scalar_store_cost. */
812 6, /* vec_stmt_cost. */
813 0, /* vec_to_scalar_cost. */
814 2, /* scalar_to_vec_cost. */
815 2, /* vec_align_load_cost. */
816 2, /* vec_unalign_load_cost. */
817 2, /* vec_store_cost. */
818 2, /* cond_taken_branch_cost. */
819 1, /* cond_not_taken_branch_cost. */
823 struct processor_costs pentium4_cost = {
824 COSTS_N_INSNS (1), /* cost of an add instruction */
825 COSTS_N_INSNS (3), /* cost of a lea instruction */
826 COSTS_N_INSNS (4), /* variable shift costs */
827 COSTS_N_INSNS (4), /* constant shift costs */
828 {COSTS_N_INSNS (15), /* cost of starting multiply for QI */
829 COSTS_N_INSNS (15), /* HI */
830 COSTS_N_INSNS (15), /* SI */
831 COSTS_N_INSNS (15), /* DI */
832 COSTS_N_INSNS (15)}, /* other */
833 0, /* cost of multiply per each bit set */
834 {COSTS_N_INSNS (56), /* cost of a divide/mod for QI */
835 COSTS_N_INSNS (56), /* HI */
836 COSTS_N_INSNS (56), /* SI */
837 COSTS_N_INSNS (56), /* DI */
838 COSTS_N_INSNS (56)}, /* other */
839 COSTS_N_INSNS (1), /* cost of movsx */
840 COSTS_N_INSNS (1), /* cost of movzx */
841 16, /* "large" insn */
843 2, /* cost for loading QImode using movzbl */
844 {4, 5, 4}, /* cost of loading integer registers
845 in QImode, HImode and SImode.
846 Relative to reg-reg move (2). */
847 {2, 3, 2}, /* cost of storing integer registers */
848 2, /* cost of reg,reg fld/fst */
849 {2, 2, 6}, /* cost of loading fp registers
850 in SFmode, DFmode and XFmode */
851 {4, 4, 6}, /* cost of storing fp registers
852 in SFmode, DFmode and XFmode */
853 2, /* cost of moving MMX register */
854 {2, 2}, /* cost of loading MMX registers
855 in SImode and DImode */
856 {2, 2}, /* cost of storing MMX registers
857 in SImode and DImode */
858 12, /* cost of moving SSE register */
859 {12, 12, 12}, /* cost of loading SSE registers
860 in SImode, DImode and TImode */
861 {2, 2, 8}, /* cost of storing SSE registers
862 in SImode, DImode and TImode */
863 10, /* MMX or SSE register to integer */
864 8, /* size of l1 cache. */
865 256, /* size of l2 cache. */
866 64, /* size of prefetch block */
867 6, /* number of parallel prefetches */
869 COSTS_N_INSNS (5), /* cost of FADD and FSUB insns. */
870 COSTS_N_INSNS (7), /* cost of FMUL instruction. */
871 COSTS_N_INSNS (43), /* cost of FDIV instruction. */
872 COSTS_N_INSNS (2), /* cost of FABS instruction. */
873 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
874 COSTS_N_INSNS (43), /* cost of FSQRT instruction. */
875 {{libcall, {{12, loop_1_byte}, {-1, rep_prefix_4_byte}}},
876 DUMMY_STRINGOP_ALGS},
877 {{libcall, {{6, loop_1_byte}, {48, loop}, {20480, rep_prefix_4_byte},
879 DUMMY_STRINGOP_ALGS},
880 1, /* scalar_stmt_cost. */
881 1, /* scalar load_cost. */
882 1, /* scalar_store_cost. */
883 1, /* vec_stmt_cost. */
884 1, /* vec_to_scalar_cost. */
885 1, /* scalar_to_vec_cost. */
886 1, /* vec_align_load_cost. */
887 2, /* vec_unalign_load_cost. */
888 1, /* vec_store_cost. */
889 3, /* cond_taken_branch_cost. */
890 1, /* cond_not_taken_branch_cost. */
894 struct processor_costs nocona_cost = {
895 COSTS_N_INSNS (1), /* cost of an add instruction */
896 COSTS_N_INSNS (1), /* cost of a lea instruction */
897 COSTS_N_INSNS (1), /* variable shift costs */
898 COSTS_N_INSNS (1), /* constant shift costs */
899 {COSTS_N_INSNS (10), /* cost of starting multiply for QI */
900 COSTS_N_INSNS (10), /* HI */
901 COSTS_N_INSNS (10), /* SI */
902 COSTS_N_INSNS (10), /* DI */
903 COSTS_N_INSNS (10)}, /* other */
904 0, /* cost of multiply per each bit set */
905 {COSTS_N_INSNS (66), /* cost of a divide/mod for QI */
906 COSTS_N_INSNS (66), /* HI */
907 COSTS_N_INSNS (66), /* SI */
908 COSTS_N_INSNS (66), /* DI */
909 COSTS_N_INSNS (66)}, /* other */
910 COSTS_N_INSNS (1), /* cost of movsx */
911 COSTS_N_INSNS (1), /* cost of movzx */
912 16, /* "large" insn */
914 4, /* cost for loading QImode using movzbl */
915 {4, 4, 4}, /* cost of loading integer registers
916 in QImode, HImode and SImode.
917 Relative to reg-reg move (2). */
918 {4, 4, 4}, /* cost of storing integer registers */
919 3, /* cost of reg,reg fld/fst */
920 {12, 12, 12}, /* cost of loading fp registers
921 in SFmode, DFmode and XFmode */
922 {4, 4, 4}, /* cost of storing fp registers
923 in SFmode, DFmode and XFmode */
924 6, /* cost of moving MMX register */
925 {12, 12}, /* cost of loading MMX registers
926 in SImode and DImode */
927 {12, 12}, /* cost of storing MMX registers
928 in SImode and DImode */
929 6, /* cost of moving SSE register */
930 {12, 12, 12}, /* cost of loading SSE registers
931 in SImode, DImode and TImode */
932 {12, 12, 12}, /* cost of storing SSE registers
933 in SImode, DImode and TImode */
934 8, /* MMX or SSE register to integer */
935 8, /* size of l1 cache. */
936 1024, /* size of l2 cache. */
937 128, /* size of prefetch block */
938 8, /* number of parallel prefetches */
940 COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */
941 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
942 COSTS_N_INSNS (40), /* cost of FDIV instruction. */
943 COSTS_N_INSNS (3), /* cost of FABS instruction. */
944 COSTS_N_INSNS (3), /* cost of FCHS instruction. */
945 COSTS_N_INSNS (44), /* cost of FSQRT instruction. */
946 {{libcall, {{12, loop_1_byte}, {-1, rep_prefix_4_byte}}},
947 {libcall, {{32, loop}, {20000, rep_prefix_8_byte},
948 {100000, unrolled_loop}, {-1, libcall}}}},
949 {{libcall, {{6, loop_1_byte}, {48, loop}, {20480, rep_prefix_4_byte},
951 {libcall, {{24, loop}, {64, unrolled_loop},
952 {8192, rep_prefix_8_byte}, {-1, libcall}}}},
953 1, /* scalar_stmt_cost. */
954 1, /* scalar load_cost. */
955 1, /* scalar_store_cost. */
956 1, /* vec_stmt_cost. */
957 1, /* vec_to_scalar_cost. */
958 1, /* scalar_to_vec_cost. */
959 1, /* vec_align_load_cost. */
960 2, /* vec_unalign_load_cost. */
961 1, /* vec_store_cost. */
962 3, /* cond_taken_branch_cost. */
963 1, /* cond_not_taken_branch_cost. */
967 struct processor_costs core2_cost = {
968 COSTS_N_INSNS (1), /* cost of an add instruction */
969 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
970 COSTS_N_INSNS (1), /* variable shift costs */
971 COSTS_N_INSNS (1), /* constant shift costs */
972 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
973 COSTS_N_INSNS (3), /* HI */
974 COSTS_N_INSNS (3), /* SI */
975 COSTS_N_INSNS (3), /* DI */
976 COSTS_N_INSNS (3)}, /* other */
977 0, /* cost of multiply per each bit set */
978 {COSTS_N_INSNS (22), /* cost of a divide/mod for QI */
979 COSTS_N_INSNS (22), /* HI */
980 COSTS_N_INSNS (22), /* SI */
981 COSTS_N_INSNS (22), /* DI */
982 COSTS_N_INSNS (22)}, /* other */
983 COSTS_N_INSNS (1), /* cost of movsx */
984 COSTS_N_INSNS (1), /* cost of movzx */
985 8, /* "large" insn */
987 2, /* cost for loading QImode using movzbl */
988 {6, 6, 6}, /* cost of loading integer registers
989 in QImode, HImode and SImode.
990 Relative to reg-reg move (2). */
991 {4, 4, 4}, /* cost of storing integer registers */
992 2, /* cost of reg,reg fld/fst */
993 {6, 6, 6}, /* cost of loading fp registers
994 in SFmode, DFmode and XFmode */
995 {4, 4, 4}, /* cost of storing fp registers
996 in SFmode, DFmode and XFmode */
997 2, /* cost of moving MMX register */
998 {6, 6}, /* cost of loading MMX registers
999 in SImode and DImode */
1000 {4, 4}, /* cost of storing MMX registers
1001 in SImode and DImode */
1002 2, /* cost of moving SSE register */
1003 {6, 6, 6}, /* cost of loading SSE registers
1004 in SImode, DImode and TImode */
1005 {4, 4, 4}, /* cost of storing SSE registers
1006 in SImode, DImode and TImode */
1007 2, /* MMX or SSE register to integer */
1008 32, /* size of l1 cache. */
1009 2048, /* size of l2 cache. */
1010 128, /* size of prefetch block */
1011 8, /* number of parallel prefetches */
1012 3, /* Branch cost */
1013 COSTS_N_INSNS (3), /* cost of FADD and FSUB insns. */
1014 COSTS_N_INSNS (5), /* cost of FMUL instruction. */
1015 COSTS_N_INSNS (32), /* cost of FDIV instruction. */
1016 COSTS_N_INSNS (1), /* cost of FABS instruction. */
1017 COSTS_N_INSNS (1), /* cost of FCHS instruction. */
1018 COSTS_N_INSNS (58), /* cost of FSQRT instruction. */
1019 {{libcall, {{11, loop}, {-1, rep_prefix_4_byte}}},
1020 {libcall, {{32, loop}, {64, rep_prefix_4_byte},
1021 {8192, rep_prefix_8_byte}, {-1, libcall}}}},
1022 {{libcall, {{8, loop}, {15, unrolled_loop},
1023 {2048, rep_prefix_4_byte}, {-1, libcall}}},
1024 {libcall, {{24, loop}, {32, unrolled_loop},
1025 {8192, rep_prefix_8_byte}, {-1, libcall}}}},
1026 1, /* scalar_stmt_cost. */
1027 1, /* scalar load_cost. */
1028 1, /* scalar_store_cost. */
1029 1, /* vec_stmt_cost. */
1030 1, /* vec_to_scalar_cost. */
1031 1, /* scalar_to_vec_cost. */
1032 1, /* vec_align_load_cost. */
1033 2, /* vec_unalign_load_cost. */
1034 1, /* vec_store_cost. */
1035 3, /* cond_taken_branch_cost. */
1036 1, /* cond_not_taken_branch_cost. */
1039 /* Generic64 should produce code tuned for Nocona and K8. */
1041 struct processor_costs generic64_cost = {
1042 COSTS_N_INSNS (1), /* cost of an add instruction */
1043 /* On all chips taken into consideration lea is 2 cycles and more. With
1044 this cost however our current implementation of synth_mult results in
1045 use of unnecessary temporary registers causing regression on several
1046 SPECfp benchmarks. */
1047 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
1048 COSTS_N_INSNS (1), /* variable shift costs */
1049 COSTS_N_INSNS (1), /* constant shift costs */
1050 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1051 COSTS_N_INSNS (4), /* HI */
1052 COSTS_N_INSNS (3), /* SI */
1053 COSTS_N_INSNS (4), /* DI */
1054 COSTS_N_INSNS (2)}, /* other */
1055 0, /* cost of multiply per each bit set */
1056 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
1057 COSTS_N_INSNS (26), /* HI */
1058 COSTS_N_INSNS (42), /* SI */
1059 COSTS_N_INSNS (74), /* DI */
1060 COSTS_N_INSNS (74)}, /* other */
1061 COSTS_N_INSNS (1), /* cost of movsx */
1062 COSTS_N_INSNS (1), /* cost of movzx */
1063 8, /* "large" insn */
1064 17, /* MOVE_RATIO */
1065 4, /* cost for loading QImode using movzbl */
1066 {4, 4, 4}, /* cost of loading integer registers
1067 in QImode, HImode and SImode.
1068 Relative to reg-reg move (2). */
1069 {4, 4, 4}, /* cost of storing integer registers */
1070 4, /* cost of reg,reg fld/fst */
1071 {12, 12, 12}, /* cost of loading fp registers
1072 in SFmode, DFmode and XFmode */
1073 {6, 6, 8}, /* cost of storing fp registers
1074 in SFmode, DFmode and XFmode */
1075 2, /* cost of moving MMX register */
1076 {8, 8}, /* cost of loading MMX registers
1077 in SImode and DImode */
1078 {8, 8}, /* cost of storing MMX registers
1079 in SImode and DImode */
1080 2, /* cost of moving SSE register */
1081 {8, 8, 8}, /* cost of loading SSE registers
1082 in SImode, DImode and TImode */
1083 {8, 8, 8}, /* cost of storing SSE registers
1084 in SImode, DImode and TImode */
1085 5, /* MMX or SSE register to integer */
1086 32, /* size of l1 cache. */
1087 512, /* size of l2 cache. */
1088 64, /* size of prefetch block */
1089 6, /* number of parallel prefetches */
1090 /* Benchmarks shows large regressions on K8 sixtrack benchmark when this value
1091 is increased to perhaps more appropriate value of 5. */
1092 3, /* Branch cost */
1093 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
1094 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
1095 COSTS_N_INSNS (20), /* cost of FDIV instruction. */
1096 COSTS_N_INSNS (8), /* cost of FABS instruction. */
1097 COSTS_N_INSNS (8), /* cost of FCHS instruction. */
1098 COSTS_N_INSNS (40), /* cost of FSQRT instruction. */
1099 {DUMMY_STRINGOP_ALGS,
1100 {libcall, {{32, loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
1101 {DUMMY_STRINGOP_ALGS,
1102 {libcall, {{32, loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
1103 1, /* scalar_stmt_cost. */
1104 1, /* scalar load_cost. */
1105 1, /* scalar_store_cost. */
1106 1, /* vec_stmt_cost. */
1107 1, /* vec_to_scalar_cost. */
1108 1, /* scalar_to_vec_cost. */
1109 1, /* vec_align_load_cost. */
1110 2, /* vec_unalign_load_cost. */
1111 1, /* vec_store_cost. */
1112 3, /* cond_taken_branch_cost. */
1113 1, /* cond_not_taken_branch_cost. */
1116 /* Generic32 should produce code tuned for Athlon, PPro, Pentium4, Nocona and K8. */
1118 struct processor_costs generic32_cost = {
1119 COSTS_N_INSNS (1), /* cost of an add instruction */
1120 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
1121 COSTS_N_INSNS (1), /* variable shift costs */
1122 COSTS_N_INSNS (1), /* constant shift costs */
1123 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1124 COSTS_N_INSNS (4), /* HI */
1125 COSTS_N_INSNS (3), /* SI */
1126 COSTS_N_INSNS (4), /* DI */
1127 COSTS_N_INSNS (2)}, /* other */
1128 0, /* cost of multiply per each bit set */
1129 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
1130 COSTS_N_INSNS (26), /* HI */
1131 COSTS_N_INSNS (42), /* SI */
1132 COSTS_N_INSNS (74), /* DI */
1133 COSTS_N_INSNS (74)}, /* other */
1134 COSTS_N_INSNS (1), /* cost of movsx */
1135 COSTS_N_INSNS (1), /* cost of movzx */
1136 8, /* "large" insn */
1137 17, /* MOVE_RATIO */
1138 4, /* cost for loading QImode using movzbl */
1139 {4, 4, 4}, /* cost of loading integer registers
1140 in QImode, HImode and SImode.
1141 Relative to reg-reg move (2). */
1142 {4, 4, 4}, /* cost of storing integer registers */
1143 4, /* cost of reg,reg fld/fst */
1144 {12, 12, 12}, /* cost of loading fp registers
1145 in SFmode, DFmode and XFmode */
1146 {6, 6, 8}, /* cost of storing fp registers
1147 in SFmode, DFmode and XFmode */
1148 2, /* cost of moving MMX register */
1149 {8, 8}, /* cost of loading MMX registers
1150 in SImode and DImode */
1151 {8, 8}, /* cost of storing MMX registers
1152 in SImode and DImode */
1153 2, /* cost of moving SSE register */
1154 {8, 8, 8}, /* cost of loading SSE registers
1155 in SImode, DImode and TImode */
1156 {8, 8, 8}, /* cost of storing SSE registers
1157 in SImode, DImode and TImode */
1158 5, /* MMX or SSE register to integer */
1159 32, /* size of l1 cache. */
1160 256, /* size of l2 cache. */
1161 64, /* size of prefetch block */
1162 6, /* number of parallel prefetches */
1163 3, /* Branch cost */
1164 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
1165 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
1166 COSTS_N_INSNS (20), /* cost of FDIV instruction. */
1167 COSTS_N_INSNS (8), /* cost of FABS instruction. */
1168 COSTS_N_INSNS (8), /* cost of FCHS instruction. */
1169 COSTS_N_INSNS (40), /* cost of FSQRT instruction. */
1170 {{libcall, {{32, loop}, {8192, rep_prefix_4_byte}, {-1, libcall}}},
1171 DUMMY_STRINGOP_ALGS},
1172 {{libcall, {{32, loop}, {8192, rep_prefix_4_byte}, {-1, libcall}}},
1173 DUMMY_STRINGOP_ALGS},
1174 1, /* scalar_stmt_cost. */
1175 1, /* scalar load_cost. */
1176 1, /* scalar_store_cost. */
1177 1, /* vec_stmt_cost. */
1178 1, /* vec_to_scalar_cost. */
1179 1, /* scalar_to_vec_cost. */
1180 1, /* vec_align_load_cost. */
1181 2, /* vec_unalign_load_cost. */
1182 1, /* vec_store_cost. */
1183 3, /* cond_taken_branch_cost. */
1184 1, /* cond_not_taken_branch_cost. */
1187 const struct processor_costs *ix86_cost = &pentium_cost;
1189 /* Processor feature/optimization bitmasks. */
1190 #define m_386 (1<<PROCESSOR_I386)
1191 #define m_486 (1<<PROCESSOR_I486)
1192 #define m_PENT (1<<PROCESSOR_PENTIUM)
1193 #define m_PPRO (1<<PROCESSOR_PENTIUMPRO)
1194 #define m_PENT4 (1<<PROCESSOR_PENTIUM4)
1195 #define m_NOCONA (1<<PROCESSOR_NOCONA)
1196 #define m_CORE2 (1<<PROCESSOR_CORE2)
1198 #define m_GEODE (1<<PROCESSOR_GEODE)
1199 #define m_K6 (1<<PROCESSOR_K6)
1200 #define m_K6_GEODE (m_K6 | m_GEODE)
1201 #define m_K8 (1<<PROCESSOR_K8)
1202 #define m_ATHLON (1<<PROCESSOR_ATHLON)
1203 #define m_ATHLON_K8 (m_K8 | m_ATHLON)
1204 #define m_AMDFAM10 (1<<PROCESSOR_AMDFAM10)
1205 #define m_AMD_MULTIPLE (m_K8 | m_ATHLON | m_AMDFAM10)
1207 #define m_GENERIC32 (1<<PROCESSOR_GENERIC32)
1208 #define m_GENERIC64 (1<<PROCESSOR_GENERIC64)
1210 /* Generic instruction choice should be common subset of supported CPUs
1211 (PPro/PENT4/NOCONA/CORE2/Athlon/K8). */
1212 #define m_GENERIC (m_GENERIC32 | m_GENERIC64)
1214 /* Feature tests against the various tunings. */
1215 unsigned char ix86_tune_features[X86_TUNE_LAST];
1217 /* Feature tests against the various tunings used to create ix86_tune_features
1218 based on the processor mask. */
1219 static unsigned int initial_ix86_tune_features[X86_TUNE_LAST] = {
1220 /* X86_TUNE_USE_LEAVE: Leave does not affect Nocona SPEC2000 results
1221 negatively, so enabling for Generic64 seems like good code size
1222 tradeoff. We can't enable it for 32bit generic because it does not
1223 work well with PPro base chips. */
1224 m_386 | m_K6_GEODE | m_AMD_MULTIPLE | m_CORE2 | m_GENERIC64,
1226 /* X86_TUNE_PUSH_MEMORY */
1227 m_386 | m_K6_GEODE | m_AMD_MULTIPLE | m_PENT4
1228 | m_NOCONA | m_CORE2 | m_GENERIC,
1230 /* X86_TUNE_ZERO_EXTEND_WITH_AND */
1233 /* X86_TUNE_UNROLL_STRLEN */
1234 m_486 | m_PENT | m_PPRO | m_AMD_MULTIPLE | m_K6 | m_CORE2 | m_GENERIC,
1236 /* X86_TUNE_DEEP_BRANCH_PREDICTION */
1237 m_PPRO | m_K6_GEODE | m_AMD_MULTIPLE | m_PENT4 | m_GENERIC,
1239 /* X86_TUNE_BRANCH_PREDICTION_HINTS: Branch hints were put in P4 based
1240 on simulation result. But after P4 was made, no performance benefit
1241 was observed with branch hints. It also increases the code size.
1242 As a result, icc never generates branch hints. */
1245 /* X86_TUNE_DOUBLE_WITH_ADD */
1248 /* X86_TUNE_USE_SAHF */
1249 m_PPRO | m_K6_GEODE | m_K8 | m_AMDFAM10 | m_PENT4
1250 | m_NOCONA | m_CORE2 | m_GENERIC,
1252 /* X86_TUNE_MOVX: Enable to zero extend integer registers to avoid
1253 partial dependencies. */
1254 m_AMD_MULTIPLE | m_PPRO | m_PENT4 | m_NOCONA
1255 | m_CORE2 | m_GENERIC | m_GEODE /* m_386 | m_K6 */,
1257 /* X86_TUNE_PARTIAL_REG_STALL: We probably ought to watch for partial
1258 register stalls on Generic32 compilation setting as well. However
1259 in current implementation the partial register stalls are not eliminated
1260 very well - they can be introduced via subregs synthesized by combine
1261 and can happen in caller/callee saving sequences. Because this option
1262 pays back little on PPro based chips and is in conflict with partial reg
1263 dependencies used by Athlon/P4 based chips, it is better to leave it off
1264 for generic32 for now. */
1267 /* X86_TUNE_PARTIAL_FLAG_REG_STALL */
1268 m_CORE2 | m_GENERIC,
1270 /* X86_TUNE_USE_HIMODE_FIOP */
1271 m_386 | m_486 | m_K6_GEODE,
1273 /* X86_TUNE_USE_SIMODE_FIOP */
1274 ~(m_PPRO | m_AMD_MULTIPLE | m_PENT | m_CORE2 | m_GENERIC),
1276 /* X86_TUNE_USE_MOV0 */
1279 /* X86_TUNE_USE_CLTD */
1280 ~(m_PENT | m_K6 | m_CORE2 | m_GENERIC),
1282 /* X86_TUNE_USE_XCHGB: Use xchgb %rh,%rl instead of rolw/rorw $8,rx. */
1285 /* X86_TUNE_SPLIT_LONG_MOVES */
1288 /* X86_TUNE_READ_MODIFY_WRITE */
1291 /* X86_TUNE_READ_MODIFY */
1294 /* X86_TUNE_PROMOTE_QIMODE */
1295 m_K6_GEODE | m_PENT | m_386 | m_486 | m_AMD_MULTIPLE | m_CORE2
1296 | m_GENERIC /* | m_PENT4 ? */,
1298 /* X86_TUNE_FAST_PREFIX */
1299 ~(m_PENT | m_486 | m_386),
1301 /* X86_TUNE_SINGLE_STRINGOP */
1302 m_386 | m_PENT4 | m_NOCONA,
1304 /* X86_TUNE_QIMODE_MATH */
1307 /* X86_TUNE_HIMODE_MATH: On PPro this flag is meant to avoid partial
1308 register stalls. Just like X86_TUNE_PARTIAL_REG_STALL this option
1309 might be considered for Generic32 if our scheme for avoiding partial
1310 stalls was more effective. */
1313 /* X86_TUNE_PROMOTE_QI_REGS */
1316 /* X86_TUNE_PROMOTE_HI_REGS */
1319 /* X86_TUNE_ADD_ESP_4: Enable if add/sub is preferred over 1/2 push/pop. */
1320 m_AMD_MULTIPLE | m_K6_GEODE | m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC,
1322 /* X86_TUNE_ADD_ESP_8 */
1323 m_AMD_MULTIPLE | m_PPRO | m_K6_GEODE | m_386
1324 | m_486 | m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC,
1326 /* X86_TUNE_SUB_ESP_4 */
1327 m_AMD_MULTIPLE | m_PPRO | m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC,
1329 /* X86_TUNE_SUB_ESP_8 */
1330 m_AMD_MULTIPLE | m_PPRO | m_386 | m_486
1331 | m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC,
1333 /* X86_TUNE_INTEGER_DFMODE_MOVES: Enable if integer moves are preferred
1334 for DFmode copies */
1335 ~(m_AMD_MULTIPLE | m_PENT4 | m_NOCONA | m_PPRO | m_CORE2
1336 | m_GENERIC | m_GEODE),
1338 /* X86_TUNE_PARTIAL_REG_DEPENDENCY */
1339 m_AMD_MULTIPLE | m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC,
1341 /* X86_TUNE_SSE_PARTIAL_REG_DEPENDENCY: In the Generic model we have a
1342 conflict here in between PPro/Pentium4 based chips that thread 128bit
1343 SSE registers as single units versus K8 based chips that divide SSE
1344 registers to two 64bit halves. This knob promotes all store destinations
1345 to be 128bit to allow register renaming on 128bit SSE units, but usually
1346 results in one extra microop on 64bit SSE units. Experimental results
1347 shows that disabling this option on P4 brings over 20% SPECfp regression,
1348 while enabling it on K8 brings roughly 2.4% regression that can be partly
1349 masked by careful scheduling of moves. */
1350 m_PENT4 | m_NOCONA | m_PPRO | m_CORE2 | m_GENERIC | m_AMDFAM10,
1352 /* X86_TUNE_SSE_UNALIGNED_MOVE_OPTIMAL */
1355 /* X86_TUNE_SSE_SPLIT_REGS: Set for machines where the type and dependencies
1356 are resolved on SSE register parts instead of whole registers, so we may
1357 maintain just lower part of scalar values in proper format leaving the
1358 upper part undefined. */
1361 /* X86_TUNE_SSE_TYPELESS_STORES */
1364 /* X86_TUNE_SSE_LOAD0_BY_PXOR */
1365 m_PPRO | m_PENT4 | m_NOCONA,
1367 /* X86_TUNE_MEMORY_MISMATCH_STALL */
1368 m_AMD_MULTIPLE | m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC,
1370 /* X86_TUNE_PROLOGUE_USING_MOVE */
1371 m_ATHLON_K8 | m_PPRO | m_CORE2 | m_GENERIC,
1373 /* X86_TUNE_EPILOGUE_USING_MOVE */
1374 m_ATHLON_K8 | m_PPRO | m_CORE2 | m_GENERIC,
1376 /* X86_TUNE_SHIFT1 */
1379 /* X86_TUNE_USE_FFREEP */
1382 /* X86_TUNE_INTER_UNIT_MOVES */
1383 ~(m_AMD_MULTIPLE | m_GENERIC),
1385 /* X86_TUNE_INTER_UNIT_CONVERSIONS */
1388 /* X86_TUNE_FOUR_JUMP_LIMIT: Some CPU cores are not able to predict more
1389 than 4 branch instructions in the 16 byte window. */
1390 m_PPRO | m_AMD_MULTIPLE | m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC,
1392 /* X86_TUNE_SCHEDULE */
1393 m_PPRO | m_AMD_MULTIPLE | m_K6_GEODE | m_PENT | m_CORE2 | m_GENERIC,
1395 /* X86_TUNE_USE_BT */
1396 m_AMD_MULTIPLE | m_CORE2 | m_GENERIC,
1398 /* X86_TUNE_USE_INCDEC */
1399 ~(m_PENT4 | m_NOCONA | m_GENERIC),
1401 /* X86_TUNE_PAD_RETURNS */
1402 m_AMD_MULTIPLE | m_CORE2 | m_GENERIC,
1404 /* X86_TUNE_EXT_80387_CONSTANTS */
1405 m_K6_GEODE | m_ATHLON_K8 | m_PENT4 | m_NOCONA | m_PPRO | m_CORE2 | m_GENERIC,
1407 /* X86_TUNE_SHORTEN_X87_SSE */
1410 /* X86_TUNE_AVOID_VECTOR_DECODE */
1413 /* X86_TUNE_PROMOTE_HIMODE_IMUL: Modern CPUs have same latency for HImode
1414 and SImode multiply, but 386 and 486 do HImode multiply faster. */
1417 /* X86_TUNE_SLOW_IMUL_IMM32_MEM: Imul of 32-bit constant and memory is
1418 vector path on AMD machines. */
1419 m_K8 | m_GENERIC64 | m_AMDFAM10,
1421 /* X86_TUNE_SLOW_IMUL_IMM8: Imul of 8-bit constant is vector path on AMD
1423 m_K8 | m_GENERIC64 | m_AMDFAM10,
1425 /* X86_TUNE_MOVE_M1_VIA_OR: On pentiums, it is faster to load -1 via OR
1429 /* X86_TUNE_NOT_UNPAIRABLE: NOT is not pairable on Pentium, while XOR is,
1430 but one byte longer. */
1433 /* X86_TUNE_NOT_VECTORMODE: On AMD K6, NOT is vector decoded with memory
1434 operand that cannot be represented using a modRM byte. The XOR
1435 replacement is long decoded, so this split helps here as well. */
1438 /* X86_TUNE_USE_VECTOR_FP_CONVERTS: Prefer vector packed SSE conversion
1440 m_AMDFAM10 | m_GENERIC,
1442 /* X86_TUNE_USE_VECTOR_CONVERTS: Prefer vector packed SSE conversion
1443 from integer to FP. */
1446 /* X86_TUNE_FUSE_CMP_AND_BRANCH: Fuse a compare or test instruction
1447 with a subsequent conditional jump instruction into a single
1448 compare-and-branch uop. */
1452 /* Feature tests against the various architecture variations. */
1453 unsigned char ix86_arch_features[X86_ARCH_LAST];
1455 /* Feature tests against the various architecture variations, used to create
1456 ix86_arch_features based on the processor mask. */
1457 static unsigned int initial_ix86_arch_features[X86_ARCH_LAST] = {
1458 /* X86_ARCH_CMOVE: Conditional move was added for pentiumpro. */
1459 ~(m_386 | m_486 | m_PENT | m_K6),
1461 /* X86_ARCH_CMPXCHG: Compare and exchange was added for 80486. */
1464 /* X86_ARCH_CMPXCHG8B: Compare and exchange 8 bytes was added for pentium. */
1467 /* X86_ARCH_XADD: Exchange and add was added for 80486. */
1470 /* X86_ARCH_BSWAP: Byteswap was added for 80486. */
1474 static const unsigned int x86_accumulate_outgoing_args
1475 = m_AMD_MULTIPLE | m_PENT4 | m_NOCONA | m_PPRO | m_CORE2 | m_GENERIC;
1477 static const unsigned int x86_arch_always_fancy_math_387
1478 = m_PENT | m_PPRO | m_AMD_MULTIPLE | m_PENT4
1479 | m_NOCONA | m_CORE2 | m_GENERIC;
1481 static enum stringop_alg stringop_alg = no_stringop;
1483 /* In case the average insn count for single function invocation is
1484 lower than this constant, emit fast (but longer) prologue and
1486 #define FAST_PROLOGUE_INSN_COUNT 20
1488 /* Names for 8 (low), 8 (high), and 16-bit registers, respectively. */
1489 static const char *const qi_reg_name[] = QI_REGISTER_NAMES;
1490 static const char *const qi_high_reg_name[] = QI_HIGH_REGISTER_NAMES;
1491 static const char *const hi_reg_name[] = HI_REGISTER_NAMES;
1493 /* Array of the smallest class containing reg number REGNO, indexed by
1494 REGNO. Used by REGNO_REG_CLASS in i386.h. */
1496 enum reg_class const regclass_map[FIRST_PSEUDO_REGISTER] =
1498 /* ax, dx, cx, bx */
1499 AREG, DREG, CREG, BREG,
1500 /* si, di, bp, sp */
1501 SIREG, DIREG, NON_Q_REGS, NON_Q_REGS,
1503 FP_TOP_REG, FP_SECOND_REG, FLOAT_REGS, FLOAT_REGS,
1504 FLOAT_REGS, FLOAT_REGS, FLOAT_REGS, FLOAT_REGS,
1507 /* flags, fpsr, fpcr, frame */
1508 NO_REGS, NO_REGS, NO_REGS, NON_Q_REGS,
1510 SSE_FIRST_REG, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS,
1513 MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS,
1516 NON_Q_REGS, NON_Q_REGS, NON_Q_REGS, NON_Q_REGS,
1517 NON_Q_REGS, NON_Q_REGS, NON_Q_REGS, NON_Q_REGS,
1518 /* SSE REX registers */
1519 SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS,
1523 /* The "default" register map used in 32bit mode. */
1525 int const dbx_register_map[FIRST_PSEUDO_REGISTER] =
1527 0, 2, 1, 3, 6, 7, 4, 5, /* general regs */
1528 12, 13, 14, 15, 16, 17, 18, 19, /* fp regs */
1529 -1, -1, -1, -1, -1, /* arg, flags, fpsr, fpcr, frame */
1530 21, 22, 23, 24, 25, 26, 27, 28, /* SSE */
1531 29, 30, 31, 32, 33, 34, 35, 36, /* MMX */
1532 -1, -1, -1, -1, -1, -1, -1, -1, /* extended integer registers */
1533 -1, -1, -1, -1, -1, -1, -1, -1, /* extended SSE registers */
1536 /* The "default" register map used in 64bit mode. */
1538 int const dbx64_register_map[FIRST_PSEUDO_REGISTER] =
1540 0, 1, 2, 3, 4, 5, 6, 7, /* general regs */
1541 33, 34, 35, 36, 37, 38, 39, 40, /* fp regs */
1542 -1, -1, -1, -1, -1, /* arg, flags, fpsr, fpcr, frame */
1543 17, 18, 19, 20, 21, 22, 23, 24, /* SSE */
1544 41, 42, 43, 44, 45, 46, 47, 48, /* MMX */
1545 8,9,10,11,12,13,14,15, /* extended integer registers */
1546 25, 26, 27, 28, 29, 30, 31, 32, /* extended SSE registers */
1549 /* Define the register numbers to be used in Dwarf debugging information.
1550 The SVR4 reference port C compiler uses the following register numbers
1551 in its Dwarf output code:
1552 0 for %eax (gcc regno = 0)
1553 1 for %ecx (gcc regno = 2)
1554 2 for %edx (gcc regno = 1)
1555 3 for %ebx (gcc regno = 3)
1556 4 for %esp (gcc regno = 7)
1557 5 for %ebp (gcc regno = 6)
1558 6 for %esi (gcc regno = 4)
1559 7 for %edi (gcc regno = 5)
1560 The following three DWARF register numbers are never generated by
1561 the SVR4 C compiler or by the GNU compilers, but SDB on x86/svr4
1562 believes these numbers have these meanings.
1563 8 for %eip (no gcc equivalent)
1564 9 for %eflags (gcc regno = 17)
1565 10 for %trapno (no gcc equivalent)
1566 It is not at all clear how we should number the FP stack registers
1567 for the x86 architecture. If the version of SDB on x86/svr4 were
1568 a bit less brain dead with respect to floating-point then we would
1569 have a precedent to follow with respect to DWARF register numbers
1570 for x86 FP registers, but the SDB on x86/svr4 is so completely
1571 broken with respect to FP registers that it is hardly worth thinking
1572 of it as something to strive for compatibility with.
1573 The version of x86/svr4 SDB I have at the moment does (partially)
1574 seem to believe that DWARF register number 11 is associated with
1575 the x86 register %st(0), but that's about all. Higher DWARF
1576 register numbers don't seem to be associated with anything in
1577 particular, and even for DWARF regno 11, SDB only seems to under-
1578 stand that it should say that a variable lives in %st(0) (when
1579 asked via an `=' command) if we said it was in DWARF regno 11,
1580 but SDB still prints garbage when asked for the value of the
1581 variable in question (via a `/' command).
1582 (Also note that the labels SDB prints for various FP stack regs
1583 when doing an `x' command are all wrong.)
1584 Note that these problems generally don't affect the native SVR4
1585 C compiler because it doesn't allow the use of -O with -g and
1586 because when it is *not* optimizing, it allocates a memory
1587 location for each floating-point variable, and the memory
1588 location is what gets described in the DWARF AT_location
1589 attribute for the variable in question.
1590 Regardless of the severe mental illness of the x86/svr4 SDB, we
1591 do something sensible here and we use the following DWARF
1592 register numbers. Note that these are all stack-top-relative
1594 11 for %st(0) (gcc regno = 8)
1595 12 for %st(1) (gcc regno = 9)
1596 13 for %st(2) (gcc regno = 10)
1597 14 for %st(3) (gcc regno = 11)
1598 15 for %st(4) (gcc regno = 12)
1599 16 for %st(5) (gcc regno = 13)
1600 17 for %st(6) (gcc regno = 14)
1601 18 for %st(7) (gcc regno = 15)
1603 int const svr4_dbx_register_map[FIRST_PSEUDO_REGISTER] =
1605 0, 2, 1, 3, 6, 7, 5, 4, /* general regs */
1606 11, 12, 13, 14, 15, 16, 17, 18, /* fp regs */
1607 -1, 9, -1, -1, -1, /* arg, flags, fpsr, fpcr, frame */
1608 21, 22, 23, 24, 25, 26, 27, 28, /* SSE registers */
1609 29, 30, 31, 32, 33, 34, 35, 36, /* MMX registers */
1610 -1, -1, -1, -1, -1, -1, -1, -1, /* extended integer registers */
1611 -1, -1, -1, -1, -1, -1, -1, -1, /* extended SSE registers */
1614 /* Test and compare insns in i386.md store the information needed to
1615 generate branch and scc insns here. */
1617 rtx ix86_compare_op0 = NULL_RTX;
1618 rtx ix86_compare_op1 = NULL_RTX;
1619 rtx ix86_compare_emitted = NULL_RTX;
1621 /* Define parameter passing and return registers. */
1623 static int const x86_64_int_parameter_registers[6] =
1625 DI_REG, SI_REG, DX_REG, CX_REG, R8_REG, R9_REG
1628 static int const x86_64_ms_abi_int_parameter_registers[4] =
1630 CX_REG, DX_REG, R8_REG, R9_REG
1633 static int const x86_64_int_return_registers[4] =
1635 AX_REG, DX_REG, DI_REG, SI_REG
1638 /* Define the structure for the machine field in struct function. */
1640 struct stack_local_entry GTY(())
1642 unsigned short mode;
1645 struct stack_local_entry *next;
1648 /* Structure describing stack frame layout.
1649 Stack grows downward:
1655 saved frame pointer if frame_pointer_needed
1656 <- HARD_FRAME_POINTER
1665 [va_arg registers] (
1666 > to_allocate <- FRAME_POINTER
1678 HOST_WIDE_INT frame;
1680 int outgoing_arguments_size;
1683 HOST_WIDE_INT to_allocate;
1684 /* The offsets relative to ARG_POINTER. */
1685 HOST_WIDE_INT frame_pointer_offset;
1686 HOST_WIDE_INT hard_frame_pointer_offset;
1687 HOST_WIDE_INT stack_pointer_offset;
1689 /* When save_regs_using_mov is set, emit prologue using
1690 move instead of push instructions. */
1691 bool save_regs_using_mov;
1694 /* Code model option. */
1695 enum cmodel ix86_cmodel;
1697 enum asm_dialect ix86_asm_dialect = ASM_ATT;
1699 enum tls_dialect ix86_tls_dialect = TLS_DIALECT_GNU;
1701 /* Which unit we are generating floating point math for. */
1702 enum fpmath_unit ix86_fpmath;
1704 /* Which cpu are we scheduling for. */
1705 enum attr_cpu ix86_schedule;
1707 /* Which cpu are we optimizing for. */
1708 enum processor_type ix86_tune;
1710 /* Which instruction set architecture to use. */
1711 enum processor_type ix86_arch;
1713 /* true if sse prefetch instruction is not NOOP. */
1714 int x86_prefetch_sse;
1716 /* ix86_regparm_string as a number */
1717 static int ix86_regparm;
1719 /* -mstackrealign option */
1720 extern int ix86_force_align_arg_pointer;
1721 static const char ix86_force_align_arg_pointer_string[]
1722 = "force_align_arg_pointer";
1724 static rtx (*ix86_gen_leave) (void);
1725 static rtx (*ix86_gen_pop1) (rtx);
1726 static rtx (*ix86_gen_add3) (rtx, rtx, rtx);
1727 static rtx (*ix86_gen_sub3) (rtx, rtx, rtx);
1728 static rtx (*ix86_gen_sub3_carry) (rtx, rtx, rtx, rtx);
1729 static rtx (*ix86_gen_one_cmpl2) (rtx, rtx);
1730 static rtx (*ix86_gen_monitor) (rtx, rtx, rtx);
1731 static rtx (*ix86_gen_andsp) (rtx, rtx, rtx);
1733 /* Preferred alignment for stack boundary in bits. */
1734 unsigned int ix86_preferred_stack_boundary;
1736 /* Alignment for incoming stack boundary in bits specified at
1738 static unsigned int ix86_user_incoming_stack_boundary;
1740 /* Default alignment for incoming stack boundary in bits. */
1741 static unsigned int ix86_default_incoming_stack_boundary;
1743 /* Alignment for incoming stack boundary in bits. */
1744 unsigned int ix86_incoming_stack_boundary;
1746 /* The abi used by target. */
1747 enum calling_abi ix86_abi;
1749 /* Values 1-5: see jump.c */
1750 int ix86_branch_cost;
1752 /* Calling abi specific va_list type nodes. */
1753 static GTY(()) tree sysv_va_list_type_node;
1754 static GTY(()) tree ms_va_list_type_node;
1756 /* Variables which are this size or smaller are put in the data/bss
1757 or ldata/lbss sections. */
1759 int ix86_section_threshold = 65536;
1761 /* Prefix built by ASM_GENERATE_INTERNAL_LABEL. */
1762 char internal_label_prefix[16];
1763 int internal_label_prefix_len;
1765 /* Fence to use after loop using movnt. */
1768 /* Register class used for passing given 64bit part of the argument.
1769 These represent classes as documented by the PS ABI, with the exception
1770 of SSESF, SSEDF classes, that are basically SSE class, just gcc will
1771 use SF or DFmode move instead of DImode to avoid reformatting penalties.
1773 Similarly we play games with INTEGERSI_CLASS to use cheaper SImode moves
1774 whenever possible (upper half does contain padding). */
1775 enum x86_64_reg_class
1778 X86_64_INTEGER_CLASS,
1779 X86_64_INTEGERSI_CLASS,
1786 X86_64_COMPLEX_X87_CLASS,
1790 #define MAX_CLASSES 4
1792 /* Table of constants used by fldpi, fldln2, etc.... */
1793 static REAL_VALUE_TYPE ext_80387_constants_table [5];
1794 static bool ext_80387_constants_init = 0;
1797 static struct machine_function * ix86_init_machine_status (void);
1798 static rtx ix86_function_value (const_tree, const_tree, bool);
1799 static int ix86_function_regparm (const_tree, const_tree);
1800 static void ix86_compute_frame_layout (struct ix86_frame *);
1801 static bool ix86_expand_vector_init_one_nonzero (bool, enum machine_mode,
1803 static void ix86_add_new_builtins (int);
1805 enum ix86_function_specific_strings
1807 IX86_FUNCTION_SPECIFIC_ARCH,
1808 IX86_FUNCTION_SPECIFIC_TUNE,
1809 IX86_FUNCTION_SPECIFIC_FPMATH,
1810 IX86_FUNCTION_SPECIFIC_MAX
1813 static char *ix86_target_string (int, int, const char *, const char *,
1814 const char *, bool);
1815 static void ix86_debug_options (void) ATTRIBUTE_UNUSED;
1816 static void ix86_function_specific_save (struct cl_target_option *);
1817 static void ix86_function_specific_restore (struct cl_target_option *);
1818 static void ix86_function_specific_print (FILE *, int,
1819 struct cl_target_option *);
1820 static bool ix86_valid_target_attribute_p (tree, tree, tree, int);
1821 static bool ix86_valid_target_attribute_inner_p (tree, char *[]);
1822 static bool ix86_can_inline_p (tree, tree);
1823 static void ix86_set_current_function (tree);
1825 static enum calling_abi ix86_function_abi (const_tree);
1828 /* The svr4 ABI for the i386 says that records and unions are returned
1830 #ifndef DEFAULT_PCC_STRUCT_RETURN
1831 #define DEFAULT_PCC_STRUCT_RETURN 1
1834 /* Whether -mtune= or -march= were specified */
1835 static int ix86_tune_defaulted;
1836 static int ix86_arch_specified;
1838 /* Bit flags that specify the ISA we are compiling for. */
1839 int ix86_isa_flags = TARGET_64BIT_DEFAULT | TARGET_SUBTARGET_ISA_DEFAULT;
1841 /* A mask of ix86_isa_flags that includes bit X if X
1842 was set or cleared on the command line. */
1843 static int ix86_isa_flags_explicit;
1845 /* Define a set of ISAs which are available when a given ISA is
1846 enabled. MMX and SSE ISAs are handled separately. */
1848 #define OPTION_MASK_ISA_MMX_SET OPTION_MASK_ISA_MMX
1849 #define OPTION_MASK_ISA_3DNOW_SET \
1850 (OPTION_MASK_ISA_3DNOW | OPTION_MASK_ISA_MMX_SET)
1852 #define OPTION_MASK_ISA_SSE_SET OPTION_MASK_ISA_SSE
1853 #define OPTION_MASK_ISA_SSE2_SET \
1854 (OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_SSE_SET)
1855 #define OPTION_MASK_ISA_SSE3_SET \
1856 (OPTION_MASK_ISA_SSE3 | OPTION_MASK_ISA_SSE2_SET)
1857 #define OPTION_MASK_ISA_SSSE3_SET \
1858 (OPTION_MASK_ISA_SSSE3 | OPTION_MASK_ISA_SSE3_SET)
1859 #define OPTION_MASK_ISA_SSE4_1_SET \
1860 (OPTION_MASK_ISA_SSE4_1 | OPTION_MASK_ISA_SSSE3_SET)
1861 #define OPTION_MASK_ISA_SSE4_2_SET \
1862 (OPTION_MASK_ISA_SSE4_2 | OPTION_MASK_ISA_SSE4_1_SET)
1863 #define OPTION_MASK_ISA_AVX_SET \
1864 (OPTION_MASK_ISA_AVX | OPTION_MASK_ISA_SSE4_2_SET)
1865 #define OPTION_MASK_ISA_FMA_SET \
1866 (OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_AVX_SET)
1868 /* SSE4 includes both SSE4.1 and SSE4.2. -msse4 should be the same
1870 #define OPTION_MASK_ISA_SSE4_SET OPTION_MASK_ISA_SSE4_2_SET
1872 #define OPTION_MASK_ISA_SSE4A_SET \
1873 (OPTION_MASK_ISA_SSE4A | OPTION_MASK_ISA_SSE3_SET)
1874 #define OPTION_MASK_ISA_SSE5_SET \
1875 (OPTION_MASK_ISA_SSE5 | OPTION_MASK_ISA_SSE4A_SET)
1877 /* AES and PCLMUL need SSE2 because they use xmm registers */
1878 #define OPTION_MASK_ISA_AES_SET \
1879 (OPTION_MASK_ISA_AES | OPTION_MASK_ISA_SSE2_SET)
1880 #define OPTION_MASK_ISA_PCLMUL_SET \
1881 (OPTION_MASK_ISA_PCLMUL | OPTION_MASK_ISA_SSE2_SET)
1883 #define OPTION_MASK_ISA_ABM_SET \
1884 (OPTION_MASK_ISA_ABM | OPTION_MASK_ISA_POPCNT)
1885 #define OPTION_MASK_ISA_POPCNT_SET OPTION_MASK_ISA_POPCNT
1886 #define OPTION_MASK_ISA_CX16_SET OPTION_MASK_ISA_CX16
1887 #define OPTION_MASK_ISA_SAHF_SET OPTION_MASK_ISA_SAHF
1889 /* Define a set of ISAs which aren't available when a given ISA is
1890 disabled. MMX and SSE ISAs are handled separately. */
1892 #define OPTION_MASK_ISA_MMX_UNSET \
1893 (OPTION_MASK_ISA_MMX | OPTION_MASK_ISA_3DNOW_UNSET)
1894 #define OPTION_MASK_ISA_3DNOW_UNSET \
1895 (OPTION_MASK_ISA_3DNOW | OPTION_MASK_ISA_3DNOW_A_UNSET)
1896 #define OPTION_MASK_ISA_3DNOW_A_UNSET OPTION_MASK_ISA_3DNOW_A
1898 #define OPTION_MASK_ISA_SSE_UNSET \
1899 (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_SSE2_UNSET)
1900 #define OPTION_MASK_ISA_SSE2_UNSET \
1901 (OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_SSE3_UNSET)
1902 #define OPTION_MASK_ISA_SSE3_UNSET \
1903 (OPTION_MASK_ISA_SSE3 \
1904 | OPTION_MASK_ISA_SSSE3_UNSET \
1905 | OPTION_MASK_ISA_SSE4A_UNSET )
1906 #define OPTION_MASK_ISA_SSSE3_UNSET \
1907 (OPTION_MASK_ISA_SSSE3 | OPTION_MASK_ISA_SSE4_1_UNSET)
1908 #define OPTION_MASK_ISA_SSE4_1_UNSET \
1909 (OPTION_MASK_ISA_SSE4_1 | OPTION_MASK_ISA_SSE4_2_UNSET)
1910 #define OPTION_MASK_ISA_SSE4_2_UNSET \
1911 (OPTION_MASK_ISA_SSE4_2 | OPTION_MASK_ISA_AVX_UNSET )
1912 #define OPTION_MASK_ISA_AVX_UNSET \
1913 (OPTION_MASK_ISA_AVX | OPTION_MASK_ISA_FMA_UNSET)
1914 #define OPTION_MASK_ISA_FMA_UNSET OPTION_MASK_ISA_FMA
1916 /* SSE4 includes both SSE4.1 and SSE4.2. -mno-sse4 should the same
1918 #define OPTION_MASK_ISA_SSE4_UNSET OPTION_MASK_ISA_SSE4_1_UNSET
1920 #define OPTION_MASK_ISA_SSE4A_UNSET \
1921 (OPTION_MASK_ISA_SSE4A | OPTION_MASK_ISA_SSE5_UNSET)
1922 #define OPTION_MASK_ISA_SSE5_UNSET OPTION_MASK_ISA_SSE5
1923 #define OPTION_MASK_ISA_AES_UNSET OPTION_MASK_ISA_AES
1924 #define OPTION_MASK_ISA_PCLMUL_UNSET OPTION_MASK_ISA_PCLMUL
1925 #define OPTION_MASK_ISA_ABM_UNSET OPTION_MASK_ISA_ABM
1926 #define OPTION_MASK_ISA_POPCNT_UNSET OPTION_MASK_ISA_POPCNT
1927 #define OPTION_MASK_ISA_CX16_UNSET OPTION_MASK_ISA_CX16
1928 #define OPTION_MASK_ISA_SAHF_UNSET OPTION_MASK_ISA_SAHF
1930 /* Vectorization library interface and handlers. */
1931 tree (*ix86_veclib_handler)(enum built_in_function, tree, tree) = NULL;
1932 static tree ix86_veclibabi_svml (enum built_in_function, tree, tree);
1933 static tree ix86_veclibabi_acml (enum built_in_function, tree, tree);
1935 /* Processor target table, indexed by processor number */
1938 const struct processor_costs *cost; /* Processor costs */
1939 const int align_loop; /* Default alignments. */
1940 const int align_loop_max_skip;
1941 const int align_jump;
1942 const int align_jump_max_skip;
1943 const int align_func;
1946 static const struct ptt processor_target_table[PROCESSOR_max] =
1948 {&i386_cost, 4, 3, 4, 3, 4},
1949 {&i486_cost, 16, 15, 16, 15, 16},
1950 {&pentium_cost, 16, 7, 16, 7, 16},
1951 {&pentiumpro_cost, 16, 15, 16, 10, 16},
1952 {&geode_cost, 0, 0, 0, 0, 0},
1953 {&k6_cost, 32, 7, 32, 7, 32},
1954 {&athlon_cost, 16, 7, 16, 7, 16},
1955 {&pentium4_cost, 0, 0, 0, 0, 0},
1956 {&k8_cost, 16, 7, 16, 7, 16},
1957 {&nocona_cost, 0, 0, 0, 0, 0},
1958 {&core2_cost, 16, 10, 16, 10, 16},
1959 {&generic32_cost, 16, 7, 16, 7, 16},
1960 {&generic64_cost, 16, 10, 16, 10, 16},
1961 {&amdfam10_cost, 32, 24, 32, 7, 32}
1964 static const char *const cpu_names[TARGET_CPU_DEFAULT_max] =
1989 /* Implement TARGET_HANDLE_OPTION. */
1992 ix86_handle_option (size_t code, const char *arg ATTRIBUTE_UNUSED, int value)
1999 ix86_isa_flags |= OPTION_MASK_ISA_MMX_SET;
2000 ix86_isa_flags_explicit |= OPTION_MASK_ISA_MMX_SET;
2004 ix86_isa_flags &= ~OPTION_MASK_ISA_MMX_UNSET;
2005 ix86_isa_flags_explicit |= OPTION_MASK_ISA_MMX_UNSET;
2012 ix86_isa_flags |= OPTION_MASK_ISA_3DNOW_SET;
2013 ix86_isa_flags_explicit |= OPTION_MASK_ISA_3DNOW_SET;
2017 ix86_isa_flags &= ~OPTION_MASK_ISA_3DNOW_UNSET;
2018 ix86_isa_flags_explicit |= OPTION_MASK_ISA_3DNOW_UNSET;
2028 ix86_isa_flags |= OPTION_MASK_ISA_SSE_SET;
2029 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE_SET;
2033 ix86_isa_flags &= ~OPTION_MASK_ISA_SSE_UNSET;
2034 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE_UNSET;
2041 ix86_isa_flags |= OPTION_MASK_ISA_SSE2_SET;
2042 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE2_SET;
2046 ix86_isa_flags &= ~OPTION_MASK_ISA_SSE2_UNSET;
2047 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE2_UNSET;
2054 ix86_isa_flags |= OPTION_MASK_ISA_SSE3_SET;
2055 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE3_SET;
2059 ix86_isa_flags &= ~OPTION_MASK_ISA_SSE3_UNSET;
2060 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE3_UNSET;
2067 ix86_isa_flags |= OPTION_MASK_ISA_SSSE3_SET;
2068 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSSE3_SET;
2072 ix86_isa_flags &= ~OPTION_MASK_ISA_SSSE3_UNSET;
2073 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSSE3_UNSET;
2080 ix86_isa_flags |= OPTION_MASK_ISA_SSE4_1_SET;
2081 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE4_1_SET;
2085 ix86_isa_flags &= ~OPTION_MASK_ISA_SSE4_1_UNSET;
2086 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE4_1_UNSET;
2093 ix86_isa_flags |= OPTION_MASK_ISA_SSE4_2_SET;
2094 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE4_2_SET;
2098 ix86_isa_flags &= ~OPTION_MASK_ISA_SSE4_2_UNSET;
2099 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE4_2_UNSET;
2106 ix86_isa_flags |= OPTION_MASK_ISA_AVX_SET;
2107 ix86_isa_flags_explicit |= OPTION_MASK_ISA_AVX_SET;
2111 ix86_isa_flags &= ~OPTION_MASK_ISA_AVX_UNSET;
2112 ix86_isa_flags_explicit |= OPTION_MASK_ISA_AVX_UNSET;
2119 ix86_isa_flags |= OPTION_MASK_ISA_FMA_SET;
2120 ix86_isa_flags_explicit |= OPTION_MASK_ISA_FMA_SET;
2124 ix86_isa_flags &= ~OPTION_MASK_ISA_FMA_UNSET;
2125 ix86_isa_flags_explicit |= OPTION_MASK_ISA_FMA_UNSET;
2130 ix86_isa_flags |= OPTION_MASK_ISA_SSE4_SET;
2131 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE4_SET;
2135 ix86_isa_flags &= ~OPTION_MASK_ISA_SSE4_UNSET;
2136 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE4_UNSET;
2142 ix86_isa_flags |= OPTION_MASK_ISA_SSE4A_SET;
2143 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE4A_SET;
2147 ix86_isa_flags &= ~OPTION_MASK_ISA_SSE4A_UNSET;
2148 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE4A_UNSET;
2155 ix86_isa_flags |= OPTION_MASK_ISA_SSE5_SET;
2156 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE5_SET;
2160 ix86_isa_flags &= ~OPTION_MASK_ISA_SSE5_UNSET;
2161 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE5_UNSET;
2168 ix86_isa_flags |= OPTION_MASK_ISA_ABM_SET;
2169 ix86_isa_flags_explicit |= OPTION_MASK_ISA_ABM_SET;
2173 ix86_isa_flags &= ~OPTION_MASK_ISA_ABM_UNSET;
2174 ix86_isa_flags_explicit |= OPTION_MASK_ISA_ABM_UNSET;
2181 ix86_isa_flags |= OPTION_MASK_ISA_POPCNT_SET;
2182 ix86_isa_flags_explicit |= OPTION_MASK_ISA_POPCNT_SET;
2186 ix86_isa_flags &= ~OPTION_MASK_ISA_POPCNT_UNSET;
2187 ix86_isa_flags_explicit |= OPTION_MASK_ISA_POPCNT_UNSET;
2194 ix86_isa_flags |= OPTION_MASK_ISA_SAHF_SET;
2195 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SAHF_SET;
2199 ix86_isa_flags &= ~OPTION_MASK_ISA_SAHF_UNSET;
2200 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SAHF_UNSET;
2207 ix86_isa_flags |= OPTION_MASK_ISA_CX16_SET;
2208 ix86_isa_flags_explicit |= OPTION_MASK_ISA_CX16_SET;
2212 ix86_isa_flags &= ~OPTION_MASK_ISA_CX16_UNSET;
2213 ix86_isa_flags_explicit |= OPTION_MASK_ISA_CX16_UNSET;
2220 ix86_isa_flags |= OPTION_MASK_ISA_AES_SET;
2221 ix86_isa_flags_explicit |= OPTION_MASK_ISA_AES_SET;
2225 ix86_isa_flags &= ~OPTION_MASK_ISA_AES_UNSET;
2226 ix86_isa_flags_explicit |= OPTION_MASK_ISA_AES_UNSET;
2233 ix86_isa_flags |= OPTION_MASK_ISA_PCLMUL_SET;
2234 ix86_isa_flags_explicit |= OPTION_MASK_ISA_PCLMUL_SET;
2238 ix86_isa_flags &= ~OPTION_MASK_ISA_PCLMUL_UNSET;
2239 ix86_isa_flags_explicit |= OPTION_MASK_ISA_PCLMUL_UNSET;
2248 /* Return a string the documents the current -m options. The caller is
2249 responsible for freeing the string. */
2252 ix86_target_string (int isa, int flags, const char *arch, const char *tune,
2253 const char *fpmath, bool add_nl_p)
2255 struct ix86_target_opts
2257 const char *option; /* option string */
2258 int mask; /* isa mask options */
2261 /* This table is ordered so that options like -msse5 or -msse4.2 that imply
2262 preceding options while match those first. */
2263 static struct ix86_target_opts isa_opts[] =
2265 { "-m64", OPTION_MASK_ISA_64BIT },
2266 { "-msse5", OPTION_MASK_ISA_SSE5 },
2267 { "-msse4a", OPTION_MASK_ISA_SSE4A },
2268 { "-msse4.2", OPTION_MASK_ISA_SSE4_2 },
2269 { "-msse4.1", OPTION_MASK_ISA_SSE4_1 },
2270 { "-mssse3", OPTION_MASK_ISA_SSSE3 },
2271 { "-msse3", OPTION_MASK_ISA_SSE3 },
2272 { "-msse2", OPTION_MASK_ISA_SSE2 },
2273 { "-msse", OPTION_MASK_ISA_SSE },
2274 { "-m3dnow", OPTION_MASK_ISA_3DNOW },
2275 { "-m3dnowa", OPTION_MASK_ISA_3DNOW_A },
2276 { "-mmmx", OPTION_MASK_ISA_MMX },
2277 { "-mabm", OPTION_MASK_ISA_ABM },
2278 { "-mpopcnt", OPTION_MASK_ISA_POPCNT },
2279 { "-maes", OPTION_MASK_ISA_AES },
2280 { "-mpclmul", OPTION_MASK_ISA_PCLMUL },
2284 static struct ix86_target_opts flag_opts[] =
2286 { "-m128bit-long-double", MASK_128BIT_LONG_DOUBLE },
2287 { "-m80387", MASK_80387 },
2288 { "-maccumulate-outgoing-args", MASK_ACCUMULATE_OUTGOING_ARGS },
2289 { "-malign-double", MASK_ALIGN_DOUBLE },
2290 { "-mcld", MASK_CLD },
2291 { "-mfp-ret-in-387", MASK_FLOAT_RETURNS },
2292 { "-mieee-fp", MASK_IEEE_FP },
2293 { "-minline-all-stringops", MASK_INLINE_ALL_STRINGOPS },
2294 { "-minline-stringops-dynamically", MASK_INLINE_STRINGOPS_DYNAMICALLY },
2295 { "-mms-bitfields", MASK_MS_BITFIELD_LAYOUT },
2296 { "-mno-align-stringops", MASK_NO_ALIGN_STRINGOPS },
2297 { "-mno-fancy-math-387", MASK_NO_FANCY_MATH_387 },
2298 { "-mno-fused-madd", MASK_NO_FUSED_MADD },
2299 { "-mno-push-args", MASK_NO_PUSH_ARGS },
2300 { "-mno-red-zone", MASK_NO_RED_ZONE },
2301 { "-momit-leaf-frame-pointer", MASK_OMIT_LEAF_FRAME_POINTER },
2302 { "-mrecip", MASK_RECIP },
2303 { "-mrtd", MASK_RTD },
2304 { "-msseregparm", MASK_SSEREGPARM },
2305 { "-mstack-arg-probe", MASK_STACK_PROBE },
2306 { "-mtls-direct-seg-refs", MASK_TLS_DIRECT_SEG_REFS },
2309 const char *opts[ARRAY_SIZE (isa_opts) + ARRAY_SIZE (flag_opts) + 6][2];
2312 char target_other[40];
2321 memset (opts, '\0', sizeof (opts));
2323 /* Add -march= option. */
2326 opts[num][0] = "-march=";
2327 opts[num++][1] = arch;
2330 /* Add -mtune= option. */
2333 opts[num][0] = "-mtune=";
2334 opts[num++][1] = tune;
2337 /* Pick out the options in isa options. */
2338 for (i = 0; i < ARRAY_SIZE (isa_opts); i++)
2340 if ((isa & isa_opts[i].mask) != 0)
2342 opts[num++][0] = isa_opts[i].option;
2343 isa &= ~ isa_opts[i].mask;
2347 if (isa && add_nl_p)
2349 opts[num++][0] = isa_other;
2350 sprintf (isa_other, "(other isa: 0x%x)", isa);
2353 /* Add flag options. */
2354 for (i = 0; i < ARRAY_SIZE (flag_opts); i++)
2356 if ((flags & flag_opts[i].mask) != 0)
2358 opts[num++][0] = flag_opts[i].option;
2359 flags &= ~ flag_opts[i].mask;
2363 if (flags && add_nl_p)
2365 opts[num++][0] = target_other;
2366 sprintf (target_other, "(other flags: 0x%x)", isa);
2369 /* Add -fpmath= option. */
2372 opts[num][0] = "-mfpmath=";
2373 opts[num++][1] = fpmath;
2380 gcc_assert (num < ARRAY_SIZE (opts));
2382 /* Size the string. */
2384 sep_len = (add_nl_p) ? 3 : 1;
2385 for (i = 0; i < num; i++)
2388 for (j = 0; j < 2; j++)
2390 len += strlen (opts[i][j]);
2393 /* Build the string. */
2394 ret = ptr = (char *) xmalloc (len);
2397 for (i = 0; i < num; i++)
2401 for (j = 0; j < 2; j++)
2402 len2[j] = (opts[i][j]) ? strlen (opts[i][j]) : 0;
2409 if (add_nl_p && line_len + len2[0] + len2[1] > 70)
2417 for (j = 0; j < 2; j++)
2420 memcpy (ptr, opts[i][j], len2[j]);
2422 line_len += len2[j];
2427 gcc_assert (ret + len >= ptr);
2432 /* Function that is callable from the debugger to print the current
2435 ix86_debug_options (void)
2437 char *opts = ix86_target_string (ix86_isa_flags, target_flags,
2438 ix86_arch_string, ix86_tune_string,
2439 ix86_fpmath_string, true);
2443 fprintf (stderr, "%s\n\n", opts);
2447 fprintf (stderr, "<no options>\n\n");
2452 /* Sometimes certain combinations of command options do not make
2453 sense on a particular target machine. You can define a macro
2454 `OVERRIDE_OPTIONS' to take account of this. This macro, if
2455 defined, is executed once just after all the command options have
2458 Don't use this macro to turn on various extra optimizations for
2459 `-O'. That is what `OPTIMIZATION_OPTIONS' is for. */
2462 override_options (bool main_args_p)
2465 unsigned int ix86_arch_mask, ix86_tune_mask;
2470 /* Comes from final.c -- no real reason to change it. */
2471 #define MAX_CODE_ALIGN 16
2479 PTA_PREFETCH_SSE = 1 << 4,
2481 PTA_3DNOW_A = 1 << 6,
2485 PTA_POPCNT = 1 << 10,
2487 PTA_SSE4A = 1 << 12,
2488 PTA_NO_SAHF = 1 << 13,
2489 PTA_SSE4_1 = 1 << 14,
2490 PTA_SSE4_2 = 1 << 15,
2493 PTA_PCLMUL = 1 << 18,
2500 const char *const name; /* processor name or nickname. */
2501 const enum processor_type processor;
2502 const enum attr_cpu schedule;
2503 const unsigned /*enum pta_flags*/ flags;
2505 const processor_alias_table[] =
2507 {"i386", PROCESSOR_I386, CPU_NONE, 0},
2508 {"i486", PROCESSOR_I486, CPU_NONE, 0},
2509 {"i586", PROCESSOR_PENTIUM, CPU_PENTIUM, 0},
2510 {"pentium", PROCESSOR_PENTIUM, CPU_PENTIUM, 0},
2511 {"pentium-mmx", PROCESSOR_PENTIUM, CPU_PENTIUM, PTA_MMX},
2512 {"winchip-c6", PROCESSOR_I486, CPU_NONE, PTA_MMX},
2513 {"winchip2", PROCESSOR_I486, CPU_NONE, PTA_MMX | PTA_3DNOW},
2514 {"c3", PROCESSOR_I486, CPU_NONE, PTA_MMX | PTA_3DNOW},
2515 {"c3-2", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO, PTA_MMX | PTA_SSE},
2516 {"i686", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO, 0},
2517 {"pentiumpro", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO, 0},
2518 {"pentium2", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO, PTA_MMX},
2519 {"pentium3", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO,
2521 {"pentium3m", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO,
2523 {"pentium-m", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO,
2524 PTA_MMX | PTA_SSE | PTA_SSE2},
2525 {"pentium4", PROCESSOR_PENTIUM4, CPU_NONE,
2526 PTA_MMX |PTA_SSE | PTA_SSE2},
2527 {"pentium4m", PROCESSOR_PENTIUM4, CPU_NONE,
2528 PTA_MMX | PTA_SSE | PTA_SSE2},
2529 {"prescott", PROCESSOR_NOCONA, CPU_NONE,
2530 PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3},
2531 {"nocona", PROCESSOR_NOCONA, CPU_NONE,
2532 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
2533 | PTA_CX16 | PTA_NO_SAHF},
2534 {"core2", PROCESSOR_CORE2, CPU_CORE2,
2535 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
2536 | PTA_SSSE3 | PTA_CX16},
2537 {"geode", PROCESSOR_GEODE, CPU_GEODE,
2538 PTA_MMX | PTA_3DNOW | PTA_3DNOW_A |PTA_PREFETCH_SSE},
2539 {"k6", PROCESSOR_K6, CPU_K6, PTA_MMX},
2540 {"k6-2", PROCESSOR_K6, CPU_K6, PTA_MMX | PTA_3DNOW},
2541 {"k6-3", PROCESSOR_K6, CPU_K6, PTA_MMX | PTA_3DNOW},
2542 {"athlon", PROCESSOR_ATHLON, CPU_ATHLON,
2543 PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_PREFETCH_SSE},
2544 {"athlon-tbird", PROCESSOR_ATHLON, CPU_ATHLON,
2545 PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_PREFETCH_SSE},
2546 {"athlon-4", PROCESSOR_ATHLON, CPU_ATHLON,
2547 PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE},
2548 {"athlon-xp", PROCESSOR_ATHLON, CPU_ATHLON,
2549 PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE},
2550 {"athlon-mp", PROCESSOR_ATHLON, CPU_ATHLON,
2551 PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE},
2552 {"x86-64", PROCESSOR_K8, CPU_K8,
2553 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_NO_SAHF},
2554 {"k8", PROCESSOR_K8, CPU_K8,
2555 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
2556 | PTA_SSE2 | PTA_NO_SAHF},
2557 {"k8-sse3", PROCESSOR_K8, CPU_K8,
2558 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
2559 | PTA_SSE2 | PTA_SSE3 | PTA_NO_SAHF},
2560 {"opteron", PROCESSOR_K8, CPU_K8,
2561 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
2562 | PTA_SSE2 | PTA_NO_SAHF},
2563 {"opteron-sse3", PROCESSOR_K8, CPU_K8,
2564 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
2565 | PTA_SSE2 | PTA_SSE3 | PTA_NO_SAHF},
2566 {"athlon64", PROCESSOR_K8, CPU_K8,
2567 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
2568 | PTA_SSE2 | PTA_NO_SAHF},
2569 {"athlon64-sse3", PROCESSOR_K8, CPU_K8,
2570 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
2571 | PTA_SSE2 | PTA_SSE3 | PTA_NO_SAHF},
2572 {"athlon-fx", PROCESSOR_K8, CPU_K8,
2573 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
2574 | PTA_SSE2 | PTA_NO_SAHF},
2575 {"amdfam10", PROCESSOR_AMDFAM10, CPU_AMDFAM10,
2576 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
2577 | PTA_SSE2 | PTA_SSE3 | PTA_SSE4A | PTA_CX16 | PTA_ABM},
2578 {"barcelona", PROCESSOR_AMDFAM10, CPU_AMDFAM10,
2579 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
2580 | PTA_SSE2 | PTA_SSE3 | PTA_SSE4A | PTA_CX16 | PTA_ABM},
2581 {"generic32", PROCESSOR_GENERIC32, CPU_PENTIUMPRO,
2582 0 /* flags are only used for -march switch. */ },
2583 {"generic64", PROCESSOR_GENERIC64, CPU_GENERIC64,
2584 PTA_64BIT /* flags are only used for -march switch. */ },
2587 int const pta_size = ARRAY_SIZE (processor_alias_table);
2589 /* Set up prefix/suffix so the error messages refer to either the command
2590 line argument, or the attribute(target). */
2599 prefix = "option(\"";
2604 #ifdef SUBTARGET_OVERRIDE_OPTIONS
2605 SUBTARGET_OVERRIDE_OPTIONS;
2608 #ifdef SUBSUBTARGET_OVERRIDE_OPTIONS
2609 SUBSUBTARGET_OVERRIDE_OPTIONS;
2612 /* -fPIC is the default for x86_64. */
2613 if (TARGET_MACHO && TARGET_64BIT)
2616 /* Set the default values for switches whose default depends on TARGET_64BIT
2617 in case they weren't overwritten by command line options. */
2620 /* Mach-O doesn't support omitting the frame pointer for now. */
2621 if (flag_omit_frame_pointer == 2)
2622 flag_omit_frame_pointer = (TARGET_MACHO ? 0 : 1);
2623 if (flag_asynchronous_unwind_tables == 2)
2624 flag_asynchronous_unwind_tables = 1;
2625 if (flag_pcc_struct_return == 2)
2626 flag_pcc_struct_return = 0;
2630 if (flag_omit_frame_pointer == 2)
2631 flag_omit_frame_pointer = 0;
2632 if (flag_asynchronous_unwind_tables == 2)
2633 flag_asynchronous_unwind_tables = 0;
2634 if (flag_pcc_struct_return == 2)
2635 flag_pcc_struct_return = DEFAULT_PCC_STRUCT_RETURN;
2638 /* Need to check -mtune=generic first. */
2639 if (ix86_tune_string)
2641 if (!strcmp (ix86_tune_string, "generic")
2642 || !strcmp (ix86_tune_string, "i686")
2643 /* As special support for cross compilers we read -mtune=native
2644 as -mtune=generic. With native compilers we won't see the
2645 -mtune=native, as it was changed by the driver. */
2646 || !strcmp (ix86_tune_string, "native"))
2649 ix86_tune_string = "generic64";
2651 ix86_tune_string = "generic32";
2653 /* If this call is for setting the option attribute, allow the
2654 generic32/generic64 that was previously set. */
2655 else if (!main_args_p
2656 && (!strcmp (ix86_tune_string, "generic32")
2657 || !strcmp (ix86_tune_string, "generic64")))
2659 else if (!strncmp (ix86_tune_string, "generic", 7))
2660 error ("bad value (%s) for %stune=%s %s",
2661 ix86_tune_string, prefix, suffix, sw);
2665 if (ix86_arch_string)
2666 ix86_tune_string = ix86_arch_string;
2667 if (!ix86_tune_string)
2669 ix86_tune_string = cpu_names[TARGET_CPU_DEFAULT];
2670 ix86_tune_defaulted = 1;
2673 /* ix86_tune_string is set to ix86_arch_string or defaulted. We
2674 need to use a sensible tune option. */
2675 if (!strcmp (ix86_tune_string, "generic")
2676 || !strcmp (ix86_tune_string, "x86-64")
2677 || !strcmp (ix86_tune_string, "i686"))
2680 ix86_tune_string = "generic64";
2682 ix86_tune_string = "generic32";
2685 if (ix86_stringop_string)
2687 if (!strcmp (ix86_stringop_string, "rep_byte"))
2688 stringop_alg = rep_prefix_1_byte;
2689 else if (!strcmp (ix86_stringop_string, "libcall"))
2690 stringop_alg = libcall;
2691 else if (!strcmp (ix86_stringop_string, "rep_4byte"))
2692 stringop_alg = rep_prefix_4_byte;
2693 else if (!strcmp (ix86_stringop_string, "rep_8byte")
2695 /* rep; movq isn't available in 32-bit code. */
2696 stringop_alg = rep_prefix_8_byte;
2697 else if (!strcmp (ix86_stringop_string, "byte_loop"))
2698 stringop_alg = loop_1_byte;
2699 else if (!strcmp (ix86_stringop_string, "loop"))
2700 stringop_alg = loop;
2701 else if (!strcmp (ix86_stringop_string, "unrolled_loop"))
2702 stringop_alg = unrolled_loop;
2704 error ("bad value (%s) for %sstringop-strategy=%s %s",
2705 ix86_stringop_string, prefix, suffix, sw);
2707 if (!strcmp (ix86_tune_string, "x86-64"))
2708 warning (OPT_Wdeprecated, "%stune=x86-64%s is deprecated. Use "
2709 "%stune=k8%s or %stune=generic%s instead as appropriate.",
2710 prefix, suffix, prefix, suffix, prefix, suffix);
2712 if (!ix86_arch_string)
2713 ix86_arch_string = TARGET_64BIT ? "x86-64" : "i386";
2715 ix86_arch_specified = 1;
2717 if (!strcmp (ix86_arch_string, "generic"))
2718 error ("generic CPU can be used only for %stune=%s %s",
2719 prefix, suffix, sw);
2720 if (!strncmp (ix86_arch_string, "generic", 7))
2721 error ("bad value (%s) for %sarch=%s %s",
2722 ix86_arch_string, prefix, suffix, sw);
2724 /* Validate -mabi= value. */
2725 if (ix86_abi_string)
2727 if (strcmp (ix86_abi_string, "sysv") == 0)
2728 ix86_abi = SYSV_ABI;
2729 else if (strcmp (ix86_abi_string, "ms") == 0)
2732 error ("unknown ABI (%s) for %sabi=%s %s",
2733 ix86_abi_string, prefix, suffix, sw);
2736 ix86_abi = DEFAULT_ABI;
2738 if (ix86_cmodel_string != 0)
2740 if (!strcmp (ix86_cmodel_string, "small"))
2741 ix86_cmodel = flag_pic ? CM_SMALL_PIC : CM_SMALL;
2742 else if (!strcmp (ix86_cmodel_string, "medium"))
2743 ix86_cmodel = flag_pic ? CM_MEDIUM_PIC : CM_MEDIUM;
2744 else if (!strcmp (ix86_cmodel_string, "large"))
2745 ix86_cmodel = flag_pic ? CM_LARGE_PIC : CM_LARGE;
2747 error ("code model %s does not support PIC mode", ix86_cmodel_string);
2748 else if (!strcmp (ix86_cmodel_string, "32"))
2749 ix86_cmodel = CM_32;
2750 else if (!strcmp (ix86_cmodel_string, "kernel") && !flag_pic)
2751 ix86_cmodel = CM_KERNEL;
2753 error ("bad value (%s) for %scmodel=%s %s",
2754 ix86_cmodel_string, prefix, suffix, sw);
2758 /* For TARGET_64BIT and MS_ABI, force pic on, in order to enable the
2759 use of rip-relative addressing. This eliminates fixups that
2760 would otherwise be needed if this object is to be placed in a
2761 DLL, and is essentially just as efficient as direct addressing. */
2762 if (TARGET_64BIT && DEFAULT_ABI == MS_ABI)
2763 ix86_cmodel = CM_SMALL_PIC, flag_pic = 1;
2764 else if (TARGET_64BIT)
2765 ix86_cmodel = flag_pic ? CM_SMALL_PIC : CM_SMALL;
2767 ix86_cmodel = CM_32;
2769 if (ix86_asm_string != 0)
2772 && !strcmp (ix86_asm_string, "intel"))
2773 ix86_asm_dialect = ASM_INTEL;
2774 else if (!strcmp (ix86_asm_string, "att"))
2775 ix86_asm_dialect = ASM_ATT;
2777 error ("bad value (%s) for %sasm=%s %s",
2778 ix86_asm_string, prefix, suffix, sw);
2780 if ((TARGET_64BIT == 0) != (ix86_cmodel == CM_32))
2781 error ("code model %qs not supported in the %s bit mode",
2782 ix86_cmodel_string, TARGET_64BIT ? "64" : "32");
2783 if ((TARGET_64BIT != 0) != ((ix86_isa_flags & OPTION_MASK_ISA_64BIT) != 0))
2784 sorry ("%i-bit mode not compiled in",
2785 (ix86_isa_flags & OPTION_MASK_ISA_64BIT) ? 64 : 32);
2787 for (i = 0; i < pta_size; i++)
2788 if (! strcmp (ix86_arch_string, processor_alias_table[i].name))
2790 ix86_schedule = processor_alias_table[i].schedule;
2791 ix86_arch = processor_alias_table[i].processor;
2792 /* Default cpu tuning to the architecture. */
2793 ix86_tune = ix86_arch;
2795 if (TARGET_64BIT && !(processor_alias_table[i].flags & PTA_64BIT))
2796 error ("CPU you selected does not support x86-64 "
2799 if (processor_alias_table[i].flags & PTA_MMX
2800 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_MMX))
2801 ix86_isa_flags |= OPTION_MASK_ISA_MMX;
2802 if (processor_alias_table[i].flags & PTA_3DNOW
2803 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_3DNOW))
2804 ix86_isa_flags |= OPTION_MASK_ISA_3DNOW;
2805 if (processor_alias_table[i].flags & PTA_3DNOW_A
2806 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_3DNOW_A))
2807 ix86_isa_flags |= OPTION_MASK_ISA_3DNOW_A;
2808 if (processor_alias_table[i].flags & PTA_SSE
2809 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE))
2810 ix86_isa_flags |= OPTION_MASK_ISA_SSE;
2811 if (processor_alias_table[i].flags & PTA_SSE2
2812 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE2))
2813 ix86_isa_flags |= OPTION_MASK_ISA_SSE2;
2814 if (processor_alias_table[i].flags & PTA_SSE3
2815 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE3))
2816 ix86_isa_flags |= OPTION_MASK_ISA_SSE3;
2817 if (processor_alias_table[i].flags & PTA_SSSE3
2818 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSSE3))
2819 ix86_isa_flags |= OPTION_MASK_ISA_SSSE3;
2820 if (processor_alias_table[i].flags & PTA_SSE4_1
2821 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE4_1))
2822 ix86_isa_flags |= OPTION_MASK_ISA_SSE4_1;
2823 if (processor_alias_table[i].flags & PTA_SSE4_2
2824 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE4_2))
2825 ix86_isa_flags |= OPTION_MASK_ISA_SSE4_2;
2826 if (processor_alias_table[i].flags & PTA_AVX
2827 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_AVX))
2828 ix86_isa_flags |= OPTION_MASK_ISA_AVX;
2829 if (processor_alias_table[i].flags & PTA_FMA
2830 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_FMA))
2831 ix86_isa_flags |= OPTION_MASK_ISA_FMA;
2832 if (processor_alias_table[i].flags & PTA_SSE4A
2833 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE4A))
2834 ix86_isa_flags |= OPTION_MASK_ISA_SSE4A;
2835 if (processor_alias_table[i].flags & PTA_SSE5
2836 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE5))
2837 ix86_isa_flags |= OPTION_MASK_ISA_SSE5;
2838 if (processor_alias_table[i].flags & PTA_ABM
2839 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_ABM))
2840 ix86_isa_flags |= OPTION_MASK_ISA_ABM;
2841 if (processor_alias_table[i].flags & PTA_CX16
2842 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_CX16))
2843 ix86_isa_flags |= OPTION_MASK_ISA_CX16;
2844 if (processor_alias_table[i].flags & (PTA_POPCNT | PTA_ABM)
2845 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_POPCNT))
2846 ix86_isa_flags |= OPTION_MASK_ISA_POPCNT;
2847 if (!(TARGET_64BIT && (processor_alias_table[i].flags & PTA_NO_SAHF))
2848 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SAHF))
2849 ix86_isa_flags |= OPTION_MASK_ISA_SAHF;
2850 if (processor_alias_table[i].flags & PTA_AES
2851 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_AES))
2852 ix86_isa_flags |= OPTION_MASK_ISA_AES;
2853 if (processor_alias_table[i].flags & PTA_PCLMUL
2854 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_PCLMUL))
2855 ix86_isa_flags |= OPTION_MASK_ISA_PCLMUL;
2856 if (processor_alias_table[i].flags & (PTA_PREFETCH_SSE | PTA_SSE))
2857 x86_prefetch_sse = true;
2863 error ("bad value (%s) for %sarch=%s %s",
2864 ix86_arch_string, prefix, suffix, sw);
2866 ix86_arch_mask = 1u << ix86_arch;
2867 for (i = 0; i < X86_ARCH_LAST; ++i)
2868 ix86_arch_features[i] = !!(initial_ix86_arch_features[i] & ix86_arch_mask);
2870 for (i = 0; i < pta_size; i++)
2871 if (! strcmp (ix86_tune_string, processor_alias_table[i].name))
2873 ix86_schedule = processor_alias_table[i].schedule;
2874 ix86_tune = processor_alias_table[i].processor;
2875 if (TARGET_64BIT && !(processor_alias_table[i].flags & PTA_64BIT))
2877 if (ix86_tune_defaulted)
2879 ix86_tune_string = "x86-64";
2880 for (i = 0; i < pta_size; i++)
2881 if (! strcmp (ix86_tune_string,
2882 processor_alias_table[i].name))
2884 ix86_schedule = processor_alias_table[i].schedule;
2885 ix86_tune = processor_alias_table[i].processor;
2888 error ("CPU you selected does not support x86-64 "
2891 /* Intel CPUs have always interpreted SSE prefetch instructions as
2892 NOPs; so, we can enable SSE prefetch instructions even when
2893 -mtune (rather than -march) points us to a processor that has them.
2894 However, the VIA C3 gives a SIGILL, so we only do that for i686 and
2895 higher processors. */
2897 && (processor_alias_table[i].flags & (PTA_PREFETCH_SSE | PTA_SSE)))
2898 x86_prefetch_sse = true;
2902 error ("bad value (%s) for %stune=%s %s",
2903 ix86_tune_string, prefix, suffix, sw);
2905 ix86_tune_mask = 1u << ix86_tune;
2906 for (i = 0; i < X86_TUNE_LAST; ++i)
2907 ix86_tune_features[i] = !!(initial_ix86_tune_features[i] & ix86_tune_mask);
2910 ix86_cost = &ix86_size_cost;
2912 ix86_cost = processor_target_table[ix86_tune].cost;
2914 /* Arrange to set up i386_stack_locals for all functions. */
2915 init_machine_status = ix86_init_machine_status;
2917 /* Validate -mregparm= value. */
2918 if (ix86_regparm_string)
2921 warning (0, "%sregparm%s is ignored in 64-bit mode", prefix, suffix);
2922 i = atoi (ix86_regparm_string);
2923 if (i < 0 || i > REGPARM_MAX)
2924 error ("%sregparm=%d%s is not between 0 and %d",
2925 prefix, i, suffix, REGPARM_MAX);
2930 ix86_regparm = REGPARM_MAX;
2932 /* If the user has provided any of the -malign-* options,
2933 warn and use that value only if -falign-* is not set.
2934 Remove this code in GCC 3.2 or later. */
2935 if (ix86_align_loops_string)
2937 warning (0, "%salign-loops%s is obsolete, use -falign-loops%s",
2938 prefix, suffix, suffix);
2939 if (align_loops == 0)
2941 i = atoi (ix86_align_loops_string);
2942 if (i < 0 || i > MAX_CODE_ALIGN)
2943 error ("%salign-loops=%d%s is not between 0 and %d",
2944 prefix, i, suffix, MAX_CODE_ALIGN);
2946 align_loops = 1 << i;
2950 if (ix86_align_jumps_string)
2952 warning (0, "%salign-jumps%s is obsolete, use -falign-jumps%s",
2953 prefix, suffix, suffix);
2954 if (align_jumps == 0)
2956 i = atoi (ix86_align_jumps_string);
2957 if (i < 0 || i > MAX_CODE_ALIGN)
2958 error ("%salign-loops=%d%s is not between 0 and %d",
2959 prefix, i, suffix, MAX_CODE_ALIGN);
2961 align_jumps = 1 << i;
2965 if (ix86_align_funcs_string)
2967 warning (0, "%salign-functions%s is obsolete, use -falign-functions%s",
2968 prefix, suffix, suffix);
2969 if (align_functions == 0)
2971 i = atoi (ix86_align_funcs_string);
2972 if (i < 0 || i > MAX_CODE_ALIGN)
2973 error ("%salign-loops=%d%s is not between 0 and %d",
2974 prefix, i, suffix, MAX_CODE_ALIGN);
2976 align_functions = 1 << i;
2980 /* Default align_* from the processor table. */
2981 if (align_loops == 0)
2983 align_loops = processor_target_table[ix86_tune].align_loop;
2984 align_loops_max_skip = processor_target_table[ix86_tune].align_loop_max_skip;
2986 if (align_jumps == 0)
2988 align_jumps = processor_target_table[ix86_tune].align_jump;
2989 align_jumps_max_skip = processor_target_table[ix86_tune].align_jump_max_skip;
2991 if (align_functions == 0)
2993 align_functions = processor_target_table[ix86_tune].align_func;
2996 /* Validate -mbranch-cost= value, or provide default. */
2997 ix86_branch_cost = ix86_cost->branch_cost;
2998 if (ix86_branch_cost_string)
3000 i = atoi (ix86_branch_cost_string);
3002 error ("%sbranch-cost=%d%s is not between 0 and 5", prefix, i, suffix);
3004 ix86_branch_cost = i;
3006 if (ix86_section_threshold_string)
3008 i = atoi (ix86_section_threshold_string);
3010 error ("%slarge-data-threshold=%d%s is negative", prefix, i, suffix);
3012 ix86_section_threshold = i;
3015 if (ix86_tls_dialect_string)
3017 if (strcmp (ix86_tls_dialect_string, "gnu") == 0)
3018 ix86_tls_dialect = TLS_DIALECT_GNU;
3019 else if (strcmp (ix86_tls_dialect_string, "gnu2") == 0)
3020 ix86_tls_dialect = TLS_DIALECT_GNU2;
3021 else if (strcmp (ix86_tls_dialect_string, "sun") == 0)
3022 ix86_tls_dialect = TLS_DIALECT_SUN;
3024 error ("bad value (%s) for %stls-dialect=%s %s",
3025 ix86_tls_dialect_string, prefix, suffix, sw);
3028 if (ix87_precision_string)
3030 i = atoi (ix87_precision_string);
3031 if (i != 32 && i != 64 && i != 80)
3032 error ("pc%d is not valid precision setting (32, 64 or 80)", i);
3037 target_flags |= TARGET_SUBTARGET64_DEFAULT & ~target_flags_explicit;
3039 /* Enable by default the SSE and MMX builtins. Do allow the user to
3040 explicitly disable any of these. In particular, disabling SSE and
3041 MMX for kernel code is extremely useful. */
3042 if (!ix86_arch_specified)
3044 |= ((OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_MMX
3045 | TARGET_SUBTARGET64_ISA_DEFAULT) & ~ix86_isa_flags_explicit);
3048 warning (0, "%srtd%s is ignored in 64bit mode", prefix, suffix);
3052 target_flags |= TARGET_SUBTARGET32_DEFAULT & ~target_flags_explicit;
3054 if (!ix86_arch_specified)
3056 |= TARGET_SUBTARGET32_ISA_DEFAULT & ~ix86_isa_flags_explicit;
3058 /* i386 ABI does not specify red zone. It still makes sense to use it
3059 when programmer takes care to stack from being destroyed. */
3060 if (!(target_flags_explicit & MASK_NO_RED_ZONE))
3061 target_flags |= MASK_NO_RED_ZONE;
3064 /* Keep nonleaf frame pointers. */
3065 if (flag_omit_frame_pointer)
3066 target_flags &= ~MASK_OMIT_LEAF_FRAME_POINTER;
3067 else if (TARGET_OMIT_LEAF_FRAME_POINTER)
3068 flag_omit_frame_pointer = 1;
3070 /* If we're doing fast math, we don't care about comparison order
3071 wrt NaNs. This lets us use a shorter comparison sequence. */
3072 if (flag_finite_math_only)
3073 target_flags &= ~MASK_IEEE_FP;
3075 /* If the architecture always has an FPU, turn off NO_FANCY_MATH_387,
3076 since the insns won't need emulation. */
3077 if (x86_arch_always_fancy_math_387 & ix86_arch_mask)
3078 target_flags &= ~MASK_NO_FANCY_MATH_387;
3080 /* Likewise, if the target doesn't have a 387, or we've specified
3081 software floating point, don't use 387 inline intrinsics. */
3083 target_flags |= MASK_NO_FANCY_MATH_387;
3085 /* Turn on MMX builtins for -msse. */
3088 ix86_isa_flags |= OPTION_MASK_ISA_MMX & ~ix86_isa_flags_explicit;
3089 x86_prefetch_sse = true;
3092 /* Turn on popcnt instruction for -msse4.2 or -mabm. */
3093 if (TARGET_SSE4_2 || TARGET_ABM)
3094 ix86_isa_flags |= OPTION_MASK_ISA_POPCNT & ~ix86_isa_flags_explicit;
3096 /* Validate -mpreferred-stack-boundary= value or default it to
3097 PREFERRED_STACK_BOUNDARY_DEFAULT. */
3098 ix86_preferred_stack_boundary = PREFERRED_STACK_BOUNDARY_DEFAULT;
3099 if (ix86_preferred_stack_boundary_string)
3101 i = atoi (ix86_preferred_stack_boundary_string);
3102 if (i < (TARGET_64BIT ? 4 : 2) || i > 12)
3103 error ("%spreferred-stack-boundary=%d%s is not between %d and 12",
3104 prefix, i, suffix, TARGET_64BIT ? 4 : 2);
3106 ix86_preferred_stack_boundary = (1 << i) * BITS_PER_UNIT;
3109 /* Set the default value for -mstackrealign. */
3110 if (ix86_force_align_arg_pointer == -1)
3111 ix86_force_align_arg_pointer = STACK_REALIGN_DEFAULT;
3113 /* Validate -mincoming-stack-boundary= value or default it to
3114 MIN_STACK_BOUNDARY/PREFERRED_STACK_BOUNDARY. */
3115 if (ix86_force_align_arg_pointer)
3116 ix86_default_incoming_stack_boundary = MIN_STACK_BOUNDARY;
3118 ix86_default_incoming_stack_boundary = PREFERRED_STACK_BOUNDARY;
3119 ix86_incoming_stack_boundary = ix86_default_incoming_stack_boundary;
3120 if (ix86_incoming_stack_boundary_string)
3122 i = atoi (ix86_incoming_stack_boundary_string);
3123 if (i < (TARGET_64BIT ? 4 : 2) || i > 12)
3124 error ("-mincoming-stack-boundary=%d is not between %d and 12",
3125 i, TARGET_64BIT ? 4 : 2);
3128 ix86_user_incoming_stack_boundary = (1 << i) * BITS_PER_UNIT;
3129 ix86_incoming_stack_boundary
3130 = ix86_user_incoming_stack_boundary;
3134 /* Accept -msseregparm only if at least SSE support is enabled. */
3135 if (TARGET_SSEREGPARM
3137 error ("%ssseregparm%s used without SSE enabled", prefix, suffix);
3139 ix86_fpmath = TARGET_FPMATH_DEFAULT;
3140 if (ix86_fpmath_string != 0)
3142 if (! strcmp (ix86_fpmath_string, "387"))
3143 ix86_fpmath = FPMATH_387;
3144 else if (! strcmp (ix86_fpmath_string, "sse"))
3148 warning (0, "SSE instruction set disabled, using 387 arithmetics");
3149 ix86_fpmath = FPMATH_387;
3152 ix86_fpmath = FPMATH_SSE;
3154 else if (! strcmp (ix86_fpmath_string, "387,sse")
3155 || ! strcmp (ix86_fpmath_string, "387+sse")
3156 || ! strcmp (ix86_fpmath_string, "sse,387")
3157 || ! strcmp (ix86_fpmath_string, "sse+387")
3158 || ! strcmp (ix86_fpmath_string, "both"))
3162 warning (0, "SSE instruction set disabled, using 387 arithmetics");
3163 ix86_fpmath = FPMATH_387;
3165 else if (!TARGET_80387)
3167 warning (0, "387 instruction set disabled, using SSE arithmetics");
3168 ix86_fpmath = FPMATH_SSE;
3171 ix86_fpmath = (enum fpmath_unit) (FPMATH_SSE | FPMATH_387);
3174 error ("bad value (%s) for %sfpmath=%s %s",
3175 ix86_fpmath_string, prefix, suffix, sw);
3178 /* If the i387 is disabled, then do not return values in it. */
3180 target_flags &= ~MASK_FLOAT_RETURNS;
3182 /* Use external vectorized library in vectorizing intrinsics. */
3183 if (ix86_veclibabi_string)
3185 if (strcmp (ix86_veclibabi_string, "svml") == 0)
3186 ix86_veclib_handler = ix86_veclibabi_svml;
3187 else if (strcmp (ix86_veclibabi_string, "acml") == 0)
3188 ix86_veclib_handler = ix86_veclibabi_acml;
3190 error ("unknown vectorization library ABI type (%s) for "
3191 "%sveclibabi=%s %s", ix86_veclibabi_string,
3192 prefix, suffix, sw);
3195 if ((x86_accumulate_outgoing_args & ix86_tune_mask)
3196 && !(target_flags_explicit & MASK_ACCUMULATE_OUTGOING_ARGS)
3198 target_flags |= MASK_ACCUMULATE_OUTGOING_ARGS;
3200 /* ??? Unwind info is not correct around the CFG unless either a frame
3201 pointer is present or M_A_O_A is set. Fixing this requires rewriting
3202 unwind info generation to be aware of the CFG and propagating states
3204 if ((flag_unwind_tables || flag_asynchronous_unwind_tables
3205 || flag_exceptions || flag_non_call_exceptions)
3206 && flag_omit_frame_pointer
3207 && !(target_flags & MASK_ACCUMULATE_OUTGOING_ARGS))
3209 if (target_flags_explicit & MASK_ACCUMULATE_OUTGOING_ARGS)
3210 warning (0, "unwind tables currently require either a frame pointer "
3211 "or %saccumulate-outgoing-args%s for correctness",
3213 target_flags |= MASK_ACCUMULATE_OUTGOING_ARGS;
3216 /* If stack probes are required, the space used for large function
3217 arguments on the stack must also be probed, so enable
3218 -maccumulate-outgoing-args so this happens in the prologue. */
3219 if (TARGET_STACK_PROBE
3220 && !(target_flags & MASK_ACCUMULATE_OUTGOING_ARGS))
3222 if (target_flags_explicit & MASK_ACCUMULATE_OUTGOING_ARGS)
3223 warning (0, "stack probing requires %saccumulate-outgoing-args%s "
3224 "for correctness", prefix, suffix);
3225 target_flags |= MASK_ACCUMULATE_OUTGOING_ARGS;
3228 /* For sane SSE instruction set generation we need fcomi instruction.
3229 It is safe to enable all CMOVE instructions. */
3233 /* Figure out what ASM_GENERATE_INTERNAL_LABEL builds as a prefix. */
3236 ASM_GENERATE_INTERNAL_LABEL (internal_label_prefix, "LX", 0);
3237 p = strchr (internal_label_prefix, 'X');
3238 internal_label_prefix_len = p - internal_label_prefix;
3242 /* When scheduling description is not available, disable scheduler pass
3243 so it won't slow down the compilation and make x87 code slower. */
3244 if (!TARGET_SCHEDULE)
3245 flag_schedule_insns_after_reload = flag_schedule_insns = 0;
3247 if (!PARAM_SET_P (PARAM_SIMULTANEOUS_PREFETCHES))
3248 set_param_value ("simultaneous-prefetches",
3249 ix86_cost->simultaneous_prefetches);
3250 if (!PARAM_SET_P (PARAM_L1_CACHE_LINE_SIZE))
3251 set_param_value ("l1-cache-line-size", ix86_cost->prefetch_block);
3252 if (!PARAM_SET_P (PARAM_L1_CACHE_SIZE))
3253 set_param_value ("l1-cache-size", ix86_cost->l1_cache_size);
3254 if (!PARAM_SET_P (PARAM_L2_CACHE_SIZE))
3255 set_param_value ("l2-cache-size", ix86_cost->l2_cache_size);
3257 /* If using typedef char *va_list, signal that __builtin_va_start (&ap, 0)
3258 can be optimized to ap = __builtin_next_arg (0). */
3260 targetm.expand_builtin_va_start = NULL;
3264 ix86_gen_leave = gen_leave_rex64;
3265 ix86_gen_pop1 = gen_popdi1;
3266 ix86_gen_add3 = gen_adddi3;
3267 ix86_gen_sub3 = gen_subdi3;
3268 ix86_gen_sub3_carry = gen_subdi3_carry_rex64;
3269 ix86_gen_one_cmpl2 = gen_one_cmpldi2;
3270 ix86_gen_monitor = gen_sse3_monitor64;
3271 ix86_gen_andsp = gen_anddi3;
3275 ix86_gen_leave = gen_leave;
3276 ix86_gen_pop1 = gen_popsi1;
3277 ix86_gen_add3 = gen_addsi3;
3278 ix86_gen_sub3 = gen_subsi3;
3279 ix86_gen_sub3_carry = gen_subsi3_carry;
3280 ix86_gen_one_cmpl2 = gen_one_cmplsi2;
3281 ix86_gen_monitor = gen_sse3_monitor;
3282 ix86_gen_andsp = gen_andsi3;
3286 /* Use -mcld by default for 32-bit code if configured with --enable-cld. */
3288 target_flags |= MASK_CLD & ~target_flags_explicit;
3291 /* Save the initial options in case the user does function specific options */
3293 target_option_default_node = target_option_current_node
3294 = build_target_option_node ();
3297 /* Save the current options */
3300 ix86_function_specific_save (struct cl_target_option *ptr)
3302 gcc_assert (IN_RANGE (ix86_arch, 0, 255));
3303 gcc_assert (IN_RANGE (ix86_schedule, 0, 255));
3304 gcc_assert (IN_RANGE (ix86_tune, 0, 255));
3305 gcc_assert (IN_RANGE (ix86_fpmath, 0, 255));
3306 gcc_assert (IN_RANGE (ix86_branch_cost, 0, 255));
3308 ptr->arch = ix86_arch;
3309 ptr->schedule = ix86_schedule;
3310 ptr->tune = ix86_tune;
3311 ptr->fpmath = ix86_fpmath;
3312 ptr->branch_cost = ix86_branch_cost;
3313 ptr->tune_defaulted = ix86_tune_defaulted;
3314 ptr->arch_specified = ix86_arch_specified;
3315 ptr->ix86_isa_flags_explicit = ix86_isa_flags_explicit;
3316 ptr->target_flags_explicit = target_flags_explicit;
3319 /* Restore the current options */
3322 ix86_function_specific_restore (struct cl_target_option *ptr)
3324 enum processor_type old_tune = ix86_tune;
3325 enum processor_type old_arch = ix86_arch;
3326 unsigned int ix86_arch_mask, ix86_tune_mask;
3329 ix86_arch = ptr->arch;
3330 ix86_schedule = ptr->schedule;
3331 ix86_tune = ptr->tune;
3332 ix86_fpmath = ptr->fpmath;
3333 ix86_branch_cost = ptr->branch_cost;
3334 ix86_tune_defaulted = ptr->tune_defaulted;
3335 ix86_arch_specified = ptr->arch_specified;
3336 ix86_isa_flags_explicit = ptr->ix86_isa_flags_explicit;
3337 target_flags_explicit = ptr->target_flags_explicit;
3339 /* Recreate the arch feature tests if the arch changed */
3340 if (old_arch != ix86_arch)
3342 ix86_arch_mask = 1u << ix86_arch;
3343 for (i = 0; i < X86_ARCH_LAST; ++i)
3344 ix86_arch_features[i]
3345 = !!(initial_ix86_arch_features[i] & ix86_arch_mask);
3348 /* Recreate the tune optimization tests */
3349 if (old_tune != ix86_tune)
3351 ix86_tune_mask = 1u << ix86_tune;
3352 for (i = 0; i < X86_TUNE_LAST; ++i)
3353 ix86_tune_features[i]
3354 = !!(initial_ix86_tune_features[i] & ix86_tune_mask);
3358 /* Print the current options */
3361 ix86_function_specific_print (FILE *file, int indent,
3362 struct cl_target_option *ptr)
3365 = ix86_target_string (ptr->ix86_isa_flags, ptr->target_flags,
3366 NULL, NULL, NULL, false);
3368 fprintf (file, "%*sarch = %d (%s)\n",
3371 ((ptr->arch < TARGET_CPU_DEFAULT_max)
3372 ? cpu_names[ptr->arch]
3375 fprintf (file, "%*stune = %d (%s)\n",
3378 ((ptr->tune < TARGET_CPU_DEFAULT_max)
3379 ? cpu_names[ptr->tune]
3382 fprintf (file, "%*sfpmath = %d%s%s\n", indent, "", ptr->fpmath,
3383 (ptr->fpmath & FPMATH_387) ? ", 387" : "",
3384 (ptr->fpmath & FPMATH_SSE) ? ", sse" : "");
3385 fprintf (file, "%*sbranch_cost = %d\n", indent, "", ptr->branch_cost);
3389 fprintf (file, "%*s%s\n", indent, "", target_string);
3390 free (target_string);
3395 /* Inner function to process the attribute((target(...))), take an argument and
3396 set the current options from the argument. If we have a list, recursively go
3400 ix86_valid_target_attribute_inner_p (tree args, char *p_strings[])
3405 #define IX86_ATTR_ISA(S,O) { S, sizeof (S)-1, ix86_opt_isa, O, 0 }
3406 #define IX86_ATTR_STR(S,O) { S, sizeof (S)-1, ix86_opt_str, O, 0 }
3407 #define IX86_ATTR_YES(S,O,M) { S, sizeof (S)-1, ix86_opt_yes, O, M }
3408 #define IX86_ATTR_NO(S,O,M) { S, sizeof (S)-1, ix86_opt_no, O, M }
3423 enum ix86_opt_type type;
3428 IX86_ATTR_ISA ("3dnow", OPT_m3dnow),
3429 IX86_ATTR_ISA ("abm", OPT_mabm),
3430 IX86_ATTR_ISA ("aes", OPT_maes),
3431 IX86_ATTR_ISA ("avx", OPT_mavx),
3432 IX86_ATTR_ISA ("mmx", OPT_mmmx),
3433 IX86_ATTR_ISA ("pclmul", OPT_mpclmul),
3434 IX86_ATTR_ISA ("popcnt", OPT_mpopcnt),
3435 IX86_ATTR_ISA ("sse", OPT_msse),
3436 IX86_ATTR_ISA ("sse2", OPT_msse2),
3437 IX86_ATTR_ISA ("sse3", OPT_msse3),
3438 IX86_ATTR_ISA ("sse4", OPT_msse4),
3439 IX86_ATTR_ISA ("sse4.1", OPT_msse4_1),
3440 IX86_ATTR_ISA ("sse4.2", OPT_msse4_2),
3441 IX86_ATTR_ISA ("sse4a", OPT_msse4a),
3442 IX86_ATTR_ISA ("sse5", OPT_msse5),
3443 IX86_ATTR_ISA ("ssse3", OPT_mssse3),
3445 /* string options */
3446 IX86_ATTR_STR ("arch=", IX86_FUNCTION_SPECIFIC_ARCH),
3447 IX86_ATTR_STR ("fpmath=", IX86_FUNCTION_SPECIFIC_FPMATH),
3448 IX86_ATTR_STR ("tune=", IX86_FUNCTION_SPECIFIC_TUNE),
3451 IX86_ATTR_YES ("cld",
3455 IX86_ATTR_NO ("fancy-math-387",
3456 OPT_mfancy_math_387,
3457 MASK_NO_FANCY_MATH_387),
3459 IX86_ATTR_NO ("fused-madd",
3461 MASK_NO_FUSED_MADD),
3463 IX86_ATTR_YES ("ieee-fp",
3467 IX86_ATTR_YES ("inline-all-stringops",
3468 OPT_minline_all_stringops,
3469 MASK_INLINE_ALL_STRINGOPS),
3471 IX86_ATTR_YES ("inline-stringops-dynamically",
3472 OPT_minline_stringops_dynamically,
3473 MASK_INLINE_STRINGOPS_DYNAMICALLY),
3475 IX86_ATTR_NO ("align-stringops",
3476 OPT_mno_align_stringops,
3477 MASK_NO_ALIGN_STRINGOPS),
3479 IX86_ATTR_YES ("recip",
3485 /* If this is a list, recurse to get the options. */
3486 if (TREE_CODE (args) == TREE_LIST)
3490 for (; args; args = TREE_CHAIN (args))
3491 if (TREE_VALUE (args)
3492 && !ix86_valid_target_attribute_inner_p (TREE_VALUE (args), p_strings))
3498 else if (TREE_CODE (args) != STRING_CST)
3501 /* Handle multiple arguments separated by commas. */
3502 next_optstr = ASTRDUP (TREE_STRING_POINTER (args));
3504 while (next_optstr && *next_optstr != '\0')
3506 char *p = next_optstr;
3508 char *comma = strchr (next_optstr, ',');
3509 const char *opt_string;
3510 size_t len, opt_len;
3515 enum ix86_opt_type type = ix86_opt_unknown;
3521 len = comma - next_optstr;
3522 next_optstr = comma + 1;
3530 /* Recognize no-xxx. */
3531 if (len > 3 && p[0] == 'n' && p[1] == 'o' && p[2] == '-')
3540 /* Find the option. */
3543 for (i = 0; i < ARRAY_SIZE (attrs); i++)
3545 type = attrs[i].type;
3546 opt_len = attrs[i].len;
3547 if (ch == attrs[i].string[0]
3548 && ((type != ix86_opt_str) ? len == opt_len : len > opt_len)
3549 && memcmp (p, attrs[i].string, opt_len) == 0)
3552 mask = attrs[i].mask;
3553 opt_string = attrs[i].string;
3558 /* Process the option. */
3561 error ("attribute(target(\"%s\")) is unknown", orig_p);
3565 else if (type == ix86_opt_isa)
3566 ix86_handle_option (opt, p, opt_set_p);
3568 else if (type == ix86_opt_yes || type == ix86_opt_no)
3570 if (type == ix86_opt_no)
3571 opt_set_p = !opt_set_p;
3574 target_flags |= mask;
3576 target_flags &= ~mask;
3579 else if (type == ix86_opt_str)
3583 error ("option(\"%s\") was already specified", opt_string);
3587 p_strings[opt] = xstrdup (p + opt_len);
3597 /* Return a TARGET_OPTION_NODE tree of the target options listed or NULL. */
3600 ix86_valid_target_attribute_tree (tree args)
3602 const char *orig_arch_string = ix86_arch_string;
3603 const char *orig_tune_string = ix86_tune_string;
3604 const char *orig_fpmath_string = ix86_fpmath_string;
3605 int orig_tune_defaulted = ix86_tune_defaulted;
3606 int orig_arch_specified = ix86_arch_specified;
3607 char *option_strings[IX86_FUNCTION_SPECIFIC_MAX] = { NULL, NULL, NULL };
3610 struct cl_target_option *def
3611 = TREE_TARGET_OPTION (target_option_default_node);
3613 /* Process each of the options on the chain. */
3614 if (! ix86_valid_target_attribute_inner_p (args, option_strings))
3617 /* If the changed options are different from the default, rerun override_options,
3618 and then save the options away. The string options are are attribute options,
3619 and will be undone when we copy the save structure. */
3620 if (ix86_isa_flags != def->ix86_isa_flags
3621 || target_flags != def->target_flags
3622 || option_strings[IX86_FUNCTION_SPECIFIC_ARCH]
3623 || option_strings[IX86_FUNCTION_SPECIFIC_TUNE]
3624 || option_strings[IX86_FUNCTION_SPECIFIC_FPMATH])
3626 /* If we are using the default tune= or arch=, undo the string assigned,
3627 and use the default. */
3628 if (option_strings[IX86_FUNCTION_SPECIFIC_ARCH])
3629 ix86_arch_string = option_strings[IX86_FUNCTION_SPECIFIC_ARCH];
3630 else if (!orig_arch_specified)
3631 ix86_arch_string = NULL;
3633 if (option_strings[IX86_FUNCTION_SPECIFIC_TUNE])
3634 ix86_tune_string = option_strings[IX86_FUNCTION_SPECIFIC_TUNE];
3635 else if (orig_tune_defaulted)
3636 ix86_tune_string = NULL;
3638 /* If fpmath= is not set, and we now have sse2 on 32-bit, use it. */
3639 if (option_strings[IX86_FUNCTION_SPECIFIC_FPMATH])
3640 ix86_fpmath_string = option_strings[IX86_FUNCTION_SPECIFIC_FPMATH];
3641 else if (!TARGET_64BIT && TARGET_SSE)
3642 ix86_fpmath_string = "sse,387";
3644 /* Do any overrides, such as arch=xxx, or tune=xxx support. */
3645 override_options (false);
3647 /* Add any builtin functions with the new isa if any. */
3648 ix86_add_new_builtins (ix86_isa_flags);
3650 /* Save the current options unless we are validating options for
3652 t = build_target_option_node ();
3654 ix86_arch_string = orig_arch_string;
3655 ix86_tune_string = orig_tune_string;
3656 ix86_fpmath_string = orig_fpmath_string;
3658 /* Free up memory allocated to hold the strings */
3659 for (i = 0; i < IX86_FUNCTION_SPECIFIC_MAX; i++)
3660 if (option_strings[i])
3661 free (option_strings[i]);
3667 /* Hook to validate attribute((target("string"))). */
3670 ix86_valid_target_attribute_p (tree fndecl,
3671 tree ARG_UNUSED (name),
3673 int ARG_UNUSED (flags))
3675 struct cl_target_option cur_target;
3677 tree old_optimize = build_optimization_node ();
3678 tree new_target, new_optimize;
3679 tree func_optimize = DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl);
3681 /* If the function changed the optimization levels as well as setting target
3682 options, start with the optimizations specified. */
3683 if (func_optimize && func_optimize != old_optimize)
3684 cl_optimization_restore (TREE_OPTIMIZATION (func_optimize));
3686 /* The target attributes may also change some optimization flags, so update
3687 the optimization options if necessary. */
3688 cl_target_option_save (&cur_target);
3689 new_target = ix86_valid_target_attribute_tree (args);
3690 new_optimize = build_optimization_node ();
3697 DECL_FUNCTION_SPECIFIC_TARGET (fndecl) = new_target;
3699 if (old_optimize != new_optimize)
3700 DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl) = new_optimize;
3703 cl_target_option_restore (&cur_target);
3705 if (old_optimize != new_optimize)
3706 cl_optimization_restore (TREE_OPTIMIZATION (old_optimize));
3712 /* Hook to determine if one function can safely inline another. */
3715 ix86_can_inline_p (tree caller, tree callee)
3718 tree caller_tree = DECL_FUNCTION_SPECIFIC_TARGET (caller);
3719 tree callee_tree = DECL_FUNCTION_SPECIFIC_TARGET (callee);
3721 /* If callee has no option attributes, then it is ok to inline. */
3725 /* If caller has no option attributes, but callee does then it is not ok to
3727 else if (!caller_tree)
3732 struct cl_target_option *caller_opts = TREE_TARGET_OPTION (caller_tree);
3733 struct cl_target_option *callee_opts = TREE_TARGET_OPTION (callee_tree);
3735 /* Callee's isa options should a subset of the caller's, i.e. a SSE5 function
3736 can inline a SSE2 function but a SSE2 function can't inline a SSE5
3738 if ((caller_opts->ix86_isa_flags & callee_opts->ix86_isa_flags)
3739 != callee_opts->ix86_isa_flags)
3742 /* See if we have the same non-isa options. */
3743 else if (caller_opts->target_flags != callee_opts->target_flags)
3746 /* See if arch, tune, etc. are the same. */
3747 else if (caller_opts->arch != callee_opts->arch)
3750 else if (caller_opts->tune != callee_opts->tune)
3753 else if (caller_opts->fpmath != callee_opts->fpmath)
3756 else if (caller_opts->branch_cost != callee_opts->branch_cost)
3767 /* Remember the last target of ix86_set_current_function. */
3768 static GTY(()) tree ix86_previous_fndecl;
3770 /* Establish appropriate back-end context for processing the function
3771 FNDECL. The argument might be NULL to indicate processing at top
3772 level, outside of any function scope. */
3774 ix86_set_current_function (tree fndecl)
3776 /* Only change the context if the function changes. This hook is called
3777 several times in the course of compiling a function, and we don't want to
3778 slow things down too much or call target_reinit when it isn't safe. */
3779 if (fndecl && fndecl != ix86_previous_fndecl)
3781 tree old_tree = (ix86_previous_fndecl
3782 ? DECL_FUNCTION_SPECIFIC_TARGET (ix86_previous_fndecl)
3785 tree new_tree = (fndecl
3786 ? DECL_FUNCTION_SPECIFIC_TARGET (fndecl)
3789 ix86_previous_fndecl = fndecl;
3790 if (old_tree == new_tree)
3795 cl_target_option_restore (TREE_TARGET_OPTION (new_tree));
3801 struct cl_target_option *def
3802 = TREE_TARGET_OPTION (target_option_current_node);
3804 cl_target_option_restore (def);
3811 /* Return true if this goes in large data/bss. */
3814 ix86_in_large_data_p (tree exp)
3816 if (ix86_cmodel != CM_MEDIUM && ix86_cmodel != CM_MEDIUM_PIC)
3819 /* Functions are never large data. */
3820 if (TREE_CODE (exp) == FUNCTION_DECL)
3823 if (TREE_CODE (exp) == VAR_DECL && DECL_SECTION_NAME (exp))
3825 const char *section = TREE_STRING_POINTER (DECL_SECTION_NAME (exp));
3826 if (strcmp (section, ".ldata") == 0
3827 || strcmp (section, ".lbss") == 0)
3833 HOST_WIDE_INT size = int_size_in_bytes (TREE_TYPE (exp));
3835 /* If this is an incomplete type with size 0, then we can't put it
3836 in data because it might be too big when completed. */
3837 if (!size || size > ix86_section_threshold)
3844 /* Switch to the appropriate section for output of DECL.
3845 DECL is either a `VAR_DECL' node or a constant of some sort.
3846 RELOC indicates whether forming the initial value of DECL requires
3847 link-time relocations. */
3849 static section * x86_64_elf_select_section (tree, int, unsigned HOST_WIDE_INT)
3853 x86_64_elf_select_section (tree decl, int reloc,
3854 unsigned HOST_WIDE_INT align)
3856 if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
3857 && ix86_in_large_data_p (decl))
3859 const char *sname = NULL;
3860 unsigned int flags = SECTION_WRITE;
3861 switch (categorize_decl_for_section (decl, reloc))
3866 case SECCAT_DATA_REL:
3867 sname = ".ldata.rel";
3869 case SECCAT_DATA_REL_LOCAL:
3870 sname = ".ldata.rel.local";
3872 case SECCAT_DATA_REL_RO:
3873 sname = ".ldata.rel.ro";
3875 case SECCAT_DATA_REL_RO_LOCAL:
3876 sname = ".ldata.rel.ro.local";
3880 flags |= SECTION_BSS;
3883 case SECCAT_RODATA_MERGE_STR:
3884 case SECCAT_RODATA_MERGE_STR_INIT:
3885 case SECCAT_RODATA_MERGE_CONST:
3889 case SECCAT_SRODATA:
3896 /* We don't split these for medium model. Place them into
3897 default sections and hope for best. */
3899 case SECCAT_EMUTLS_VAR:
3900 case SECCAT_EMUTLS_TMPL:
3905 /* We might get called with string constants, but get_named_section
3906 doesn't like them as they are not DECLs. Also, we need to set
3907 flags in that case. */
3909 return get_section (sname, flags, NULL);
3910 return get_named_section (decl, sname, reloc);
3913 return default_elf_select_section (decl, reloc, align);
3916 /* Build up a unique section name, expressed as a
3917 STRING_CST node, and assign it to DECL_SECTION_NAME (decl).
3918 RELOC indicates whether the initial value of EXP requires
3919 link-time relocations. */
3921 static void ATTRIBUTE_UNUSED
3922 x86_64_elf_unique_section (tree decl, int reloc)
3924 if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
3925 && ix86_in_large_data_p (decl))
3927 const char *prefix = NULL;
3928 /* We only need to use .gnu.linkonce if we don't have COMDAT groups. */
3929 bool one_only = DECL_ONE_ONLY (decl) && !HAVE_COMDAT_GROUP;
3931 switch (categorize_decl_for_section (decl, reloc))
3934 case SECCAT_DATA_REL:
3935 case SECCAT_DATA_REL_LOCAL:
3936 case SECCAT_DATA_REL_RO:
3937 case SECCAT_DATA_REL_RO_LOCAL:
3938 prefix = one_only ? ".ld" : ".ldata";
3941 prefix = one_only ? ".lb" : ".lbss";
3944 case SECCAT_RODATA_MERGE_STR:
3945 case SECCAT_RODATA_MERGE_STR_INIT:
3946 case SECCAT_RODATA_MERGE_CONST:
3947 prefix = one_only ? ".lr" : ".lrodata";
3949 case SECCAT_SRODATA:
3956 /* We don't split these for medium model. Place them into
3957 default sections and hope for best. */
3959 case SECCAT_EMUTLS_VAR:
3960 prefix = targetm.emutls.var_section;
3962 case SECCAT_EMUTLS_TMPL:
3963 prefix = targetm.emutls.tmpl_section;
3968 const char *name, *linkonce;
3971 name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl));
3972 name = targetm.strip_name_encoding (name);
3974 /* If we're using one_only, then there needs to be a .gnu.linkonce
3975 prefix to the section name. */
3976 linkonce = one_only ? ".gnu.linkonce" : "";
3978 string = ACONCAT ((linkonce, prefix, ".", name, NULL));
3980 DECL_SECTION_NAME (decl) = build_string (strlen (string), string);
3984 default_unique_section (decl, reloc);
3987 #ifdef COMMON_ASM_OP
3988 /* This says how to output assembler code to declare an
3989 uninitialized external linkage data object.
3991 For medium model x86-64 we need to use .largecomm opcode for
3994 x86_elf_aligned_common (FILE *file,
3995 const char *name, unsigned HOST_WIDE_INT size,
3998 if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
3999 && size > (unsigned int)ix86_section_threshold)
4000 fprintf (file, ".largecomm\t");
4002 fprintf (file, "%s", COMMON_ASM_OP);
4003 assemble_name (file, name);
4004 fprintf (file, ","HOST_WIDE_INT_PRINT_UNSIGNED",%u\n",
4005 size, align / BITS_PER_UNIT);
4009 /* Utility function for targets to use in implementing
4010 ASM_OUTPUT_ALIGNED_BSS. */
4013 x86_output_aligned_bss (FILE *file, tree decl ATTRIBUTE_UNUSED,
4014 const char *name, unsigned HOST_WIDE_INT size,
4017 if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
4018 && size > (unsigned int)ix86_section_threshold)
4019 switch_to_section (get_named_section (decl, ".lbss", 0));
4021 switch_to_section (bss_section);
4022 ASM_OUTPUT_ALIGN (file, floor_log2 (align / BITS_PER_UNIT));
4023 #ifdef ASM_DECLARE_OBJECT_NAME
4024 last_assemble_variable_decl = decl;
4025 ASM_DECLARE_OBJECT_NAME (file, name, decl);
4027 /* Standard thing is just output label for the object. */
4028 ASM_OUTPUT_LABEL (file, name);
4029 #endif /* ASM_DECLARE_OBJECT_NAME */
4030 ASM_OUTPUT_SKIP (file, size ? size : 1);
4034 optimization_options (int level, int size ATTRIBUTE_UNUSED)
4036 /* For -O2 and beyond, turn off -fschedule-insns by default. It tends to
4037 make the problem with not enough registers even worse. */
4038 #ifdef INSN_SCHEDULING
4040 flag_schedule_insns = 0;
4044 /* The Darwin libraries never set errno, so we might as well
4045 avoid calling them when that's the only reason we would. */
4046 flag_errno_math = 0;
4048 /* The default values of these switches depend on the TARGET_64BIT
4049 that is not known at this moment. Mark these values with 2 and
4050 let user the to override these. In case there is no command line option
4051 specifying them, we will set the defaults in override_options. */
4053 flag_omit_frame_pointer = 2;
4054 flag_pcc_struct_return = 2;
4055 flag_asynchronous_unwind_tables = 2;
4056 flag_vect_cost_model = 1;
4057 #ifdef SUBTARGET_OPTIMIZATION_OPTIONS
4058 SUBTARGET_OPTIMIZATION_OPTIONS;
4062 /* Decide whether we can make a sibling call to a function. DECL is the
4063 declaration of the function being targeted by the call and EXP is the
4064 CALL_EXPR representing the call. */
4067 ix86_function_ok_for_sibcall (tree decl, tree exp)
4072 /* If we are generating position-independent code, we cannot sibcall
4073 optimize any indirect call, or a direct call to a global function,
4074 as the PLT requires %ebx be live. */
4075 if (!TARGET_64BIT && flag_pic && (!decl || !targetm.binds_local_p (decl)))
4082 func = TREE_TYPE (CALL_EXPR_FN (exp));
4083 if (POINTER_TYPE_P (func))
4084 func = TREE_TYPE (func);
4087 /* Check that the return value locations are the same. Like
4088 if we are returning floats on the 80387 register stack, we cannot
4089 make a sibcall from a function that doesn't return a float to a
4090 function that does or, conversely, from a function that does return
4091 a float to a function that doesn't; the necessary stack adjustment
4092 would not be executed. This is also the place we notice
4093 differences in the return value ABI. Note that it is ok for one
4094 of the functions to have void return type as long as the return
4095 value of the other is passed in a register. */
4096 a = ix86_function_value (TREE_TYPE (exp), func, false);
4097 b = ix86_function_value (TREE_TYPE (DECL_RESULT (cfun->decl)),
4099 if (STACK_REG_P (a) || STACK_REG_P (b))
4101 if (!rtx_equal_p (a, b))
4104 else if (VOID_TYPE_P (TREE_TYPE (DECL_RESULT (cfun->decl))))
4106 else if (!rtx_equal_p (a, b))
4109 /* If this call is indirect, we'll need to be able to use a call-clobbered
4110 register for the address of the target function. Make sure that all
4111 such registers are not used for passing parameters. */
4112 if (!decl && !TARGET_64BIT)
4116 /* We're looking at the CALL_EXPR, we need the type of the function. */
4117 type = CALL_EXPR_FN (exp); /* pointer expression */
4118 type = TREE_TYPE (type); /* pointer type */
4119 type = TREE_TYPE (type); /* function type */
4121 if (ix86_function_regparm (type, NULL) >= 3)
4123 /* ??? Need to count the actual number of registers to be used,
4124 not the possible number of registers. Fix later. */
4129 /* Dllimport'd functions are also called indirectly. */
4130 if (TARGET_DLLIMPORT_DECL_ATTRIBUTES
4132 && decl && DECL_DLLIMPORT_P (decl)
4133 && ix86_function_regparm (TREE_TYPE (decl), NULL) >= 3)
4136 /* If we need to align the outgoing stack, then sibcalling would
4137 unalign the stack, which may break the called function. */
4138 if (ix86_incoming_stack_boundary < PREFERRED_STACK_BOUNDARY)
4141 /* Otherwise okay. That also includes certain types of indirect calls. */
4145 /* Handle "cdecl", "stdcall", "fastcall", "regparm" and "sseregparm"
4146 calling convention attributes;
4147 arguments as in struct attribute_spec.handler. */
4150 ix86_handle_cconv_attribute (tree *node, tree name,
4152 int flags ATTRIBUTE_UNUSED,
4155 if (TREE_CODE (*node) != FUNCTION_TYPE
4156 && TREE_CODE (*node) != METHOD_TYPE
4157 && TREE_CODE (*node) != FIELD_DECL
4158 && TREE_CODE (*node) != TYPE_DECL)
4160 warning (OPT_Wattributes, "%qs attribute only applies to functions",
4161 IDENTIFIER_POINTER (name));
4162 *no_add_attrs = true;
4166 /* Can combine regparm with all attributes but fastcall. */
4167 if (is_attribute_p ("regparm", name))
4171 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
4173 error ("fastcall and regparm attributes are not compatible");
4176 cst = TREE_VALUE (args);
4177 if (TREE_CODE (cst) != INTEGER_CST)
4179 warning (OPT_Wattributes,
4180 "%qs attribute requires an integer constant argument",
4181 IDENTIFIER_POINTER (name));
4182 *no_add_attrs = true;
4184 else if (compare_tree_int (cst, REGPARM_MAX) > 0)
4186 warning (OPT_Wattributes, "argument to %qs attribute larger than %d",
4187 IDENTIFIER_POINTER (name), REGPARM_MAX);
4188 *no_add_attrs = true;
4196 /* Do not warn when emulating the MS ABI. */
4197 if (TREE_CODE (*node) != FUNCTION_TYPE || ix86_function_type_abi (*node)!=MS_ABI)
4198 warning (OPT_Wattributes, "%qs attribute ignored",
4199 IDENTIFIER_POINTER (name));
4200 *no_add_attrs = true;
4204 /* Can combine fastcall with stdcall (redundant) and sseregparm. */
4205 if (is_attribute_p ("fastcall", name))
4207 if (lookup_attribute ("cdecl", TYPE_ATTRIBUTES (*node)))
4209 error ("fastcall and cdecl attributes are not compatible");
4211 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (*node)))
4213 error ("fastcall and stdcall attributes are not compatible");
4215 if (lookup_attribute ("regparm", TYPE_ATTRIBUTES (*node)))
4217 error ("fastcall and regparm attributes are not compatible");
4221 /* Can combine stdcall with fastcall (redundant), regparm and
4223 else if (is_attribute_p ("stdcall", name))
4225 if (lookup_attribute ("cdecl", TYPE_ATTRIBUTES (*node)))
4227 error ("stdcall and cdecl attributes are not compatible");
4229 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
4231 error ("stdcall and fastcall attributes are not compatible");
4235 /* Can combine cdecl with regparm and sseregparm. */
4236 else if (is_attribute_p ("cdecl", name))
4238 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (*node)))
4240 error ("stdcall and cdecl attributes are not compatible");
4242 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
4244 error ("fastcall and cdecl attributes are not compatible");
4248 /* Can combine sseregparm with all attributes. */
4253 /* Return 0 if the attributes for two types are incompatible, 1 if they
4254 are compatible, and 2 if they are nearly compatible (which causes a
4255 warning to be generated). */
4258 ix86_comp_type_attributes (const_tree type1, const_tree type2)
4260 /* Check for mismatch of non-default calling convention. */
4261 const char *const rtdstr = TARGET_RTD ? "cdecl" : "stdcall";
4263 if (TREE_CODE (type1) != FUNCTION_TYPE
4264 && TREE_CODE (type1) != METHOD_TYPE)
4267 /* Check for mismatched fastcall/regparm types. */
4268 if ((!lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type1))
4269 != !lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type2)))
4270 || (ix86_function_regparm (type1, NULL)
4271 != ix86_function_regparm (type2, NULL)))
4274 /* Check for mismatched sseregparm types. */
4275 if (!lookup_attribute ("sseregparm", TYPE_ATTRIBUTES (type1))
4276 != !lookup_attribute ("sseregparm", TYPE_ATTRIBUTES (type2)))
4279 /* Check for mismatched return types (cdecl vs stdcall). */
4280 if (!lookup_attribute (rtdstr, TYPE_ATTRIBUTES (type1))
4281 != !lookup_attribute (rtdstr, TYPE_ATTRIBUTES (type2)))
4287 /* Return the regparm value for a function with the indicated TYPE and DECL.
4288 DECL may be NULL when calling function indirectly
4289 or considering a libcall. */
4292 ix86_function_regparm (const_tree type, const_tree decl)
4297 static bool error_issued;
4300 return (ix86_function_type_abi (type) == SYSV_ABI
4301 ? X86_64_REGPARM_MAX : X64_REGPARM_MAX);
4303 regparm = ix86_regparm;
4304 attr = lookup_attribute ("regparm", TYPE_ATTRIBUTES (type));
4308 = TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (attr)));
4310 if (decl && TREE_CODE (decl) == FUNCTION_DECL)
4312 /* We can't use regparm(3) for nested functions because
4313 these pass static chain pointer in %ecx register. */
4314 if (!error_issued && regparm == 3
4315 && decl_function_context (decl)
4316 && !DECL_NO_STATIC_CHAIN (decl))
4318 error ("nested functions are limited to 2 register parameters");
4319 error_issued = true;
4327 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type)))
4330 /* Use register calling convention for local functions when possible. */
4332 && TREE_CODE (decl) == FUNCTION_DECL
4336 /* FIXME: remove this CONST_CAST when cgraph.[ch] is constified. */
4337 struct cgraph_local_info *i = cgraph_local_info (CONST_CAST_TREE(decl));
4340 int local_regparm, globals = 0, regno;
4343 /* Make sure no regparm register is taken by a
4344 fixed register variable. */
4345 for (local_regparm = 0; local_regparm < REGPARM_MAX; local_regparm++)
4346 if (fixed_regs[local_regparm])
4349 /* We can't use regparm(3) for nested functions as these use
4350 static chain pointer in third argument. */
4351 if (local_regparm == 3
4352 && decl_function_context (decl)
4353 && !DECL_NO_STATIC_CHAIN (decl))
4356 /* If the function realigns its stackpointer, the prologue will
4357 clobber %ecx. If we've already generated code for the callee,
4358 the callee DECL_STRUCT_FUNCTION is gone, so we fall back to
4359 scanning the attributes for the self-realigning property. */
4360 f = DECL_STRUCT_FUNCTION (decl);
4361 /* Since current internal arg pointer won't conflict with
4362 parameter passing regs, so no need to change stack
4363 realignment and adjust regparm number.
4365 Each fixed register usage increases register pressure,
4366 so less registers should be used for argument passing.
4367 This functionality can be overriden by an explicit
4369 for (regno = 0; regno <= DI_REG; regno++)
4370 if (fixed_regs[regno])
4374 = globals < local_regparm ? local_regparm - globals : 0;
4376 if (local_regparm > regparm)
4377 regparm = local_regparm;
4384 /* Return 1 or 2, if we can pass up to SSE_REGPARM_MAX SFmode (1) and
4385 DFmode (2) arguments in SSE registers for a function with the
4386 indicated TYPE and DECL. DECL may be NULL when calling function
4387 indirectly or considering a libcall. Otherwise return 0. */
4390 ix86_function_sseregparm (const_tree type, const_tree decl, bool warn)
4392 gcc_assert (!TARGET_64BIT);
4394 /* Use SSE registers to pass SFmode and DFmode arguments if requested
4395 by the sseregparm attribute. */
4396 if (TARGET_SSEREGPARM
4397 || (type && lookup_attribute ("sseregparm", TYPE_ATTRIBUTES (type))))
4404 error ("Calling %qD with attribute sseregparm without "
4405 "SSE/SSE2 enabled", decl);
4407 error ("Calling %qT with attribute sseregparm without "
4408 "SSE/SSE2 enabled", type);
4416 /* For local functions, pass up to SSE_REGPARM_MAX SFmode
4417 (and DFmode for SSE2) arguments in SSE registers. */
4418 if (decl && TARGET_SSE_MATH && optimize && !profile_flag)
4420 /* FIXME: remove this CONST_CAST when cgraph.[ch] is constified. */
4421 struct cgraph_local_info *i = cgraph_local_info (CONST_CAST_TREE(decl));
4423 return TARGET_SSE2 ? 2 : 1;
4429 /* Return true if EAX is live at the start of the function. Used by
4430 ix86_expand_prologue to determine if we need special help before
4431 calling allocate_stack_worker. */
4434 ix86_eax_live_at_start_p (void)
4436 /* Cheat. Don't bother working forward from ix86_function_regparm
4437 to the function type to whether an actual argument is located in
4438 eax. Instead just look at cfg info, which is still close enough
4439 to correct at this point. This gives false positives for broken
4440 functions that might use uninitialized data that happens to be
4441 allocated in eax, but who cares? */
4442 return REGNO_REG_SET_P (df_get_live_out (ENTRY_BLOCK_PTR), 0);
4445 /* Value is the number of bytes of arguments automatically
4446 popped when returning from a subroutine call.
4447 FUNDECL is the declaration node of the function (as a tree),
4448 FUNTYPE is the data type of the function (as a tree),
4449 or for a library call it is an identifier node for the subroutine name.
4450 SIZE is the number of bytes of arguments passed on the stack.
4452 On the 80386, the RTD insn may be used to pop them if the number
4453 of args is fixed, but if the number is variable then the caller
4454 must pop them all. RTD can't be used for library calls now
4455 because the library is compiled with the Unix compiler.
4456 Use of RTD is a selectable option, since it is incompatible with
4457 standard Unix calling sequences. If the option is not selected,
4458 the caller must always pop the args.
4460 The attribute stdcall is equivalent to RTD on a per module basis. */
4463 ix86_return_pops_args (tree fundecl, tree funtype, int size)
4467 /* None of the 64-bit ABIs pop arguments. */
4471 rtd = TARGET_RTD && (!fundecl || TREE_CODE (fundecl) != IDENTIFIER_NODE);
4473 /* Cdecl functions override -mrtd, and never pop the stack. */
4474 if (! lookup_attribute ("cdecl", TYPE_ATTRIBUTES (funtype)))
4476 /* Stdcall and fastcall functions will pop the stack if not
4478 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (funtype))
4479 || lookup_attribute ("fastcall", TYPE_ATTRIBUTES (funtype)))
4482 if (rtd && ! stdarg_p (funtype))
4486 /* Lose any fake structure return argument if it is passed on the stack. */
4487 if (aggregate_value_p (TREE_TYPE (funtype), fundecl)
4488 && !KEEP_AGGREGATE_RETURN_POINTER)
4490 int nregs = ix86_function_regparm (funtype, fundecl);
4492 return GET_MODE_SIZE (Pmode);
4498 /* Argument support functions. */
4500 /* Return true when register may be used to pass function parameters. */
4502 ix86_function_arg_regno_p (int regno)
4505 const int *parm_regs;
4510 return (regno < REGPARM_MAX
4511 || (TARGET_SSE && SSE_REGNO_P (regno) && !fixed_regs[regno]));
4513 return (regno < REGPARM_MAX
4514 || (TARGET_MMX && MMX_REGNO_P (regno)
4515 && (regno < FIRST_MMX_REG + MMX_REGPARM_MAX))
4516 || (TARGET_SSE && SSE_REGNO_P (regno)
4517 && (regno < FIRST_SSE_REG + SSE_REGPARM_MAX)));
4522 if (SSE_REGNO_P (regno) && TARGET_SSE)
4527 if (TARGET_SSE && SSE_REGNO_P (regno)
4528 && (regno < FIRST_SSE_REG + SSE_REGPARM_MAX))
4532 /* TODO: The function should depend on current function ABI but
4533 builtins.c would need updating then. Therefore we use the
4536 /* RAX is used as hidden argument to va_arg functions. */
4537 if (ix86_abi == SYSV_ABI && regno == AX_REG)
4540 if (ix86_abi == MS_ABI)
4541 parm_regs = x86_64_ms_abi_int_parameter_registers;
4543 parm_regs = x86_64_int_parameter_registers;
4544 for (i = 0; i < (ix86_abi == MS_ABI ? X64_REGPARM_MAX
4545 : X86_64_REGPARM_MAX); i++)
4546 if (regno == parm_regs[i])
4551 /* Return if we do not know how to pass TYPE solely in registers. */
4554 ix86_must_pass_in_stack (enum machine_mode mode, const_tree type)
4556 if (must_pass_in_stack_var_size_or_pad (mode, type))
4559 /* For 32-bit, we want TImode aggregates to go on the stack. But watch out!
4560 The layout_type routine is crafty and tries to trick us into passing
4561 currently unsupported vector types on the stack by using TImode. */
4562 return (!TARGET_64BIT && mode == TImode
4563 && type && TREE_CODE (type) != VECTOR_TYPE);
4566 /* It returns the size, in bytes, of the area reserved for arguments passed
4567 in registers for the function represented by fndecl dependent to the used
4570 ix86_reg_parm_stack_space (const_tree fndecl)
4572 enum calling_abi call_abi = SYSV_ABI;
4573 if (fndecl != NULL_TREE && TREE_CODE (fndecl) == FUNCTION_DECL)
4574 call_abi = ix86_function_abi (fndecl);
4576 call_abi = ix86_function_type_abi (fndecl);
4577 if (call_abi == MS_ABI)
4582 /* Returns value SYSV_ABI, MS_ABI dependent on fntype, specifying the
4585 ix86_function_type_abi (const_tree fntype)
4587 if (TARGET_64BIT && fntype != NULL)
4589 enum calling_abi abi = ix86_abi;
4590 if (abi == SYSV_ABI)
4592 if (lookup_attribute ("ms_abi", TYPE_ATTRIBUTES (fntype)))
4595 else if (lookup_attribute ("sysv_abi", TYPE_ATTRIBUTES (fntype)))
4602 static enum calling_abi
4603 ix86_function_abi (const_tree fndecl)
4607 return ix86_function_type_abi (TREE_TYPE (fndecl));
4610 /* Returns value SYSV_ABI, MS_ABI dependent on cfun, specifying the
4613 ix86_cfun_abi (void)
4615 if (! cfun || ! TARGET_64BIT)
4617 return cfun->machine->call_abi;
4621 extern void init_regs (void);
4623 /* Implementation of call abi switching target hook. Specific to FNDECL
4624 the specific call register sets are set. See also CONDITIONAL_REGISTER_USAGE
4625 for more details. */
4627 ix86_call_abi_override (const_tree fndecl)
4629 if (fndecl == NULL_TREE)
4630 cfun->machine->call_abi = ix86_abi;
4632 cfun->machine->call_abi = ix86_function_type_abi (TREE_TYPE (fndecl));
4635 /* MS and SYSV ABI have different set of call used registers. Avoid expensive
4636 re-initialization of init_regs each time we switch function context since
4637 this is needed only during RTL expansion. */
4639 ix86_maybe_switch_abi (void)
4642 call_used_regs[SI_REG] == (cfun->machine->call_abi == MS_ABI))
4646 /* Initialize a variable CUM of type CUMULATIVE_ARGS
4647 for a call to a function whose data type is FNTYPE.
4648 For a library call, FNTYPE is 0. */
4651 init_cumulative_args (CUMULATIVE_ARGS *cum, /* Argument info to initialize */
4652 tree fntype, /* tree ptr for function decl */
4653 rtx libname, /* SYMBOL_REF of library name or 0 */
4656 struct cgraph_local_info *i = fndecl ? cgraph_local_info (fndecl) : NULL;
4657 memset (cum, 0, sizeof (*cum));
4660 cum->call_abi = ix86_function_abi (fndecl);
4662 cum->call_abi = ix86_function_type_abi (fntype);
4663 /* Set up the number of registers to use for passing arguments. */
4665 if (cum->call_abi == MS_ABI && !ACCUMULATE_OUTGOING_ARGS)
4666 sorry ("ms_abi attribute require -maccumulate-outgoing-args or subtarget optimization implying it");
4667 cum->nregs = ix86_regparm;
4670 if (cum->call_abi != ix86_abi)
4671 cum->nregs = ix86_abi != SYSV_ABI ? X86_64_REGPARM_MAX
4676 cum->sse_nregs = SSE_REGPARM_MAX;
4679 if (cum->call_abi != ix86_abi)
4680 cum->sse_nregs = ix86_abi != SYSV_ABI ? X86_64_SSE_REGPARM_MAX
4681 : X64_SSE_REGPARM_MAX;
4685 cum->mmx_nregs = MMX_REGPARM_MAX;
4686 cum->warn_avx = true;
4687 cum->warn_sse = true;
4688 cum->warn_mmx = true;
4690 /* Because type might mismatch in between caller and callee, we need to
4691 use actual type of function for local calls.
4692 FIXME: cgraph_analyze can be told to actually record if function uses
4693 va_start so for local functions maybe_vaarg can be made aggressive
4695 FIXME: once typesytem is fixed, we won't need this code anymore. */
4697 fntype = TREE_TYPE (fndecl);
4698 cum->maybe_vaarg = (fntype
4699 ? (!prototype_p (fntype) || stdarg_p (fntype))
4704 /* If there are variable arguments, then we won't pass anything
4705 in registers in 32-bit mode. */
4706 if (stdarg_p (fntype))
4717 /* Use ecx and edx registers if function has fastcall attribute,
4718 else look for regparm information. */
4721 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (fntype)))
4727 cum->nregs = ix86_function_regparm (fntype, fndecl);
4730 /* Set up the number of SSE registers used for passing SFmode
4731 and DFmode arguments. Warn for mismatching ABI. */
4732 cum->float_in_sse = ix86_function_sseregparm (fntype, fndecl, true);
4736 /* Return the "natural" mode for TYPE. In most cases, this is just TYPE_MODE.
4737 But in the case of vector types, it is some vector mode.
4739 When we have only some of our vector isa extensions enabled, then there
4740 are some modes for which vector_mode_supported_p is false. For these
4741 modes, the generic vector support in gcc will choose some non-vector mode
4742 in order to implement the type. By computing the natural mode, we'll
4743 select the proper ABI location for the operand and not depend on whatever
4744 the middle-end decides to do with these vector types.
4746 The midde-end can't deal with the vector types > 16 bytes. In this
4747 case, we return the original mode and warn ABI change if CUM isn't
4750 static enum machine_mode
4751 type_natural_mode (const_tree type, CUMULATIVE_ARGS *cum)
4753 enum machine_mode mode = TYPE_MODE (type);
4755 if (TREE_CODE (type) == VECTOR_TYPE && !VECTOR_MODE_P (mode))
4757 HOST_WIDE_INT size = int_size_in_bytes (type);
4758 if ((size == 8 || size == 16 || size == 32)
4759 /* ??? Generic code allows us to create width 1 vectors. Ignore. */
4760 && TYPE_VECTOR_SUBPARTS (type) > 1)
4762 enum machine_mode innermode = TYPE_MODE (TREE_TYPE (type));
4764 if (TREE_CODE (TREE_TYPE (type)) == REAL_TYPE)
4765 mode = MIN_MODE_VECTOR_FLOAT;
4767 mode = MIN_MODE_VECTOR_INT;
4769 /* Get the mode which has this inner mode and number of units. */
4770 for (; mode != VOIDmode; mode = GET_MODE_WIDER_MODE (mode))
4771 if (GET_MODE_NUNITS (mode) == TYPE_VECTOR_SUBPARTS (type)
4772 && GET_MODE_INNER (mode) == innermode)
4774 if (size == 32 && !TARGET_AVX)
4776 static bool warnedavx;
4783 warning (0, "AVX vector argument without AVX "
4784 "enabled changes the ABI");
4786 return TYPE_MODE (type);
4799 /* We want to pass a value in REGNO whose "natural" mode is MODE. However,
4800 this may not agree with the mode that the type system has chosen for the
4801 register, which is ORIG_MODE. If ORIG_MODE is not BLKmode, then we can
4802 go ahead and use it. Otherwise we have to build a PARALLEL instead. */
4805 gen_reg_or_parallel (enum machine_mode mode, enum machine_mode orig_mode,
4810 if (orig_mode != BLKmode)
4811 tmp = gen_rtx_REG (orig_mode, regno);
4814 tmp = gen_rtx_REG (mode, regno);
4815 tmp = gen_rtx_EXPR_LIST (VOIDmode, tmp, const0_rtx);
4816 tmp = gen_rtx_PARALLEL (orig_mode, gen_rtvec (1, tmp));
4822 /* x86-64 register passing implementation. See x86-64 ABI for details. Goal
4823 of this code is to classify each 8bytes of incoming argument by the register
4824 class and assign registers accordingly. */
4826 /* Return the union class of CLASS1 and CLASS2.
4827 See the x86-64 PS ABI for details. */
4829 static enum x86_64_reg_class
4830 merge_classes (enum x86_64_reg_class class1, enum x86_64_reg_class class2)
4832 /* Rule #1: If both classes are equal, this is the resulting class. */
4833 if (class1 == class2)
4836 /* Rule #2: If one of the classes is NO_CLASS, the resulting class is
4838 if (class1 == X86_64_NO_CLASS)
4840 if (class2 == X86_64_NO_CLASS)
4843 /* Rule #3: If one of the classes is MEMORY, the result is MEMORY. */
4844 if (class1 == X86_64_MEMORY_CLASS || class2 == X86_64_MEMORY_CLASS)
4845 return X86_64_MEMORY_CLASS;
4847 /* Rule #4: If one of the classes is INTEGER, the result is INTEGER. */
4848 if ((class1 == X86_64_INTEGERSI_CLASS && class2 == X86_64_SSESF_CLASS)
4849 || (class2 == X86_64_INTEGERSI_CLASS && class1 == X86_64_SSESF_CLASS))
4850 return X86_64_INTEGERSI_CLASS;
4851 if (class1 == X86_64_INTEGER_CLASS || class1 == X86_64_INTEGERSI_CLASS
4852 || class2 == X86_64_INTEGER_CLASS || class2 == X86_64_INTEGERSI_CLASS)
4853 return X86_64_INTEGER_CLASS;
4855 /* Rule #5: If one of the classes is X87, X87UP, or COMPLEX_X87 class,
4857 if (class1 == X86_64_X87_CLASS
4858 || class1 == X86_64_X87UP_CLASS
4859 || class1 == X86_64_COMPLEX_X87_CLASS
4860 || class2 == X86_64_X87_CLASS
4861 || class2 == X86_64_X87UP_CLASS
4862 || class2 == X86_64_COMPLEX_X87_CLASS)
4863 return X86_64_MEMORY_CLASS;
4865 /* Rule #6: Otherwise class SSE is used. */
4866 return X86_64_SSE_CLASS;
4869 /* Classify the argument of type TYPE and mode MODE.
4870 CLASSES will be filled by the register class used to pass each word
4871 of the operand. The number of words is returned. In case the parameter
4872 should be passed in memory, 0 is returned. As a special case for zero
4873 sized containers, classes[0] will be NO_CLASS and 1 is returned.
4875 BIT_OFFSET is used internally for handling records and specifies offset
4876 of the offset in bits modulo 256 to avoid overflow cases.
4878 See the x86-64 PS ABI for details.
4882 classify_argument (enum machine_mode mode, const_tree type,
4883 enum x86_64_reg_class classes[MAX_CLASSES], int bit_offset)
4885 HOST_WIDE_INT bytes =
4886 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
4887 int words = (bytes + (bit_offset % 64) / 8 + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
4889 /* Variable sized entities are always passed/returned in memory. */
4893 if (mode != VOIDmode
4894 && targetm.calls.must_pass_in_stack (mode, type))
4897 if (type && AGGREGATE_TYPE_P (type))
4901 enum x86_64_reg_class subclasses[MAX_CLASSES];
4903 /* On x86-64 we pass structures larger than 32 bytes on the stack. */
4907 for (i = 0; i < words; i++)
4908 classes[i] = X86_64_NO_CLASS;
4910 /* Zero sized arrays or structures are NO_CLASS. We return 0 to
4911 signalize memory class, so handle it as special case. */
4914 classes[0] = X86_64_NO_CLASS;
4918 /* Classify each field of record and merge classes. */
4919 switch (TREE_CODE (type))
4922 /* And now merge the fields of structure. */
4923 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
4925 if (TREE_CODE (field) == FIELD_DECL)
4929 if (TREE_TYPE (field) == error_mark_node)
4932 /* Bitfields are always classified as integer. Handle them
4933 early, since later code would consider them to be
4934 misaligned integers. */
4935 if (DECL_BIT_FIELD (field))
4937 for (i = (int_bit_position (field) + (bit_offset % 64)) / 8 / 8;
4938 i < ((int_bit_position (field) + (bit_offset % 64))
4939 + tree_low_cst (DECL_SIZE (field), 0)
4942 merge_classes (X86_64_INTEGER_CLASS,
4949 type = TREE_TYPE (field);
4951 /* Flexible array member is ignored. */
4952 if (TYPE_MODE (type) == BLKmode
4953 && TREE_CODE (type) == ARRAY_TYPE
4954 && TYPE_SIZE (type) == NULL_TREE
4955 && TYPE_DOMAIN (type) != NULL_TREE
4956 && (TYPE_MAX_VALUE (TYPE_DOMAIN (type))
4961 if (!warned && warn_psabi)
4964 inform (input_location,
4965 "The ABI of passing struct with"
4966 " a flexible array member has"
4967 " changed in GCC 4.4");
4971 num = classify_argument (TYPE_MODE (type), type,
4973 (int_bit_position (field)
4974 + bit_offset) % 256);
4977 pos = (int_bit_position (field) + (bit_offset % 64)) / 8 / 8;
4978 for (i = 0; i < num && (i + pos) < words; i++)
4980 merge_classes (subclasses[i], classes[i + pos]);
4987 /* Arrays are handled as small records. */
4990 num = classify_argument (TYPE_MODE (TREE_TYPE (type)),
4991 TREE_TYPE (type), subclasses, bit_offset);
4995 /* The partial classes are now full classes. */
4996 if (subclasses[0] == X86_64_SSESF_CLASS && bytes != 4)
4997 subclasses[0] = X86_64_SSE_CLASS;
4998 if (subclasses[0] == X86_64_INTEGERSI_CLASS
4999 && !((bit_offset % 64) == 0 && bytes == 4))
5000 subclasses[0] = X86_64_INTEGER_CLASS;
5002 for (i = 0; i < words; i++)
5003 classes[i] = subclasses[i % num];
5008 case QUAL_UNION_TYPE:
5009 /* Unions are similar to RECORD_TYPE but offset is always 0.
5011 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
5013 if (TREE_CODE (field) == FIELD_DECL)
5017 if (TREE_TYPE (field) == error_mark_node)
5020 num = classify_argument (TYPE_MODE (TREE_TYPE (field)),
5021 TREE_TYPE (field), subclasses,
5025 for (i = 0; i < num; i++)
5026 classes[i] = merge_classes (subclasses[i], classes[i]);
5037 /* When size > 16 bytes, if the first one isn't
5038 X86_64_SSE_CLASS or any other ones aren't
5039 X86_64_SSEUP_CLASS, everything should be passed in
5041 if (classes[0] != X86_64_SSE_CLASS)
5044 for (i = 1; i < words; i++)
5045 if (classes[i] != X86_64_SSEUP_CLASS)
5049 /* Final merger cleanup. */
5050 for (i = 0; i < words; i++)
5052 /* If one class is MEMORY, everything should be passed in
5054 if (classes[i] == X86_64_MEMORY_CLASS)
5057 /* The X86_64_SSEUP_CLASS should be always preceded by
5058 X86_64_SSE_CLASS or X86_64_SSEUP_CLASS. */
5059 if (classes[i] == X86_64_SSEUP_CLASS
5060 && classes[i - 1] != X86_64_SSE_CLASS
5061 && classes[i - 1] != X86_64_SSEUP_CLASS)
5063 /* The first one should never be X86_64_SSEUP_CLASS. */
5064 gcc_assert (i != 0);
5065 classes[i] = X86_64_SSE_CLASS;
5068 /* If X86_64_X87UP_CLASS isn't preceded by X86_64_X87_CLASS,
5069 everything should be passed in memory. */
5070 if (classes[i] == X86_64_X87UP_CLASS
5071 && (classes[i - 1] != X86_64_X87_CLASS))
5075 /* The first one should never be X86_64_X87UP_CLASS. */
5076 gcc_assert (i != 0);
5077 if (!warned && warn_psabi)
5080 inform (input_location,
5081 "The ABI of passing union with long double"
5082 " has changed in GCC 4.4");
5090 /* Compute alignment needed. We align all types to natural boundaries with
5091 exception of XFmode that is aligned to 64bits. */
5092 if (mode != VOIDmode && mode != BLKmode)
5094 int mode_alignment = GET_MODE_BITSIZE (mode);
5097 mode_alignment = 128;
5098 else if (mode == XCmode)
5099 mode_alignment = 256;
5100 if (COMPLEX_MODE_P (mode))
5101 mode_alignment /= 2;
5102 /* Misaligned fields are always returned in memory. */
5103 if (bit_offset % mode_alignment)
5107 /* for V1xx modes, just use the base mode */
5108 if (VECTOR_MODE_P (mode) && mode != V1DImode
5109 && GET_MODE_SIZE (GET_MODE_INNER (mode)) == bytes)
5110 mode = GET_MODE_INNER (mode);
5112 /* Classification of atomic types. */
5117 classes[0] = X86_64_SSE_CLASS;
5120 classes[0] = X86_64_SSE_CLASS;
5121 classes[1] = X86_64_SSEUP_CLASS;
5131 int size = (bit_offset % 64)+ (int) GET_MODE_BITSIZE (mode);
5135 classes[0] = X86_64_INTEGERSI_CLASS;
5138 else if (size <= 64)
5140 classes[0] = X86_64_INTEGER_CLASS;
5143 else if (size <= 64+32)
5145 classes[0] = X86_64_INTEGER_CLASS;
5146 classes[1] = X86_64_INTEGERSI_CLASS;
5149 else if (size <= 64+64)
5151 classes[0] = classes[1] = X86_64_INTEGER_CLASS;
5159 classes[0] = classes[1] = X86_64_INTEGER_CLASS;
5163 /* OImode shouldn't be used directly. */
5168 if (!(bit_offset % 64))
5169 classes[0] = X86_64_SSESF_CLASS;
5171 classes[0] = X86_64_SSE_CLASS;
5174 classes[0] = X86_64_SSEDF_CLASS;
5177 classes[0] = X86_64_X87_CLASS;
5178 classes[1] = X86_64_X87UP_CLASS;
5181 classes[0] = X86_64_SSE_CLASS;
5182 classes[1] = X86_64_SSEUP_CLASS;
5185 classes[0] = X86_64_SSE_CLASS;
5188 classes[0] = X86_64_SSEDF_CLASS;
5189 classes[1] = X86_64_SSEDF_CLASS;
5192 classes[0] = X86_64_COMPLEX_X87_CLASS;
5195 /* This modes is larger than 16 bytes. */
5203 classes[0] = X86_64_SSE_CLASS;
5204 classes[1] = X86_64_SSEUP_CLASS;
5205 classes[2] = X86_64_SSEUP_CLASS;
5206 classes[3] = X86_64_SSEUP_CLASS;
5214 classes[0] = X86_64_SSE_CLASS;
5215 classes[1] = X86_64_SSEUP_CLASS;
5222 classes[0] = X86_64_SSE_CLASS;
5228 gcc_assert (VECTOR_MODE_P (mode));
5233 gcc_assert (GET_MODE_CLASS (GET_MODE_INNER (mode)) == MODE_INT);
5235 if (bit_offset + GET_MODE_BITSIZE (mode) <= 32)
5236 classes[0] = X86_64_INTEGERSI_CLASS;
5238 classes[0] = X86_64_INTEGER_CLASS;
5239 classes[1] = X86_64_INTEGER_CLASS;
5240 return 1 + (bytes > 8);
5244 /* Examine the argument and return set number of register required in each
5245 class. Return 0 iff parameter should be passed in memory. */
5247 examine_argument (enum machine_mode mode, const_tree type, int in_return,
5248 int *int_nregs, int *sse_nregs)
5250 enum x86_64_reg_class regclass[MAX_CLASSES];
5251 int n = classify_argument (mode, type, regclass, 0);
5257 for (n--; n >= 0; n--)
5258 switch (regclass[n])
5260 case X86_64_INTEGER_CLASS:
5261 case X86_64_INTEGERSI_CLASS:
5264 case X86_64_SSE_CLASS:
5265 case X86_64_SSESF_CLASS:
5266 case X86_64_SSEDF_CLASS:
5269 case X86_64_NO_CLASS:
5270 case X86_64_SSEUP_CLASS:
5272 case X86_64_X87_CLASS:
5273 case X86_64_X87UP_CLASS:
5277 case X86_64_COMPLEX_X87_CLASS:
5278 return in_return ? 2 : 0;
5279 case X86_64_MEMORY_CLASS:
5285 /* Construct container for the argument used by GCC interface. See
5286 FUNCTION_ARG for the detailed description. */
5289 construct_container (enum machine_mode mode, enum machine_mode orig_mode,
5290 const_tree type, int in_return, int nintregs, int nsseregs,
5291 const int *intreg, int sse_regno)
5293 /* The following variables hold the static issued_error state. */
5294 static bool issued_sse_arg_error;
5295 static bool issued_sse_ret_error;
5296 static bool issued_x87_ret_error;
5298 enum machine_mode tmpmode;
5300 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
5301 enum x86_64_reg_class regclass[MAX_CLASSES];
5305 int needed_sseregs, needed_intregs;
5306 rtx exp[MAX_CLASSES];
5309 n = classify_argument (mode, type, regclass, 0);
5312 if (!examine_argument (mode, type, in_return, &needed_intregs,
5315 if (needed_intregs > nintregs || needed_sseregs > nsseregs)
5318 /* We allowed the user to turn off SSE for kernel mode. Don't crash if
5319 some less clueful developer tries to use floating-point anyway. */
5320 if (needed_sseregs && !TARGET_SSE)
5324 if (!issued_sse_ret_error)
5326 error ("SSE register return with SSE disabled");
5327 issued_sse_ret_error = true;
5330 else if (!issued_sse_arg_error)
5332 error ("SSE register argument with SSE disabled");
5333 issued_sse_arg_error = true;
5338 /* Likewise, error if the ABI requires us to return values in the
5339 x87 registers and the user specified -mno-80387. */
5340 if (!TARGET_80387 && in_return)
5341 for (i = 0; i < n; i++)
5342 if (regclass[i] == X86_64_X87_CLASS
5343 || regclass[i] == X86_64_X87UP_CLASS
5344 || regclass[i] == X86_64_COMPLEX_X87_CLASS)
5346 if (!issued_x87_ret_error)
5348 error ("x87 register return with x87 disabled");
5349 issued_x87_ret_error = true;
5354 /* First construct simple cases. Avoid SCmode, since we want to use
5355 single register to pass this type. */
5356 if (n == 1 && mode != SCmode)
5357 switch (regclass[0])
5359 case X86_64_INTEGER_CLASS:
5360 case X86_64_INTEGERSI_CLASS:
5361 return gen_rtx_REG (mode, intreg[0]);
5362 case X86_64_SSE_CLASS:
5363 case X86_64_SSESF_CLASS:
5364 case X86_64_SSEDF_CLASS:
5365 return gen_reg_or_parallel (mode, orig_mode, SSE_REGNO (sse_regno));
5366 case X86_64_X87_CLASS:
5367 case X86_64_COMPLEX_X87_CLASS:
5368 return gen_rtx_REG (mode, FIRST_STACK_REG);
5369 case X86_64_NO_CLASS:
5370 /* Zero sized array, struct or class. */
5375 if (n == 2 && regclass[0] == X86_64_SSE_CLASS
5376 && regclass[1] == X86_64_SSEUP_CLASS && mode != BLKmode)
5377 return gen_rtx_REG (mode, SSE_REGNO (sse_regno));
5379 && regclass[0] == X86_64_SSE_CLASS
5380 && regclass[1] == X86_64_SSEUP_CLASS
5381 && regclass[2] == X86_64_SSEUP_CLASS
5382 && regclass[3] == X86_64_SSEUP_CLASS
5384 return gen_rtx_REG (mode, SSE_REGNO (sse_regno));
5387 && regclass[0] == X86_64_X87_CLASS && regclass[1] == X86_64_X87UP_CLASS)
5388 return gen_rtx_REG (XFmode, FIRST_STACK_REG);
5389 if (n == 2 && regclass[0] == X86_64_INTEGER_CLASS
5390 && regclass[1] == X86_64_INTEGER_CLASS
5391 && (mode == CDImode || mode == TImode || mode == TFmode)
5392 && intreg[0] + 1 == intreg[1])
5393 return gen_rtx_REG (mode, intreg[0]);
5395 /* Otherwise figure out the entries of the PARALLEL. */
5396 for (i = 0; i < n; i++)
5400 switch (regclass[i])
5402 case X86_64_NO_CLASS:
5404 case X86_64_INTEGER_CLASS:
5405 case X86_64_INTEGERSI_CLASS:
5406 /* Merge TImodes on aligned occasions here too. */
5407 if (i * 8 + 8 > bytes)
5408 tmpmode = mode_for_size ((bytes - i * 8) * BITS_PER_UNIT, MODE_INT, 0);
5409 else if (regclass[i] == X86_64_INTEGERSI_CLASS)
5413 /* We've requested 24 bytes we don't have mode for. Use DImode. */
5414 if (tmpmode == BLKmode)
5416 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
5417 gen_rtx_REG (tmpmode, *intreg),
5421 case X86_64_SSESF_CLASS:
5422 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
5423 gen_rtx_REG (SFmode,
5424 SSE_REGNO (sse_regno)),
5428 case X86_64_SSEDF_CLASS:
5429 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
5430 gen_rtx_REG (DFmode,
5431 SSE_REGNO (sse_regno)),
5435 case X86_64_SSE_CLASS:
5443 if (i == 0 && regclass[1] == X86_64_SSEUP_CLASS)
5453 && regclass[1] == X86_64_SSEUP_CLASS
5454 && regclass[2] == X86_64_SSEUP_CLASS
5455 && regclass[3] == X86_64_SSEUP_CLASS);
5462 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
5463 gen_rtx_REG (tmpmode,
5464 SSE_REGNO (sse_regno)),
5473 /* Empty aligned struct, union or class. */
5477 ret = gen_rtx_PARALLEL (mode, rtvec_alloc (nexps));
5478 for (i = 0; i < nexps; i++)
5479 XVECEXP (ret, 0, i) = exp [i];
5483 /* Update the data in CUM to advance over an argument of mode MODE
5484 and data type TYPE. (TYPE is null for libcalls where that information
5485 may not be available.) */
5488 function_arg_advance_32 (CUMULATIVE_ARGS *cum, enum machine_mode mode,
5489 tree type, HOST_WIDE_INT bytes, HOST_WIDE_INT words)
5505 cum->words += words;
5506 cum->nregs -= words;
5507 cum->regno += words;
5509 if (cum->nregs <= 0)
5517 /* OImode shouldn't be used directly. */
5521 if (cum->float_in_sse < 2)
5524 if (cum->float_in_sse < 1)
5541 if (!type || !AGGREGATE_TYPE_P (type))
5543 cum->sse_words += words;
5544 cum->sse_nregs -= 1;
5545 cum->sse_regno += 1;
5546 if (cum->sse_nregs <= 0)
5559 if (!type || !AGGREGATE_TYPE_P (type))
5561 cum->mmx_words += words;
5562 cum->mmx_nregs -= 1;
5563 cum->mmx_regno += 1;
5564 if (cum->mmx_nregs <= 0)
5575 function_arg_advance_64 (CUMULATIVE_ARGS *cum, enum machine_mode mode,
5576 tree type, HOST_WIDE_INT words, int named)
5578 int int_nregs, sse_nregs;
5580 /* Unnamed 256bit vector mode parameters are passed on stack. */
5581 if (!named && VALID_AVX256_REG_MODE (mode))
5584 if (!examine_argument (mode, type, 0, &int_nregs, &sse_nregs))
5585 cum->words += words;
5586 else if (sse_nregs <= cum->sse_nregs && int_nregs <= cum->nregs)
5588 cum->nregs -= int_nregs;
5589 cum->sse_nregs -= sse_nregs;
5590 cum->regno += int_nregs;
5591 cum->sse_regno += sse_nregs;
5594 cum->words += words;
5598 function_arg_advance_ms_64 (CUMULATIVE_ARGS *cum, HOST_WIDE_INT bytes,
5599 HOST_WIDE_INT words)
5601 /* Otherwise, this should be passed indirect. */
5602 gcc_assert (bytes == 1 || bytes == 2 || bytes == 4 || bytes == 8);
5604 cum->words += words;
5613 function_arg_advance (CUMULATIVE_ARGS *cum, enum machine_mode mode,
5614 tree type, int named)
5616 HOST_WIDE_INT bytes, words;
5618 if (mode == BLKmode)
5619 bytes = int_size_in_bytes (type);
5621 bytes = GET_MODE_SIZE (mode);
5622 words = (bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
5625 mode = type_natural_mode (type, NULL);
5627 if (TARGET_64BIT && (cum ? cum->call_abi : ix86_abi) == MS_ABI)
5628 function_arg_advance_ms_64 (cum, bytes, words);
5629 else if (TARGET_64BIT)
5630 function_arg_advance_64 (cum, mode, type, words, named);
5632 function_arg_advance_32 (cum, mode, type, bytes, words);
5635 /* Define where to put the arguments to a function.
5636 Value is zero to push the argument on the stack,
5637 or a hard register in which to store the argument.
5639 MODE is the argument's machine mode.
5640 TYPE is the data type of the argument (as a tree).
5641 This is null for libcalls where that information may
5643 CUM is a variable of type CUMULATIVE_ARGS which gives info about
5644 the preceding args and about the function being called.
5645 NAMED is nonzero if this argument is a named parameter
5646 (otherwise it is an extra parameter matching an ellipsis). */
5649 function_arg_32 (CUMULATIVE_ARGS *cum, enum machine_mode mode,
5650 enum machine_mode orig_mode, tree type,
5651 HOST_WIDE_INT bytes, HOST_WIDE_INT words)
5653 static bool warnedsse, warnedmmx;
5655 /* Avoid the AL settings for the Unix64 ABI. */
5656 if (mode == VOIDmode)
5672 if (words <= cum->nregs)
5674 int regno = cum->regno;
5676 /* Fastcall allocates the first two DWORD (SImode) or
5677 smaller arguments to ECX and EDX if it isn't an
5683 || (type && AGGREGATE_TYPE_P (type)))
5686 /* ECX not EAX is the first allocated register. */
5687 if (regno == AX_REG)
5690 return gen_rtx_REG (mode, regno);
5695 if (cum->float_in_sse < 2)
5698 if (cum->float_in_sse < 1)
5702 /* In 32bit, we pass TImode in xmm registers. */
5709 if (!type || !AGGREGATE_TYPE_P (type))
5711 if (!TARGET_SSE && !warnedsse && cum->warn_sse)
5714 warning (0, "SSE vector argument without SSE enabled "
5718 return gen_reg_or_parallel (mode, orig_mode,
5719 cum->sse_regno + FIRST_SSE_REG);
5724 /* OImode shouldn't be used directly. */
5733 if (!type || !AGGREGATE_TYPE_P (type))
5736 return gen_reg_or_parallel (mode, orig_mode,
5737 cum->sse_regno + FIRST_SSE_REG);
5746 if (!type || !AGGREGATE_TYPE_P (type))
5748 if (!TARGET_MMX && !warnedmmx && cum->warn_mmx)
5751 warning (0, "MMX vector argument without MMX enabled "
5755 return gen_reg_or_parallel (mode, orig_mode,
5756 cum->mmx_regno + FIRST_MMX_REG);
5765 function_arg_64 (CUMULATIVE_ARGS *cum, enum machine_mode mode,
5766 enum machine_mode orig_mode, tree type, int named)
5768 /* Handle a hidden AL argument containing number of registers
5769 for varargs x86-64 functions. */
5770 if (mode == VOIDmode)
5771 return GEN_INT (cum->maybe_vaarg
5772 ? (cum->sse_nregs < 0
5773 ? (cum->call_abi == ix86_abi
5775 : (ix86_abi != SYSV_ABI ? X86_64_SSE_REGPARM_MAX
5776 : X64_SSE_REGPARM_MAX))
5791 /* Unnamed 256bit vector mode parameters are passed on stack. */
5797 return construct_container (mode, orig_mode, type, 0, cum->nregs,
5799 &x86_64_int_parameter_registers [cum->regno],
5804 function_arg_ms_64 (CUMULATIVE_ARGS *cum, enum machine_mode mode,
5805 enum machine_mode orig_mode, int named,
5806 HOST_WIDE_INT bytes)
5810 /* We need to add clobber for MS_ABI->SYSV ABI calls in expand_call.
5811 We use value of -2 to specify that current function call is MSABI. */
5812 if (mode == VOIDmode)
5813 return GEN_INT (-2);
5815 /* If we've run out of registers, it goes on the stack. */
5816 if (cum->nregs == 0)
5819 regno = x86_64_ms_abi_int_parameter_registers[cum->regno];
5821 /* Only floating point modes are passed in anything but integer regs. */
5822 if (TARGET_SSE && (mode == SFmode || mode == DFmode))
5825 regno = cum->regno + FIRST_SSE_REG;
5830 /* Unnamed floating parameters are passed in both the
5831 SSE and integer registers. */
5832 t1 = gen_rtx_REG (mode, cum->regno + FIRST_SSE_REG);
5833 t2 = gen_rtx_REG (mode, regno);
5834 t1 = gen_rtx_EXPR_LIST (VOIDmode, t1, const0_rtx);
5835 t2 = gen_rtx_EXPR_LIST (VOIDmode, t2, const0_rtx);
5836 return gen_rtx_PARALLEL (mode, gen_rtvec (2, t1, t2));
5839 /* Handle aggregated types passed in register. */
5840 if (orig_mode == BLKmode)
5842 if (bytes > 0 && bytes <= 8)
5843 mode = (bytes > 4 ? DImode : SImode);
5844 if (mode == BLKmode)
5848 return gen_reg_or_parallel (mode, orig_mode, regno);
5852 function_arg (CUMULATIVE_ARGS *cum, enum machine_mode omode,
5853 tree type, int named)
5855 enum machine_mode mode = omode;
5856 HOST_WIDE_INT bytes, words;
5858 if (mode == BLKmode)
5859 bytes = int_size_in_bytes (type);
5861 bytes = GET_MODE_SIZE (mode);
5862 words = (bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
5864 /* To simplify the code below, represent vector types with a vector mode
5865 even if MMX/SSE are not active. */
5866 if (type && TREE_CODE (type) == VECTOR_TYPE)
5867 mode = type_natural_mode (type, cum);
5869 if (TARGET_64BIT && (cum ? cum->call_abi : ix86_abi) == MS_ABI)
5870 return function_arg_ms_64 (cum, mode, omode, named, bytes);
5871 else if (TARGET_64BIT)
5872 return function_arg_64 (cum, mode, omode, type, named);
5874 return function_arg_32 (cum, mode, omode, type, bytes, words);
5877 /* A C expression that indicates when an argument must be passed by
5878 reference. If nonzero for an argument, a copy of that argument is
5879 made in memory and a pointer to the argument is passed instead of
5880 the argument itself. The pointer is passed in whatever way is
5881 appropriate for passing a pointer to that type. */
5884 ix86_pass_by_reference (CUMULATIVE_ARGS *cum ATTRIBUTE_UNUSED,
5885 enum machine_mode mode ATTRIBUTE_UNUSED,
5886 const_tree type, bool named ATTRIBUTE_UNUSED)
5888 /* See Windows x64 Software Convention. */
5889 if (TARGET_64BIT && (cum ? cum->call_abi : ix86_abi) == MS_ABI)
5891 int msize = (int) GET_MODE_SIZE (mode);
5894 /* Arrays are passed by reference. */
5895 if (TREE_CODE (type) == ARRAY_TYPE)
5898 if (AGGREGATE_TYPE_P (type))
5900 /* Structs/unions of sizes other than 8, 16, 32, or 64 bits
5901 are passed by reference. */
5902 msize = int_size_in_bytes (type);
5906 /* __m128 is passed by reference. */
5908 case 1: case 2: case 4: case 8:
5914 else if (TARGET_64BIT && type && int_size_in_bytes (type) == -1)
5920 /* Return true when TYPE should be 128bit aligned for 32bit argument passing
5923 contains_aligned_value_p (tree type)
5925 enum machine_mode mode = TYPE_MODE (type);
5926 if (((TARGET_SSE && SSE_REG_MODE_P (mode))
5930 && (!TYPE_USER_ALIGN (type) || TYPE_ALIGN (type) > 128))
5932 if (TYPE_ALIGN (type) < 128)
5935 if (AGGREGATE_TYPE_P (type))
5937 /* Walk the aggregates recursively. */
5938 switch (TREE_CODE (type))
5942 case QUAL_UNION_TYPE:
5946 /* Walk all the structure fields. */
5947 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
5949 if (TREE_CODE (field) == FIELD_DECL
5950 && contains_aligned_value_p (TREE_TYPE (field)))
5957 /* Just for use if some languages passes arrays by value. */
5958 if (contains_aligned_value_p (TREE_TYPE (type)))
5969 /* Gives the alignment boundary, in bits, of an argument with the
5970 specified mode and type. */
5973 ix86_function_arg_boundary (enum machine_mode mode, tree type)
5978 /* Since canonical type is used for call, we convert it to
5979 canonical type if needed. */
5980 if (!TYPE_STRUCTURAL_EQUALITY_P (type))
5981 type = TYPE_CANONICAL (type);
5982 align = TYPE_ALIGN (type);
5985 align = GET_MODE_ALIGNMENT (mode);
5986 if (align < PARM_BOUNDARY)
5987 align = PARM_BOUNDARY;
5988 /* In 32bit, only _Decimal128 and __float128 are aligned to their
5989 natural boundaries. */
5990 if (!TARGET_64BIT && mode != TDmode && mode != TFmode)
5992 /* i386 ABI defines all arguments to be 4 byte aligned. We have to
5993 make an exception for SSE modes since these require 128bit
5996 The handling here differs from field_alignment. ICC aligns MMX
5997 arguments to 4 byte boundaries, while structure fields are aligned
5998 to 8 byte boundaries. */
6001 if (!(TARGET_SSE && SSE_REG_MODE_P (mode)))
6002 align = PARM_BOUNDARY;
6006 if (!contains_aligned_value_p (type))
6007 align = PARM_BOUNDARY;
6010 if (align > BIGGEST_ALIGNMENT)
6011 align = BIGGEST_ALIGNMENT;
6015 /* Return true if N is a possible register number of function value. */
6018 ix86_function_value_regno_p (int regno)
6025 case FIRST_FLOAT_REG:
6026 /* TODO: The function should depend on current function ABI but
6027 builtins.c would need updating then. Therefore we use the
6029 if (TARGET_64BIT && ix86_abi == MS_ABI)
6031 return TARGET_FLOAT_RETURNS_IN_80387;
6037 if (TARGET_MACHO || TARGET_64BIT)
6045 /* Define how to find the value returned by a function.
6046 VALTYPE is the data type of the value (as a tree).
6047 If the precise function being called is known, FUNC is its FUNCTION_DECL;
6048 otherwise, FUNC is 0. */
6051 function_value_32 (enum machine_mode orig_mode, enum machine_mode mode,
6052 const_tree fntype, const_tree fn)
6056 /* 8-byte vector modes in %mm0. See ix86_return_in_memory for where
6057 we normally prevent this case when mmx is not available. However
6058 some ABIs may require the result to be returned like DImode. */
6059 if (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 8)
6060 regno = TARGET_MMX ? FIRST_MMX_REG : 0;
6062 /* 16-byte vector modes in %xmm0. See ix86_return_in_memory for where
6063 we prevent this case when sse is not available. However some ABIs
6064 may require the result to be returned like integer TImode. */
6065 else if (mode == TImode
6066 || (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 16))
6067 regno = TARGET_SSE ? FIRST_SSE_REG : 0;
6069 /* 32-byte vector modes in %ymm0. */
6070 else if (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 32)
6071 regno = TARGET_AVX ? FIRST_SSE_REG : 0;
6073 /* Floating point return values in %st(0) (unless -mno-fp-ret-in-387). */
6074 else if (X87_FLOAT_MODE_P (mode) && TARGET_FLOAT_RETURNS_IN_80387)
6075 regno = FIRST_FLOAT_REG;
6077 /* Most things go in %eax. */
6080 /* Override FP return register with %xmm0 for local functions when
6081 SSE math is enabled or for functions with sseregparm attribute. */
6082 if ((fn || fntype) && (mode == SFmode || mode == DFmode))
6084 int sse_level = ix86_function_sseregparm (fntype, fn, false);
6085 if ((sse_level >= 1 && mode == SFmode)
6086 || (sse_level == 2 && mode == DFmode))
6087 regno = FIRST_SSE_REG;
6090 /* OImode shouldn't be used directly. */
6091 gcc_assert (mode != OImode);
6093 return gen_rtx_REG (orig_mode, regno);
6097 function_value_64 (enum machine_mode orig_mode, enum machine_mode mode,
6102 /* Handle libcalls, which don't provide a type node. */
6103 if (valtype == NULL)
6115 return gen_rtx_REG (mode, FIRST_SSE_REG);
6118 return gen_rtx_REG (mode, FIRST_FLOAT_REG);
6122 return gen_rtx_REG (mode, AX_REG);
6126 ret = construct_container (mode, orig_mode, valtype, 1,
6127 X86_64_REGPARM_MAX, X86_64_SSE_REGPARM_MAX,
6128 x86_64_int_return_registers, 0);
6130 /* For zero sized structures, construct_container returns NULL, but we
6131 need to keep rest of compiler happy by returning meaningful value. */
6133 ret = gen_rtx_REG (orig_mode, AX_REG);
6139 function_value_ms_64 (enum machine_mode orig_mode, enum machine_mode mode)
6141 unsigned int regno = AX_REG;
6145 switch (GET_MODE_SIZE (mode))
6148 if((SCALAR_INT_MODE_P (mode) || VECTOR_MODE_P (mode))
6149 && !COMPLEX_MODE_P (mode))
6150 regno = FIRST_SSE_REG;
6154 if (mode == SFmode || mode == DFmode)
6155 regno = FIRST_SSE_REG;
6161 return gen_rtx_REG (orig_mode, regno);
6165 ix86_function_value_1 (const_tree valtype, const_tree fntype_or_decl,
6166 enum machine_mode orig_mode, enum machine_mode mode)
6168 const_tree fn, fntype;
6171 if (fntype_or_decl && DECL_P (fntype_or_decl))
6172 fn = fntype_or_decl;
6173 fntype = fn ? TREE_TYPE (fn) : fntype_or_decl;
6175 if (TARGET_64BIT && ix86_function_type_abi (fntype) == MS_ABI)
6176 return function_value_ms_64 (orig_mode, mode);
6177 else if (TARGET_64BIT)
6178 return function_value_64 (orig_mode, mode, valtype);
6180 return function_value_32 (orig_mode, mode, fntype, fn);
6184 ix86_function_value (const_tree valtype, const_tree fntype_or_decl,
6185 bool outgoing ATTRIBUTE_UNUSED)
6187 enum machine_mode mode, orig_mode;
6189 orig_mode = TYPE_MODE (valtype);
6190 mode = type_natural_mode (valtype, NULL);
6191 return ix86_function_value_1 (valtype, fntype_or_decl, orig_mode, mode);
6195 ix86_libcall_value (enum machine_mode mode)
6197 return ix86_function_value_1 (NULL, NULL, mode, mode);
6200 /* Return true iff type is returned in memory. */
6202 static int ATTRIBUTE_UNUSED
6203 return_in_memory_32 (const_tree type, enum machine_mode mode)
6207 if (mode == BLKmode)
6210 size = int_size_in_bytes (type);
6212 if (MS_AGGREGATE_RETURN && AGGREGATE_TYPE_P (type) && size <= 8)
6215 if (VECTOR_MODE_P (mode) || mode == TImode)
6217 /* User-created vectors small enough to fit in EAX. */
6221 /* MMX/3dNow values are returned in MM0,
6222 except when it doesn't exits. */
6224 return (TARGET_MMX ? 0 : 1);
6226 /* SSE values are returned in XMM0, except when it doesn't exist. */
6228 return (TARGET_SSE ? 0 : 1);
6230 /* AVX values are returned in YMM0, except when it doesn't exist. */
6232 return TARGET_AVX ? 0 : 1;
6241 /* OImode shouldn't be used directly. */
6242 gcc_assert (mode != OImode);
6247 static int ATTRIBUTE_UNUSED
6248 return_in_memory_64 (const_tree type, enum machine_mode mode)
6250 int needed_intregs, needed_sseregs;
6251 return !examine_argument (mode, type, 1, &needed_intregs, &needed_sseregs);
6254 static int ATTRIBUTE_UNUSED
6255 return_in_memory_ms_64 (const_tree type, enum machine_mode mode)
6257 HOST_WIDE_INT size = int_size_in_bytes (type);
6259 /* __m128 is returned in xmm0. */
6260 if ((SCALAR_INT_MODE_P (mode) || VECTOR_MODE_P (mode))
6261 && !COMPLEX_MODE_P (mode) && (GET_MODE_SIZE (mode) == 16 || size == 16))
6264 /* Otherwise, the size must be exactly in [1248]. */
6265 return (size != 1 && size != 2 && size != 4 && size != 8);
6269 ix86_return_in_memory (const_tree type, const_tree fntype ATTRIBUTE_UNUSED)
6271 #ifdef SUBTARGET_RETURN_IN_MEMORY
6272 return SUBTARGET_RETURN_IN_MEMORY (type, fntype);
6274 const enum machine_mode mode = type_natural_mode (type, NULL);
6278 if (ix86_function_type_abi (fntype) == MS_ABI)
6279 return return_in_memory_ms_64 (type, mode);
6281 return return_in_memory_64 (type, mode);
6284 return return_in_memory_32 (type, mode);
6288 /* Return false iff TYPE is returned in memory. This version is used
6289 on Solaris 10. It is similar to the generic ix86_return_in_memory,
6290 but differs notably in that when MMX is available, 8-byte vectors
6291 are returned in memory, rather than in MMX registers. */
6294 ix86_sol10_return_in_memory (const_tree type, const_tree fntype ATTRIBUTE_UNUSED)
6297 enum machine_mode mode = type_natural_mode (type, NULL);
6300 return return_in_memory_64 (type, mode);
6302 if (mode == BLKmode)
6305 size = int_size_in_bytes (type);
6307 if (VECTOR_MODE_P (mode))
6309 /* Return in memory only if MMX registers *are* available. This
6310 seems backwards, but it is consistent with the existing
6317 else if (mode == TImode)
6319 else if (mode == XFmode)
6325 /* When returning SSE vector types, we have a choice of either
6326 (1) being abi incompatible with a -march switch, or
6327 (2) generating an error.
6328 Given no good solution, I think the safest thing is one warning.
6329 The user won't be able to use -Werror, but....
6331 Choose the STRUCT_VALUE_RTX hook because that's (at present) only
6332 called in response to actually generating a caller or callee that
6333 uses such a type. As opposed to TARGET_RETURN_IN_MEMORY, which is called
6334 via aggregate_value_p for general type probing from tree-ssa. */
6337 ix86_struct_value_rtx (tree type, int incoming ATTRIBUTE_UNUSED)
6339 static bool warnedsse, warnedmmx;
6341 if (!TARGET_64BIT && type)
6343 /* Look at the return type of the function, not the function type. */
6344 enum machine_mode mode = TYPE_MODE (TREE_TYPE (type));
6346 if (!TARGET_SSE && !warnedsse)
6349 || (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 16))
6352 warning (0, "SSE vector return without SSE enabled "
6357 if (!TARGET_MMX && !warnedmmx)
6359 if (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 8)
6362 warning (0, "MMX vector return without MMX enabled "
6372 /* Create the va_list data type. */
6374 /* Returns the calling convention specific va_list date type.
6375 The argument ABI can be DEFAULT_ABI, MS_ABI, or SYSV_ABI. */
6378 ix86_build_builtin_va_list_abi (enum calling_abi abi)
6380 tree f_gpr, f_fpr, f_ovf, f_sav, record, type_decl;
6382 /* For i386 we use plain pointer to argument area. */
6383 if (!TARGET_64BIT || abi == MS_ABI)
6384 return build_pointer_type (char_type_node);
6386 record = (*lang_hooks.types.make_type) (RECORD_TYPE);
6387 type_decl = build_decl (TYPE_DECL, get_identifier ("__va_list_tag"), record);
6389 f_gpr = build_decl (FIELD_DECL, get_identifier ("gp_offset"),
6390 unsigned_type_node);
6391 f_fpr = build_decl (FIELD_DECL, get_identifier ("fp_offset"),
6392 unsigned_type_node);
6393 f_ovf = build_decl (FIELD_DECL, get_identifier ("overflow_arg_area"),
6395 f_sav = build_decl (FIELD_DECL, get_identifier ("reg_save_area"),
6398 va_list_gpr_counter_field = f_gpr;
6399 va_list_fpr_counter_field = f_fpr;
6401 DECL_FIELD_CONTEXT (f_gpr) = record;
6402 DECL_FIELD_CONTEXT (f_fpr) = record;
6403 DECL_FIELD_CONTEXT (f_ovf) = record;
6404 DECL_FIELD_CONTEXT (f_sav) = record;
6406 TREE_CHAIN (record) = type_decl;
6407 TYPE_NAME (record) = type_decl;
6408 TYPE_FIELDS (record) = f_gpr;
6409 TREE_CHAIN (f_gpr) = f_fpr;
6410 TREE_CHAIN (f_fpr) = f_ovf;
6411 TREE_CHAIN (f_ovf) = f_sav;
6413 layout_type (record);
6415 /* The correct type is an array type of one element. */
6416 return build_array_type (record, build_index_type (size_zero_node));
6419 /* Setup the builtin va_list data type and for 64-bit the additional
6420 calling convention specific va_list data types. */
6423 ix86_build_builtin_va_list (void)
6425 tree ret = ix86_build_builtin_va_list_abi (ix86_abi);
6427 /* Initialize abi specific va_list builtin types. */
6431 if (ix86_abi == MS_ABI)
6433 t = ix86_build_builtin_va_list_abi (SYSV_ABI);
6434 if (TREE_CODE (t) != RECORD_TYPE)
6435 t = build_variant_type_copy (t);
6436 sysv_va_list_type_node = t;
6441 if (TREE_CODE (t) != RECORD_TYPE)
6442 t = build_variant_type_copy (t);
6443 sysv_va_list_type_node = t;
6445 if (ix86_abi != MS_ABI)
6447 t = ix86_build_builtin_va_list_abi (MS_ABI);
6448 if (TREE_CODE (t) != RECORD_TYPE)
6449 t = build_variant_type_copy (t);
6450 ms_va_list_type_node = t;
6455 if (TREE_CODE (t) != RECORD_TYPE)
6456 t = build_variant_type_copy (t);
6457 ms_va_list_type_node = t;
6464 /* Worker function for TARGET_SETUP_INCOMING_VARARGS. */
6467 setup_incoming_varargs_64 (CUMULATIVE_ARGS *cum)
6476 int regparm = ix86_regparm;
6478 if (cum->call_abi != ix86_abi)
6479 regparm = ix86_abi != SYSV_ABI ? X86_64_REGPARM_MAX : X64_REGPARM_MAX;
6481 /* GPR size of varargs save area. */
6482 if (cfun->va_list_gpr_size)
6483 ix86_varargs_gpr_size = X86_64_REGPARM_MAX * UNITS_PER_WORD;
6485 ix86_varargs_gpr_size = 0;
6487 /* FPR size of varargs save area. We don't need it if we don't pass
6488 anything in SSE registers. */
6489 if (cum->sse_nregs && cfun->va_list_fpr_size)
6490 ix86_varargs_fpr_size = X86_64_SSE_REGPARM_MAX * 16;
6492 ix86_varargs_fpr_size = 0;
6494 if (! ix86_varargs_gpr_size && ! ix86_varargs_fpr_size)
6497 save_area = frame_pointer_rtx;
6498 set = get_varargs_alias_set ();
6500 for (i = cum->regno;
6502 && i < cum->regno + cfun->va_list_gpr_size / UNITS_PER_WORD;
6505 mem = gen_rtx_MEM (Pmode,
6506 plus_constant (save_area, i * UNITS_PER_WORD));
6507 MEM_NOTRAP_P (mem) = 1;
6508 set_mem_alias_set (mem, set);
6509 emit_move_insn (mem, gen_rtx_REG (Pmode,
6510 x86_64_int_parameter_registers[i]));
6513 if (ix86_varargs_fpr_size)
6515 /* Now emit code to save SSE registers. The AX parameter contains number
6516 of SSE parameter registers used to call this function. We use
6517 sse_prologue_save insn template that produces computed jump across
6518 SSE saves. We need some preparation work to get this working. */
6520 label = gen_label_rtx ();
6521 label_ref = gen_rtx_LABEL_REF (Pmode, label);
6523 /* Compute address to jump to :
6524 label - eax*4 + nnamed_sse_arguments*4 Or
6525 label - eax*5 + nnamed_sse_arguments*5 for AVX. */
6526 tmp_reg = gen_reg_rtx (Pmode);
6527 nsse_reg = gen_reg_rtx (Pmode);
6528 emit_insn (gen_zero_extendqidi2 (nsse_reg, gen_rtx_REG (QImode, AX_REG)));
6529 emit_insn (gen_rtx_SET (VOIDmode, tmp_reg,
6530 gen_rtx_MULT (Pmode, nsse_reg,
6533 /* vmovaps is one byte longer than movaps. */
6535 emit_insn (gen_rtx_SET (VOIDmode, tmp_reg,
6536 gen_rtx_PLUS (Pmode, tmp_reg,
6542 gen_rtx_CONST (DImode,
6543 gen_rtx_PLUS (DImode,
6545 GEN_INT (cum->sse_regno
6546 * (TARGET_AVX ? 5 : 4)))));
6548 emit_move_insn (nsse_reg, label_ref);
6549 emit_insn (gen_subdi3 (nsse_reg, nsse_reg, tmp_reg));
6551 /* Compute address of memory block we save into. We always use pointer
6552 pointing 127 bytes after first byte to store - this is needed to keep
6553 instruction size limited by 4 bytes (5 bytes for AVX) with one
6554 byte displacement. */
6555 tmp_reg = gen_reg_rtx (Pmode);
6556 emit_insn (gen_rtx_SET (VOIDmode, tmp_reg,
6557 plus_constant (save_area,
6558 ix86_varargs_gpr_size + 127)));
6559 mem = gen_rtx_MEM (BLKmode, plus_constant (tmp_reg, -127));
6560 MEM_NOTRAP_P (mem) = 1;
6561 set_mem_alias_set (mem, set);
6562 set_mem_align (mem, BITS_PER_WORD);
6564 /* And finally do the dirty job! */
6565 emit_insn (gen_sse_prologue_save (mem, nsse_reg,
6566 GEN_INT (cum->sse_regno), label));
6571 setup_incoming_varargs_ms_64 (CUMULATIVE_ARGS *cum)
6573 alias_set_type set = get_varargs_alias_set ();
6576 for (i = cum->regno; i < X64_REGPARM_MAX; i++)
6580 mem = gen_rtx_MEM (Pmode,
6581 plus_constant (virtual_incoming_args_rtx,
6582 i * UNITS_PER_WORD));
6583 MEM_NOTRAP_P (mem) = 1;
6584 set_mem_alias_set (mem, set);
6586 reg = gen_rtx_REG (Pmode, x86_64_ms_abi_int_parameter_registers[i]);
6587 emit_move_insn (mem, reg);
6592 ix86_setup_incoming_varargs (CUMULATIVE_ARGS *cum, enum machine_mode mode,
6593 tree type, int *pretend_size ATTRIBUTE_UNUSED,
6596 CUMULATIVE_ARGS next_cum;
6599 /* This argument doesn't appear to be used anymore. Which is good,
6600 because the old code here didn't suppress rtl generation. */
6601 gcc_assert (!no_rtl);
6606 fntype = TREE_TYPE (current_function_decl);
6608 /* For varargs, we do not want to skip the dummy va_dcl argument.
6609 For stdargs, we do want to skip the last named argument. */
6611 if (stdarg_p (fntype))
6612 function_arg_advance (&next_cum, mode, type, 1);
6614 if (cum->call_abi == MS_ABI)
6615 setup_incoming_varargs_ms_64 (&next_cum);
6617 setup_incoming_varargs_64 (&next_cum);
6620 /* Checks if TYPE is of kind va_list char *. */
6623 is_va_list_char_pointer (tree type)
6627 /* For 32-bit it is always true. */
6630 canonic = ix86_canonical_va_list_type (type);
6631 return (canonic == ms_va_list_type_node
6632 || (ix86_abi == MS_ABI && canonic == va_list_type_node));
6635 /* Implement va_start. */
6638 ix86_va_start (tree valist, rtx nextarg)
6640 HOST_WIDE_INT words, n_gpr, n_fpr;
6641 tree f_gpr, f_fpr, f_ovf, f_sav;
6642 tree gpr, fpr, ovf, sav, t;
6645 /* Only 64bit target needs something special. */
6646 if (!TARGET_64BIT || is_va_list_char_pointer (TREE_TYPE (valist)))
6648 std_expand_builtin_va_start (valist, nextarg);
6652 f_gpr = TYPE_FIELDS (TREE_TYPE (sysv_va_list_type_node));
6653 f_fpr = TREE_CHAIN (f_gpr);
6654 f_ovf = TREE_CHAIN (f_fpr);
6655 f_sav = TREE_CHAIN (f_ovf);
6657 valist = build1 (INDIRECT_REF, TREE_TYPE (TREE_TYPE (valist)), valist);
6658 gpr = build3 (COMPONENT_REF, TREE_TYPE (f_gpr), valist, f_gpr, NULL_TREE);
6659 fpr = build3 (COMPONENT_REF, TREE_TYPE (f_fpr), valist, f_fpr, NULL_TREE);
6660 ovf = build3 (COMPONENT_REF, TREE_TYPE (f_ovf), valist, f_ovf, NULL_TREE);
6661 sav = build3 (COMPONENT_REF, TREE_TYPE (f_sav), valist, f_sav, NULL_TREE);
6663 /* Count number of gp and fp argument registers used. */
6664 words = crtl->args.info.words;
6665 n_gpr = crtl->args.info.regno;
6666 n_fpr = crtl->args.info.sse_regno;
6668 if (cfun->va_list_gpr_size)
6670 type = TREE_TYPE (gpr);
6671 t = build2 (MODIFY_EXPR, type,
6672 gpr, build_int_cst (type, n_gpr * 8));
6673 TREE_SIDE_EFFECTS (t) = 1;
6674 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
6677 if (TARGET_SSE && cfun->va_list_fpr_size)
6679 type = TREE_TYPE (fpr);
6680 t = build2 (MODIFY_EXPR, type, fpr,
6681 build_int_cst (type, n_fpr * 16 + 8*X86_64_REGPARM_MAX));
6682 TREE_SIDE_EFFECTS (t) = 1;
6683 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
6686 /* Find the overflow area. */
6687 type = TREE_TYPE (ovf);
6688 t = make_tree (type, crtl->args.internal_arg_pointer);
6690 t = build2 (POINTER_PLUS_EXPR, type, t,
6691 size_int (words * UNITS_PER_WORD));
6692 t = build2 (MODIFY_EXPR, type, ovf, t);
6693 TREE_SIDE_EFFECTS (t) = 1;
6694 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
6696 if (ix86_varargs_gpr_size || ix86_varargs_fpr_size)
6698 /* Find the register save area.
6699 Prologue of the function save it right above stack frame. */
6700 type = TREE_TYPE (sav);
6701 t = make_tree (type, frame_pointer_rtx);
6702 if (!ix86_varargs_gpr_size)
6703 t = build2 (POINTER_PLUS_EXPR, type, t,
6704 size_int (-8 * X86_64_REGPARM_MAX));
6705 t = build2 (MODIFY_EXPR, type, sav, t);
6706 TREE_SIDE_EFFECTS (t) = 1;
6707 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
6711 /* Implement va_arg. */
6714 ix86_gimplify_va_arg (tree valist, tree type, gimple_seq *pre_p,
6717 static const int intreg[6] = { 0, 1, 2, 3, 4, 5 };
6718 tree f_gpr, f_fpr, f_ovf, f_sav;
6719 tree gpr, fpr, ovf, sav, t;
6721 tree lab_false, lab_over = NULL_TREE;
6726 enum machine_mode nat_mode;
6729 /* Only 64bit target needs something special. */
6730 if (!TARGET_64BIT || is_va_list_char_pointer (TREE_TYPE (valist)))
6731 return std_gimplify_va_arg_expr (valist, type, pre_p, post_p);
6733 f_gpr = TYPE_FIELDS (TREE_TYPE (sysv_va_list_type_node));
6734 f_fpr = TREE_CHAIN (f_gpr);
6735 f_ovf = TREE_CHAIN (f_fpr);
6736 f_sav = TREE_CHAIN (f_ovf);
6738 gpr = build3 (COMPONENT_REF, TREE_TYPE (f_gpr),
6739 build_va_arg_indirect_ref (valist), f_gpr, NULL_TREE);
6740 valist = build_va_arg_indirect_ref (valist);
6741 fpr = build3 (COMPONENT_REF, TREE_TYPE (f_fpr), valist, f_fpr, NULL_TREE);
6742 ovf = build3 (COMPONENT_REF, TREE_TYPE (f_ovf), valist, f_ovf, NULL_TREE);
6743 sav = build3 (COMPONENT_REF, TREE_TYPE (f_sav), valist, f_sav, NULL_TREE);
6745 indirect_p = pass_by_reference (NULL, TYPE_MODE (type), type, false);
6747 type = build_pointer_type (type);
6748 size = int_size_in_bytes (type);
6749 rsize = (size + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
6751 nat_mode = type_natural_mode (type, NULL);
6760 /* Unnamed 256bit vector mode parameters are passed on stack. */
6761 if (ix86_cfun_abi () == SYSV_ABI)
6768 container = construct_container (nat_mode, TYPE_MODE (type),
6769 type, 0, X86_64_REGPARM_MAX,
6770 X86_64_SSE_REGPARM_MAX, intreg,
6775 /* Pull the value out of the saved registers. */
6777 addr = create_tmp_var (ptr_type_node, "addr");
6778 DECL_POINTER_ALIAS_SET (addr) = get_varargs_alias_set ();
6782 int needed_intregs, needed_sseregs;
6784 tree int_addr, sse_addr;
6786 lab_false = create_artificial_label ();
6787 lab_over = create_artificial_label ();
6789 examine_argument (nat_mode, type, 0, &needed_intregs, &needed_sseregs);
6791 need_temp = (!REG_P (container)
6792 && ((needed_intregs && TYPE_ALIGN (type) > 64)
6793 || TYPE_ALIGN (type) > 128));
6795 /* In case we are passing structure, verify that it is consecutive block
6796 on the register save area. If not we need to do moves. */
6797 if (!need_temp && !REG_P (container))
6799 /* Verify that all registers are strictly consecutive */
6800 if (SSE_REGNO_P (REGNO (XEXP (XVECEXP (container, 0, 0), 0))))
6804 for (i = 0; i < XVECLEN (container, 0) && !need_temp; i++)
6806 rtx slot = XVECEXP (container, 0, i);
6807 if (REGNO (XEXP (slot, 0)) != FIRST_SSE_REG + (unsigned int) i
6808 || INTVAL (XEXP (slot, 1)) != i * 16)
6816 for (i = 0; i < XVECLEN (container, 0) && !need_temp; i++)
6818 rtx slot = XVECEXP (container, 0, i);
6819 if (REGNO (XEXP (slot, 0)) != (unsigned int) i
6820 || INTVAL (XEXP (slot, 1)) != i * 8)
6832 int_addr = create_tmp_var (ptr_type_node, "int_addr");
6833 DECL_POINTER_ALIAS_SET (int_addr) = get_varargs_alias_set ();
6834 sse_addr = create_tmp_var (ptr_type_node, "sse_addr");
6835 DECL_POINTER_ALIAS_SET (sse_addr) = get_varargs_alias_set ();
6838 /* First ensure that we fit completely in registers. */
6841 t = build_int_cst (TREE_TYPE (gpr),
6842 (X86_64_REGPARM_MAX - needed_intregs + 1) * 8);
6843 t = build2 (GE_EXPR, boolean_type_node, gpr, t);
6844 t2 = build1 (GOTO_EXPR, void_type_node, lab_false);
6845 t = build3 (COND_EXPR, void_type_node, t, t2, NULL_TREE);
6846 gimplify_and_add (t, pre_p);
6850 t = build_int_cst (TREE_TYPE (fpr),
6851 (X86_64_SSE_REGPARM_MAX - needed_sseregs + 1) * 16
6852 + X86_64_REGPARM_MAX * 8);
6853 t = build2 (GE_EXPR, boolean_type_node, fpr, t);
6854 t2 = build1 (GOTO_EXPR, void_type_node, lab_false);
6855 t = build3 (COND_EXPR, void_type_node, t, t2, NULL_TREE);
6856 gimplify_and_add (t, pre_p);
6859 /* Compute index to start of area used for integer regs. */
6862 /* int_addr = gpr + sav; */
6863 t = fold_convert (sizetype, gpr);
6864 t = build2 (POINTER_PLUS_EXPR, ptr_type_node, sav, t);
6865 gimplify_assign (int_addr, t, pre_p);
6869 /* sse_addr = fpr + sav; */
6870 t = fold_convert (sizetype, fpr);
6871 t = build2 (POINTER_PLUS_EXPR, ptr_type_node, sav, t);
6872 gimplify_assign (sse_addr, t, pre_p);
6877 tree temp = create_tmp_var (type, "va_arg_tmp");
6880 t = build1 (ADDR_EXPR, build_pointer_type (type), temp);
6881 gimplify_assign (addr, t, pre_p);
6883 for (i = 0; i < XVECLEN (container, 0); i++)
6885 rtx slot = XVECEXP (container, 0, i);
6886 rtx reg = XEXP (slot, 0);
6887 enum machine_mode mode = GET_MODE (reg);
6888 tree piece_type = lang_hooks.types.type_for_mode (mode, 1);
6889 tree addr_type = build_pointer_type (piece_type);
6890 tree daddr_type = build_pointer_type_for_mode (piece_type,
6894 tree dest_addr, dest;
6896 if (SSE_REGNO_P (REGNO (reg)))
6898 src_addr = sse_addr;
6899 src_offset = (REGNO (reg) - FIRST_SSE_REG) * 16;
6903 src_addr = int_addr;
6904 src_offset = REGNO (reg) * 8;
6906 src_addr = fold_convert (addr_type, src_addr);
6907 src_addr = fold_build2 (POINTER_PLUS_EXPR, addr_type, src_addr,
6908 size_int (src_offset));
6909 src = build_va_arg_indirect_ref (src_addr);
6911 dest_addr = fold_convert (daddr_type, addr);
6912 dest_addr = fold_build2 (POINTER_PLUS_EXPR, daddr_type, dest_addr,
6913 size_int (INTVAL (XEXP (slot, 1))));
6914 dest = build_va_arg_indirect_ref (dest_addr);
6916 gimplify_assign (dest, src, pre_p);
6922 t = build2 (PLUS_EXPR, TREE_TYPE (gpr), gpr,
6923 build_int_cst (TREE_TYPE (gpr), needed_intregs * 8));
6924 gimplify_assign (gpr, t, pre_p);
6929 t = build2 (PLUS_EXPR, TREE_TYPE (fpr), fpr,
6930 build_int_cst (TREE_TYPE (fpr), needed_sseregs * 16));
6931 gimplify_assign (fpr, t, pre_p);
6934 gimple_seq_add_stmt (pre_p, gimple_build_goto (lab_over));
6936 gimple_seq_add_stmt (pre_p, gimple_build_label (lab_false));
6939 /* ... otherwise out of the overflow area. */
6941 /* When we align parameter on stack for caller, if the parameter
6942 alignment is beyond MAX_SUPPORTED_STACK_ALIGNMENT, it will be
6943 aligned at MAX_SUPPORTED_STACK_ALIGNMENT. We will match callee
6944 here with caller. */
6945 arg_boundary = FUNCTION_ARG_BOUNDARY (VOIDmode, type);
6946 if ((unsigned int) arg_boundary > MAX_SUPPORTED_STACK_ALIGNMENT)
6947 arg_boundary = MAX_SUPPORTED_STACK_ALIGNMENT;
6949 /* Care for on-stack alignment if needed. */
6950 if (arg_boundary <= 64
6951 || integer_zerop (TYPE_SIZE (type)))
6955 HOST_WIDE_INT align = arg_boundary / 8;
6956 t = build2 (POINTER_PLUS_EXPR, TREE_TYPE (ovf), ovf,
6957 size_int (align - 1));
6958 t = fold_convert (sizetype, t);
6959 t = build2 (BIT_AND_EXPR, TREE_TYPE (t), t,
6961 t = fold_convert (TREE_TYPE (ovf), t);
6963 gimplify_expr (&t, pre_p, NULL, is_gimple_val, fb_rvalue);
6964 gimplify_assign (addr, t, pre_p);
6966 t = build2 (POINTER_PLUS_EXPR, TREE_TYPE (t), t,
6967 size_int (rsize * UNITS_PER_WORD));
6968 gimplify_assign (unshare_expr (ovf), t, pre_p);
6971 gimple_seq_add_stmt (pre_p, gimple_build_label (lab_over));
6973 ptrtype = build_pointer_type (type);
6974 addr = fold_convert (ptrtype, addr);
6977 addr = build_va_arg_indirect_ref (addr);
6978 return build_va_arg_indirect_ref (addr);
6981 /* Return nonzero if OPNUM's MEM should be matched
6982 in movabs* patterns. */
6985 ix86_check_movabs (rtx insn, int opnum)
6989 set = PATTERN (insn);
6990 if (GET_CODE (set) == PARALLEL)
6991 set = XVECEXP (set, 0, 0);
6992 gcc_assert (GET_CODE (set) == SET);
6993 mem = XEXP (set, opnum);
6994 while (GET_CODE (mem) == SUBREG)
6995 mem = SUBREG_REG (mem);
6996 gcc_assert (MEM_P (mem));
6997 return (volatile_ok || !MEM_VOLATILE_P (mem));
7000 /* Initialize the table of extra 80387 mathematical constants. */
7003 init_ext_80387_constants (void)
7005 static const char * cst[5] =
7007 "0.3010299956639811952256464283594894482", /* 0: fldlg2 */
7008 "0.6931471805599453094286904741849753009", /* 1: fldln2 */
7009 "1.4426950408889634073876517827983434472", /* 2: fldl2e */
7010 "3.3219280948873623478083405569094566090", /* 3: fldl2t */
7011 "3.1415926535897932385128089594061862044", /* 4: fldpi */
7015 for (i = 0; i < 5; i++)
7017 real_from_string (&ext_80387_constants_table[i], cst[i]);
7018 /* Ensure each constant is rounded to XFmode precision. */
7019 real_convert (&ext_80387_constants_table[i],
7020 XFmode, &ext_80387_constants_table[i]);
7023 ext_80387_constants_init = 1;
7026 /* Return true if the constant is something that can be loaded with
7027 a special instruction. */
7030 standard_80387_constant_p (rtx x)
7032 enum machine_mode mode = GET_MODE (x);
7036 if (!(X87_FLOAT_MODE_P (mode) && (GET_CODE (x) == CONST_DOUBLE)))
7039 if (x == CONST0_RTX (mode))
7041 if (x == CONST1_RTX (mode))
7044 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
7046 /* For XFmode constants, try to find a special 80387 instruction when
7047 optimizing for size or on those CPUs that benefit from them. */
7049 && (optimize_function_for_size_p (cfun) || TARGET_EXT_80387_CONSTANTS))
7053 if (! ext_80387_constants_init)
7054 init_ext_80387_constants ();
7056 for (i = 0; i < 5; i++)
7057 if (real_identical (&r, &ext_80387_constants_table[i]))
7061 /* Load of the constant -0.0 or -1.0 will be split as
7062 fldz;fchs or fld1;fchs sequence. */
7063 if (real_isnegzero (&r))
7065 if (real_identical (&r, &dconstm1))
7071 /* Return the opcode of the special instruction to be used to load
7075 standard_80387_constant_opcode (rtx x)
7077 switch (standard_80387_constant_p (x))
7101 /* Return the CONST_DOUBLE representing the 80387 constant that is
7102 loaded by the specified special instruction. The argument IDX
7103 matches the return value from standard_80387_constant_p. */
7106 standard_80387_constant_rtx (int idx)
7110 if (! ext_80387_constants_init)
7111 init_ext_80387_constants ();
7127 return CONST_DOUBLE_FROM_REAL_VALUE (ext_80387_constants_table[i],
7131 /* Return 1 if mode is a valid mode for sse. */
7133 standard_sse_mode_p (enum machine_mode mode)
7150 /* Return 1 if X is all 0s. For all 1s, return 2 if X is in 128bit
7151 SSE modes and SSE2 is enabled, return 3 if X is in 256bit AVX
7152 modes and AVX is enabled. */
7155 standard_sse_constant_p (rtx x)
7157 enum machine_mode mode = GET_MODE (x);
7159 if (x == const0_rtx || x == CONST0_RTX (GET_MODE (x)))
7161 if (vector_all_ones_operand (x, mode))
7163 if (standard_sse_mode_p (mode))
7164 return TARGET_SSE2 ? 2 : -2;
7165 else if (VALID_AVX256_REG_MODE (mode))
7166 return TARGET_AVX ? 3 : -3;
7172 /* Return the opcode of the special instruction to be used to load
7176 standard_sse_constant_opcode (rtx insn, rtx x)
7178 switch (standard_sse_constant_p (x))
7181 switch (get_attr_mode (insn))
7184 return TARGET_AVX ? "vxorps\t%0, %0, %0" : "xorps\t%0, %0";
7186 return TARGET_AVX ? "vxorpd\t%0, %0, %0" : "xorpd\t%0, %0";
7188 return TARGET_AVX ? "vpxor\t%0, %0, %0" : "pxor\t%0, %0";
7190 return "vxorps\t%x0, %x0, %x0";
7192 return "vxorpd\t%x0, %x0, %x0";
7194 return "vpxor\t%x0, %x0, %x0";
7200 switch (get_attr_mode (insn))
7205 return "vpcmpeqd\t%0, %0, %0";
7211 return "pcmpeqd\t%0, %0";
7216 /* Returns 1 if OP contains a symbol reference */
7219 symbolic_reference_mentioned_p (rtx op)
7224 if (GET_CODE (op) == SYMBOL_REF || GET_CODE (op) == LABEL_REF)
7227 fmt = GET_RTX_FORMAT (GET_CODE (op));
7228 for (i = GET_RTX_LENGTH (GET_CODE (op)) - 1; i >= 0; i--)
7234 for (j = XVECLEN (op, i) - 1; j >= 0; j--)
7235 if (symbolic_reference_mentioned_p (XVECEXP (op, i, j)))
7239 else if (fmt[i] == 'e' && symbolic_reference_mentioned_p (XEXP (op, i)))
7246 /* Return 1 if it is appropriate to emit `ret' instructions in the
7247 body of a function. Do this only if the epilogue is simple, needing a
7248 couple of insns. Prior to reloading, we can't tell how many registers
7249 must be saved, so return 0 then. Return 0 if there is no frame
7250 marker to de-allocate. */
7253 ix86_can_use_return_insn_p (void)
7255 struct ix86_frame frame;
7257 if (! reload_completed || frame_pointer_needed)
7260 /* Don't allow more than 32 pop, since that's all we can do
7261 with one instruction. */
7262 if (crtl->args.pops_args
7263 && crtl->args.size >= 32768)
7266 ix86_compute_frame_layout (&frame);
7267 return frame.to_allocate == 0 && (frame.nregs + frame.nsseregs) == 0;
7270 /* Value should be nonzero if functions must have frame pointers.
7271 Zero means the frame pointer need not be set up (and parms may
7272 be accessed via the stack pointer) in functions that seem suitable. */
7275 ix86_frame_pointer_required (void)
7277 /* If we accessed previous frames, then the generated code expects
7278 to be able to access the saved ebp value in our frame. */
7279 if (cfun->machine->accesses_prev_frame)
7282 /* Several x86 os'es need a frame pointer for other reasons,
7283 usually pertaining to setjmp. */
7284 if (SUBTARGET_FRAME_POINTER_REQUIRED)
7287 /* In override_options, TARGET_OMIT_LEAF_FRAME_POINTER turns off
7288 the frame pointer by default. Turn it back on now if we've not
7289 got a leaf function. */
7290 if (TARGET_OMIT_LEAF_FRAME_POINTER
7291 && (!current_function_is_leaf
7292 || ix86_current_function_calls_tls_descriptor))
7301 /* Record that the current function accesses previous call frames. */
7304 ix86_setup_frame_addresses (void)
7306 cfun->machine->accesses_prev_frame = 1;
7309 #if (defined(HAVE_GAS_HIDDEN) && (SUPPORTS_ONE_ONLY - 0)) || TARGET_MACHO
7310 # define USE_HIDDEN_LINKONCE 1
7312 # define USE_HIDDEN_LINKONCE 0
7315 static int pic_labels_used;
7317 /* Fills in the label name that should be used for a pc thunk for
7318 the given register. */
7321 get_pc_thunk_name (char name[32], unsigned int regno)
7323 gcc_assert (!TARGET_64BIT);
7325 if (USE_HIDDEN_LINKONCE)
7326 sprintf (name, "__i686.get_pc_thunk.%s", reg_names[regno]);
7328 ASM_GENERATE_INTERNAL_LABEL (name, "LPR", regno);
7332 /* This function generates code for -fpic that loads %ebx with
7333 the return address of the caller and then returns. */
7336 ix86_file_end (void)
7341 for (regno = 0; regno < 8; ++regno)
7345 if (! ((pic_labels_used >> regno) & 1))
7348 get_pc_thunk_name (name, regno);
7353 switch_to_section (darwin_sections[text_coal_section]);
7354 fputs ("\t.weak_definition\t", asm_out_file);
7355 assemble_name (asm_out_file, name);
7356 fputs ("\n\t.private_extern\t", asm_out_file);
7357 assemble_name (asm_out_file, name);
7358 fputs ("\n", asm_out_file);
7359 ASM_OUTPUT_LABEL (asm_out_file, name);
7363 if (USE_HIDDEN_LINKONCE)
7367 decl = build_decl (FUNCTION_DECL, get_identifier (name),
7369 TREE_PUBLIC (decl) = 1;
7370 TREE_STATIC (decl) = 1;
7371 DECL_ONE_ONLY (decl) = 1;
7373 (*targetm.asm_out.unique_section) (decl, 0);
7374 switch_to_section (get_named_section (decl, NULL, 0));
7376 (*targetm.asm_out.globalize_label) (asm_out_file, name);
7377 fputs ("\t.hidden\t", asm_out_file);
7378 assemble_name (asm_out_file, name);
7379 fputc ('\n', asm_out_file);
7380 ASM_DECLARE_FUNCTION_NAME (asm_out_file, name, decl);
7384 switch_to_section (text_section);
7385 ASM_OUTPUT_LABEL (asm_out_file, name);
7388 xops[0] = gen_rtx_REG (Pmode, regno);
7389 xops[1] = gen_rtx_MEM (Pmode, stack_pointer_rtx);
7390 output_asm_insn ("mov%z0\t{%1, %0|%0, %1}", xops);
7391 output_asm_insn ("ret", xops);
7394 if (NEED_INDICATE_EXEC_STACK)
7395 file_end_indicate_exec_stack ();
7398 /* Emit code for the SET_GOT patterns. */
7401 output_set_got (rtx dest, rtx label ATTRIBUTE_UNUSED)
7407 if (TARGET_VXWORKS_RTP && flag_pic)
7409 /* Load (*VXWORKS_GOTT_BASE) into the PIC register. */
7410 xops[2] = gen_rtx_MEM (Pmode,
7411 gen_rtx_SYMBOL_REF (Pmode, VXWORKS_GOTT_BASE));
7412 output_asm_insn ("mov{l}\t{%2, %0|%0, %2}", xops);
7414 /* Load (*VXWORKS_GOTT_BASE)[VXWORKS_GOTT_INDEX] into the PIC register.
7415 Use %P and a local symbol in order to print VXWORKS_GOTT_INDEX as
7416 an unadorned address. */
7417 xops[2] = gen_rtx_SYMBOL_REF (Pmode, VXWORKS_GOTT_INDEX);
7418 SYMBOL_REF_FLAGS (xops[2]) |= SYMBOL_FLAG_LOCAL;
7419 output_asm_insn ("mov{l}\t{%P2(%0), %0|%0, DWORD PTR %P2[%0]}", xops);
7423 xops[1] = gen_rtx_SYMBOL_REF (Pmode, GOT_SYMBOL_NAME);
7425 if (! TARGET_DEEP_BRANCH_PREDICTION || !flag_pic)
7427 xops[2] = gen_rtx_LABEL_REF (Pmode, label ? label : gen_label_rtx ());
7430 output_asm_insn ("mov%z0\t{%2, %0|%0, %2}", xops);
7432 output_asm_insn ("call\t%a2", xops);
7435 /* Output the Mach-O "canonical" label name ("Lxx$pb") here too. This
7436 is what will be referenced by the Mach-O PIC subsystem. */
7438 ASM_OUTPUT_LABEL (asm_out_file, MACHOPIC_FUNCTION_BASE_NAME);
7441 (*targetm.asm_out.internal_label) (asm_out_file, "L",
7442 CODE_LABEL_NUMBER (XEXP (xops[2], 0)));
7445 output_asm_insn ("pop%z0\t%0", xops);
7450 get_pc_thunk_name (name, REGNO (dest));
7451 pic_labels_used |= 1 << REGNO (dest);
7453 xops[2] = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (name));
7454 xops[2] = gen_rtx_MEM (QImode, xops[2]);
7455 output_asm_insn ("call\t%X2", xops);
7456 /* Output the Mach-O "canonical" label name ("Lxx$pb") here too. This
7457 is what will be referenced by the Mach-O PIC subsystem. */
7460 ASM_OUTPUT_LABEL (asm_out_file, MACHOPIC_FUNCTION_BASE_NAME);
7462 targetm.asm_out.internal_label (asm_out_file, "L",
7463 CODE_LABEL_NUMBER (label));
7470 if (!flag_pic || TARGET_DEEP_BRANCH_PREDICTION)
7471 output_asm_insn ("add%z0\t{%1, %0|%0, %1}", xops);
7473 output_asm_insn ("add%z0\t{%1+[.-%a2], %0|%0, %1+(.-%a2)}", xops);
7478 /* Generate an "push" pattern for input ARG. */
7483 return gen_rtx_SET (VOIDmode,
7485 gen_rtx_PRE_DEC (Pmode,
7486 stack_pointer_rtx)),
7490 /* Return >= 0 if there is an unused call-clobbered register available
7491 for the entire function. */
7494 ix86_select_alt_pic_regnum (void)
7496 if (current_function_is_leaf && !crtl->profile
7497 && !ix86_current_function_calls_tls_descriptor)
7500 /* Can't use the same register for both PIC and DRAP. */
7502 drap = REGNO (crtl->drap_reg);
7505 for (i = 2; i >= 0; --i)
7506 if (i != drap && !df_regs_ever_live_p (i))
7510 return INVALID_REGNUM;
7513 /* Return 1 if we need to save REGNO. */
7515 ix86_save_reg (unsigned int regno, int maybe_eh_return)
7517 if (pic_offset_table_rtx
7518 && regno == REAL_PIC_OFFSET_TABLE_REGNUM
7519 && (df_regs_ever_live_p (REAL_PIC_OFFSET_TABLE_REGNUM)
7521 || crtl->calls_eh_return
7522 || crtl->uses_const_pool))
7524 if (ix86_select_alt_pic_regnum () != INVALID_REGNUM)
7529 if (crtl->calls_eh_return && maybe_eh_return)
7534 unsigned test = EH_RETURN_DATA_REGNO (i);
7535 if (test == INVALID_REGNUM)
7543 && regno == REGNO (crtl->drap_reg))
7546 return (df_regs_ever_live_p (regno)
7547 && !call_used_regs[regno]
7548 && !fixed_regs[regno]
7549 && (regno != HARD_FRAME_POINTER_REGNUM || !frame_pointer_needed));
7552 /* Return number of saved general prupose registers. */
7555 ix86_nsaved_regs (void)
7560 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
7561 if (!SSE_REGNO_P (regno) && ix86_save_reg (regno, true))
7566 /* Return number of saved SSE registrers. */
7569 ix86_nsaved_sseregs (void)
7574 if (ix86_cfun_abi () != MS_ABI)
7576 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
7577 if (SSE_REGNO_P (regno) && ix86_save_reg (regno, true))
7582 /* Given FROM and TO register numbers, say whether this elimination is
7583 allowed. If stack alignment is needed, we can only replace argument
7584 pointer with hard frame pointer, or replace frame pointer with stack
7585 pointer. Otherwise, frame pointer elimination is automatically
7586 handled and all other eliminations are valid. */
7589 ix86_can_eliminate (int from, int to)
7591 if (stack_realign_fp)
7592 return ((from == ARG_POINTER_REGNUM
7593 && to == HARD_FRAME_POINTER_REGNUM)
7594 || (from == FRAME_POINTER_REGNUM
7595 && to == STACK_POINTER_REGNUM));
7597 return to == STACK_POINTER_REGNUM ? !frame_pointer_needed : 1;
7600 /* Return the offset between two registers, one to be eliminated, and the other
7601 its replacement, at the start of a routine. */
7604 ix86_initial_elimination_offset (int from, int to)
7606 struct ix86_frame frame;
7607 ix86_compute_frame_layout (&frame);
7609 if (from == ARG_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
7610 return frame.hard_frame_pointer_offset;
7611 else if (from == FRAME_POINTER_REGNUM
7612 && to == HARD_FRAME_POINTER_REGNUM)
7613 return frame.hard_frame_pointer_offset - frame.frame_pointer_offset;
7616 gcc_assert (to == STACK_POINTER_REGNUM);
7618 if (from == ARG_POINTER_REGNUM)
7619 return frame.stack_pointer_offset;
7621 gcc_assert (from == FRAME_POINTER_REGNUM);
7622 return frame.stack_pointer_offset - frame.frame_pointer_offset;
7626 /* In a dynamically-aligned function, we can't know the offset from
7627 stack pointer to frame pointer, so we must ensure that setjmp
7628 eliminates fp against the hard fp (%ebp) rather than trying to
7629 index from %esp up to the top of the frame across a gap that is
7630 of unknown (at compile-time) size. */
7632 ix86_builtin_setjmp_frame_value (void)
7634 return stack_realign_fp ? hard_frame_pointer_rtx : virtual_stack_vars_rtx;
7637 /* Fill structure ix86_frame about frame of currently computed function. */
7640 ix86_compute_frame_layout (struct ix86_frame *frame)
7642 HOST_WIDE_INT total_size;
7643 unsigned int stack_alignment_needed;
7644 HOST_WIDE_INT offset;
7645 unsigned int preferred_alignment;
7646 HOST_WIDE_INT size = get_frame_size ();
7648 frame->nregs = ix86_nsaved_regs ();
7649 frame->nsseregs = ix86_nsaved_sseregs ();
7652 stack_alignment_needed = crtl->stack_alignment_needed / BITS_PER_UNIT;
7653 preferred_alignment = crtl->preferred_stack_boundary / BITS_PER_UNIT;
7655 /* MS ABI seem to require stack alignment to be always 16 except for function
7657 if (ix86_cfun_abi () == MS_ABI && preferred_alignment < 16)
7659 preferred_alignment = 16;
7660 stack_alignment_needed = 16;
7661 crtl->preferred_stack_boundary = 128;
7662 crtl->stack_alignment_needed = 128;
7665 gcc_assert (!size || stack_alignment_needed);
7666 gcc_assert (preferred_alignment >= STACK_BOUNDARY / BITS_PER_UNIT);
7667 gcc_assert (preferred_alignment <= stack_alignment_needed);
7669 /* During reload iteration the amount of registers saved can change.
7670 Recompute the value as needed. Do not recompute when amount of registers
7671 didn't change as reload does multiple calls to the function and does not
7672 expect the decision to change within single iteration. */
7673 if (!optimize_function_for_size_p (cfun)
7674 && cfun->machine->use_fast_prologue_epilogue_nregs != frame->nregs)
7676 int count = frame->nregs;
7678 cfun->machine->use_fast_prologue_epilogue_nregs = count;
7679 /* The fast prologue uses move instead of push to save registers. This
7680 is significantly longer, but also executes faster as modern hardware
7681 can execute the moves in parallel, but can't do that for push/pop.
7683 Be careful about choosing what prologue to emit: When function takes
7684 many instructions to execute we may use slow version as well as in
7685 case function is known to be outside hot spot (this is known with
7686 feedback only). Weight the size of function by number of registers
7687 to save as it is cheap to use one or two push instructions but very
7688 slow to use many of them. */
7690 count = (count - 1) * FAST_PROLOGUE_INSN_COUNT;
7691 if (cfun->function_frequency < FUNCTION_FREQUENCY_NORMAL
7692 || (flag_branch_probabilities
7693 && cfun->function_frequency < FUNCTION_FREQUENCY_HOT))
7694 cfun->machine->use_fast_prologue_epilogue = false;
7696 cfun->machine->use_fast_prologue_epilogue
7697 = !expensive_function_p (count);
7699 if (TARGET_PROLOGUE_USING_MOVE
7700 && cfun->machine->use_fast_prologue_epilogue)
7701 frame->save_regs_using_mov = true;
7703 frame->save_regs_using_mov = false;
7706 /* Skip return address and saved base pointer. */
7707 offset = frame_pointer_needed ? UNITS_PER_WORD * 2 : UNITS_PER_WORD;
7709 frame->hard_frame_pointer_offset = offset;
7711 /* Set offset to aligned because the realigned frame starts from
7713 if (stack_realign_fp)
7714 offset = (offset + stack_alignment_needed -1) & -stack_alignment_needed;
7716 /* Register save area */
7717 offset += frame->nregs * UNITS_PER_WORD;
7719 /* Align SSE reg save area. */
7720 if (frame->nsseregs)
7721 frame->padding0 = ((offset + 16 - 1) & -16) - offset;
7723 frame->padding0 = 0;
7725 /* SSE register save area. */
7726 offset += frame->padding0 + frame->nsseregs * 16;
7729 frame->va_arg_size = ix86_varargs_gpr_size + ix86_varargs_fpr_size;
7730 offset += frame->va_arg_size;
7732 /* Align start of frame for local function. */
7733 frame->padding1 = ((offset + stack_alignment_needed - 1)
7734 & -stack_alignment_needed) - offset;
7736 offset += frame->padding1;
7738 /* Frame pointer points here. */
7739 frame->frame_pointer_offset = offset;
7743 /* Add outgoing arguments area. Can be skipped if we eliminated
7744 all the function calls as dead code.
7745 Skipping is however impossible when function calls alloca. Alloca
7746 expander assumes that last crtl->outgoing_args_size
7747 of stack frame are unused. */
7748 if (ACCUMULATE_OUTGOING_ARGS
7749 && (!current_function_is_leaf || cfun->calls_alloca
7750 || ix86_current_function_calls_tls_descriptor))
7752 offset += crtl->outgoing_args_size;
7753 frame->outgoing_arguments_size = crtl->outgoing_args_size;
7756 frame->outgoing_arguments_size = 0;
7758 /* Align stack boundary. Only needed if we're calling another function
7760 if (!current_function_is_leaf || cfun->calls_alloca
7761 || ix86_current_function_calls_tls_descriptor)
7762 frame->padding2 = ((offset + preferred_alignment - 1)
7763 & -preferred_alignment) - offset;
7765 frame->padding2 = 0;
7767 offset += frame->padding2;
7769 /* We've reached end of stack frame. */
7770 frame->stack_pointer_offset = offset;
7772 /* Size prologue needs to allocate. */
7773 frame->to_allocate =
7774 (size + frame->padding1 + frame->padding2
7775 + frame->outgoing_arguments_size + frame->va_arg_size);
7777 if ((!frame->to_allocate && frame->nregs <= 1)
7778 || (TARGET_64BIT && frame->to_allocate >= (HOST_WIDE_INT) 0x80000000))
7779 frame->save_regs_using_mov = false;
7781 if (!TARGET_64BIT_MS_ABI && TARGET_RED_ZONE && current_function_sp_is_unchanging
7782 && current_function_is_leaf
7783 && !ix86_current_function_calls_tls_descriptor)
7785 frame->red_zone_size = frame->to_allocate;
7786 if (frame->save_regs_using_mov)
7787 frame->red_zone_size += frame->nregs * UNITS_PER_WORD;
7788 if (frame->red_zone_size > RED_ZONE_SIZE - RED_ZONE_RESERVE)
7789 frame->red_zone_size = RED_ZONE_SIZE - RED_ZONE_RESERVE;
7792 frame->red_zone_size = 0;
7793 frame->to_allocate -= frame->red_zone_size;
7794 frame->stack_pointer_offset -= frame->red_zone_size;
7796 fprintf (stderr, "\n");
7797 fprintf (stderr, "size: %ld\n", (long)size);
7798 fprintf (stderr, "nregs: %ld\n", (long)frame->nregs);
7799 fprintf (stderr, "nsseregs: %ld\n", (long)frame->nsseregs);
7800 fprintf (stderr, "padding0: %ld\n", (long)frame->padding0);
7801 fprintf (stderr, "alignment1: %ld\n", (long)stack_alignment_needed);
7802 fprintf (stderr, "padding1: %ld\n", (long)frame->padding1);
7803 fprintf (stderr, "va_arg: %ld\n", (long)frame->va_arg_size);
7804 fprintf (stderr, "padding2: %ld\n", (long)frame->padding2);
7805 fprintf (stderr, "to_allocate: %ld\n", (long)frame->to_allocate);
7806 fprintf (stderr, "red_zone_size: %ld\n", (long)frame->red_zone_size);
7807 fprintf (stderr, "frame_pointer_offset: %ld\n", (long)frame->frame_pointer_offset);
7808 fprintf (stderr, "hard_frame_pointer_offset: %ld\n",
7809 (long)frame->hard_frame_pointer_offset);
7810 fprintf (stderr, "stack_pointer_offset: %ld\n", (long)frame->stack_pointer_offset);
7811 fprintf (stderr, "current_function_is_leaf: %ld\n", (long)current_function_is_leaf);
7812 fprintf (stderr, "cfun->calls_alloca: %ld\n", (long)cfun->calls_alloca);
7813 fprintf (stderr, "x86_current_function_calls_tls_descriptor: %ld\n", (long)ix86_current_function_calls_tls_descriptor);
7817 /* Emit code to save registers in the prologue. */
7820 ix86_emit_save_regs (void)
7825 for (regno = FIRST_PSEUDO_REGISTER - 1; regno-- > 0; )
7826 if (!SSE_REGNO_P (regno) && ix86_save_reg (regno, true))
7828 insn = emit_insn (gen_push (gen_rtx_REG (Pmode, regno)));
7829 RTX_FRAME_RELATED_P (insn) = 1;
7833 /* Emit code to save registers using MOV insns. First register
7834 is restored from POINTER + OFFSET. */
7836 ix86_emit_save_regs_using_mov (rtx pointer, HOST_WIDE_INT offset)
7841 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
7842 if (!SSE_REGNO_P (regno) && ix86_save_reg (regno, true))
7844 insn = emit_move_insn (adjust_address (gen_rtx_MEM (Pmode, pointer),
7846 gen_rtx_REG (Pmode, regno));
7847 RTX_FRAME_RELATED_P (insn) = 1;
7848 offset += UNITS_PER_WORD;
7852 /* Emit code to save registers using MOV insns. First register
7853 is restored from POINTER + OFFSET. */
7855 ix86_emit_save_sse_regs_using_mov (rtx pointer, HOST_WIDE_INT offset)
7861 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
7862 if (SSE_REGNO_P (regno) && ix86_save_reg (regno, true))
7864 mem = adjust_address (gen_rtx_MEM (TImode, pointer), TImode, offset);
7865 set_mem_align (mem, 128);
7866 insn = emit_move_insn (mem, gen_rtx_REG (TImode, regno));
7867 RTX_FRAME_RELATED_P (insn) = 1;
7872 /* Expand prologue or epilogue stack adjustment.
7873 The pattern exist to put a dependency on all ebp-based memory accesses.
7874 STYLE should be negative if instructions should be marked as frame related,
7875 zero if %r11 register is live and cannot be freely used and positive
7879 pro_epilogue_adjust_stack (rtx dest, rtx src, rtx offset, int style)
7884 insn = emit_insn (gen_pro_epilogue_adjust_stack_1 (dest, src, offset));
7885 else if (x86_64_immediate_operand (offset, DImode))
7886 insn = emit_insn (gen_pro_epilogue_adjust_stack_rex64 (dest, src, offset));
7890 /* r11 is used by indirect sibcall return as well, set before the
7891 epilogue and used after the epilogue. ATM indirect sibcall
7892 shouldn't be used together with huge frame sizes in one
7893 function because of the frame_size check in sibcall.c. */
7895 r11 = gen_rtx_REG (DImode, R11_REG);
7896 insn = emit_insn (gen_rtx_SET (DImode, r11, offset));
7898 RTX_FRAME_RELATED_P (insn) = 1;
7899 insn = emit_insn (gen_pro_epilogue_adjust_stack_rex64_2 (dest, src, r11,
7903 RTX_FRAME_RELATED_P (insn) = 1;
7906 /* Find an available register to be used as dynamic realign argument
7907 pointer regsiter. Such a register will be written in prologue and
7908 used in begin of body, so it must not be
7909 1. parameter passing register.
7911 We reuse static-chain register if it is available. Otherwise, we
7912 use DI for i386 and R13 for x86-64. We chose R13 since it has
7915 Return: the regno of chosen register. */
7918 find_drap_reg (void)
7920 tree decl = cfun->decl;
7924 /* Use R13 for nested function or function need static chain.
7925 Since function with tail call may use any caller-saved
7926 registers in epilogue, DRAP must not use caller-saved
7927 register in such case. */
7928 if ((decl_function_context (decl)
7929 && !DECL_NO_STATIC_CHAIN (decl))
7930 || crtl->tail_call_emit)
7937 /* Use DI for nested function or function need static chain.
7938 Since function with tail call may use any caller-saved
7939 registers in epilogue, DRAP must not use caller-saved
7940 register in such case. */
7941 if ((decl_function_context (decl)
7942 && !DECL_NO_STATIC_CHAIN (decl))
7943 || crtl->tail_call_emit)
7946 /* Reuse static chain register if it isn't used for parameter
7948 if (ix86_function_regparm (TREE_TYPE (decl), decl) <= 2
7949 && !lookup_attribute ("fastcall",
7950 TYPE_ATTRIBUTES (TREE_TYPE (decl))))
7957 /* Update incoming stack boundary and estimated stack alignment. */
7960 ix86_update_stack_boundary (void)
7962 /* Prefer the one specified at command line. */
7963 ix86_incoming_stack_boundary
7964 = (ix86_user_incoming_stack_boundary
7965 ? ix86_user_incoming_stack_boundary
7966 : ix86_default_incoming_stack_boundary);
7968 /* Incoming stack alignment can be changed on individual functions
7969 via force_align_arg_pointer attribute. We use the smallest
7970 incoming stack boundary. */
7971 if (ix86_incoming_stack_boundary > MIN_STACK_BOUNDARY
7972 && lookup_attribute (ix86_force_align_arg_pointer_string,
7973 TYPE_ATTRIBUTES (TREE_TYPE (current_function_decl))))
7974 ix86_incoming_stack_boundary = MIN_STACK_BOUNDARY;
7976 /* The incoming stack frame has to be aligned at least at
7977 parm_stack_boundary. */
7978 if (ix86_incoming_stack_boundary < crtl->parm_stack_boundary)
7979 ix86_incoming_stack_boundary = crtl->parm_stack_boundary;
7981 /* Stack at entrance of main is aligned by runtime. We use the
7982 smallest incoming stack boundary. */
7983 if (ix86_incoming_stack_boundary > MAIN_STACK_BOUNDARY
7984 && DECL_NAME (current_function_decl)
7985 && MAIN_NAME_P (DECL_NAME (current_function_decl))
7986 && DECL_FILE_SCOPE_P (current_function_decl))
7987 ix86_incoming_stack_boundary = MAIN_STACK_BOUNDARY;
7989 /* x86_64 vararg needs 16byte stack alignment for register save
7993 && crtl->stack_alignment_estimated < 128)
7994 crtl->stack_alignment_estimated = 128;
7997 /* Handle the TARGET_GET_DRAP_RTX hook. Return NULL if no DRAP is
7998 needed or an rtx for DRAP otherwise. */
8001 ix86_get_drap_rtx (void)
8003 if (ix86_force_drap || !ACCUMULATE_OUTGOING_ARGS)
8004 crtl->need_drap = true;
8006 if (stack_realign_drap)
8008 /* Assign DRAP to vDRAP and returns vDRAP */
8009 unsigned int regno = find_drap_reg ();
8014 arg_ptr = gen_rtx_REG (Pmode, regno);
8015 crtl->drap_reg = arg_ptr;
8018 drap_vreg = copy_to_reg (arg_ptr);
8022 insn = emit_insn_before (seq, NEXT_INSN (entry_of_function ()));
8023 RTX_FRAME_RELATED_P (insn) = 1;
8030 /* Handle the TARGET_INTERNAL_ARG_POINTER hook. */
8033 ix86_internal_arg_pointer (void)
8035 return virtual_incoming_args_rtx;
8038 /* Handle the TARGET_DWARF_HANDLE_FRAME_UNSPEC hook.
8039 This is called from dwarf2out.c to emit call frame instructions
8040 for frame-related insns containing UNSPECs and UNSPEC_VOLATILEs. */
8042 ix86_dwarf_handle_frame_unspec (const char *label, rtx pattern, int index)
8044 rtx unspec = SET_SRC (pattern);
8045 gcc_assert (GET_CODE (unspec) == UNSPEC);
8049 case UNSPEC_REG_SAVE:
8050 dwarf2out_reg_save_reg (label, XVECEXP (unspec, 0, 0),
8051 SET_DEST (pattern));
8053 case UNSPEC_DEF_CFA:
8054 dwarf2out_def_cfa (label, REGNO (SET_DEST (pattern)),
8055 INTVAL (XVECEXP (unspec, 0, 0)));
8062 /* Finalize stack_realign_needed flag, which will guide prologue/epilogue
8063 to be generated in correct form. */
8065 ix86_finalize_stack_realign_flags (void)
8067 /* Check if stack realign is really needed after reload, and
8068 stores result in cfun */
8069 unsigned int incoming_stack_boundary
8070 = (crtl->parm_stack_boundary > ix86_incoming_stack_boundary
8071 ? crtl->parm_stack_boundary : ix86_incoming_stack_boundary);
8072 unsigned int stack_realign = (incoming_stack_boundary
8073 < (current_function_is_leaf
8074 ? crtl->max_used_stack_slot_alignment
8075 : crtl->stack_alignment_needed));
8077 if (crtl->stack_realign_finalized)
8079 /* After stack_realign_needed is finalized, we can't no longer
8081 gcc_assert (crtl->stack_realign_needed == stack_realign);
8085 crtl->stack_realign_needed = stack_realign;
8086 crtl->stack_realign_finalized = true;
8090 /* Expand the prologue into a bunch of separate insns. */
8093 ix86_expand_prologue (void)
8097 struct ix86_frame frame;
8098 HOST_WIDE_INT allocate;
8100 ix86_finalize_stack_realign_flags ();
8102 /* DRAP should not coexist with stack_realign_fp */
8103 gcc_assert (!(crtl->drap_reg && stack_realign_fp));
8105 ix86_compute_frame_layout (&frame);
8107 /* Emit prologue code to adjust stack alignment and setup DRAP, in case
8108 of DRAP is needed and stack realignment is really needed after reload */
8109 if (crtl->drap_reg && crtl->stack_realign_needed)
8112 int align_bytes = crtl->stack_alignment_needed / BITS_PER_UNIT;
8113 int param_ptr_offset = (call_used_regs[REGNO (crtl->drap_reg)]
8114 ? 0 : UNITS_PER_WORD);
8116 gcc_assert (stack_realign_drap);
8118 /* Grab the argument pointer. */
8119 x = plus_constant (stack_pointer_rtx,
8120 (UNITS_PER_WORD + param_ptr_offset));
8123 /* Only need to push parameter pointer reg if it is caller
8125 if (!call_used_regs[REGNO (crtl->drap_reg)])
8127 /* Push arg pointer reg */
8128 insn = emit_insn (gen_push (y));
8129 RTX_FRAME_RELATED_P (insn) = 1;
8132 insn = emit_insn (gen_rtx_SET (VOIDmode, y, x));
8133 RTX_FRAME_RELATED_P (insn) = 1;
8135 /* Align the stack. */
8136 insn = emit_insn ((*ix86_gen_andsp) (stack_pointer_rtx,
8138 GEN_INT (-align_bytes)));
8139 RTX_FRAME_RELATED_P (insn) = 1;
8141 /* Replicate the return address on the stack so that return
8142 address can be reached via (argp - 1) slot. This is needed
8143 to implement macro RETURN_ADDR_RTX and intrinsic function
8144 expand_builtin_return_addr etc. */
8146 x = gen_frame_mem (Pmode,
8147 plus_constant (x, -UNITS_PER_WORD));
8148 insn = emit_insn (gen_push (x));
8149 RTX_FRAME_RELATED_P (insn) = 1;
8152 /* Note: AT&T enter does NOT have reversed args. Enter is probably
8153 slower on all targets. Also sdb doesn't like it. */
8155 if (frame_pointer_needed)
8157 insn = emit_insn (gen_push (hard_frame_pointer_rtx));
8158 RTX_FRAME_RELATED_P (insn) = 1;
8160 insn = emit_move_insn (hard_frame_pointer_rtx, stack_pointer_rtx);
8161 RTX_FRAME_RELATED_P (insn) = 1;
8164 if (stack_realign_fp)
8166 int align_bytes = crtl->stack_alignment_needed / BITS_PER_UNIT;
8167 gcc_assert (align_bytes > MIN_STACK_BOUNDARY / BITS_PER_UNIT);
8169 /* Align the stack. */
8170 insn = emit_insn ((*ix86_gen_andsp) (stack_pointer_rtx,
8172 GEN_INT (-align_bytes)));
8173 RTX_FRAME_RELATED_P (insn) = 1;
8176 allocate = frame.to_allocate + frame.nsseregs * 16 + frame.padding0;
8178 if (!frame.save_regs_using_mov)
8179 ix86_emit_save_regs ();
8181 allocate += frame.nregs * UNITS_PER_WORD;
8183 /* When using red zone we may start register saving before allocating
8184 the stack frame saving one cycle of the prologue. However I will
8185 avoid doing this if I am going to have to probe the stack since
8186 at least on x86_64 the stack probe can turn into a call that clobbers
8187 a red zone location */
8188 if (!TARGET_64BIT_MS_ABI && TARGET_RED_ZONE && frame.save_regs_using_mov
8189 && (! TARGET_STACK_PROBE || allocate < CHECK_STACK_LIMIT))
8190 ix86_emit_save_regs_using_mov ((frame_pointer_needed
8191 && !crtl->stack_realign_needed)
8192 ? hard_frame_pointer_rtx
8193 : stack_pointer_rtx,
8194 -frame.nregs * UNITS_PER_WORD);
8198 else if (! TARGET_STACK_PROBE || allocate < CHECK_STACK_LIMIT)
8199 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
8200 GEN_INT (-allocate), -1);
8203 /* Only valid for Win32. */
8204 rtx eax = gen_rtx_REG (Pmode, AX_REG);
8208 gcc_assert (!TARGET_64BIT || cfun->machine->call_abi == MS_ABI);
8210 if (cfun->machine->call_abi == MS_ABI)
8213 eax_live = ix86_eax_live_at_start_p ();
8217 emit_insn (gen_push (eax));
8218 allocate -= UNITS_PER_WORD;
8221 emit_move_insn (eax, GEN_INT (allocate));
8224 insn = gen_allocate_stack_worker_64 (eax, eax);
8226 insn = gen_allocate_stack_worker_32 (eax, eax);
8227 insn = emit_insn (insn);
8228 RTX_FRAME_RELATED_P (insn) = 1;
8229 t = gen_rtx_PLUS (Pmode, stack_pointer_rtx, GEN_INT (-allocate));
8230 t = gen_rtx_SET (VOIDmode, stack_pointer_rtx, t);
8231 REG_NOTES (insn) = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR,
8232 t, REG_NOTES (insn));
8236 if (frame_pointer_needed)
8237 t = plus_constant (hard_frame_pointer_rtx,
8240 - frame.nregs * UNITS_PER_WORD);
8242 t = plus_constant (stack_pointer_rtx, allocate);
8243 emit_move_insn (eax, gen_rtx_MEM (Pmode, t));
8247 if (frame.save_regs_using_mov
8248 && !(!TARGET_64BIT_MS_ABI && TARGET_RED_ZONE
8249 && (! TARGET_STACK_PROBE || allocate < CHECK_STACK_LIMIT)))
8251 if (!frame_pointer_needed
8252 || !frame.to_allocate
8253 || crtl->stack_realign_needed)
8254 ix86_emit_save_regs_using_mov (stack_pointer_rtx,
8256 + frame.nsseregs * 16 + frame.padding0);
8258 ix86_emit_save_regs_using_mov (hard_frame_pointer_rtx,
8259 -frame.nregs * UNITS_PER_WORD);
8261 if (!frame_pointer_needed
8262 || !frame.to_allocate
8263 || crtl->stack_realign_needed)
8264 ix86_emit_save_sse_regs_using_mov (stack_pointer_rtx,
8267 ix86_emit_save_sse_regs_using_mov (hard_frame_pointer_rtx,
8268 - frame.nregs * UNITS_PER_WORD
8269 - frame.nsseregs * 16
8272 pic_reg_used = false;
8273 if (pic_offset_table_rtx
8274 && (df_regs_ever_live_p (REAL_PIC_OFFSET_TABLE_REGNUM)
8277 unsigned int alt_pic_reg_used = ix86_select_alt_pic_regnum ();
8279 if (alt_pic_reg_used != INVALID_REGNUM)
8280 SET_REGNO (pic_offset_table_rtx, alt_pic_reg_used);
8282 pic_reg_used = true;
8289 if (ix86_cmodel == CM_LARGE_PIC)
8291 rtx tmp_reg = gen_rtx_REG (DImode, R11_REG);
8292 rtx label = gen_label_rtx ();
8294 LABEL_PRESERVE_P (label) = 1;
8295 gcc_assert (REGNO (pic_offset_table_rtx) != REGNO (tmp_reg));
8296 insn = emit_insn (gen_set_rip_rex64 (pic_offset_table_rtx, label));
8297 insn = emit_insn (gen_set_got_offset_rex64 (tmp_reg, label));
8298 insn = emit_insn (gen_adddi3 (pic_offset_table_rtx,
8299 pic_offset_table_rtx, tmp_reg));
8302 insn = emit_insn (gen_set_got_rex64 (pic_offset_table_rtx));
8305 insn = emit_insn (gen_set_got (pic_offset_table_rtx));
8308 /* In the pic_reg_used case, make sure that the got load isn't deleted
8309 when mcount needs it. Blockage to avoid call movement across mcount
8310 call is emitted in generic code after the NOTE_INSN_PROLOGUE_END
8312 if (crtl->profile && pic_reg_used)
8313 emit_insn (gen_prologue_use (pic_offset_table_rtx));
8315 if (crtl->drap_reg && !crtl->stack_realign_needed)
8317 /* vDRAP is setup but after reload it turns out stack realign
8318 isn't necessary, here we will emit prologue to setup DRAP
8319 without stack realign adjustment */
8320 int drap_bp_offset = UNITS_PER_WORD * 2;
8321 rtx x = plus_constant (hard_frame_pointer_rtx, drap_bp_offset);
8322 insn = emit_insn (gen_rtx_SET (VOIDmode, crtl->drap_reg, x));
8325 /* Prevent instructions from being scheduled into register save push
8326 sequence when access to the redzone area is done through frame pointer.
8327 The offset betweeh the frame pointer and the stack pointer is calculated
8328 relative to the value of the stack pointer at the end of the function
8329 prologue, and moving instructions that access redzone area via frame
8330 pointer inside push sequence violates this assumption. */
8331 if (frame_pointer_needed && frame.red_zone_size)
8332 emit_insn (gen_memory_blockage ());
8334 /* Emit cld instruction if stringops are used in the function. */
8335 if (TARGET_CLD && ix86_current_function_needs_cld)
8336 emit_insn (gen_cld ());
8339 /* Emit code to restore saved registers using MOV insns. First register
8340 is restored from POINTER + OFFSET. */
8342 ix86_emit_restore_regs_using_mov (rtx pointer, HOST_WIDE_INT offset,
8343 int maybe_eh_return)
8346 rtx base_address = gen_rtx_MEM (Pmode, pointer);
8348 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
8349 if (!SSE_REGNO_P (regno) && ix86_save_reg (regno, maybe_eh_return))
8351 /* Ensure that adjust_address won't be forced to produce pointer
8352 out of range allowed by x86-64 instruction set. */
8353 if (TARGET_64BIT && offset != trunc_int_for_mode (offset, SImode))
8357 r11 = gen_rtx_REG (DImode, R11_REG);
8358 emit_move_insn (r11, GEN_INT (offset));
8359 emit_insn (gen_adddi3 (r11, r11, pointer));
8360 base_address = gen_rtx_MEM (Pmode, r11);
8363 emit_move_insn (gen_rtx_REG (Pmode, regno),
8364 adjust_address (base_address, Pmode, offset));
8365 offset += UNITS_PER_WORD;
8369 /* Emit code to restore saved registers using MOV insns. First register
8370 is restored from POINTER + OFFSET. */
8372 ix86_emit_restore_sse_regs_using_mov (rtx pointer, HOST_WIDE_INT offset,
8373 int maybe_eh_return)
8376 rtx base_address = gen_rtx_MEM (TImode, pointer);
8379 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
8380 if (SSE_REGNO_P (regno) && ix86_save_reg (regno, maybe_eh_return))
8382 /* Ensure that adjust_address won't be forced to produce pointer
8383 out of range allowed by x86-64 instruction set. */
8384 if (TARGET_64BIT && offset != trunc_int_for_mode (offset, SImode))
8388 r11 = gen_rtx_REG (DImode, R11_REG);
8389 emit_move_insn (r11, GEN_INT (offset));
8390 emit_insn (gen_adddi3 (r11, r11, pointer));
8391 base_address = gen_rtx_MEM (TImode, r11);
8394 mem = adjust_address (base_address, TImode, offset);
8395 set_mem_align (mem, 128);
8396 emit_move_insn (gen_rtx_REG (TImode, regno), mem);
8401 /* Restore function stack, frame, and registers. */
8404 ix86_expand_epilogue (int style)
8408 struct ix86_frame frame;
8409 HOST_WIDE_INT offset;
8411 ix86_finalize_stack_realign_flags ();
8413 /* When stack is realigned, SP must be valid. */
8414 sp_valid = (!frame_pointer_needed
8415 || current_function_sp_is_unchanging
8416 || stack_realign_fp);
8418 ix86_compute_frame_layout (&frame);
8420 /* See the comment about red zone and frame
8421 pointer usage in ix86_expand_prologue. */
8422 if (frame_pointer_needed && frame.red_zone_size)
8423 emit_insn (gen_memory_blockage ());
8425 /* Calculate start of saved registers relative to ebp. Special care
8426 must be taken for the normal return case of a function using
8427 eh_return: the eax and edx registers are marked as saved, but not
8428 restored along this path. */
8429 offset = frame.nregs;
8430 if (crtl->calls_eh_return && style != 2)
8432 offset *= -UNITS_PER_WORD;
8433 offset -= frame.nsseregs * 16 + frame.padding0;
8435 /* If we're only restoring one register and sp is not valid then
8436 using a move instruction to restore the register since it's
8437 less work than reloading sp and popping the register.
8439 The default code result in stack adjustment using add/lea instruction,
8440 while this code results in LEAVE instruction (or discrete equivalent),
8441 so it is profitable in some other cases as well. Especially when there
8442 are no registers to restore. We also use this code when TARGET_USE_LEAVE
8443 and there is exactly one register to pop. This heuristic may need some
8444 tuning in future. */
8445 if ((!sp_valid && (frame.nregs + frame.nsseregs) <= 1)
8446 || (TARGET_EPILOGUE_USING_MOVE
8447 && cfun->machine->use_fast_prologue_epilogue
8448 && ((frame.nregs + frame.nsseregs) > 1 || frame.to_allocate))
8449 || (frame_pointer_needed && !(frame.nregs + frame.nsseregs) && frame.to_allocate)
8450 || (frame_pointer_needed && TARGET_USE_LEAVE
8451 && cfun->machine->use_fast_prologue_epilogue
8452 && (frame.nregs + frame.nsseregs) == 1)
8453 || crtl->calls_eh_return)
8455 /* Restore registers. We can use ebp or esp to address the memory
8456 locations. If both are available, default to ebp, since offsets
8457 are known to be small. Only exception is esp pointing directly
8458 to the end of block of saved registers, where we may simplify
8461 If we are realigning stack with bp and sp, regs restore can't
8462 be addressed by bp. sp must be used instead. */
8464 if (!frame_pointer_needed
8465 || (sp_valid && !frame.to_allocate)
8466 || stack_realign_fp)
8468 ix86_emit_restore_sse_regs_using_mov (stack_pointer_rtx,
8469 frame.to_allocate, style == 2);
8470 ix86_emit_restore_regs_using_mov (stack_pointer_rtx,
8472 + frame.nsseregs * 16
8473 + frame.padding0, style == 2);
8477 ix86_emit_restore_sse_regs_using_mov (hard_frame_pointer_rtx,
8478 offset, style == 2);
8479 ix86_emit_restore_regs_using_mov (hard_frame_pointer_rtx,
8481 + frame.nsseregs * 16
8482 + frame.padding0, style == 2);
8485 /* eh_return epilogues need %ecx added to the stack pointer. */
8488 rtx tmp, sa = EH_RETURN_STACKADJ_RTX;
8490 /* Stack align doesn't work with eh_return. */
8491 gcc_assert (!crtl->stack_realign_needed);
8493 if (frame_pointer_needed)
8495 tmp = gen_rtx_PLUS (Pmode, hard_frame_pointer_rtx, sa);
8496 tmp = plus_constant (tmp, UNITS_PER_WORD);
8497 emit_insn (gen_rtx_SET (VOIDmode, sa, tmp));
8499 tmp = gen_rtx_MEM (Pmode, hard_frame_pointer_rtx);
8500 emit_move_insn (hard_frame_pointer_rtx, tmp);
8502 pro_epilogue_adjust_stack (stack_pointer_rtx, sa,
8507 tmp = gen_rtx_PLUS (Pmode, stack_pointer_rtx, sa);
8508 tmp = plus_constant (tmp, (frame.to_allocate
8509 + frame.nregs * UNITS_PER_WORD
8510 + frame.nsseregs * 16
8512 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx, tmp));
8515 else if (!frame_pointer_needed)
8516 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
8517 GEN_INT (frame.to_allocate
8518 + frame.nregs * UNITS_PER_WORD
8519 + frame.nsseregs * 16
8522 /* If not an i386, mov & pop is faster than "leave". */
8523 else if (TARGET_USE_LEAVE || optimize_function_for_size_p (cfun)
8524 || !cfun->machine->use_fast_prologue_epilogue)
8525 emit_insn ((*ix86_gen_leave) ());
8528 pro_epilogue_adjust_stack (stack_pointer_rtx,
8529 hard_frame_pointer_rtx,
8532 emit_insn ((*ix86_gen_pop1) (hard_frame_pointer_rtx));
8537 /* First step is to deallocate the stack frame so that we can
8540 If we realign stack with frame pointer, then stack pointer
8541 won't be able to recover via lea $offset(%bp), %sp, because
8542 there is a padding area between bp and sp for realign.
8543 "add $to_allocate, %sp" must be used instead. */
8546 gcc_assert (frame_pointer_needed);
8547 gcc_assert (!stack_realign_fp);
8548 pro_epilogue_adjust_stack (stack_pointer_rtx,
8549 hard_frame_pointer_rtx,
8550 GEN_INT (offset), style);
8551 ix86_emit_restore_sse_regs_using_mov (stack_pointer_rtx,
8552 frame.to_allocate, style == 2);
8553 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
8554 GEN_INT (frame.nsseregs * 16), style);
8556 else if (frame.to_allocate || frame.nsseregs)
8558 ix86_emit_restore_sse_regs_using_mov (stack_pointer_rtx,
8561 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
8562 GEN_INT (frame.to_allocate
8563 + frame.nsseregs * 16
8564 + frame.padding0), style);
8567 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
8568 if (!SSE_REGNO_P (regno) && ix86_save_reg (regno, false))
8569 emit_insn ((*ix86_gen_pop1) (gen_rtx_REG (Pmode, regno)));
8570 if (frame_pointer_needed)
8572 /* Leave results in shorter dependency chains on CPUs that are
8573 able to grok it fast. */
8574 if (TARGET_USE_LEAVE)
8575 emit_insn ((*ix86_gen_leave) ());
8578 /* For stack realigned really happens, recover stack
8579 pointer to hard frame pointer is a must, if not using
8581 if (stack_realign_fp)
8582 pro_epilogue_adjust_stack (stack_pointer_rtx,
8583 hard_frame_pointer_rtx,
8585 emit_insn ((*ix86_gen_pop1) (hard_frame_pointer_rtx));
8590 if (crtl->drap_reg && crtl->stack_realign_needed)
8592 int param_ptr_offset = (call_used_regs[REGNO (crtl->drap_reg)]
8593 ? 0 : UNITS_PER_WORD);
8594 gcc_assert (stack_realign_drap);
8595 emit_insn ((*ix86_gen_add3) (stack_pointer_rtx,
8597 GEN_INT (-(UNITS_PER_WORD
8598 + param_ptr_offset))));
8599 if (!call_used_regs[REGNO (crtl->drap_reg)])
8600 emit_insn ((*ix86_gen_pop1) (crtl->drap_reg));
8604 /* Sibcall epilogues don't want a return instruction. */
8608 if (crtl->args.pops_args && crtl->args.size)
8610 rtx popc = GEN_INT (crtl->args.pops_args);
8612 /* i386 can only pop 64K bytes. If asked to pop more, pop
8613 return address, do explicit add, and jump indirectly to the
8616 if (crtl->args.pops_args >= 65536)
8618 rtx ecx = gen_rtx_REG (SImode, CX_REG);
8620 /* There is no "pascal" calling convention in any 64bit ABI. */
8621 gcc_assert (!TARGET_64BIT);
8623 emit_insn (gen_popsi1 (ecx));
8624 emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx, popc));
8625 emit_jump_insn (gen_return_indirect_internal (ecx));
8628 emit_jump_insn (gen_return_pop_internal (popc));
8631 emit_jump_insn (gen_return_internal ());
8634 /* Reset from the function's potential modifications. */
8637 ix86_output_function_epilogue (FILE *file ATTRIBUTE_UNUSED,
8638 HOST_WIDE_INT size ATTRIBUTE_UNUSED)
8640 if (pic_offset_table_rtx)
8641 SET_REGNO (pic_offset_table_rtx, REAL_PIC_OFFSET_TABLE_REGNUM);
8643 /* Mach-O doesn't support labels at the end of objects, so if
8644 it looks like we might want one, insert a NOP. */
8646 rtx insn = get_last_insn ();
8649 && NOTE_KIND (insn) != NOTE_INSN_DELETED_LABEL)
8650 insn = PREV_INSN (insn);
8654 && NOTE_KIND (insn) == NOTE_INSN_DELETED_LABEL)))
8655 fputs ("\tnop\n", file);
8661 /* Extract the parts of an RTL expression that is a valid memory address
8662 for an instruction. Return 0 if the structure of the address is
8663 grossly off. Return -1 if the address contains ASHIFT, so it is not
8664 strictly valid, but still used for computing length of lea instruction. */
8667 ix86_decompose_address (rtx addr, struct ix86_address *out)
8669 rtx base = NULL_RTX, index = NULL_RTX, disp = NULL_RTX;
8670 rtx base_reg, index_reg;
8671 HOST_WIDE_INT scale = 1;
8672 rtx scale_rtx = NULL_RTX;
8674 enum ix86_address_seg seg = SEG_DEFAULT;
8676 if (REG_P (addr) || GET_CODE (addr) == SUBREG)
8678 else if (GET_CODE (addr) == PLUS)
8688 addends[n++] = XEXP (op, 1);
8691 while (GET_CODE (op) == PLUS);
8696 for (i = n; i >= 0; --i)
8699 switch (GET_CODE (op))
8704 index = XEXP (op, 0);
8705 scale_rtx = XEXP (op, 1);
8709 if (XINT (op, 1) == UNSPEC_TP
8710 && TARGET_TLS_DIRECT_SEG_REFS
8711 && seg == SEG_DEFAULT)
8712 seg = TARGET_64BIT ? SEG_FS : SEG_GS;
8741 else if (GET_CODE (addr) == MULT)
8743 index = XEXP (addr, 0); /* index*scale */
8744 scale_rtx = XEXP (addr, 1);
8746 else if (GET_CODE (addr) == ASHIFT)
8750 /* We're called for lea too, which implements ashift on occasion. */
8751 index = XEXP (addr, 0);
8752 tmp = XEXP (addr, 1);
8753 if (!CONST_INT_P (tmp))
8755 scale = INTVAL (tmp);
8756 if ((unsigned HOST_WIDE_INT) scale > 3)
8762 disp = addr; /* displacement */
8764 /* Extract the integral value of scale. */
8767 if (!CONST_INT_P (scale_rtx))
8769 scale = INTVAL (scale_rtx);
8772 base_reg = base && GET_CODE (base) == SUBREG ? SUBREG_REG (base) : base;
8773 index_reg = index && GET_CODE (index) == SUBREG ? SUBREG_REG (index) : index;
8775 /* Allow arg pointer and stack pointer as index if there is not scaling. */
8776 if (base_reg && index_reg && scale == 1
8777 && (index_reg == arg_pointer_rtx
8778 || index_reg == frame_pointer_rtx
8779 || (REG_P (index_reg) && REGNO (index_reg) == STACK_POINTER_REGNUM)))
8782 tmp = base, base = index, index = tmp;
8783 tmp = base_reg, base_reg = index_reg, index_reg = tmp;
8786 /* Special case: %ebp cannot be encoded as a base without a displacement. */
8787 if ((base_reg == hard_frame_pointer_rtx
8788 || base_reg == frame_pointer_rtx
8789 || base_reg == arg_pointer_rtx) && !disp)
8792 /* Special case: on K6, [%esi] makes the instruction vector decoded.
8793 Avoid this by transforming to [%esi+0].
8794 Reload calls address legitimization without cfun defined, so we need
8795 to test cfun for being non-NULL. */
8796 if (TARGET_K6 && cfun && optimize_function_for_speed_p (cfun)
8797 && base_reg && !index_reg && !disp
8799 && REGNO_REG_CLASS (REGNO (base_reg)) == SIREG)
8802 /* Special case: encode reg+reg instead of reg*2. */
8803 if (!base && index && scale && scale == 2)
8804 base = index, base_reg = index_reg, scale = 1;
8806 /* Special case: scaling cannot be encoded without base or displacement. */
8807 if (!base && !disp && index && scale != 1)
8819 /* Return cost of the memory address x.
8820 For i386, it is better to use a complex address than let gcc copy
8821 the address into a reg and make a new pseudo. But not if the address
8822 requires to two regs - that would mean more pseudos with longer
8825 ix86_address_cost (rtx x, bool speed ATTRIBUTE_UNUSED)
8827 struct ix86_address parts;
8829 int ok = ix86_decompose_address (x, &parts);
8833 if (parts.base && GET_CODE (parts.base) == SUBREG)
8834 parts.base = SUBREG_REG (parts.base);
8835 if (parts.index && GET_CODE (parts.index) == SUBREG)
8836 parts.index = SUBREG_REG (parts.index);
8838 /* Attempt to minimize number of registers in the address. */
8840 && (!REG_P (parts.base) || REGNO (parts.base) >= FIRST_PSEUDO_REGISTER))
8842 && (!REG_P (parts.index)
8843 || REGNO (parts.index) >= FIRST_PSEUDO_REGISTER)))
8847 && (!REG_P (parts.base) || REGNO (parts.base) >= FIRST_PSEUDO_REGISTER)
8849 && (!REG_P (parts.index) || REGNO (parts.index) >= FIRST_PSEUDO_REGISTER)
8850 && parts.base != parts.index)
8853 /* AMD-K6 don't like addresses with ModR/M set to 00_xxx_100b,
8854 since it's predecode logic can't detect the length of instructions
8855 and it degenerates to vector decoded. Increase cost of such
8856 addresses here. The penalty is minimally 2 cycles. It may be worthwhile
8857 to split such addresses or even refuse such addresses at all.
8859 Following addressing modes are affected:
8864 The first and last case may be avoidable by explicitly coding the zero in
8865 memory address, but I don't have AMD-K6 machine handy to check this
8869 && ((!parts.disp && parts.base && parts.index && parts.scale != 1)
8870 || (parts.disp && !parts.base && parts.index && parts.scale != 1)
8871 || (!parts.disp && parts.base && parts.index && parts.scale == 1)))
8877 /* Allow {LABEL | SYMBOL}_REF - SYMBOL_REF-FOR-PICBASE for Mach-O as
8878 this is used for to form addresses to local data when -fPIC is in
8882 darwin_local_data_pic (rtx disp)
8884 return (GET_CODE (disp) == UNSPEC
8885 && XINT (disp, 1) == UNSPEC_MACHOPIC_OFFSET);
8888 /* Determine if a given RTX is a valid constant. We already know this
8889 satisfies CONSTANT_P. */
8892 legitimate_constant_p (rtx x)
8894 switch (GET_CODE (x))
8899 if (GET_CODE (x) == PLUS)
8901 if (!CONST_INT_P (XEXP (x, 1)))
8906 if (TARGET_MACHO && darwin_local_data_pic (x))
8909 /* Only some unspecs are valid as "constants". */
8910 if (GET_CODE (x) == UNSPEC)
8911 switch (XINT (x, 1))
8916 return TARGET_64BIT;
8919 x = XVECEXP (x, 0, 0);
8920 return (GET_CODE (x) == SYMBOL_REF
8921 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_EXEC);
8923 x = XVECEXP (x, 0, 0);
8924 return (GET_CODE (x) == SYMBOL_REF
8925 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_DYNAMIC);
8930 /* We must have drilled down to a symbol. */
8931 if (GET_CODE (x) == LABEL_REF)
8933 if (GET_CODE (x) != SYMBOL_REF)
8938 /* TLS symbols are never valid. */
8939 if (SYMBOL_REF_TLS_MODEL (x))
8942 /* DLLIMPORT symbols are never valid. */
8943 if (TARGET_DLLIMPORT_DECL_ATTRIBUTES
8944 && SYMBOL_REF_DLLIMPORT_P (x))
8949 if (GET_MODE (x) == TImode
8950 && x != CONST0_RTX (TImode)
8956 if (!standard_sse_constant_p (x))
8963 /* Otherwise we handle everything else in the move patterns. */
8967 /* Determine if it's legal to put X into the constant pool. This
8968 is not possible for the address of thread-local symbols, which
8969 is checked above. */
8972 ix86_cannot_force_const_mem (rtx x)
8974 /* We can always put integral constants and vectors in memory. */
8975 switch (GET_CODE (x))
8985 return !legitimate_constant_p (x);
8988 /* Determine if a given RTX is a valid constant address. */
8991 constant_address_p (rtx x)
8993 return CONSTANT_P (x) && legitimate_address_p (Pmode, x, 1);
8996 /* Nonzero if the constant value X is a legitimate general operand
8997 when generating PIC code. It is given that flag_pic is on and
8998 that X satisfies CONSTANT_P or is a CONST_DOUBLE. */
9001 legitimate_pic_operand_p (rtx x)
9005 switch (GET_CODE (x))
9008 inner = XEXP (x, 0);
9009 if (GET_CODE (inner) == PLUS
9010 && CONST_INT_P (XEXP (inner, 1)))
9011 inner = XEXP (inner, 0);
9013 /* Only some unspecs are valid as "constants". */
9014 if (GET_CODE (inner) == UNSPEC)
9015 switch (XINT (inner, 1))
9020 return TARGET_64BIT;
9022 x = XVECEXP (inner, 0, 0);
9023 return (GET_CODE (x) == SYMBOL_REF
9024 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_EXEC);
9025 case UNSPEC_MACHOPIC_OFFSET:
9026 return legitimate_pic_address_disp_p (x);
9034 return legitimate_pic_address_disp_p (x);
9041 /* Determine if a given CONST RTX is a valid memory displacement
9045 legitimate_pic_address_disp_p (rtx disp)
9049 /* In 64bit mode we can allow direct addresses of symbols and labels
9050 when they are not dynamic symbols. */
9053 rtx op0 = disp, op1;
9055 switch (GET_CODE (disp))
9061 if (GET_CODE (XEXP (disp, 0)) != PLUS)
9063 op0 = XEXP (XEXP (disp, 0), 0);
9064 op1 = XEXP (XEXP (disp, 0), 1);
9065 if (!CONST_INT_P (op1)
9066 || INTVAL (op1) >= 16*1024*1024
9067 || INTVAL (op1) < -16*1024*1024)
9069 if (GET_CODE (op0) == LABEL_REF)
9071 if (GET_CODE (op0) != SYMBOL_REF)
9076 /* TLS references should always be enclosed in UNSPEC. */
9077 if (SYMBOL_REF_TLS_MODEL (op0))
9079 if (!SYMBOL_REF_FAR_ADDR_P (op0) && SYMBOL_REF_LOCAL_P (op0)
9080 && ix86_cmodel != CM_LARGE_PIC)
9088 if (GET_CODE (disp) != CONST)
9090 disp = XEXP (disp, 0);
9094 /* We are unsafe to allow PLUS expressions. This limit allowed distance
9095 of GOT tables. We should not need these anyway. */
9096 if (GET_CODE (disp) != UNSPEC
9097 || (XINT (disp, 1) != UNSPEC_GOTPCREL
9098 && XINT (disp, 1) != UNSPEC_GOTOFF
9099 && XINT (disp, 1) != UNSPEC_PLTOFF))
9102 if (GET_CODE (XVECEXP (disp, 0, 0)) != SYMBOL_REF
9103 && GET_CODE (XVECEXP (disp, 0, 0)) != LABEL_REF)
9109 if (GET_CODE (disp) == PLUS)
9111 if (!CONST_INT_P (XEXP (disp, 1)))
9113 disp = XEXP (disp, 0);
9117 if (TARGET_MACHO && darwin_local_data_pic (disp))
9120 if (GET_CODE (disp) != UNSPEC)
9123 switch (XINT (disp, 1))
9128 /* We need to check for both symbols and labels because VxWorks loads
9129 text labels with @GOT rather than @GOTOFF. See gotoff_operand for
9131 return (GET_CODE (XVECEXP (disp, 0, 0)) == SYMBOL_REF
9132 || GET_CODE (XVECEXP (disp, 0, 0)) == LABEL_REF);
9134 /* Refuse GOTOFF in 64bit mode since it is always 64bit when used.
9135 While ABI specify also 32bit relocation but we don't produce it in
9136 small PIC model at all. */
9137 if ((GET_CODE (XVECEXP (disp, 0, 0)) == SYMBOL_REF
9138 || GET_CODE (XVECEXP (disp, 0, 0)) == LABEL_REF)
9140 return gotoff_operand (XVECEXP (disp, 0, 0), Pmode);
9142 case UNSPEC_GOTTPOFF:
9143 case UNSPEC_GOTNTPOFF:
9144 case UNSPEC_INDNTPOFF:
9147 disp = XVECEXP (disp, 0, 0);
9148 return (GET_CODE (disp) == SYMBOL_REF
9149 && SYMBOL_REF_TLS_MODEL (disp) == TLS_MODEL_INITIAL_EXEC);
9151 disp = XVECEXP (disp, 0, 0);
9152 return (GET_CODE (disp) == SYMBOL_REF
9153 && SYMBOL_REF_TLS_MODEL (disp) == TLS_MODEL_LOCAL_EXEC);
9155 disp = XVECEXP (disp, 0, 0);
9156 return (GET_CODE (disp) == SYMBOL_REF
9157 && SYMBOL_REF_TLS_MODEL (disp) == TLS_MODEL_LOCAL_DYNAMIC);
9163 /* GO_IF_LEGITIMATE_ADDRESS recognizes an RTL expression that is a valid
9164 memory address for an instruction. The MODE argument is the machine mode
9165 for the MEM expression that wants to use this address.
9167 It only recognizes address in canonical form. LEGITIMIZE_ADDRESS should
9168 convert common non-canonical forms to canonical form so that they will
9172 legitimate_address_p (enum machine_mode mode ATTRIBUTE_UNUSED,
9173 rtx addr, int strict)
9175 struct ix86_address parts;
9176 rtx base, index, disp;
9177 HOST_WIDE_INT scale;
9178 const char *reason = NULL;
9179 rtx reason_rtx = NULL_RTX;
9181 if (ix86_decompose_address (addr, &parts) <= 0)
9183 reason = "decomposition failed";
9188 index = parts.index;
9190 scale = parts.scale;
9192 /* Validate base register.
9194 Don't allow SUBREG's that span more than a word here. It can lead to spill
9195 failures when the base is one word out of a two word structure, which is
9196 represented internally as a DImode int. */
9205 else if (GET_CODE (base) == SUBREG
9206 && REG_P (SUBREG_REG (base))
9207 && GET_MODE_SIZE (GET_MODE (SUBREG_REG (base)))
9209 reg = SUBREG_REG (base);
9212 reason = "base is not a register";
9216 if (GET_MODE (base) != Pmode)
9218 reason = "base is not in Pmode";
9222 if ((strict && ! REG_OK_FOR_BASE_STRICT_P (reg))
9223 || (! strict && ! REG_OK_FOR_BASE_NONSTRICT_P (reg)))
9225 reason = "base is not valid";
9230 /* Validate index register.
9232 Don't allow SUBREG's that span more than a word here -- same as above. */
9241 else if (GET_CODE (index) == SUBREG
9242 && REG_P (SUBREG_REG (index))
9243 && GET_MODE_SIZE (GET_MODE (SUBREG_REG (index)))
9245 reg = SUBREG_REG (index);
9248 reason = "index is not a register";
9252 if (GET_MODE (index) != Pmode)
9254 reason = "index is not in Pmode";
9258 if ((strict && ! REG_OK_FOR_INDEX_STRICT_P (reg))
9259 || (! strict && ! REG_OK_FOR_INDEX_NONSTRICT_P (reg)))
9261 reason = "index is not valid";
9266 /* Validate scale factor. */
9269 reason_rtx = GEN_INT (scale);
9272 reason = "scale without index";
9276 if (scale != 2 && scale != 4 && scale != 8)
9278 reason = "scale is not a valid multiplier";
9283 /* Validate displacement. */
9288 if (GET_CODE (disp) == CONST
9289 && GET_CODE (XEXP (disp, 0)) == UNSPEC
9290 && XINT (XEXP (disp, 0), 1) != UNSPEC_MACHOPIC_OFFSET)
9291 switch (XINT (XEXP (disp, 0), 1))
9293 /* Refuse GOTOFF and GOT in 64bit mode since it is always 64bit when
9294 used. While ABI specify also 32bit relocations, we don't produce
9295 them at all and use IP relative instead. */
9298 gcc_assert (flag_pic);
9300 goto is_legitimate_pic;
9301 reason = "64bit address unspec";
9304 case UNSPEC_GOTPCREL:
9305 gcc_assert (flag_pic);
9306 goto is_legitimate_pic;
9308 case UNSPEC_GOTTPOFF:
9309 case UNSPEC_GOTNTPOFF:
9310 case UNSPEC_INDNTPOFF:
9316 reason = "invalid address unspec";
9320 else if (SYMBOLIC_CONST (disp)
9324 && MACHOPIC_INDIRECT
9325 && !machopic_operand_p (disp)
9331 if (TARGET_64BIT && (index || base))
9333 /* foo@dtpoff(%rX) is ok. */
9334 if (GET_CODE (disp) != CONST
9335 || GET_CODE (XEXP (disp, 0)) != PLUS
9336 || GET_CODE (XEXP (XEXP (disp, 0), 0)) != UNSPEC
9337 || !CONST_INT_P (XEXP (XEXP (disp, 0), 1))
9338 || (XINT (XEXP (XEXP (disp, 0), 0), 1) != UNSPEC_DTPOFF
9339 && XINT (XEXP (XEXP (disp, 0), 0), 1) != UNSPEC_NTPOFF))
9341 reason = "non-constant pic memory reference";
9345 else if (! legitimate_pic_address_disp_p (disp))
9347 reason = "displacement is an invalid pic construct";
9351 /* This code used to verify that a symbolic pic displacement
9352 includes the pic_offset_table_rtx register.
9354 While this is good idea, unfortunately these constructs may
9355 be created by "adds using lea" optimization for incorrect
9364 This code is nonsensical, but results in addressing
9365 GOT table with pic_offset_table_rtx base. We can't
9366 just refuse it easily, since it gets matched by
9367 "addsi3" pattern, that later gets split to lea in the
9368 case output register differs from input. While this
9369 can be handled by separate addsi pattern for this case
9370 that never results in lea, this seems to be easier and
9371 correct fix for crash to disable this test. */
9373 else if (GET_CODE (disp) != LABEL_REF
9374 && !CONST_INT_P (disp)
9375 && (GET_CODE (disp) != CONST
9376 || !legitimate_constant_p (disp))
9377 && (GET_CODE (disp) != SYMBOL_REF
9378 || !legitimate_constant_p (disp)))
9380 reason = "displacement is not constant";
9383 else if (TARGET_64BIT
9384 && !x86_64_immediate_operand (disp, VOIDmode))
9386 reason = "displacement is out of range";
9391 /* Everything looks valid. */
9398 /* Return a unique alias set for the GOT. */
9400 static alias_set_type
9401 ix86_GOT_alias_set (void)
9403 static alias_set_type set = -1;
9405 set = new_alias_set ();
9409 /* Return a legitimate reference for ORIG (an address) using the
9410 register REG. If REG is 0, a new pseudo is generated.
9412 There are two types of references that must be handled:
9414 1. Global data references must load the address from the GOT, via
9415 the PIC reg. An insn is emitted to do this load, and the reg is
9418 2. Static data references, constant pool addresses, and code labels
9419 compute the address as an offset from the GOT, whose base is in
9420 the PIC reg. Static data objects have SYMBOL_FLAG_LOCAL set to
9421 differentiate them from global data objects. The returned
9422 address is the PIC reg + an unspec constant.
9424 GO_IF_LEGITIMATE_ADDRESS rejects symbolic references unless the PIC
9425 reg also appears in the address. */
9428 legitimize_pic_address (rtx orig, rtx reg)
9435 if (TARGET_MACHO && !TARGET_64BIT)
9438 reg = gen_reg_rtx (Pmode);
9439 /* Use the generic Mach-O PIC machinery. */
9440 return machopic_legitimize_pic_address (orig, GET_MODE (orig), reg);
9444 if (TARGET_64BIT && legitimate_pic_address_disp_p (addr))
9446 else if (TARGET_64BIT
9447 && ix86_cmodel != CM_SMALL_PIC
9448 && gotoff_operand (addr, Pmode))
9451 /* This symbol may be referenced via a displacement from the PIC
9452 base address (@GOTOFF). */
9454 if (reload_in_progress)
9455 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
9456 if (GET_CODE (addr) == CONST)
9457 addr = XEXP (addr, 0);
9458 if (GET_CODE (addr) == PLUS)
9460 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, XEXP (addr, 0)),
9462 new_rtx = gen_rtx_PLUS (Pmode, new_rtx, XEXP (addr, 1));
9465 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTOFF);
9466 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
9468 tmpreg = gen_reg_rtx (Pmode);
9471 emit_move_insn (tmpreg, new_rtx);
9475 new_rtx = expand_simple_binop (Pmode, PLUS, reg, pic_offset_table_rtx,
9476 tmpreg, 1, OPTAB_DIRECT);
9479 else new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, tmpreg);
9481 else if (!TARGET_64BIT && gotoff_operand (addr, Pmode))
9483 /* This symbol may be referenced via a displacement from the PIC
9484 base address (@GOTOFF). */
9486 if (reload_in_progress)
9487 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
9488 if (GET_CODE (addr) == CONST)
9489 addr = XEXP (addr, 0);
9490 if (GET_CODE (addr) == PLUS)
9492 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, XEXP (addr, 0)),
9494 new_rtx = gen_rtx_PLUS (Pmode, new_rtx, XEXP (addr, 1));
9497 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTOFF);
9498 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
9499 new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new_rtx);
9503 emit_move_insn (reg, new_rtx);
9507 else if ((GET_CODE (addr) == SYMBOL_REF && SYMBOL_REF_TLS_MODEL (addr) == 0)
9508 /* We can't use @GOTOFF for text labels on VxWorks;
9509 see gotoff_operand. */
9510 || (TARGET_VXWORKS_RTP && GET_CODE (addr) == LABEL_REF))
9512 if (TARGET_DLLIMPORT_DECL_ATTRIBUTES)
9514 if (GET_CODE (addr) == SYMBOL_REF && SYMBOL_REF_DLLIMPORT_P (addr))
9515 return legitimize_dllimport_symbol (addr, true);
9516 if (GET_CODE (addr) == CONST && GET_CODE (XEXP (addr, 0)) == PLUS
9517 && GET_CODE (XEXP (XEXP (addr, 0), 0)) == SYMBOL_REF
9518 && SYMBOL_REF_DLLIMPORT_P (XEXP (XEXP (addr, 0), 0)))
9520 rtx t = legitimize_dllimport_symbol (XEXP (XEXP (addr, 0), 0), true);
9521 return gen_rtx_PLUS (Pmode, t, XEXP (XEXP (addr, 0), 1));
9525 if (TARGET_64BIT && ix86_cmodel != CM_LARGE_PIC)
9527 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTPCREL);
9528 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
9529 new_rtx = gen_const_mem (Pmode, new_rtx);
9530 set_mem_alias_set (new_rtx, ix86_GOT_alias_set ());
9533 reg = gen_reg_rtx (Pmode);
9534 /* Use directly gen_movsi, otherwise the address is loaded
9535 into register for CSE. We don't want to CSE this addresses,
9536 instead we CSE addresses from the GOT table, so skip this. */
9537 emit_insn (gen_movsi (reg, new_rtx));
9542 /* This symbol must be referenced via a load from the
9543 Global Offset Table (@GOT). */
9545 if (reload_in_progress)
9546 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
9547 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOT);
9548 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
9550 new_rtx = force_reg (Pmode, new_rtx);
9551 new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new_rtx);
9552 new_rtx = gen_const_mem (Pmode, new_rtx);
9553 set_mem_alias_set (new_rtx, ix86_GOT_alias_set ());
9556 reg = gen_reg_rtx (Pmode);
9557 emit_move_insn (reg, new_rtx);
9563 if (CONST_INT_P (addr)
9564 && !x86_64_immediate_operand (addr, VOIDmode))
9568 emit_move_insn (reg, addr);
9572 new_rtx = force_reg (Pmode, addr);
9574 else if (GET_CODE (addr) == CONST)
9576 addr = XEXP (addr, 0);
9578 /* We must match stuff we generate before. Assume the only
9579 unspecs that can get here are ours. Not that we could do
9580 anything with them anyway.... */
9581 if (GET_CODE (addr) == UNSPEC
9582 || (GET_CODE (addr) == PLUS
9583 && GET_CODE (XEXP (addr, 0)) == UNSPEC))
9585 gcc_assert (GET_CODE (addr) == PLUS);
9587 if (GET_CODE (addr) == PLUS)
9589 rtx op0 = XEXP (addr, 0), op1 = XEXP (addr, 1);
9591 /* Check first to see if this is a constant offset from a @GOTOFF
9592 symbol reference. */
9593 if (gotoff_operand (op0, Pmode)
9594 && CONST_INT_P (op1))
9598 if (reload_in_progress)
9599 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
9600 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, op0),
9602 new_rtx = gen_rtx_PLUS (Pmode, new_rtx, op1);
9603 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
9604 new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new_rtx);
9608 emit_move_insn (reg, new_rtx);
9614 if (INTVAL (op1) < -16*1024*1024
9615 || INTVAL (op1) >= 16*1024*1024)
9617 if (!x86_64_immediate_operand (op1, Pmode))
9618 op1 = force_reg (Pmode, op1);
9619 new_rtx = gen_rtx_PLUS (Pmode, force_reg (Pmode, op0), op1);
9625 base = legitimize_pic_address (XEXP (addr, 0), reg);
9626 new_rtx = legitimize_pic_address (XEXP (addr, 1),
9627 base == reg ? NULL_RTX : reg);
9629 if (CONST_INT_P (new_rtx))
9630 new_rtx = plus_constant (base, INTVAL (new_rtx));
9633 if (GET_CODE (new_rtx) == PLUS && CONSTANT_P (XEXP (new_rtx, 1)))
9635 base = gen_rtx_PLUS (Pmode, base, XEXP (new_rtx, 0));
9636 new_rtx = XEXP (new_rtx, 1);
9638 new_rtx = gen_rtx_PLUS (Pmode, base, new_rtx);
9646 /* Load the thread pointer. If TO_REG is true, force it into a register. */
9649 get_thread_pointer (int to_reg)
9653 tp = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx), UNSPEC_TP);
9657 reg = gen_reg_rtx (Pmode);
9658 insn = gen_rtx_SET (VOIDmode, reg, tp);
9659 insn = emit_insn (insn);
9664 /* A subroutine of legitimize_address and ix86_expand_move. FOR_MOV is
9665 false if we expect this to be used for a memory address and true if
9666 we expect to load the address into a register. */
9669 legitimize_tls_address (rtx x, enum tls_model model, int for_mov)
9671 rtx dest, base, off, pic, tp;
9676 case TLS_MODEL_GLOBAL_DYNAMIC:
9677 dest = gen_reg_rtx (Pmode);
9678 tp = TARGET_GNU2_TLS ? get_thread_pointer (1) : 0;
9680 if (TARGET_64BIT && ! TARGET_GNU2_TLS)
9682 rtx rax = gen_rtx_REG (Pmode, AX_REG), insns;
9685 emit_call_insn (gen_tls_global_dynamic_64 (rax, x));
9686 insns = get_insns ();
9689 RTL_CONST_CALL_P (insns) = 1;
9690 emit_libcall_block (insns, dest, rax, x);
9692 else if (TARGET_64BIT && TARGET_GNU2_TLS)
9693 emit_insn (gen_tls_global_dynamic_64 (dest, x));
9695 emit_insn (gen_tls_global_dynamic_32 (dest, x));
9697 if (TARGET_GNU2_TLS)
9699 dest = force_reg (Pmode, gen_rtx_PLUS (Pmode, tp, dest));
9701 set_unique_reg_note (get_last_insn (), REG_EQUIV, x);
9705 case TLS_MODEL_LOCAL_DYNAMIC:
9706 base = gen_reg_rtx (Pmode);
9707 tp = TARGET_GNU2_TLS ? get_thread_pointer (1) : 0;
9709 if (TARGET_64BIT && ! TARGET_GNU2_TLS)
9711 rtx rax = gen_rtx_REG (Pmode, AX_REG), insns, note;
9714 emit_call_insn (gen_tls_local_dynamic_base_64 (rax));
9715 insns = get_insns ();
9718 note = gen_rtx_EXPR_LIST (VOIDmode, const0_rtx, NULL);
9719 note = gen_rtx_EXPR_LIST (VOIDmode, ix86_tls_get_addr (), note);
9720 RTL_CONST_CALL_P (insns) = 1;
9721 emit_libcall_block (insns, base, rax, note);
9723 else if (TARGET_64BIT && TARGET_GNU2_TLS)
9724 emit_insn (gen_tls_local_dynamic_base_64 (base));
9726 emit_insn (gen_tls_local_dynamic_base_32 (base));
9728 if (TARGET_GNU2_TLS)
9730 rtx x = ix86_tls_module_base ();
9732 set_unique_reg_note (get_last_insn (), REG_EQUIV,
9733 gen_rtx_MINUS (Pmode, x, tp));
9736 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x), UNSPEC_DTPOFF);
9737 off = gen_rtx_CONST (Pmode, off);
9739 dest = force_reg (Pmode, gen_rtx_PLUS (Pmode, base, off));
9741 if (TARGET_GNU2_TLS)
9743 dest = force_reg (Pmode, gen_rtx_PLUS (Pmode, dest, tp));
9745 set_unique_reg_note (get_last_insn (), REG_EQUIV, x);
9750 case TLS_MODEL_INITIAL_EXEC:
9754 type = UNSPEC_GOTNTPOFF;
9758 if (reload_in_progress)
9759 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
9760 pic = pic_offset_table_rtx;
9761 type = TARGET_ANY_GNU_TLS ? UNSPEC_GOTNTPOFF : UNSPEC_GOTTPOFF;
9763 else if (!TARGET_ANY_GNU_TLS)
9765 pic = gen_reg_rtx (Pmode);
9766 emit_insn (gen_set_got (pic));
9767 type = UNSPEC_GOTTPOFF;
9772 type = UNSPEC_INDNTPOFF;
9775 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x), type);
9776 off = gen_rtx_CONST (Pmode, off);
9778 off = gen_rtx_PLUS (Pmode, pic, off);
9779 off = gen_const_mem (Pmode, off);
9780 set_mem_alias_set (off, ix86_GOT_alias_set ());
9782 if (TARGET_64BIT || TARGET_ANY_GNU_TLS)
9784 base = get_thread_pointer (for_mov || !TARGET_TLS_DIRECT_SEG_REFS);
9785 off = force_reg (Pmode, off);
9786 return gen_rtx_PLUS (Pmode, base, off);
9790 base = get_thread_pointer (true);
9791 dest = gen_reg_rtx (Pmode);
9792 emit_insn (gen_subsi3 (dest, base, off));
9796 case TLS_MODEL_LOCAL_EXEC:
9797 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x),
9798 (TARGET_64BIT || TARGET_ANY_GNU_TLS)
9799 ? UNSPEC_NTPOFF : UNSPEC_TPOFF);
9800 off = gen_rtx_CONST (Pmode, off);
9802 if (TARGET_64BIT || TARGET_ANY_GNU_TLS)
9804 base = get_thread_pointer (for_mov || !TARGET_TLS_DIRECT_SEG_REFS);
9805 return gen_rtx_PLUS (Pmode, base, off);
9809 base = get_thread_pointer (true);
9810 dest = gen_reg_rtx (Pmode);
9811 emit_insn (gen_subsi3 (dest, base, off));
9822 /* Create or return the unique __imp_DECL dllimport symbol corresponding
9825 static GTY((if_marked ("tree_map_marked_p"), param_is (struct tree_map)))
9826 htab_t dllimport_map;
9829 get_dllimport_decl (tree decl)
9831 struct tree_map *h, in;
9835 size_t namelen, prefixlen;
9841 dllimport_map = htab_create_ggc (512, tree_map_hash, tree_map_eq, 0);
9843 in.hash = htab_hash_pointer (decl);
9844 in.base.from = decl;
9845 loc = htab_find_slot_with_hash (dllimport_map, &in, in.hash, INSERT);
9846 h = (struct tree_map *) *loc;
9850 *loc = h = GGC_NEW (struct tree_map);
9852 h->base.from = decl;
9853 h->to = to = build_decl (VAR_DECL, NULL, ptr_type_node);
9854 DECL_ARTIFICIAL (to) = 1;
9855 DECL_IGNORED_P (to) = 1;
9856 DECL_EXTERNAL (to) = 1;
9857 TREE_READONLY (to) = 1;
9859 name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl));
9860 name = targetm.strip_name_encoding (name);
9861 prefix = name[0] == FASTCALL_PREFIX || user_label_prefix[0] == 0
9862 ? "*__imp_" : "*__imp__";
9863 namelen = strlen (name);
9864 prefixlen = strlen (prefix);
9865 imp_name = (char *) alloca (namelen + prefixlen + 1);
9866 memcpy (imp_name, prefix, prefixlen);
9867 memcpy (imp_name + prefixlen, name, namelen + 1);
9869 name = ggc_alloc_string (imp_name, namelen + prefixlen);
9870 rtl = gen_rtx_SYMBOL_REF (Pmode, name);
9871 SET_SYMBOL_REF_DECL (rtl, to);
9872 SYMBOL_REF_FLAGS (rtl) = SYMBOL_FLAG_LOCAL;
9874 rtl = gen_const_mem (Pmode, rtl);
9875 set_mem_alias_set (rtl, ix86_GOT_alias_set ());
9877 SET_DECL_RTL (to, rtl);
9878 SET_DECL_ASSEMBLER_NAME (to, get_identifier (name));
9883 /* Expand SYMBOL into its corresponding dllimport symbol. WANT_REG is
9884 true if we require the result be a register. */
9887 legitimize_dllimport_symbol (rtx symbol, bool want_reg)
9892 gcc_assert (SYMBOL_REF_DECL (symbol));
9893 imp_decl = get_dllimport_decl (SYMBOL_REF_DECL (symbol));
9895 x = DECL_RTL (imp_decl);
9897 x = force_reg (Pmode, x);
9901 /* Try machine-dependent ways of modifying an illegitimate address
9902 to be legitimate. If we find one, return the new, valid address.
9903 This macro is used in only one place: `memory_address' in explow.c.
9905 OLDX is the address as it was before break_out_memory_refs was called.
9906 In some cases it is useful to look at this to decide what needs to be done.
9908 MODE and WIN are passed so that this macro can use
9909 GO_IF_LEGITIMATE_ADDRESS.
9911 It is always safe for this macro to do nothing. It exists to recognize
9912 opportunities to optimize the output.
9914 For the 80386, we handle X+REG by loading X into a register R and
9915 using R+REG. R will go in a general reg and indexing will be used.
9916 However, if REG is a broken-out memory address or multiplication,
9917 nothing needs to be done because REG can certainly go in a general reg.
9919 When -fpic is used, special handling is needed for symbolic references.
9920 See comments by legitimize_pic_address in i386.c for details. */
9923 legitimize_address (rtx x, rtx oldx ATTRIBUTE_UNUSED, enum machine_mode mode)
9928 log = GET_CODE (x) == SYMBOL_REF ? SYMBOL_REF_TLS_MODEL (x) : 0;
9930 return legitimize_tls_address (x, (enum tls_model) log, false);
9931 if (GET_CODE (x) == CONST
9932 && GET_CODE (XEXP (x, 0)) == PLUS
9933 && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF
9934 && (log = SYMBOL_REF_TLS_MODEL (XEXP (XEXP (x, 0), 0))))
9936 rtx t = legitimize_tls_address (XEXP (XEXP (x, 0), 0),
9937 (enum tls_model) log, false);
9938 return gen_rtx_PLUS (Pmode, t, XEXP (XEXP (x, 0), 1));
9941 if (TARGET_DLLIMPORT_DECL_ATTRIBUTES)
9943 if (GET_CODE (x) == SYMBOL_REF && SYMBOL_REF_DLLIMPORT_P (x))
9944 return legitimize_dllimport_symbol (x, true);
9945 if (GET_CODE (x) == CONST
9946 && GET_CODE (XEXP (x, 0)) == PLUS
9947 && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF
9948 && SYMBOL_REF_DLLIMPORT_P (XEXP (XEXP (x, 0), 0)))
9950 rtx t = legitimize_dllimport_symbol (XEXP (XEXP (x, 0), 0), true);
9951 return gen_rtx_PLUS (Pmode, t, XEXP (XEXP (x, 0), 1));
9955 if (flag_pic && SYMBOLIC_CONST (x))
9956 return legitimize_pic_address (x, 0);
9958 /* Canonicalize shifts by 0, 1, 2, 3 into multiply */
9959 if (GET_CODE (x) == ASHIFT
9960 && CONST_INT_P (XEXP (x, 1))
9961 && (unsigned HOST_WIDE_INT) INTVAL (XEXP (x, 1)) < 4)
9964 log = INTVAL (XEXP (x, 1));
9965 x = gen_rtx_MULT (Pmode, force_reg (Pmode, XEXP (x, 0)),
9966 GEN_INT (1 << log));
9969 if (GET_CODE (x) == PLUS)
9971 /* Canonicalize shifts by 0, 1, 2, 3 into multiply. */
9973 if (GET_CODE (XEXP (x, 0)) == ASHIFT
9974 && CONST_INT_P (XEXP (XEXP (x, 0), 1))
9975 && (unsigned HOST_WIDE_INT) INTVAL (XEXP (XEXP (x, 0), 1)) < 4)
9978 log = INTVAL (XEXP (XEXP (x, 0), 1));
9979 XEXP (x, 0) = gen_rtx_MULT (Pmode,
9980 force_reg (Pmode, XEXP (XEXP (x, 0), 0)),
9981 GEN_INT (1 << log));
9984 if (GET_CODE (XEXP (x, 1)) == ASHIFT
9985 && CONST_INT_P (XEXP (XEXP (x, 1), 1))
9986 && (unsigned HOST_WIDE_INT) INTVAL (XEXP (XEXP (x, 1), 1)) < 4)
9989 log = INTVAL (XEXP (XEXP (x, 1), 1));
9990 XEXP (x, 1) = gen_rtx_MULT (Pmode,
9991 force_reg (Pmode, XEXP (XEXP (x, 1), 0)),
9992 GEN_INT (1 << log));
9995 /* Put multiply first if it isn't already. */
9996 if (GET_CODE (XEXP (x, 1)) == MULT)
9998 rtx tmp = XEXP (x, 0);
9999 XEXP (x, 0) = XEXP (x, 1);
10004 /* Canonicalize (plus (mult (reg) (const)) (plus (reg) (const)))
10005 into (plus (plus (mult (reg) (const)) (reg)) (const)). This can be
10006 created by virtual register instantiation, register elimination, and
10007 similar optimizations. */
10008 if (GET_CODE (XEXP (x, 0)) == MULT && GET_CODE (XEXP (x, 1)) == PLUS)
10011 x = gen_rtx_PLUS (Pmode,
10012 gen_rtx_PLUS (Pmode, XEXP (x, 0),
10013 XEXP (XEXP (x, 1), 0)),
10014 XEXP (XEXP (x, 1), 1));
10018 (plus (plus (mult (reg) (const)) (plus (reg) (const))) const)
10019 into (plus (plus (mult (reg) (const)) (reg)) (const)). */
10020 else if (GET_CODE (x) == PLUS && GET_CODE (XEXP (x, 0)) == PLUS
10021 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
10022 && GET_CODE (XEXP (XEXP (x, 0), 1)) == PLUS
10023 && CONSTANT_P (XEXP (x, 1)))
10026 rtx other = NULL_RTX;
10028 if (CONST_INT_P (XEXP (x, 1)))
10030 constant = XEXP (x, 1);
10031 other = XEXP (XEXP (XEXP (x, 0), 1), 1);
10033 else if (CONST_INT_P (XEXP (XEXP (XEXP (x, 0), 1), 1)))
10035 constant = XEXP (XEXP (XEXP (x, 0), 1), 1);
10036 other = XEXP (x, 1);
10044 x = gen_rtx_PLUS (Pmode,
10045 gen_rtx_PLUS (Pmode, XEXP (XEXP (x, 0), 0),
10046 XEXP (XEXP (XEXP (x, 0), 1), 0)),
10047 plus_constant (other, INTVAL (constant)));
10051 if (changed && legitimate_address_p (mode, x, FALSE))
10054 if (GET_CODE (XEXP (x, 0)) == MULT)
10057 XEXP (x, 0) = force_operand (XEXP (x, 0), 0);
10060 if (GET_CODE (XEXP (x, 1)) == MULT)
10063 XEXP (x, 1) = force_operand (XEXP (x, 1), 0);
10067 && REG_P (XEXP (x, 1))
10068 && REG_P (XEXP (x, 0)))
10071 if (flag_pic && SYMBOLIC_CONST (XEXP (x, 1)))
10074 x = legitimize_pic_address (x, 0);
10077 if (changed && legitimate_address_p (mode, x, FALSE))
10080 if (REG_P (XEXP (x, 0)))
10082 rtx temp = gen_reg_rtx (Pmode);
10083 rtx val = force_operand (XEXP (x, 1), temp);
10085 emit_move_insn (temp, val);
10087 XEXP (x, 1) = temp;
10091 else if (REG_P (XEXP (x, 1)))
10093 rtx temp = gen_reg_rtx (Pmode);
10094 rtx val = force_operand (XEXP (x, 0), temp);
10096 emit_move_insn (temp, val);
10098 XEXP (x, 0) = temp;
10106 /* Print an integer constant expression in assembler syntax. Addition
10107 and subtraction are the only arithmetic that may appear in these
10108 expressions. FILE is the stdio stream to write to, X is the rtx, and
10109 CODE is the operand print code from the output string. */
10112 output_pic_addr_const (FILE *file, rtx x, int code)
10116 switch (GET_CODE (x))
10119 gcc_assert (flag_pic);
10124 if (! TARGET_MACHO || TARGET_64BIT)
10125 output_addr_const (file, x);
10128 const char *name = XSTR (x, 0);
10130 /* Mark the decl as referenced so that cgraph will
10131 output the function. */
10132 if (SYMBOL_REF_DECL (x))
10133 mark_decl_referenced (SYMBOL_REF_DECL (x));
10136 if (MACHOPIC_INDIRECT
10137 && machopic_classify_symbol (x) == MACHOPIC_UNDEFINED_FUNCTION)
10138 name = machopic_indirection_name (x, /*stub_p=*/true);
10140 assemble_name (file, name);
10142 if (!TARGET_MACHO && !(TARGET_64BIT && DEFAULT_ABI == MS_ABI)
10143 && code == 'P' && ! SYMBOL_REF_LOCAL_P (x))
10144 fputs ("@PLT", file);
10151 ASM_GENERATE_INTERNAL_LABEL (buf, "L", CODE_LABEL_NUMBER (x));
10152 assemble_name (asm_out_file, buf);
10156 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
10160 /* This used to output parentheses around the expression,
10161 but that does not work on the 386 (either ATT or BSD assembler). */
10162 output_pic_addr_const (file, XEXP (x, 0), code);
10166 if (GET_MODE (x) == VOIDmode)
10168 /* We can use %d if the number is <32 bits and positive. */
10169 if (CONST_DOUBLE_HIGH (x) || CONST_DOUBLE_LOW (x) < 0)
10170 fprintf (file, "0x%lx%08lx",
10171 (unsigned long) CONST_DOUBLE_HIGH (x),
10172 (unsigned long) CONST_DOUBLE_LOW (x));
10174 fprintf (file, HOST_WIDE_INT_PRINT_DEC, CONST_DOUBLE_LOW (x));
10177 /* We can't handle floating point constants;
10178 PRINT_OPERAND must handle them. */
10179 output_operand_lossage ("floating constant misused");
10183 /* Some assemblers need integer constants to appear first. */
10184 if (CONST_INT_P (XEXP (x, 0)))
10186 output_pic_addr_const (file, XEXP (x, 0), code);
10188 output_pic_addr_const (file, XEXP (x, 1), code);
10192 gcc_assert (CONST_INT_P (XEXP (x, 1)));
10193 output_pic_addr_const (file, XEXP (x, 1), code);
10195 output_pic_addr_const (file, XEXP (x, 0), code);
10201 putc (ASSEMBLER_DIALECT == ASM_INTEL ? '(' : '[', file);
10202 output_pic_addr_const (file, XEXP (x, 0), code);
10204 output_pic_addr_const (file, XEXP (x, 1), code);
10206 putc (ASSEMBLER_DIALECT == ASM_INTEL ? ')' : ']', file);
10210 gcc_assert (XVECLEN (x, 0) == 1);
10211 output_pic_addr_const (file, XVECEXP (x, 0, 0), code);
10212 switch (XINT (x, 1))
10215 fputs ("@GOT", file);
10217 case UNSPEC_GOTOFF:
10218 fputs ("@GOTOFF", file);
10220 case UNSPEC_PLTOFF:
10221 fputs ("@PLTOFF", file);
10223 case UNSPEC_GOTPCREL:
10224 fputs (ASSEMBLER_DIALECT == ASM_ATT ?
10225 "@GOTPCREL(%rip)" : "@GOTPCREL[rip]", file);
10227 case UNSPEC_GOTTPOFF:
10228 /* FIXME: This might be @TPOFF in Sun ld too. */
10229 fputs ("@GOTTPOFF", file);
10232 fputs ("@TPOFF", file);
10234 case UNSPEC_NTPOFF:
10236 fputs ("@TPOFF", file);
10238 fputs ("@NTPOFF", file);
10240 case UNSPEC_DTPOFF:
10241 fputs ("@DTPOFF", file);
10243 case UNSPEC_GOTNTPOFF:
10245 fputs (ASSEMBLER_DIALECT == ASM_ATT ?
10246 "@GOTTPOFF(%rip)": "@GOTTPOFF[rip]", file);
10248 fputs ("@GOTNTPOFF", file);
10250 case UNSPEC_INDNTPOFF:
10251 fputs ("@INDNTPOFF", file);
10254 case UNSPEC_MACHOPIC_OFFSET:
10256 machopic_output_function_base_name (file);
10260 output_operand_lossage ("invalid UNSPEC as operand");
10266 output_operand_lossage ("invalid expression as operand");
10270 /* This is called from dwarf2out.c via TARGET_ASM_OUTPUT_DWARF_DTPREL.
10271 We need to emit DTP-relative relocations. */
10273 static void ATTRIBUTE_UNUSED
10274 i386_output_dwarf_dtprel (FILE *file, int size, rtx x)
10276 fputs (ASM_LONG, file);
10277 output_addr_const (file, x);
10278 fputs ("@DTPOFF", file);
10284 fputs (", 0", file);
10287 gcc_unreachable ();
10291 /* Return true if X is a representation of the PIC register. This copes
10292 with calls from ix86_find_base_term, where the register might have
10293 been replaced by a cselib value. */
10296 ix86_pic_register_p (rtx x)
10298 if (GET_CODE (x) == VALUE)
10299 return (pic_offset_table_rtx
10300 && rtx_equal_for_cselib_p (x, pic_offset_table_rtx));
10302 return REG_P (x) && REGNO (x) == PIC_OFFSET_TABLE_REGNUM;
10305 /* In the name of slightly smaller debug output, and to cater to
10306 general assembler lossage, recognize PIC+GOTOFF and turn it back
10307 into a direct symbol reference.
10309 On Darwin, this is necessary to avoid a crash, because Darwin
10310 has a different PIC label for each routine but the DWARF debugging
10311 information is not associated with any particular routine, so it's
10312 necessary to remove references to the PIC label from RTL stored by
10313 the DWARF output code. */
10316 ix86_delegitimize_address (rtx orig_x)
10319 /* reg_addend is NULL or a multiple of some register. */
10320 rtx reg_addend = NULL_RTX;
10321 /* const_addend is NULL or a const_int. */
10322 rtx const_addend = NULL_RTX;
10323 /* This is the result, or NULL. */
10324 rtx result = NULL_RTX;
10331 if (GET_CODE (x) != CONST
10332 || GET_CODE (XEXP (x, 0)) != UNSPEC
10333 || XINT (XEXP (x, 0), 1) != UNSPEC_GOTPCREL
10334 || !MEM_P (orig_x))
10336 return XVECEXP (XEXP (x, 0), 0, 0);
10339 if (GET_CODE (x) != PLUS
10340 || GET_CODE (XEXP (x, 1)) != CONST)
10343 if (ix86_pic_register_p (XEXP (x, 0)))
10344 /* %ebx + GOT/GOTOFF */
10346 else if (GET_CODE (XEXP (x, 0)) == PLUS)
10348 /* %ebx + %reg * scale + GOT/GOTOFF */
10349 reg_addend = XEXP (x, 0);
10350 if (ix86_pic_register_p (XEXP (reg_addend, 0)))
10351 reg_addend = XEXP (reg_addend, 1);
10352 else if (ix86_pic_register_p (XEXP (reg_addend, 1)))
10353 reg_addend = XEXP (reg_addend, 0);
10356 if (!REG_P (reg_addend)
10357 && GET_CODE (reg_addend) != MULT
10358 && GET_CODE (reg_addend) != ASHIFT)
10364 x = XEXP (XEXP (x, 1), 0);
10365 if (GET_CODE (x) == PLUS
10366 && CONST_INT_P (XEXP (x, 1)))
10368 const_addend = XEXP (x, 1);
10372 if (GET_CODE (x) == UNSPEC
10373 && ((XINT (x, 1) == UNSPEC_GOT && MEM_P (orig_x))
10374 || (XINT (x, 1) == UNSPEC_GOTOFF && !MEM_P (orig_x))))
10375 result = XVECEXP (x, 0, 0);
10377 if (TARGET_MACHO && darwin_local_data_pic (x)
10378 && !MEM_P (orig_x))
10379 result = XVECEXP (x, 0, 0);
10385 result = gen_rtx_CONST (Pmode, gen_rtx_PLUS (Pmode, result, const_addend));
10387 result = gen_rtx_PLUS (Pmode, reg_addend, result);
10391 /* If X is a machine specific address (i.e. a symbol or label being
10392 referenced as a displacement from the GOT implemented using an
10393 UNSPEC), then return the base term. Otherwise return X. */
10396 ix86_find_base_term (rtx x)
10402 if (GET_CODE (x) != CONST)
10404 term = XEXP (x, 0);
10405 if (GET_CODE (term) == PLUS
10406 && (CONST_INT_P (XEXP (term, 1))
10407 || GET_CODE (XEXP (term, 1)) == CONST_DOUBLE))
10408 term = XEXP (term, 0);
10409 if (GET_CODE (term) != UNSPEC
10410 || XINT (term, 1) != UNSPEC_GOTPCREL)
10413 return XVECEXP (term, 0, 0);
10416 return ix86_delegitimize_address (x);
10420 put_condition_code (enum rtx_code code, enum machine_mode mode, int reverse,
10421 int fp, FILE *file)
10423 const char *suffix;
10425 if (mode == CCFPmode || mode == CCFPUmode)
10427 enum rtx_code second_code, bypass_code;
10428 ix86_fp_comparison_codes (code, &bypass_code, &code, &second_code);
10429 gcc_assert (bypass_code == UNKNOWN && second_code == UNKNOWN);
10430 code = ix86_fp_compare_code_to_integer (code);
10434 code = reverse_condition (code);
10485 gcc_assert (mode == CCmode || mode == CCNOmode || mode == CCGCmode);
10489 /* ??? Use "nbe" instead of "a" for fcmov lossage on some assemblers.
10490 Those same assemblers have the same but opposite lossage on cmov. */
10491 if (mode == CCmode)
10492 suffix = fp ? "nbe" : "a";
10493 else if (mode == CCCmode)
10496 gcc_unreachable ();
10512 gcc_unreachable ();
10516 gcc_assert (mode == CCmode || mode == CCCmode);
10533 gcc_unreachable ();
10537 /* ??? As above. */
10538 gcc_assert (mode == CCmode || mode == CCCmode);
10539 suffix = fp ? "nb" : "ae";
10542 gcc_assert (mode == CCmode || mode == CCGCmode || mode == CCNOmode);
10546 /* ??? As above. */
10547 if (mode == CCmode)
10549 else if (mode == CCCmode)
10550 suffix = fp ? "nb" : "ae";
10552 gcc_unreachable ();
10555 suffix = fp ? "u" : "p";
10558 suffix = fp ? "nu" : "np";
10561 gcc_unreachable ();
10563 fputs (suffix, file);
10566 /* Print the name of register X to FILE based on its machine mode and number.
10567 If CODE is 'w', pretend the mode is HImode.
10568 If CODE is 'b', pretend the mode is QImode.
10569 If CODE is 'k', pretend the mode is SImode.
10570 If CODE is 'q', pretend the mode is DImode.
10571 If CODE is 'x', pretend the mode is V4SFmode.
10572 If CODE is 't', pretend the mode is V8SFmode.
10573 If CODE is 'h', pretend the reg is the 'high' byte register.
10574 If CODE is 'y', print "st(0)" instead of "st", if the reg is stack op.
10575 If CODE is 'd', duplicate the operand for AVX instruction.
10579 print_reg (rtx x, int code, FILE *file)
10582 bool duplicated = code == 'd' && TARGET_AVX;
10584 gcc_assert (x == pc_rtx
10585 || (REGNO (x) != ARG_POINTER_REGNUM
10586 && REGNO (x) != FRAME_POINTER_REGNUM
10587 && REGNO (x) != FLAGS_REG
10588 && REGNO (x) != FPSR_REG
10589 && REGNO (x) != FPCR_REG));
10591 if (ASSEMBLER_DIALECT == ASM_ATT)
10596 gcc_assert (TARGET_64BIT);
10597 fputs ("rip", file);
10601 if (code == 'w' || MMX_REG_P (x))
10603 else if (code == 'b')
10605 else if (code == 'k')
10607 else if (code == 'q')
10609 else if (code == 'y')
10611 else if (code == 'h')
10613 else if (code == 'x')
10615 else if (code == 't')
10618 code = GET_MODE_SIZE (GET_MODE (x));
10620 /* Irritatingly, AMD extended registers use different naming convention
10621 from the normal registers. */
10622 if (REX_INT_REG_P (x))
10624 gcc_assert (TARGET_64BIT);
10628 error ("extended registers have no high halves");
10631 fprintf (file, "r%ib", REGNO (x) - FIRST_REX_INT_REG + 8);
10634 fprintf (file, "r%iw", REGNO (x) - FIRST_REX_INT_REG + 8);
10637 fprintf (file, "r%id", REGNO (x) - FIRST_REX_INT_REG + 8);
10640 fprintf (file, "r%i", REGNO (x) - FIRST_REX_INT_REG + 8);
10643 error ("unsupported operand size for extended register");
10653 if (STACK_TOP_P (x))
10662 if (! ANY_FP_REG_P (x))
10663 putc (code == 8 && TARGET_64BIT ? 'r' : 'e', file);
10668 reg = hi_reg_name[REGNO (x)];
10671 if (REGNO (x) >= ARRAY_SIZE (qi_reg_name))
10673 reg = qi_reg_name[REGNO (x)];
10676 if (REGNO (x) >= ARRAY_SIZE (qi_high_reg_name))
10678 reg = qi_high_reg_name[REGNO (x)];
10683 gcc_assert (!duplicated);
10685 fputs (hi_reg_name[REGNO (x)] + 1, file);
10690 gcc_unreachable ();
10696 if (ASSEMBLER_DIALECT == ASM_ATT)
10697 fprintf (file, ", %%%s", reg);
10699 fprintf (file, ", %s", reg);
10703 /* Locate some local-dynamic symbol still in use by this function
10704 so that we can print its name in some tls_local_dynamic_base
10708 get_some_local_dynamic_name_1 (rtx *px, void *data ATTRIBUTE_UNUSED)
10712 if (GET_CODE (x) == SYMBOL_REF
10713 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_DYNAMIC)
10715 cfun->machine->some_ld_name = XSTR (x, 0);
10722 static const char *
10723 get_some_local_dynamic_name (void)
10727 if (cfun->machine->some_ld_name)
10728 return cfun->machine->some_ld_name;
10730 for (insn = get_insns (); insn ; insn = NEXT_INSN (insn))
10732 && for_each_rtx (&PATTERN (insn), get_some_local_dynamic_name_1, 0))
10733 return cfun->machine->some_ld_name;
10735 gcc_unreachable ();
10738 /* Meaning of CODE:
10739 L,W,B,Q,S,T -- print the opcode suffix for specified size of operand.
10740 C -- print opcode suffix for set/cmov insn.
10741 c -- like C, but print reversed condition
10742 E,e -- likewise, but for compare-and-branch fused insn.
10743 F,f -- likewise, but for floating-point.
10744 O -- if HAVE_AS_IX86_CMOV_SUN_SYNTAX, expand to "w.", "l." or "q.",
10746 R -- print the prefix for register names.
10747 z -- print the opcode suffix for the size of the current operand.
10748 * -- print a star (in certain assembler syntax)
10749 A -- print an absolute memory reference.
10750 w -- print the operand as if it's a "word" (HImode) even if it isn't.
10751 s -- print a shift double count, followed by the assemblers argument
10753 b -- print the QImode name of the register for the indicated operand.
10754 %b0 would print %al if operands[0] is reg 0.
10755 w -- likewise, print the HImode name of the register.
10756 k -- likewise, print the SImode name of the register.
10757 q -- likewise, print the DImode name of the register.
10758 x -- likewise, print the V4SFmode name of the register.
10759 t -- likewise, print the V8SFmode name of the register.
10760 h -- print the QImode name for a "high" register, either ah, bh, ch or dh.
10761 y -- print "st(0)" instead of "st" as a register.
10762 d -- print duplicated register operand for AVX instruction.
10763 D -- print condition for SSE cmp instruction.
10764 P -- if PIC, print an @PLT suffix.
10765 X -- don't print any sort of PIC '@' suffix for a symbol.
10766 & -- print some in-use local-dynamic symbol name.
10767 H -- print a memory address offset by 8; used for sse high-parts
10768 Y -- print condition for SSE5 com* instruction.
10769 + -- print a branch hint as 'cs' or 'ds' prefix
10770 ; -- print a semicolon (after prefixes due to bug in older gas).
10774 print_operand (FILE *file, rtx x, int code)
10781 if (ASSEMBLER_DIALECT == ASM_ATT)
10786 assemble_name (file, get_some_local_dynamic_name ());
10790 switch (ASSEMBLER_DIALECT)
10797 /* Intel syntax. For absolute addresses, registers should not
10798 be surrounded by braces. */
10802 PRINT_OPERAND (file, x, 0);
10809 gcc_unreachable ();
10812 PRINT_OPERAND (file, x, 0);
10817 if (ASSEMBLER_DIALECT == ASM_ATT)
10822 if (ASSEMBLER_DIALECT == ASM_ATT)
10827 if (ASSEMBLER_DIALECT == ASM_ATT)
10832 if (ASSEMBLER_DIALECT == ASM_ATT)
10837 if (ASSEMBLER_DIALECT == ASM_ATT)
10842 if (ASSEMBLER_DIALECT == ASM_ATT)
10847 /* 387 opcodes don't get size suffixes if the operands are
10849 if (STACK_REG_P (x))
10852 /* Likewise if using Intel opcodes. */
10853 if (ASSEMBLER_DIALECT == ASM_INTEL)
10856 /* This is the size of op from size of operand. */
10857 switch (GET_MODE_SIZE (GET_MODE (x)))
10866 #ifdef HAVE_GAS_FILDS_FISTS
10876 if (GET_MODE (x) == SFmode)
10891 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)
10895 #ifdef GAS_MNEMONICS
10910 gcc_unreachable ();
10927 if (CONST_INT_P (x) || ! SHIFT_DOUBLE_OMITS_COUNT)
10929 PRINT_OPERAND (file, x, 0);
10930 fputs (", ", file);
10935 /* Little bit of braindamage here. The SSE compare instructions
10936 does use completely different names for the comparisons that the
10937 fp conditional moves. */
10940 switch (GET_CODE (x))
10943 fputs ("eq", file);
10946 fputs ("eq_us", file);
10949 fputs ("lt", file);
10952 fputs ("nge", file);
10955 fputs ("le", file);
10958 fputs ("ngt", file);
10961 fputs ("unord", file);
10964 fputs ("neq", file);
10967 fputs ("neq_oq", file);
10970 fputs ("ge", file);
10973 fputs ("nlt", file);
10976 fputs ("gt", file);
10979 fputs ("nle", file);
10982 fputs ("ord", file);
10985 output_operand_lossage ("operand is not a condition code, invalid operand code 'D'");
10991 switch (GET_CODE (x))
10995 fputs ("eq", file);
10999 fputs ("lt", file);
11003 fputs ("le", file);
11006 fputs ("unord", file);
11010 fputs ("neq", file);
11014 fputs ("nlt", file);
11018 fputs ("nle", file);
11021 fputs ("ord", file);
11024 output_operand_lossage ("operand is not a condition code, invalid operand code 'D'");
11030 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
11031 if (ASSEMBLER_DIALECT == ASM_ATT)
11033 switch (GET_MODE (x))
11035 case HImode: putc ('w', file); break;
11037 case SFmode: putc ('l', file); break;
11039 case DFmode: putc ('q', file); break;
11040 default: gcc_unreachable ();
11047 if (!COMPARISON_P (x))
11049 output_operand_lossage ("operand is neither a constant nor a "
11050 "condition code, invalid operand code "
11054 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 0, 0, file);
11057 if (!COMPARISON_P (x))
11059 output_operand_lossage ("operand is neither a constant nor a "
11060 "condition code, invalid operand code "
11064 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
11065 if (ASSEMBLER_DIALECT == ASM_ATT)
11068 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 0, 1, file);
11071 /* Like above, but reverse condition */
11073 /* Check to see if argument to %c is really a constant
11074 and not a condition code which needs to be reversed. */
11075 if (!COMPARISON_P (x))
11077 output_operand_lossage ("operand is neither a constant nor a "
11078 "condition code, invalid operand "
11082 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 1, 0, file);
11085 if (!COMPARISON_P (x))
11087 output_operand_lossage ("operand is neither a constant nor a "
11088 "condition code, invalid operand "
11092 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
11093 if (ASSEMBLER_DIALECT == ASM_ATT)
11096 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 1, 1, file);
11100 put_condition_code (GET_CODE (x), CCmode, 0, 0, file);
11104 put_condition_code (GET_CODE (x), CCmode, 1, 0, file);
11108 /* It doesn't actually matter what mode we use here, as we're
11109 only going to use this for printing. */
11110 x = adjust_address_nv (x, DImode, 8);
11118 || optimize_function_for_size_p (cfun) || !TARGET_BRANCH_PREDICTION_HINTS)
11121 x = find_reg_note (current_output_insn, REG_BR_PROB, 0);
11124 int pred_val = INTVAL (XEXP (x, 0));
11126 if (pred_val < REG_BR_PROB_BASE * 45 / 100
11127 || pred_val > REG_BR_PROB_BASE * 55 / 100)
11129 int taken = pred_val > REG_BR_PROB_BASE / 2;
11130 int cputaken = final_forward_branch_p (current_output_insn) == 0;
11132 /* Emit hints only in the case default branch prediction
11133 heuristics would fail. */
11134 if (taken != cputaken)
11136 /* We use 3e (DS) prefix for taken branches and
11137 2e (CS) prefix for not taken branches. */
11139 fputs ("ds ; ", file);
11141 fputs ("cs ; ", file);
11149 switch (GET_CODE (x))
11152 fputs ("neq", file);
11155 fputs ("eq", file);
11159 fputs (INTEGRAL_MODE_P (GET_MODE (x)) ? "ge" : "unlt", file);
11163 fputs (INTEGRAL_MODE_P (GET_MODE (x)) ? "gt" : "unle", file);
11167 fputs ("le", file);
11171 fputs ("lt", file);
11174 fputs ("unord", file);
11177 fputs ("ord", file);
11180 fputs ("ueq", file);
11183 fputs ("nlt", file);
11186 fputs ("nle", file);
11189 fputs ("ule", file);
11192 fputs ("ult", file);
11195 fputs ("une", file);
11198 output_operand_lossage ("operand is not a condition code, invalid operand code 'D'");
11205 fputs (" ; ", file);
11212 output_operand_lossage ("invalid operand code '%c'", code);
11217 print_reg (x, code, file);
11219 else if (MEM_P (x))
11221 /* No `byte ptr' prefix for call instructions or BLKmode operands. */
11222 if (ASSEMBLER_DIALECT == ASM_INTEL && code != 'X' && code != 'P'
11223 && GET_MODE (x) != BLKmode)
11226 switch (GET_MODE_SIZE (GET_MODE (x)))
11228 case 1: size = "BYTE"; break;
11229 case 2: size = "WORD"; break;
11230 case 4: size = "DWORD"; break;
11231 case 8: size = "QWORD"; break;
11232 case 12: size = "XWORD"; break;
11234 if (GET_MODE (x) == XFmode)
11240 gcc_unreachable ();
11243 /* Check for explicit size override (codes 'b', 'w' and 'k') */
11246 else if (code == 'w')
11248 else if (code == 'k')
11251 fputs (size, file);
11252 fputs (" PTR ", file);
11256 /* Avoid (%rip) for call operands. */
11257 if (CONSTANT_ADDRESS_P (x) && code == 'P'
11258 && !CONST_INT_P (x))
11259 output_addr_const (file, x);
11260 else if (this_is_asm_operands && ! address_operand (x, VOIDmode))
11261 output_operand_lossage ("invalid constraints for operand");
11263 output_address (x);
11266 else if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) == SFmode)
11271 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
11272 REAL_VALUE_TO_TARGET_SINGLE (r, l);
11274 if (ASSEMBLER_DIALECT == ASM_ATT)
11276 fprintf (file, "0x%08lx", (long unsigned int) l);
11279 /* These float cases don't actually occur as immediate operands. */
11280 else if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) == DFmode)
11284 real_to_decimal (dstr, CONST_DOUBLE_REAL_VALUE (x), sizeof (dstr), 0, 1);
11285 fprintf (file, "%s", dstr);
11288 else if (GET_CODE (x) == CONST_DOUBLE
11289 && GET_MODE (x) == XFmode)
11293 real_to_decimal (dstr, CONST_DOUBLE_REAL_VALUE (x), sizeof (dstr), 0, 1);
11294 fprintf (file, "%s", dstr);
11299 /* We have patterns that allow zero sets of memory, for instance.
11300 In 64-bit mode, we should probably support all 8-byte vectors,
11301 since we can in fact encode that into an immediate. */
11302 if (GET_CODE (x) == CONST_VECTOR)
11304 gcc_assert (x == CONST0_RTX (GET_MODE (x)));
11310 if (CONST_INT_P (x) || GET_CODE (x) == CONST_DOUBLE)
11312 if (ASSEMBLER_DIALECT == ASM_ATT)
11315 else if (GET_CODE (x) == CONST || GET_CODE (x) == SYMBOL_REF
11316 || GET_CODE (x) == LABEL_REF)
11318 if (ASSEMBLER_DIALECT == ASM_ATT)
11321 fputs ("OFFSET FLAT:", file);
11324 if (CONST_INT_P (x))
11325 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
11327 output_pic_addr_const (file, x, code);
11329 output_addr_const (file, x);
11333 /* Print a memory operand whose address is ADDR. */
11336 print_operand_address (FILE *file, rtx addr)
11338 struct ix86_address parts;
11339 rtx base, index, disp;
11341 int ok = ix86_decompose_address (addr, &parts);
11346 index = parts.index;
11348 scale = parts.scale;
11356 if (ASSEMBLER_DIALECT == ASM_ATT)
11358 fputs ((parts.seg == SEG_FS ? "fs:" : "gs:"), file);
11361 gcc_unreachable ();
11364 /* Use one byte shorter RIP relative addressing for 64bit mode. */
11365 if (TARGET_64BIT && !base && !index)
11369 if (GET_CODE (disp) == CONST
11370 && GET_CODE (XEXP (disp, 0)) == PLUS
11371 && CONST_INT_P (XEXP (XEXP (disp, 0), 1)))
11372 symbol = XEXP (XEXP (disp, 0), 0);
11374 if (GET_CODE (symbol) == LABEL_REF
11375 || (GET_CODE (symbol) == SYMBOL_REF
11376 && SYMBOL_REF_TLS_MODEL (symbol) == 0))
11379 if (!base && !index)
11381 /* Displacement only requires special attention. */
11383 if (CONST_INT_P (disp))
11385 if (ASSEMBLER_DIALECT == ASM_INTEL && parts.seg == SEG_DEFAULT)
11386 fputs ("ds:", file);
11387 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (disp));
11390 output_pic_addr_const (file, disp, 0);
11392 output_addr_const (file, disp);
11396 if (ASSEMBLER_DIALECT == ASM_ATT)
11401 output_pic_addr_const (file, disp, 0);
11402 else if (GET_CODE (disp) == LABEL_REF)
11403 output_asm_label (disp);
11405 output_addr_const (file, disp);
11410 print_reg (base, 0, file);
11414 print_reg (index, 0, file);
11416 fprintf (file, ",%d", scale);
11422 rtx offset = NULL_RTX;
11426 /* Pull out the offset of a symbol; print any symbol itself. */
11427 if (GET_CODE (disp) == CONST
11428 && GET_CODE (XEXP (disp, 0)) == PLUS
11429 && CONST_INT_P (XEXP (XEXP (disp, 0), 1)))
11431 offset = XEXP (XEXP (disp, 0), 1);
11432 disp = gen_rtx_CONST (VOIDmode,
11433 XEXP (XEXP (disp, 0), 0));
11437 output_pic_addr_const (file, disp, 0);
11438 else if (GET_CODE (disp) == LABEL_REF)
11439 output_asm_label (disp);
11440 else if (CONST_INT_P (disp))
11443 output_addr_const (file, disp);
11449 print_reg (base, 0, file);
11452 if (INTVAL (offset) >= 0)
11454 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (offset));
11458 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (offset));
11465 print_reg (index, 0, file);
11467 fprintf (file, "*%d", scale);
11475 output_addr_const_extra (FILE *file, rtx x)
11479 if (GET_CODE (x) != UNSPEC)
11482 op = XVECEXP (x, 0, 0);
11483 switch (XINT (x, 1))
11485 case UNSPEC_GOTTPOFF:
11486 output_addr_const (file, op);
11487 /* FIXME: This might be @TPOFF in Sun ld. */
11488 fputs ("@GOTTPOFF", file);
11491 output_addr_const (file, op);
11492 fputs ("@TPOFF", file);
11494 case UNSPEC_NTPOFF:
11495 output_addr_const (file, op);
11497 fputs ("@TPOFF", file);
11499 fputs ("@NTPOFF", file);
11501 case UNSPEC_DTPOFF:
11502 output_addr_const (file, op);
11503 fputs ("@DTPOFF", file);
11505 case UNSPEC_GOTNTPOFF:
11506 output_addr_const (file, op);
11508 fputs (ASSEMBLER_DIALECT == ASM_ATT ?
11509 "@GOTTPOFF(%rip)" : "@GOTTPOFF[rip]", file);
11511 fputs ("@GOTNTPOFF", file);
11513 case UNSPEC_INDNTPOFF:
11514 output_addr_const (file, op);
11515 fputs ("@INDNTPOFF", file);
11518 case UNSPEC_MACHOPIC_OFFSET:
11519 output_addr_const (file, op);
11521 machopic_output_function_base_name (file);
11532 /* Split one or more DImode RTL references into pairs of SImode
11533 references. The RTL can be REG, offsettable MEM, integer constant, or
11534 CONST_DOUBLE. "operands" is a pointer to an array of DImode RTL to
11535 split and "num" is its length. lo_half and hi_half are output arrays
11536 that parallel "operands". */
11539 split_di (rtx operands[], int num, rtx lo_half[], rtx hi_half[])
11543 rtx op = operands[num];
11545 /* simplify_subreg refuse to split volatile memory addresses,
11546 but we still have to handle it. */
11549 lo_half[num] = adjust_address (op, SImode, 0);
11550 hi_half[num] = adjust_address (op, SImode, 4);
11554 lo_half[num] = simplify_gen_subreg (SImode, op,
11555 GET_MODE (op) == VOIDmode
11556 ? DImode : GET_MODE (op), 0);
11557 hi_half[num] = simplify_gen_subreg (SImode, op,
11558 GET_MODE (op) == VOIDmode
11559 ? DImode : GET_MODE (op), 4);
11563 /* Split one or more TImode RTL references into pairs of DImode
11564 references. The RTL can be REG, offsettable MEM, integer constant, or
11565 CONST_DOUBLE. "operands" is a pointer to an array of DImode RTL to
11566 split and "num" is its length. lo_half and hi_half are output arrays
11567 that parallel "operands". */
11570 split_ti (rtx operands[], int num, rtx lo_half[], rtx hi_half[])
11574 rtx op = operands[num];
11576 /* simplify_subreg refuse to split volatile memory addresses, but we
11577 still have to handle it. */
11580 lo_half[num] = adjust_address (op, DImode, 0);
11581 hi_half[num] = adjust_address (op, DImode, 8);
11585 lo_half[num] = simplify_gen_subreg (DImode, op, TImode, 0);
11586 hi_half[num] = simplify_gen_subreg (DImode, op, TImode, 8);
11591 /* Output code to perform a 387 binary operation in INSN, one of PLUS,
11592 MINUS, MULT or DIV. OPERANDS are the insn operands, where operands[3]
11593 is the expression of the binary operation. The output may either be
11594 emitted here, or returned to the caller, like all output_* functions.
11596 There is no guarantee that the operands are the same mode, as they
11597 might be within FLOAT or FLOAT_EXTEND expressions. */
11599 #ifndef SYSV386_COMPAT
11600 /* Set to 1 for compatibility with brain-damaged assemblers. No-one
11601 wants to fix the assemblers because that causes incompatibility
11602 with gcc. No-one wants to fix gcc because that causes
11603 incompatibility with assemblers... You can use the option of
11604 -DSYSV386_COMPAT=0 if you recompile both gcc and gas this way. */
11605 #define SYSV386_COMPAT 1
11609 output_387_binary_op (rtx insn, rtx *operands)
11611 static char buf[40];
11614 int is_sse = SSE_REG_P (operands[0]) || SSE_REG_P (operands[1]) || SSE_REG_P (operands[2]);
11616 #ifdef ENABLE_CHECKING
11617 /* Even if we do not want to check the inputs, this documents input
11618 constraints. Which helps in understanding the following code. */
11619 if (STACK_REG_P (operands[0])
11620 && ((REG_P (operands[1])
11621 && REGNO (operands[0]) == REGNO (operands[1])
11622 && (STACK_REG_P (operands[2]) || MEM_P (operands[2])))
11623 || (REG_P (operands[2])
11624 && REGNO (operands[0]) == REGNO (operands[2])
11625 && (STACK_REG_P (operands[1]) || MEM_P (operands[1]))))
11626 && (STACK_TOP_P (operands[1]) || STACK_TOP_P (operands[2])))
11629 gcc_assert (is_sse);
11632 switch (GET_CODE (operands[3]))
11635 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
11636 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
11644 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
11645 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
11653 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
11654 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
11662 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
11663 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
11671 gcc_unreachable ();
11678 strcpy (buf, ssep);
11679 if (GET_MODE (operands[0]) == SFmode)
11680 strcat (buf, "ss\t{%2, %1, %0|%0, %1, %2}");
11682 strcat (buf, "sd\t{%2, %1, %0|%0, %1, %2}");
11686 strcpy (buf, ssep + 1);
11687 if (GET_MODE (operands[0]) == SFmode)
11688 strcat (buf, "ss\t{%2, %0|%0, %2}");
11690 strcat (buf, "sd\t{%2, %0|%0, %2}");
11696 switch (GET_CODE (operands[3]))
11700 if (REG_P (operands[2]) && REGNO (operands[0]) == REGNO (operands[2]))
11702 rtx temp = operands[2];
11703 operands[2] = operands[1];
11704 operands[1] = temp;
11707 /* know operands[0] == operands[1]. */
11709 if (MEM_P (operands[2]))
11715 if (find_regno_note (insn, REG_DEAD, REGNO (operands[2])))
11717 if (STACK_TOP_P (operands[0]))
11718 /* How is it that we are storing to a dead operand[2]?
11719 Well, presumably operands[1] is dead too. We can't
11720 store the result to st(0) as st(0) gets popped on this
11721 instruction. Instead store to operands[2] (which I
11722 think has to be st(1)). st(1) will be popped later.
11723 gcc <= 2.8.1 didn't have this check and generated
11724 assembly code that the Unixware assembler rejected. */
11725 p = "p\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
11727 p = "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
11731 if (STACK_TOP_P (operands[0]))
11732 p = "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
11734 p = "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
11739 if (MEM_P (operands[1]))
11745 if (MEM_P (operands[2]))
11751 if (find_regno_note (insn, REG_DEAD, REGNO (operands[2])))
11754 /* The SystemV/386 SVR3.2 assembler, and probably all AT&T
11755 derived assemblers, confusingly reverse the direction of
11756 the operation for fsub{r} and fdiv{r} when the
11757 destination register is not st(0). The Intel assembler
11758 doesn't have this brain damage. Read !SYSV386_COMPAT to
11759 figure out what the hardware really does. */
11760 if (STACK_TOP_P (operands[0]))
11761 p = "{p\t%0, %2|rp\t%2, %0}";
11763 p = "{rp\t%2, %0|p\t%0, %2}";
11765 if (STACK_TOP_P (operands[0]))
11766 /* As above for fmul/fadd, we can't store to st(0). */
11767 p = "rp\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
11769 p = "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
11774 if (find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
11777 if (STACK_TOP_P (operands[0]))
11778 p = "{rp\t%0, %1|p\t%1, %0}";
11780 p = "{p\t%1, %0|rp\t%0, %1}";
11782 if (STACK_TOP_P (operands[0]))
11783 p = "p\t{%0, %1|%1, %0}"; /* st(1) = st(1) op st(0); pop */
11785 p = "rp\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2); pop */
11790 if (STACK_TOP_P (operands[0]))
11792 if (STACK_TOP_P (operands[1]))
11793 p = "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
11795 p = "r\t{%y1, %0|%0, %y1}"; /* st(0) = st(r1) op st(0) */
11798 else if (STACK_TOP_P (operands[1]))
11801 p = "{\t%1, %0|r\t%0, %1}";
11803 p = "r\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2) */
11809 p = "{r\t%2, %0|\t%0, %2}";
11811 p = "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
11817 gcc_unreachable ();
11824 /* Return needed mode for entity in optimize_mode_switching pass. */
11827 ix86_mode_needed (int entity, rtx insn)
11829 enum attr_i387_cw mode;
11831 /* The mode UNINITIALIZED is used to store control word after a
11832 function call or ASM pattern. The mode ANY specify that function
11833 has no requirements on the control word and make no changes in the
11834 bits we are interested in. */
11837 || (NONJUMP_INSN_P (insn)
11838 && (asm_noperands (PATTERN (insn)) >= 0
11839 || GET_CODE (PATTERN (insn)) == ASM_INPUT)))
11840 return I387_CW_UNINITIALIZED;
11842 if (recog_memoized (insn) < 0)
11843 return I387_CW_ANY;
11845 mode = get_attr_i387_cw (insn);
11850 if (mode == I387_CW_TRUNC)
11855 if (mode == I387_CW_FLOOR)
11860 if (mode == I387_CW_CEIL)
11865 if (mode == I387_CW_MASK_PM)
11870 gcc_unreachable ();
11873 return I387_CW_ANY;
11876 /* Output code to initialize control word copies used by trunc?f?i and
11877 rounding patterns. CURRENT_MODE is set to current control word,
11878 while NEW_MODE is set to new control word. */
11881 emit_i387_cw_initialization (int mode)
11883 rtx stored_mode = assign_386_stack_local (HImode, SLOT_CW_STORED);
11886 enum ix86_stack_slot slot;
11888 rtx reg = gen_reg_rtx (HImode);
11890 emit_insn (gen_x86_fnstcw_1 (stored_mode));
11891 emit_move_insn (reg, copy_rtx (stored_mode));
11893 if (TARGET_64BIT || TARGET_PARTIAL_REG_STALL
11894 || optimize_function_for_size_p (cfun))
11898 case I387_CW_TRUNC:
11899 /* round toward zero (truncate) */
11900 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0c00)));
11901 slot = SLOT_CW_TRUNC;
11904 case I387_CW_FLOOR:
11905 /* round down toward -oo */
11906 emit_insn (gen_andhi3 (reg, reg, GEN_INT (~0x0c00)));
11907 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0400)));
11908 slot = SLOT_CW_FLOOR;
11912 /* round up toward +oo */
11913 emit_insn (gen_andhi3 (reg, reg, GEN_INT (~0x0c00)));
11914 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0800)));
11915 slot = SLOT_CW_CEIL;
11918 case I387_CW_MASK_PM:
11919 /* mask precision exception for nearbyint() */
11920 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0020)));
11921 slot = SLOT_CW_MASK_PM;
11925 gcc_unreachable ();
11932 case I387_CW_TRUNC:
11933 /* round toward zero (truncate) */
11934 emit_insn (gen_movsi_insv_1 (reg, GEN_INT (0xc)));
11935 slot = SLOT_CW_TRUNC;
11938 case I387_CW_FLOOR:
11939 /* round down toward -oo */
11940 emit_insn (gen_movsi_insv_1 (reg, GEN_INT (0x4)));
11941 slot = SLOT_CW_FLOOR;
11945 /* round up toward +oo */
11946 emit_insn (gen_movsi_insv_1 (reg, GEN_INT (0x8)));
11947 slot = SLOT_CW_CEIL;
11950 case I387_CW_MASK_PM:
11951 /* mask precision exception for nearbyint() */
11952 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0020)));
11953 slot = SLOT_CW_MASK_PM;
11957 gcc_unreachable ();
11961 gcc_assert (slot < MAX_386_STACK_LOCALS);
11963 new_mode = assign_386_stack_local (HImode, slot);
11964 emit_move_insn (new_mode, reg);
11967 /* Output code for INSN to convert a float to a signed int. OPERANDS
11968 are the insn operands. The output may be [HSD]Imode and the input
11969 operand may be [SDX]Fmode. */
11972 output_fix_trunc (rtx insn, rtx *operands, int fisttp)
11974 int stack_top_dies = find_regno_note (insn, REG_DEAD, FIRST_STACK_REG) != 0;
11975 int dimode_p = GET_MODE (operands[0]) == DImode;
11976 int round_mode = get_attr_i387_cw (insn);
11978 /* Jump through a hoop or two for DImode, since the hardware has no
11979 non-popping instruction. We used to do this a different way, but
11980 that was somewhat fragile and broke with post-reload splitters. */
11981 if ((dimode_p || fisttp) && !stack_top_dies)
11982 output_asm_insn ("fld\t%y1", operands);
11984 gcc_assert (STACK_TOP_P (operands[1]));
11985 gcc_assert (MEM_P (operands[0]));
11986 gcc_assert (GET_MODE (operands[1]) != TFmode);
11989 output_asm_insn ("fisttp%z0\t%0", operands);
11992 if (round_mode != I387_CW_ANY)
11993 output_asm_insn ("fldcw\t%3", operands);
11994 if (stack_top_dies || dimode_p)
11995 output_asm_insn ("fistp%z0\t%0", operands);
11997 output_asm_insn ("fist%z0\t%0", operands);
11998 if (round_mode != I387_CW_ANY)
11999 output_asm_insn ("fldcw\t%2", operands);
12005 /* Output code for x87 ffreep insn. The OPNO argument, which may only
12006 have the values zero or one, indicates the ffreep insn's operand
12007 from the OPERANDS array. */
12009 static const char *
12010 output_387_ffreep (rtx *operands ATTRIBUTE_UNUSED, int opno)
12012 if (TARGET_USE_FFREEP)
12013 #if HAVE_AS_IX86_FFREEP
12014 return opno ? "ffreep\t%y1" : "ffreep\t%y0";
12017 static char retval[] = ".word\t0xc_df";
12018 int regno = REGNO (operands[opno]);
12020 gcc_assert (FP_REGNO_P (regno));
12022 retval[9] = '0' + (regno - FIRST_STACK_REG);
12027 return opno ? "fstp\t%y1" : "fstp\t%y0";
12031 /* Output code for INSN to compare OPERANDS. EFLAGS_P is 1 when fcomi
12032 should be used. UNORDERED_P is true when fucom should be used. */
12035 output_fp_compare (rtx insn, rtx *operands, int eflags_p, int unordered_p)
12037 int stack_top_dies;
12038 rtx cmp_op0, cmp_op1;
12039 int is_sse = SSE_REG_P (operands[0]) || SSE_REG_P (operands[1]);
12043 cmp_op0 = operands[0];
12044 cmp_op1 = operands[1];
12048 cmp_op0 = operands[1];
12049 cmp_op1 = operands[2];
12054 static const char ucomiss[] = "vucomiss\t{%1, %0|%0, %1}";
12055 static const char ucomisd[] = "vucomisd\t{%1, %0|%0, %1}";
12056 static const char comiss[] = "vcomiss\t{%1, %0|%0, %1}";
12057 static const char comisd[] = "vcomisd\t{%1, %0|%0, %1}";
12059 if (GET_MODE (operands[0]) == SFmode)
12061 return &ucomiss[TARGET_AVX ? 0 : 1];
12063 return &comiss[TARGET_AVX ? 0 : 1];
12066 return &ucomisd[TARGET_AVX ? 0 : 1];
12068 return &comisd[TARGET_AVX ? 0 : 1];
12071 gcc_assert (STACK_TOP_P (cmp_op0));
12073 stack_top_dies = find_regno_note (insn, REG_DEAD, FIRST_STACK_REG) != 0;
12075 if (cmp_op1 == CONST0_RTX (GET_MODE (cmp_op1)))
12077 if (stack_top_dies)
12079 output_asm_insn ("ftst\n\tfnstsw\t%0", operands);
12080 return output_387_ffreep (operands, 1);
12083 return "ftst\n\tfnstsw\t%0";
12086 if (STACK_REG_P (cmp_op1)
12088 && find_regno_note (insn, REG_DEAD, REGNO (cmp_op1))
12089 && REGNO (cmp_op1) != FIRST_STACK_REG)
12091 /* If both the top of the 387 stack dies, and the other operand
12092 is also a stack register that dies, then this must be a
12093 `fcompp' float compare */
12097 /* There is no double popping fcomi variant. Fortunately,
12098 eflags is immune from the fstp's cc clobbering. */
12100 output_asm_insn ("fucomip\t{%y1, %0|%0, %y1}", operands);
12102 output_asm_insn ("fcomip\t{%y1, %0|%0, %y1}", operands);
12103 return output_387_ffreep (operands, 0);
12108 return "fucompp\n\tfnstsw\t%0";
12110 return "fcompp\n\tfnstsw\t%0";
12115 /* Encoded here as eflags_p | intmode | unordered_p | stack_top_dies. */
12117 static const char * const alt[16] =
12119 "fcom%z2\t%y2\n\tfnstsw\t%0",
12120 "fcomp%z2\t%y2\n\tfnstsw\t%0",
12121 "fucom%z2\t%y2\n\tfnstsw\t%0",
12122 "fucomp%z2\t%y2\n\tfnstsw\t%0",
12124 "ficom%z2\t%y2\n\tfnstsw\t%0",
12125 "ficomp%z2\t%y2\n\tfnstsw\t%0",
12129 "fcomi\t{%y1, %0|%0, %y1}",
12130 "fcomip\t{%y1, %0|%0, %y1}",
12131 "fucomi\t{%y1, %0|%0, %y1}",
12132 "fucomip\t{%y1, %0|%0, %y1}",
12143 mask = eflags_p << 3;
12144 mask |= (GET_MODE_CLASS (GET_MODE (cmp_op1)) == MODE_INT) << 2;
12145 mask |= unordered_p << 1;
12146 mask |= stack_top_dies;
12148 gcc_assert (mask < 16);
12157 ix86_output_addr_vec_elt (FILE *file, int value)
12159 const char *directive = ASM_LONG;
12163 directive = ASM_QUAD;
12165 gcc_assert (!TARGET_64BIT);
12168 fprintf (file, "%s%s%d\n", directive, LPREFIX, value);
12172 ix86_output_addr_diff_elt (FILE *file, int value, int rel)
12174 const char *directive = ASM_LONG;
12177 if (TARGET_64BIT && CASE_VECTOR_MODE == DImode)
12178 directive = ASM_QUAD;
12180 gcc_assert (!TARGET_64BIT);
12182 /* We can't use @GOTOFF for text labels on VxWorks; see gotoff_operand. */
12183 if (TARGET_64BIT || TARGET_VXWORKS_RTP)
12184 fprintf (file, "%s%s%d-%s%d\n",
12185 directive, LPREFIX, value, LPREFIX, rel);
12186 else if (HAVE_AS_GOTOFF_IN_DATA)
12187 fprintf (file, "%s%s%d@GOTOFF\n", ASM_LONG, LPREFIX, value);
12189 else if (TARGET_MACHO)
12191 fprintf (file, "%s%s%d-", ASM_LONG, LPREFIX, value);
12192 machopic_output_function_base_name (file);
12193 fprintf(file, "\n");
12197 asm_fprintf (file, "%s%U%s+[.-%s%d]\n",
12198 ASM_LONG, GOT_SYMBOL_NAME, LPREFIX, value);
12201 /* Generate either "mov $0, reg" or "xor reg, reg", as appropriate
12205 ix86_expand_clear (rtx dest)
12209 /* We play register width games, which are only valid after reload. */
12210 gcc_assert (reload_completed);
12212 /* Avoid HImode and its attendant prefix byte. */
12213 if (GET_MODE_SIZE (GET_MODE (dest)) < 4)
12214 dest = gen_rtx_REG (SImode, REGNO (dest));
12215 tmp = gen_rtx_SET (VOIDmode, dest, const0_rtx);
12217 /* This predicate should match that for movsi_xor and movdi_xor_rex64. */
12218 if (reload_completed && (!TARGET_USE_MOV0 || optimize_insn_for_speed_p ()))
12220 rtx clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
12221 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, tmp, clob));
12227 /* X is an unchanging MEM. If it is a constant pool reference, return
12228 the constant pool rtx, else NULL. */
12231 maybe_get_pool_constant (rtx x)
12233 x = ix86_delegitimize_address (XEXP (x, 0));
12235 if (GET_CODE (x) == SYMBOL_REF && CONSTANT_POOL_ADDRESS_P (x))
12236 return get_pool_constant (x);
12242 ix86_expand_move (enum machine_mode mode, rtx operands[])
12245 enum tls_model model;
12250 if (GET_CODE (op1) == SYMBOL_REF)
12252 model = SYMBOL_REF_TLS_MODEL (op1);
12255 op1 = legitimize_tls_address (op1, model, true);
12256 op1 = force_operand (op1, op0);
12260 else if (TARGET_DLLIMPORT_DECL_ATTRIBUTES
12261 && SYMBOL_REF_DLLIMPORT_P (op1))
12262 op1 = legitimize_dllimport_symbol (op1, false);
12264 else if (GET_CODE (op1) == CONST
12265 && GET_CODE (XEXP (op1, 0)) == PLUS
12266 && GET_CODE (XEXP (XEXP (op1, 0), 0)) == SYMBOL_REF)
12268 rtx addend = XEXP (XEXP (op1, 0), 1);
12269 rtx symbol = XEXP (XEXP (op1, 0), 0);
12272 model = SYMBOL_REF_TLS_MODEL (symbol);
12274 tmp = legitimize_tls_address (symbol, model, true);
12275 else if (TARGET_DLLIMPORT_DECL_ATTRIBUTES
12276 && SYMBOL_REF_DLLIMPORT_P (symbol))
12277 tmp = legitimize_dllimport_symbol (symbol, true);
12281 tmp = force_operand (tmp, NULL);
12282 tmp = expand_simple_binop (Pmode, PLUS, tmp, addend,
12283 op0, 1, OPTAB_DIRECT);
12289 if (flag_pic && mode == Pmode && symbolic_operand (op1, Pmode))
12291 if (TARGET_MACHO && !TARGET_64BIT)
12296 rtx temp = ((reload_in_progress
12297 || ((op0 && REG_P (op0))
12299 ? op0 : gen_reg_rtx (Pmode));
12300 op1 = machopic_indirect_data_reference (op1, temp);
12301 op1 = machopic_legitimize_pic_address (op1, mode,
12302 temp == op1 ? 0 : temp);
12304 else if (MACHOPIC_INDIRECT)
12305 op1 = machopic_indirect_data_reference (op1, 0);
12313 op1 = force_reg (Pmode, op1);
12314 else if (!TARGET_64BIT || !x86_64_movabs_operand (op1, Pmode))
12316 rtx reg = !can_create_pseudo_p () ? op0 : NULL_RTX;
12317 op1 = legitimize_pic_address (op1, reg);
12326 && (PUSH_ROUNDING (GET_MODE_SIZE (mode)) != GET_MODE_SIZE (mode)
12327 || !push_operand (op0, mode))
12329 op1 = force_reg (mode, op1);
12331 if (push_operand (op0, mode)
12332 && ! general_no_elim_operand (op1, mode))
12333 op1 = copy_to_mode_reg (mode, op1);
12335 /* Force large constants in 64bit compilation into register
12336 to get them CSEed. */
12337 if (can_create_pseudo_p ()
12338 && (mode == DImode) && TARGET_64BIT
12339 && immediate_operand (op1, mode)
12340 && !x86_64_zext_immediate_operand (op1, VOIDmode)
12341 && !register_operand (op0, mode)
12343 op1 = copy_to_mode_reg (mode, op1);
12345 if (can_create_pseudo_p ()
12346 && FLOAT_MODE_P (mode)
12347 && GET_CODE (op1) == CONST_DOUBLE)
12349 /* If we are loading a floating point constant to a register,
12350 force the value to memory now, since we'll get better code
12351 out the back end. */
12353 op1 = validize_mem (force_const_mem (mode, op1));
12354 if (!register_operand (op0, mode))
12356 rtx temp = gen_reg_rtx (mode);
12357 emit_insn (gen_rtx_SET (VOIDmode, temp, op1));
12358 emit_move_insn (op0, temp);
12364 emit_insn (gen_rtx_SET (VOIDmode, op0, op1));
12368 ix86_expand_vector_move (enum machine_mode mode, rtx operands[])
12370 rtx op0 = operands[0], op1 = operands[1];
12371 unsigned int align = GET_MODE_ALIGNMENT (mode);
12373 /* Force constants other than zero into memory. We do not know how
12374 the instructions used to build constants modify the upper 64 bits
12375 of the register, once we have that information we may be able
12376 to handle some of them more efficiently. */
12377 if (can_create_pseudo_p ()
12378 && register_operand (op0, mode)
12379 && (CONSTANT_P (op1)
12380 || (GET_CODE (op1) == SUBREG
12381 && CONSTANT_P (SUBREG_REG (op1))))
12382 && standard_sse_constant_p (op1) <= 0)
12383 op1 = validize_mem (force_const_mem (mode, op1));
12385 /* We need to check memory alignment for SSE mode since attribute
12386 can make operands unaligned. */
12387 if (can_create_pseudo_p ()
12388 && SSE_REG_MODE_P (mode)
12389 && ((MEM_P (op0) && (MEM_ALIGN (op0) < align))
12390 || (MEM_P (op1) && (MEM_ALIGN (op1) < align))))
12394 /* ix86_expand_vector_move_misalign() does not like constants ... */
12395 if (CONSTANT_P (op1)
12396 || (GET_CODE (op1) == SUBREG
12397 && CONSTANT_P (SUBREG_REG (op1))))
12398 op1 = validize_mem (force_const_mem (mode, op1));
12400 /* ... nor both arguments in memory. */
12401 if (!register_operand (op0, mode)
12402 && !register_operand (op1, mode))
12403 op1 = force_reg (mode, op1);
12405 tmp[0] = op0; tmp[1] = op1;
12406 ix86_expand_vector_move_misalign (mode, tmp);
12410 /* Make operand1 a register if it isn't already. */
12411 if (can_create_pseudo_p ()
12412 && !register_operand (op0, mode)
12413 && !register_operand (op1, mode))
12415 emit_move_insn (op0, force_reg (GET_MODE (op0), op1));
12419 emit_insn (gen_rtx_SET (VOIDmode, op0, op1));
12422 /* Implement the movmisalign patterns for SSE. Non-SSE modes go
12423 straight to ix86_expand_vector_move. */
12424 /* Code generation for scalar reg-reg moves of single and double precision data:
12425 if (x86_sse_partial_reg_dependency == true | x86_sse_split_regs == true)
12429 if (x86_sse_partial_reg_dependency == true)
12434 Code generation for scalar loads of double precision data:
12435 if (x86_sse_split_regs == true)
12436 movlpd mem, reg (gas syntax)
12440 Code generation for unaligned packed loads of single precision data
12441 (x86_sse_unaligned_move_optimal overrides x86_sse_partial_reg_dependency):
12442 if (x86_sse_unaligned_move_optimal)
12445 if (x86_sse_partial_reg_dependency == true)
12457 Code generation for unaligned packed loads of double precision data
12458 (x86_sse_unaligned_move_optimal overrides x86_sse_split_regs):
12459 if (x86_sse_unaligned_move_optimal)
12462 if (x86_sse_split_regs == true)
12475 ix86_expand_vector_move_misalign (enum machine_mode mode, rtx operands[])
12484 switch (GET_MODE_CLASS (mode))
12486 case MODE_VECTOR_INT:
12488 switch (GET_MODE_SIZE (mode))
12491 op0 = gen_lowpart (V16QImode, op0);
12492 op1 = gen_lowpart (V16QImode, op1);
12493 emit_insn (gen_avx_movdqu (op0, op1));
12496 op0 = gen_lowpart (V32QImode, op0);
12497 op1 = gen_lowpart (V32QImode, op1);
12498 emit_insn (gen_avx_movdqu256 (op0, op1));
12501 gcc_unreachable ();
12504 case MODE_VECTOR_FLOAT:
12505 op0 = gen_lowpart (mode, op0);
12506 op1 = gen_lowpart (mode, op1);
12511 emit_insn (gen_avx_movups (op0, op1));
12514 emit_insn (gen_avx_movups256 (op0, op1));
12517 emit_insn (gen_avx_movupd (op0, op1));
12520 emit_insn (gen_avx_movupd256 (op0, op1));
12523 gcc_unreachable ();
12528 gcc_unreachable ();
12536 /* If we're optimizing for size, movups is the smallest. */
12537 if (optimize_insn_for_size_p ())
12539 op0 = gen_lowpart (V4SFmode, op0);
12540 op1 = gen_lowpart (V4SFmode, op1);
12541 emit_insn (gen_sse_movups (op0, op1));
12545 /* ??? If we have typed data, then it would appear that using
12546 movdqu is the only way to get unaligned data loaded with
12548 if (TARGET_SSE2 && GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
12550 op0 = gen_lowpart (V16QImode, op0);
12551 op1 = gen_lowpart (V16QImode, op1);
12552 emit_insn (gen_sse2_movdqu (op0, op1));
12556 if (TARGET_SSE2 && mode == V2DFmode)
12560 if (TARGET_SSE_UNALIGNED_MOVE_OPTIMAL)
12562 op0 = gen_lowpart (V2DFmode, op0);
12563 op1 = gen_lowpart (V2DFmode, op1);
12564 emit_insn (gen_sse2_movupd (op0, op1));
12568 /* When SSE registers are split into halves, we can avoid
12569 writing to the top half twice. */
12570 if (TARGET_SSE_SPLIT_REGS)
12572 emit_clobber (op0);
12577 /* ??? Not sure about the best option for the Intel chips.
12578 The following would seem to satisfy; the register is
12579 entirely cleared, breaking the dependency chain. We
12580 then store to the upper half, with a dependency depth
12581 of one. A rumor has it that Intel recommends two movsd
12582 followed by an unpacklpd, but this is unconfirmed. And
12583 given that the dependency depth of the unpacklpd would
12584 still be one, I'm not sure why this would be better. */
12585 zero = CONST0_RTX (V2DFmode);
12588 m = adjust_address (op1, DFmode, 0);
12589 emit_insn (gen_sse2_loadlpd (op0, zero, m));
12590 m = adjust_address (op1, DFmode, 8);
12591 emit_insn (gen_sse2_loadhpd (op0, op0, m));
12595 if (TARGET_SSE_UNALIGNED_MOVE_OPTIMAL)
12597 op0 = gen_lowpart (V4SFmode, op0);
12598 op1 = gen_lowpart (V4SFmode, op1);
12599 emit_insn (gen_sse_movups (op0, op1));
12603 if (TARGET_SSE_PARTIAL_REG_DEPENDENCY)
12604 emit_move_insn (op0, CONST0_RTX (mode));
12606 emit_clobber (op0);
12608 if (mode != V4SFmode)
12609 op0 = gen_lowpart (V4SFmode, op0);
12610 m = adjust_address (op1, V2SFmode, 0);
12611 emit_insn (gen_sse_loadlps (op0, op0, m));
12612 m = adjust_address (op1, V2SFmode, 8);
12613 emit_insn (gen_sse_loadhps (op0, op0, m));
12616 else if (MEM_P (op0))
12618 /* If we're optimizing for size, movups is the smallest. */
12619 if (optimize_insn_for_size_p ())
12621 op0 = gen_lowpart (V4SFmode, op0);
12622 op1 = gen_lowpart (V4SFmode, op1);
12623 emit_insn (gen_sse_movups (op0, op1));
12627 /* ??? Similar to above, only less clear because of quote
12628 typeless stores unquote. */
12629 if (TARGET_SSE2 && !TARGET_SSE_TYPELESS_STORES
12630 && GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
12632 op0 = gen_lowpart (V16QImode, op0);
12633 op1 = gen_lowpart (V16QImode, op1);
12634 emit_insn (gen_sse2_movdqu (op0, op1));
12638 if (TARGET_SSE2 && mode == V2DFmode)
12640 m = adjust_address (op0, DFmode, 0);
12641 emit_insn (gen_sse2_storelpd (m, op1));
12642 m = adjust_address (op0, DFmode, 8);
12643 emit_insn (gen_sse2_storehpd (m, op1));
12647 if (mode != V4SFmode)
12648 op1 = gen_lowpart (V4SFmode, op1);
12649 m = adjust_address (op0, V2SFmode, 0);
12650 emit_insn (gen_sse_storelps (m, op1));
12651 m = adjust_address (op0, V2SFmode, 8);
12652 emit_insn (gen_sse_storehps (m, op1));
12656 gcc_unreachable ();
12659 /* Expand a push in MODE. This is some mode for which we do not support
12660 proper push instructions, at least from the registers that we expect
12661 the value to live in. */
12664 ix86_expand_push (enum machine_mode mode, rtx x)
12668 tmp = expand_simple_binop (Pmode, PLUS, stack_pointer_rtx,
12669 GEN_INT (-GET_MODE_SIZE (mode)),
12670 stack_pointer_rtx, 1, OPTAB_DIRECT);
12671 if (tmp != stack_pointer_rtx)
12672 emit_move_insn (stack_pointer_rtx, tmp);
12674 tmp = gen_rtx_MEM (mode, stack_pointer_rtx);
12676 /* When we push an operand onto stack, it has to be aligned at least
12677 at the function argument boundary. However since we don't have
12678 the argument type, we can't determine the actual argument
12680 emit_move_insn (tmp, x);
12683 /* Helper function of ix86_fixup_binary_operands to canonicalize
12684 operand order. Returns true if the operands should be swapped. */
12687 ix86_swap_binary_operands_p (enum rtx_code code, enum machine_mode mode,
12690 rtx dst = operands[0];
12691 rtx src1 = operands[1];
12692 rtx src2 = operands[2];
12694 /* If the operation is not commutative, we can't do anything. */
12695 if (GET_RTX_CLASS (code) != RTX_COMM_ARITH)
12698 /* Highest priority is that src1 should match dst. */
12699 if (rtx_equal_p (dst, src1))
12701 if (rtx_equal_p (dst, src2))
12704 /* Next highest priority is that immediate constants come second. */
12705 if (immediate_operand (src2, mode))
12707 if (immediate_operand (src1, mode))
12710 /* Lowest priority is that memory references should come second. */
12720 /* Fix up OPERANDS to satisfy ix86_binary_operator_ok. Return the
12721 destination to use for the operation. If different from the true
12722 destination in operands[0], a copy operation will be required. */
12725 ix86_fixup_binary_operands (enum rtx_code code, enum machine_mode mode,
12728 rtx dst = operands[0];
12729 rtx src1 = operands[1];
12730 rtx src2 = operands[2];
12732 /* Canonicalize operand order. */
12733 if (ix86_swap_binary_operands_p (code, mode, operands))
12737 /* It is invalid to swap operands of different modes. */
12738 gcc_assert (GET_MODE (src1) == GET_MODE (src2));
12745 /* Both source operands cannot be in memory. */
12746 if (MEM_P (src1) && MEM_P (src2))
12748 /* Optimization: Only read from memory once. */
12749 if (rtx_equal_p (src1, src2))
12751 src2 = force_reg (mode, src2);
12755 src2 = force_reg (mode, src2);
12758 /* If the destination is memory, and we do not have matching source
12759 operands, do things in registers. */
12760 if (MEM_P (dst) && !rtx_equal_p (dst, src1))
12761 dst = gen_reg_rtx (mode);
12763 /* Source 1 cannot be a constant. */
12764 if (CONSTANT_P (src1))
12765 src1 = force_reg (mode, src1);
12767 /* Source 1 cannot be a non-matching memory. */
12768 if (MEM_P (src1) && !rtx_equal_p (dst, src1))
12769 src1 = force_reg (mode, src1);
12771 operands[1] = src1;
12772 operands[2] = src2;
12776 /* Similarly, but assume that the destination has already been
12777 set up properly. */
12780 ix86_fixup_binary_operands_no_copy (enum rtx_code code,
12781 enum machine_mode mode, rtx operands[])
12783 rtx dst = ix86_fixup_binary_operands (code, mode, operands);
12784 gcc_assert (dst == operands[0]);
12787 /* Attempt to expand a binary operator. Make the expansion closer to the
12788 actual machine, then just general_operand, which will allow 3 separate
12789 memory references (one output, two input) in a single insn. */
12792 ix86_expand_binary_operator (enum rtx_code code, enum machine_mode mode,
12795 rtx src1, src2, dst, op, clob;
12797 dst = ix86_fixup_binary_operands (code, mode, operands);
12798 src1 = operands[1];
12799 src2 = operands[2];
12801 /* Emit the instruction. */
12803 op = gen_rtx_SET (VOIDmode, dst, gen_rtx_fmt_ee (code, mode, src1, src2));
12804 if (reload_in_progress)
12806 /* Reload doesn't know about the flags register, and doesn't know that
12807 it doesn't want to clobber it. We can only do this with PLUS. */
12808 gcc_assert (code == PLUS);
12813 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
12814 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
12817 /* Fix up the destination if needed. */
12818 if (dst != operands[0])
12819 emit_move_insn (operands[0], dst);
12822 /* Return TRUE or FALSE depending on whether the binary operator meets the
12823 appropriate constraints. */
12826 ix86_binary_operator_ok (enum rtx_code code, enum machine_mode mode,
12829 rtx dst = operands[0];
12830 rtx src1 = operands[1];
12831 rtx src2 = operands[2];
12833 /* Both source operands cannot be in memory. */
12834 if (MEM_P (src1) && MEM_P (src2))
12837 /* Canonicalize operand order for commutative operators. */
12838 if (ix86_swap_binary_operands_p (code, mode, operands))
12845 /* If the destination is memory, we must have a matching source operand. */
12846 if (MEM_P (dst) && !rtx_equal_p (dst, src1))
12849 /* Source 1 cannot be a constant. */
12850 if (CONSTANT_P (src1))
12853 /* Source 1 cannot be a non-matching memory. */
12854 if (MEM_P (src1) && !rtx_equal_p (dst, src1))
12860 /* Attempt to expand a unary operator. Make the expansion closer to the
12861 actual machine, then just general_operand, which will allow 2 separate
12862 memory references (one output, one input) in a single insn. */
12865 ix86_expand_unary_operator (enum rtx_code code, enum machine_mode mode,
12868 int matching_memory;
12869 rtx src, dst, op, clob;
12874 /* If the destination is memory, and we do not have matching source
12875 operands, do things in registers. */
12876 matching_memory = 0;
12879 if (rtx_equal_p (dst, src))
12880 matching_memory = 1;
12882 dst = gen_reg_rtx (mode);
12885 /* When source operand is memory, destination must match. */
12886 if (MEM_P (src) && !matching_memory)
12887 src = force_reg (mode, src);
12889 /* Emit the instruction. */
12891 op = gen_rtx_SET (VOIDmode, dst, gen_rtx_fmt_e (code, mode, src));
12892 if (reload_in_progress || code == NOT)
12894 /* Reload doesn't know about the flags register, and doesn't know that
12895 it doesn't want to clobber it. */
12896 gcc_assert (code == NOT);
12901 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
12902 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
12905 /* Fix up the destination if needed. */
12906 if (dst != operands[0])
12907 emit_move_insn (operands[0], dst);
12910 /* Return TRUE or FALSE depending on whether the unary operator meets the
12911 appropriate constraints. */
12914 ix86_unary_operator_ok (enum rtx_code code ATTRIBUTE_UNUSED,
12915 enum machine_mode mode ATTRIBUTE_UNUSED,
12916 rtx operands[2] ATTRIBUTE_UNUSED)
12918 /* If one of operands is memory, source and destination must match. */
12919 if ((MEM_P (operands[0])
12920 || MEM_P (operands[1]))
12921 && ! rtx_equal_p (operands[0], operands[1]))
12926 /* Post-reload splitter for converting an SF or DFmode value in an
12927 SSE register into an unsigned SImode. */
12930 ix86_split_convert_uns_si_sse (rtx operands[])
12932 enum machine_mode vecmode;
12933 rtx value, large, zero_or_two31, input, two31, x;
12935 large = operands[1];
12936 zero_or_two31 = operands[2];
12937 input = operands[3];
12938 two31 = operands[4];
12939 vecmode = GET_MODE (large);
12940 value = gen_rtx_REG (vecmode, REGNO (operands[0]));
12942 /* Load up the value into the low element. We must ensure that the other
12943 elements are valid floats -- zero is the easiest such value. */
12946 if (vecmode == V4SFmode)
12947 emit_insn (gen_vec_setv4sf_0 (value, CONST0_RTX (V4SFmode), input));
12949 emit_insn (gen_sse2_loadlpd (value, CONST0_RTX (V2DFmode), input));
12953 input = gen_rtx_REG (vecmode, REGNO (input));
12954 emit_move_insn (value, CONST0_RTX (vecmode));
12955 if (vecmode == V4SFmode)
12956 emit_insn (gen_sse_movss (value, value, input));
12958 emit_insn (gen_sse2_movsd (value, value, input));
12961 emit_move_insn (large, two31);
12962 emit_move_insn (zero_or_two31, MEM_P (two31) ? large : two31);
12964 x = gen_rtx_fmt_ee (LE, vecmode, large, value);
12965 emit_insn (gen_rtx_SET (VOIDmode, large, x));
12967 x = gen_rtx_AND (vecmode, zero_or_two31, large);
12968 emit_insn (gen_rtx_SET (VOIDmode, zero_or_two31, x));
12970 x = gen_rtx_MINUS (vecmode, value, zero_or_two31);
12971 emit_insn (gen_rtx_SET (VOIDmode, value, x));
12973 large = gen_rtx_REG (V4SImode, REGNO (large));
12974 emit_insn (gen_ashlv4si3 (large, large, GEN_INT (31)));
12976 x = gen_rtx_REG (V4SImode, REGNO (value));
12977 if (vecmode == V4SFmode)
12978 emit_insn (gen_sse2_cvttps2dq (x, value));
12980 emit_insn (gen_sse2_cvttpd2dq (x, value));
12983 emit_insn (gen_xorv4si3 (value, value, large));
12986 /* Convert an unsigned DImode value into a DFmode, using only SSE.
12987 Expects the 64-bit DImode to be supplied in a pair of integral
12988 registers. Requires SSE2; will use SSE3 if available. For x86_32,
12989 -mfpmath=sse, !optimize_size only. */
12992 ix86_expand_convert_uns_didf_sse (rtx target, rtx input)
12994 REAL_VALUE_TYPE bias_lo_rvt, bias_hi_rvt;
12995 rtx int_xmm, fp_xmm;
12996 rtx biases, exponents;
12999 int_xmm = gen_reg_rtx (V4SImode);
13000 if (TARGET_INTER_UNIT_MOVES)
13001 emit_insn (gen_movdi_to_sse (int_xmm, input));
13002 else if (TARGET_SSE_SPLIT_REGS)
13004 emit_clobber (int_xmm);
13005 emit_move_insn (gen_lowpart (DImode, int_xmm), input);
13009 x = gen_reg_rtx (V2DImode);
13010 ix86_expand_vector_init_one_nonzero (false, V2DImode, x, input, 0);
13011 emit_move_insn (int_xmm, gen_lowpart (V4SImode, x));
13014 x = gen_rtx_CONST_VECTOR (V4SImode,
13015 gen_rtvec (4, GEN_INT (0x43300000UL),
13016 GEN_INT (0x45300000UL),
13017 const0_rtx, const0_rtx));
13018 exponents = validize_mem (force_const_mem (V4SImode, x));
13020 /* int_xmm = {0x45300000UL, fp_xmm/hi, 0x43300000, fp_xmm/lo } */
13021 emit_insn (gen_sse2_punpckldq (int_xmm, int_xmm, exponents));
13023 /* Concatenating (juxtaposing) (0x43300000UL ## fp_value_low_xmm)
13024 yields a valid DF value equal to (0x1.0p52 + double(fp_value_lo_xmm)).
13025 Similarly (0x45300000UL ## fp_value_hi_xmm) yields
13026 (0x1.0p84 + double(fp_value_hi_xmm)).
13027 Note these exponents differ by 32. */
13029 fp_xmm = copy_to_mode_reg (V2DFmode, gen_lowpart (V2DFmode, int_xmm));
13031 /* Subtract off those 0x1.0p52 and 0x1.0p84 biases, to produce values
13032 in [0,2**32-1] and [0]+[2**32,2**64-1] respectively. */
13033 real_ldexp (&bias_lo_rvt, &dconst1, 52);
13034 real_ldexp (&bias_hi_rvt, &dconst1, 84);
13035 biases = const_double_from_real_value (bias_lo_rvt, DFmode);
13036 x = const_double_from_real_value (bias_hi_rvt, DFmode);
13037 biases = gen_rtx_CONST_VECTOR (V2DFmode, gen_rtvec (2, biases, x));
13038 biases = validize_mem (force_const_mem (V2DFmode, biases));
13039 emit_insn (gen_subv2df3 (fp_xmm, fp_xmm, biases));
13041 /* Add the upper and lower DFmode values together. */
13043 emit_insn (gen_sse3_haddv2df3 (fp_xmm, fp_xmm, fp_xmm));
13046 x = copy_to_mode_reg (V2DFmode, fp_xmm);
13047 emit_insn (gen_sse2_unpckhpd (fp_xmm, fp_xmm, fp_xmm));
13048 emit_insn (gen_addv2df3 (fp_xmm, fp_xmm, x));
13051 ix86_expand_vector_extract (false, target, fp_xmm, 0);
13054 /* Not used, but eases macroization of patterns. */
13056 ix86_expand_convert_uns_sixf_sse (rtx target ATTRIBUTE_UNUSED,
13057 rtx input ATTRIBUTE_UNUSED)
13059 gcc_unreachable ();
13062 /* Convert an unsigned SImode value into a DFmode. Only currently used
13063 for SSE, but applicable anywhere. */
13066 ix86_expand_convert_uns_sidf_sse (rtx target, rtx input)
13068 REAL_VALUE_TYPE TWO31r;
13071 x = expand_simple_binop (SImode, PLUS, input, GEN_INT (-2147483647 - 1),
13072 NULL, 1, OPTAB_DIRECT);
13074 fp = gen_reg_rtx (DFmode);
13075 emit_insn (gen_floatsidf2 (fp, x));
13077 real_ldexp (&TWO31r, &dconst1, 31);
13078 x = const_double_from_real_value (TWO31r, DFmode);
13080 x = expand_simple_binop (DFmode, PLUS, fp, x, target, 0, OPTAB_DIRECT);
13082 emit_move_insn (target, x);
13085 /* Convert a signed DImode value into a DFmode. Only used for SSE in
13086 32-bit mode; otherwise we have a direct convert instruction. */
13089 ix86_expand_convert_sign_didf_sse (rtx target, rtx input)
13091 REAL_VALUE_TYPE TWO32r;
13092 rtx fp_lo, fp_hi, x;
13094 fp_lo = gen_reg_rtx (DFmode);
13095 fp_hi = gen_reg_rtx (DFmode);
13097 emit_insn (gen_floatsidf2 (fp_hi, gen_highpart (SImode, input)));
13099 real_ldexp (&TWO32r, &dconst1, 32);
13100 x = const_double_from_real_value (TWO32r, DFmode);
13101 fp_hi = expand_simple_binop (DFmode, MULT, fp_hi, x, fp_hi, 0, OPTAB_DIRECT);
13103 ix86_expand_convert_uns_sidf_sse (fp_lo, gen_lowpart (SImode, input));
13105 x = expand_simple_binop (DFmode, PLUS, fp_hi, fp_lo, target,
13108 emit_move_insn (target, x);
13111 /* Convert an unsigned SImode value into a SFmode, using only SSE.
13112 For x86_32, -mfpmath=sse, !optimize_size only. */
13114 ix86_expand_convert_uns_sisf_sse (rtx target, rtx input)
13116 REAL_VALUE_TYPE ONE16r;
13117 rtx fp_hi, fp_lo, int_hi, int_lo, x;
13119 real_ldexp (&ONE16r, &dconst1, 16);
13120 x = const_double_from_real_value (ONE16r, SFmode);
13121 int_lo = expand_simple_binop (SImode, AND, input, GEN_INT(0xffff),
13122 NULL, 0, OPTAB_DIRECT);
13123 int_hi = expand_simple_binop (SImode, LSHIFTRT, input, GEN_INT(16),
13124 NULL, 0, OPTAB_DIRECT);
13125 fp_hi = gen_reg_rtx (SFmode);
13126 fp_lo = gen_reg_rtx (SFmode);
13127 emit_insn (gen_floatsisf2 (fp_hi, int_hi));
13128 emit_insn (gen_floatsisf2 (fp_lo, int_lo));
13129 fp_hi = expand_simple_binop (SFmode, MULT, fp_hi, x, fp_hi,
13131 fp_hi = expand_simple_binop (SFmode, PLUS, fp_hi, fp_lo, target,
13133 if (!rtx_equal_p (target, fp_hi))
13134 emit_move_insn (target, fp_hi);
13137 /* A subroutine of ix86_build_signbit_mask_vector. If VECT is true,
13138 then replicate the value for all elements of the vector
13142 ix86_build_const_vector (enum machine_mode mode, bool vect, rtx value)
13149 v = gen_rtvec (4, value, value, value, value);
13150 return gen_rtx_CONST_VECTOR (V4SImode, v);
13154 v = gen_rtvec (2, value, value);
13155 return gen_rtx_CONST_VECTOR (V2DImode, v);
13159 v = gen_rtvec (4, value, value, value, value);
13161 v = gen_rtvec (4, value, CONST0_RTX (SFmode),
13162 CONST0_RTX (SFmode), CONST0_RTX (SFmode));
13163 return gen_rtx_CONST_VECTOR (V4SFmode, v);
13167 v = gen_rtvec (2, value, value);
13169 v = gen_rtvec (2, value, CONST0_RTX (DFmode));
13170 return gen_rtx_CONST_VECTOR (V2DFmode, v);
13173 gcc_unreachable ();
13177 /* A subroutine of ix86_expand_fp_absneg_operator, copysign expanders
13178 and ix86_expand_int_vcond. Create a mask for the sign bit in MODE
13179 for an SSE register. If VECT is true, then replicate the mask for
13180 all elements of the vector register. If INVERT is true, then create
13181 a mask excluding the sign bit. */
13184 ix86_build_signbit_mask (enum machine_mode mode, bool vect, bool invert)
13186 enum machine_mode vec_mode, imode;
13187 HOST_WIDE_INT hi, lo;
13192 /* Find the sign bit, sign extended to 2*HWI. */
13198 vec_mode = (mode == SImode) ? V4SImode : V4SFmode;
13199 lo = 0x80000000, hi = lo < 0;
13205 vec_mode = (mode == DImode) ? V2DImode : V2DFmode;
13206 if (HOST_BITS_PER_WIDE_INT >= 64)
13207 lo = (HOST_WIDE_INT)1 << shift, hi = -1;
13209 lo = 0, hi = (HOST_WIDE_INT)1 << (shift - HOST_BITS_PER_WIDE_INT);
13214 vec_mode = VOIDmode;
13215 if (HOST_BITS_PER_WIDE_INT >= 64)
13218 lo = 0, hi = (HOST_WIDE_INT)1 << shift;
13225 lo = 0, hi = (HOST_WIDE_INT)1 << (shift - HOST_BITS_PER_WIDE_INT);
13229 lo = ~lo, hi = ~hi;
13235 mask = immed_double_const (lo, hi, imode);
13237 vec = gen_rtvec (2, v, mask);
13238 v = gen_rtx_CONST_VECTOR (V2DImode, vec);
13239 v = copy_to_mode_reg (mode, gen_lowpart (mode, v));
13246 gcc_unreachable ();
13250 lo = ~lo, hi = ~hi;
13252 /* Force this value into the low part of a fp vector constant. */
13253 mask = immed_double_const (lo, hi, imode);
13254 mask = gen_lowpart (mode, mask);
13256 if (vec_mode == VOIDmode)
13257 return force_reg (mode, mask);
13259 v = ix86_build_const_vector (mode, vect, mask);
13260 return force_reg (vec_mode, v);
13263 /* Generate code for floating point ABS or NEG. */
13266 ix86_expand_fp_absneg_operator (enum rtx_code code, enum machine_mode mode,
13269 rtx mask, set, use, clob, dst, src;
13270 bool use_sse = false;
13271 bool vector_mode = VECTOR_MODE_P (mode);
13272 enum machine_mode elt_mode = mode;
13276 elt_mode = GET_MODE_INNER (mode);
13279 else if (mode == TFmode)
13281 else if (TARGET_SSE_MATH)
13282 use_sse = SSE_FLOAT_MODE_P (mode);
13284 /* NEG and ABS performed with SSE use bitwise mask operations.
13285 Create the appropriate mask now. */
13287 mask = ix86_build_signbit_mask (elt_mode, vector_mode, code == ABS);
13296 set = gen_rtx_fmt_ee (code == NEG ? XOR : AND, mode, src, mask);
13297 set = gen_rtx_SET (VOIDmode, dst, set);
13302 set = gen_rtx_fmt_e (code, mode, src);
13303 set = gen_rtx_SET (VOIDmode, dst, set);
13306 use = gen_rtx_USE (VOIDmode, mask);
13307 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
13308 emit_insn (gen_rtx_PARALLEL (VOIDmode,
13309 gen_rtvec (3, set, use, clob)));
13316 /* Expand a copysign operation. Special case operand 0 being a constant. */
13319 ix86_expand_copysign (rtx operands[])
13321 enum machine_mode mode;
13322 rtx dest, op0, op1, mask, nmask;
13324 dest = operands[0];
13328 mode = GET_MODE (dest);
13330 if (GET_CODE (op0) == CONST_DOUBLE)
13332 rtx (*copysign_insn)(rtx, rtx, rtx, rtx);
13334 if (real_isneg (CONST_DOUBLE_REAL_VALUE (op0)))
13335 op0 = simplify_unary_operation (ABS, mode, op0, mode);
13337 if (mode == SFmode || mode == DFmode)
13339 enum machine_mode vmode;
13341 vmode = mode == SFmode ? V4SFmode : V2DFmode;
13343 if (op0 == CONST0_RTX (mode))
13344 op0 = CONST0_RTX (vmode);
13349 if (mode == SFmode)
13350 v = gen_rtvec (4, op0, CONST0_RTX (SFmode),
13351 CONST0_RTX (SFmode), CONST0_RTX (SFmode));
13353 v = gen_rtvec (2, op0, CONST0_RTX (DFmode));
13355 op0 = force_reg (vmode, gen_rtx_CONST_VECTOR (vmode, v));
13358 else if (op0 != CONST0_RTX (mode))
13359 op0 = force_reg (mode, op0);
13361 mask = ix86_build_signbit_mask (mode, 0, 0);
13363 if (mode == SFmode)
13364 copysign_insn = gen_copysignsf3_const;
13365 else if (mode == DFmode)
13366 copysign_insn = gen_copysigndf3_const;
13368 copysign_insn = gen_copysigntf3_const;
13370 emit_insn (copysign_insn (dest, op0, op1, mask));
13374 rtx (*copysign_insn)(rtx, rtx, rtx, rtx, rtx, rtx);
13376 nmask = ix86_build_signbit_mask (mode, 0, 1);
13377 mask = ix86_build_signbit_mask (mode, 0, 0);
13379 if (mode == SFmode)
13380 copysign_insn = gen_copysignsf3_var;
13381 else if (mode == DFmode)
13382 copysign_insn = gen_copysigndf3_var;
13384 copysign_insn = gen_copysigntf3_var;
13386 emit_insn (copysign_insn (dest, NULL_RTX, op0, op1, nmask, mask));
13390 /* Deconstruct a copysign operation into bit masks. Operand 0 is known to
13391 be a constant, and so has already been expanded into a vector constant. */
13394 ix86_split_copysign_const (rtx operands[])
13396 enum machine_mode mode, vmode;
13397 rtx dest, op0, op1, mask, x;
13399 dest = operands[0];
13402 mask = operands[3];
13404 mode = GET_MODE (dest);
13405 vmode = GET_MODE (mask);
13407 dest = simplify_gen_subreg (vmode, dest, mode, 0);
13408 x = gen_rtx_AND (vmode, dest, mask);
13409 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
13411 if (op0 != CONST0_RTX (vmode))
13413 x = gen_rtx_IOR (vmode, dest, op0);
13414 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
13418 /* Deconstruct a copysign operation into bit masks. Operand 0 is variable,
13419 so we have to do two masks. */
13422 ix86_split_copysign_var (rtx operands[])
13424 enum machine_mode mode, vmode;
13425 rtx dest, scratch, op0, op1, mask, nmask, x;
13427 dest = operands[0];
13428 scratch = operands[1];
13431 nmask = operands[4];
13432 mask = operands[5];
13434 mode = GET_MODE (dest);
13435 vmode = GET_MODE (mask);
13437 if (rtx_equal_p (op0, op1))
13439 /* Shouldn't happen often (it's useless, obviously), but when it does
13440 we'd generate incorrect code if we continue below. */
13441 emit_move_insn (dest, op0);
13445 if (REG_P (mask) && REGNO (dest) == REGNO (mask)) /* alternative 0 */
13447 gcc_assert (REGNO (op1) == REGNO (scratch));
13449 x = gen_rtx_AND (vmode, scratch, mask);
13450 emit_insn (gen_rtx_SET (VOIDmode, scratch, x));
13453 op0 = simplify_gen_subreg (vmode, op0, mode, 0);
13454 x = gen_rtx_NOT (vmode, dest);
13455 x = gen_rtx_AND (vmode, x, op0);
13456 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
13460 if (REGNO (op1) == REGNO (scratch)) /* alternative 1,3 */
13462 x = gen_rtx_AND (vmode, scratch, mask);
13464 else /* alternative 2,4 */
13466 gcc_assert (REGNO (mask) == REGNO (scratch));
13467 op1 = simplify_gen_subreg (vmode, op1, mode, 0);
13468 x = gen_rtx_AND (vmode, scratch, op1);
13470 emit_insn (gen_rtx_SET (VOIDmode, scratch, x));
13472 if (REGNO (op0) == REGNO (dest)) /* alternative 1,2 */
13474 dest = simplify_gen_subreg (vmode, op0, mode, 0);
13475 x = gen_rtx_AND (vmode, dest, nmask);
13477 else /* alternative 3,4 */
13479 gcc_assert (REGNO (nmask) == REGNO (dest));
13481 op0 = simplify_gen_subreg (vmode, op0, mode, 0);
13482 x = gen_rtx_AND (vmode, dest, op0);
13484 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
13487 x = gen_rtx_IOR (vmode, dest, scratch);
13488 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
13491 /* Return TRUE or FALSE depending on whether the first SET in INSN
13492 has source and destination with matching CC modes, and that the
13493 CC mode is at least as constrained as REQ_MODE. */
13496 ix86_match_ccmode (rtx insn, enum machine_mode req_mode)
13499 enum machine_mode set_mode;
13501 set = PATTERN (insn);
13502 if (GET_CODE (set) == PARALLEL)
13503 set = XVECEXP (set, 0, 0);
13504 gcc_assert (GET_CODE (set) == SET);
13505 gcc_assert (GET_CODE (SET_SRC (set)) == COMPARE);
13507 set_mode = GET_MODE (SET_DEST (set));
13511 if (req_mode != CCNOmode
13512 && (req_mode != CCmode
13513 || XEXP (SET_SRC (set), 1) != const0_rtx))
13517 if (req_mode == CCGCmode)
13521 if (req_mode == CCGOCmode || req_mode == CCNOmode)
13525 if (req_mode == CCZmode)
13536 gcc_unreachable ();
13539 return (GET_MODE (SET_SRC (set)) == set_mode);
13542 /* Generate insn patterns to do an integer compare of OPERANDS. */
13545 ix86_expand_int_compare (enum rtx_code code, rtx op0, rtx op1)
13547 enum machine_mode cmpmode;
13550 cmpmode = SELECT_CC_MODE (code, op0, op1);
13551 flags = gen_rtx_REG (cmpmode, FLAGS_REG);
13553 /* This is very simple, but making the interface the same as in the
13554 FP case makes the rest of the code easier. */
13555 tmp = gen_rtx_COMPARE (cmpmode, op0, op1);
13556 emit_insn (gen_rtx_SET (VOIDmode, flags, tmp));
13558 /* Return the test that should be put into the flags user, i.e.
13559 the bcc, scc, or cmov instruction. */
13560 return gen_rtx_fmt_ee (code, VOIDmode, flags, const0_rtx);
13563 /* Figure out whether to use ordered or unordered fp comparisons.
13564 Return the appropriate mode to use. */
13567 ix86_fp_compare_mode (enum rtx_code code ATTRIBUTE_UNUSED)
13569 /* ??? In order to make all comparisons reversible, we do all comparisons
13570 non-trapping when compiling for IEEE. Once gcc is able to distinguish
13571 all forms trapping and nontrapping comparisons, we can make inequality
13572 comparisons trapping again, since it results in better code when using
13573 FCOM based compares. */
13574 return TARGET_IEEE_FP ? CCFPUmode : CCFPmode;
13578 ix86_cc_mode (enum rtx_code code, rtx op0, rtx op1)
13580 enum machine_mode mode = GET_MODE (op0);
13582 if (SCALAR_FLOAT_MODE_P (mode))
13584 gcc_assert (!DECIMAL_FLOAT_MODE_P (mode));
13585 return ix86_fp_compare_mode (code);
13590 /* Only zero flag is needed. */
13591 case EQ: /* ZF=0 */
13592 case NE: /* ZF!=0 */
13594 /* Codes needing carry flag. */
13595 case GEU: /* CF=0 */
13596 case LTU: /* CF=1 */
13597 /* Detect overflow checks. They need just the carry flag. */
13598 if (GET_CODE (op0) == PLUS
13599 && rtx_equal_p (op1, XEXP (op0, 0)))
13603 case GTU: /* CF=0 & ZF=0 */
13604 case LEU: /* CF=1 | ZF=1 */
13605 /* Detect overflow checks. They need just the carry flag. */
13606 if (GET_CODE (op0) == MINUS
13607 && rtx_equal_p (op1, XEXP (op0, 0)))
13611 /* Codes possibly doable only with sign flag when
13612 comparing against zero. */
13613 case GE: /* SF=OF or SF=0 */
13614 case LT: /* SF<>OF or SF=1 */
13615 if (op1 == const0_rtx)
13618 /* For other cases Carry flag is not required. */
13620 /* Codes doable only with sign flag when comparing
13621 against zero, but we miss jump instruction for it
13622 so we need to use relational tests against overflow
13623 that thus needs to be zero. */
13624 case GT: /* ZF=0 & SF=OF */
13625 case LE: /* ZF=1 | SF<>OF */
13626 if (op1 == const0_rtx)
13630 /* strcmp pattern do (use flags) and combine may ask us for proper
13635 gcc_unreachable ();
13639 /* Return the fixed registers used for condition codes. */
13642 ix86_fixed_condition_code_regs (unsigned int *p1, unsigned int *p2)
13649 /* If two condition code modes are compatible, return a condition code
13650 mode which is compatible with both. Otherwise, return
13653 static enum machine_mode
13654 ix86_cc_modes_compatible (enum machine_mode m1, enum machine_mode m2)
13659 if (GET_MODE_CLASS (m1) != MODE_CC || GET_MODE_CLASS (m2) != MODE_CC)
13662 if ((m1 == CCGCmode && m2 == CCGOCmode)
13663 || (m1 == CCGOCmode && m2 == CCGCmode))
13669 gcc_unreachable ();
13699 /* These are only compatible with themselves, which we already
13705 /* Split comparison code CODE into comparisons we can do using branch
13706 instructions. BYPASS_CODE is comparison code for branch that will
13707 branch around FIRST_CODE and SECOND_CODE. If some of branches
13708 is not required, set value to UNKNOWN.
13709 We never require more than two branches. */
13712 ix86_fp_comparison_codes (enum rtx_code code, enum rtx_code *bypass_code,
13713 enum rtx_code *first_code,
13714 enum rtx_code *second_code)
13716 *first_code = code;
13717 *bypass_code = UNKNOWN;
13718 *second_code = UNKNOWN;
13720 /* The fcomi comparison sets flags as follows:
13730 case GT: /* GTU - CF=0 & ZF=0 */
13731 case GE: /* GEU - CF=0 */
13732 case ORDERED: /* PF=0 */
13733 case UNORDERED: /* PF=1 */
13734 case UNEQ: /* EQ - ZF=1 */
13735 case UNLT: /* LTU - CF=1 */
13736 case UNLE: /* LEU - CF=1 | ZF=1 */
13737 case LTGT: /* EQ - ZF=0 */
13739 case LT: /* LTU - CF=1 - fails on unordered */
13740 *first_code = UNLT;
13741 *bypass_code = UNORDERED;
13743 case LE: /* LEU - CF=1 | ZF=1 - fails on unordered */
13744 *first_code = UNLE;
13745 *bypass_code = UNORDERED;
13747 case EQ: /* EQ - ZF=1 - fails on unordered */
13748 *first_code = UNEQ;
13749 *bypass_code = UNORDERED;
13751 case NE: /* NE - ZF=0 - fails on unordered */
13752 *first_code = LTGT;
13753 *second_code = UNORDERED;
13755 case UNGE: /* GEU - CF=0 - fails on unordered */
13757 *second_code = UNORDERED;
13759 case UNGT: /* GTU - CF=0 & ZF=0 - fails on unordered */
13761 *second_code = UNORDERED;
13764 gcc_unreachable ();
13766 if (!TARGET_IEEE_FP)
13768 *second_code = UNKNOWN;
13769 *bypass_code = UNKNOWN;
13773 /* Return cost of comparison done fcom + arithmetics operations on AX.
13774 All following functions do use number of instructions as a cost metrics.
13775 In future this should be tweaked to compute bytes for optimize_size and
13776 take into account performance of various instructions on various CPUs. */
13778 ix86_fp_comparison_arithmetics_cost (enum rtx_code code)
13780 if (!TARGET_IEEE_FP)
13782 /* The cost of code output by ix86_expand_fp_compare. */
13806 gcc_unreachable ();
13810 /* Return cost of comparison done using fcomi operation.
13811 See ix86_fp_comparison_arithmetics_cost for the metrics. */
13813 ix86_fp_comparison_fcomi_cost (enum rtx_code code)
13815 enum rtx_code bypass_code, first_code, second_code;
13816 /* Return arbitrarily high cost when instruction is not supported - this
13817 prevents gcc from using it. */
13820 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
13821 return (bypass_code != UNKNOWN || second_code != UNKNOWN) + 2;
13824 /* Return cost of comparison done using sahf operation.
13825 See ix86_fp_comparison_arithmetics_cost for the metrics. */
13827 ix86_fp_comparison_sahf_cost (enum rtx_code code)
13829 enum rtx_code bypass_code, first_code, second_code;
13830 /* Return arbitrarily high cost when instruction is not preferred - this
13831 avoids gcc from using it. */
13832 if (!(TARGET_SAHF && (TARGET_USE_SAHF || optimize_insn_for_size_p ())))
13834 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
13835 return (bypass_code != UNKNOWN || second_code != UNKNOWN) + 3;
13838 /* Compute cost of the comparison done using any method.
13839 See ix86_fp_comparison_arithmetics_cost for the metrics. */
13841 ix86_fp_comparison_cost (enum rtx_code code)
13843 int fcomi_cost, sahf_cost, arithmetics_cost = 1024;
13846 fcomi_cost = ix86_fp_comparison_fcomi_cost (code);
13847 sahf_cost = ix86_fp_comparison_sahf_cost (code);
13849 min = arithmetics_cost = ix86_fp_comparison_arithmetics_cost (code);
13850 if (min > sahf_cost)
13852 if (min > fcomi_cost)
13857 /* Return true if we should use an FCOMI instruction for this
13861 ix86_use_fcomi_compare (enum rtx_code code ATTRIBUTE_UNUSED)
13863 enum rtx_code swapped_code = swap_condition (code);
13865 return ((ix86_fp_comparison_cost (code)
13866 == ix86_fp_comparison_fcomi_cost (code))
13867 || (ix86_fp_comparison_cost (swapped_code)
13868 == ix86_fp_comparison_fcomi_cost (swapped_code)));
13871 /* Swap, force into registers, or otherwise massage the two operands
13872 to a fp comparison. The operands are updated in place; the new
13873 comparison code is returned. */
13875 static enum rtx_code
13876 ix86_prepare_fp_compare_args (enum rtx_code code, rtx *pop0, rtx *pop1)
13878 enum machine_mode fpcmp_mode = ix86_fp_compare_mode (code);
13879 rtx op0 = *pop0, op1 = *pop1;
13880 enum machine_mode op_mode = GET_MODE (op0);
13881 int is_sse = TARGET_SSE_MATH && SSE_FLOAT_MODE_P (op_mode);
13883 /* All of the unordered compare instructions only work on registers.
13884 The same is true of the fcomi compare instructions. The XFmode
13885 compare instructions require registers except when comparing
13886 against zero or when converting operand 1 from fixed point to
13890 && (fpcmp_mode == CCFPUmode
13891 || (op_mode == XFmode
13892 && ! (standard_80387_constant_p (op0) == 1
13893 || standard_80387_constant_p (op1) == 1)
13894 && GET_CODE (op1) != FLOAT)
13895 || ix86_use_fcomi_compare (code)))
13897 op0 = force_reg (op_mode, op0);
13898 op1 = force_reg (op_mode, op1);
13902 /* %%% We only allow op1 in memory; op0 must be st(0). So swap
13903 things around if they appear profitable, otherwise force op0
13904 into a register. */
13906 if (standard_80387_constant_p (op0) == 0
13908 && ! (standard_80387_constant_p (op1) == 0
13912 tmp = op0, op0 = op1, op1 = tmp;
13913 code = swap_condition (code);
13917 op0 = force_reg (op_mode, op0);
13919 if (CONSTANT_P (op1))
13921 int tmp = standard_80387_constant_p (op1);
13923 op1 = validize_mem (force_const_mem (op_mode, op1));
13927 op1 = force_reg (op_mode, op1);
13930 op1 = force_reg (op_mode, op1);
13934 /* Try to rearrange the comparison to make it cheaper. */
13935 if (ix86_fp_comparison_cost (code)
13936 > ix86_fp_comparison_cost (swap_condition (code))
13937 && (REG_P (op1) || can_create_pseudo_p ()))
13940 tmp = op0, op0 = op1, op1 = tmp;
13941 code = swap_condition (code);
13943 op0 = force_reg (op_mode, op0);
13951 /* Convert comparison codes we use to represent FP comparison to integer
13952 code that will result in proper branch. Return UNKNOWN if no such code
13956 ix86_fp_compare_code_to_integer (enum rtx_code code)
13985 /* Generate insn patterns to do a floating point compare of OPERANDS. */
13988 ix86_expand_fp_compare (enum rtx_code code, rtx op0, rtx op1, rtx scratch,
13989 rtx *second_test, rtx *bypass_test)
13991 enum machine_mode fpcmp_mode, intcmp_mode;
13993 int cost = ix86_fp_comparison_cost (code);
13994 enum rtx_code bypass_code, first_code, second_code;
13996 fpcmp_mode = ix86_fp_compare_mode (code);
13997 code = ix86_prepare_fp_compare_args (code, &op0, &op1);
14000 *second_test = NULL_RTX;
14002 *bypass_test = NULL_RTX;
14004 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
14006 /* Do fcomi/sahf based test when profitable. */
14007 if (ix86_fp_comparison_arithmetics_cost (code) > cost
14008 && (bypass_code == UNKNOWN || bypass_test)
14009 && (second_code == UNKNOWN || second_test))
14011 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
14012 tmp = gen_rtx_SET (VOIDmode, gen_rtx_REG (fpcmp_mode, FLAGS_REG),
14018 gcc_assert (TARGET_SAHF);
14021 scratch = gen_reg_rtx (HImode);
14022 tmp2 = gen_rtx_CLOBBER (VOIDmode, scratch);
14024 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, tmp, tmp2)));
14027 /* The FP codes work out to act like unsigned. */
14028 intcmp_mode = fpcmp_mode;
14030 if (bypass_code != UNKNOWN)
14031 *bypass_test = gen_rtx_fmt_ee (bypass_code, VOIDmode,
14032 gen_rtx_REG (intcmp_mode, FLAGS_REG),
14034 if (second_code != UNKNOWN)
14035 *second_test = gen_rtx_fmt_ee (second_code, VOIDmode,
14036 gen_rtx_REG (intcmp_mode, FLAGS_REG),
14041 /* Sadness wrt reg-stack pops killing fpsr -- gotta get fnstsw first. */
14042 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
14043 tmp2 = gen_rtx_UNSPEC (HImode, gen_rtvec (1, tmp), UNSPEC_FNSTSW);
14045 scratch = gen_reg_rtx (HImode);
14046 emit_insn (gen_rtx_SET (VOIDmode, scratch, tmp2));
14048 /* In the unordered case, we have to check C2 for NaN's, which
14049 doesn't happen to work out to anything nice combination-wise.
14050 So do some bit twiddling on the value we've got in AH to come
14051 up with an appropriate set of condition codes. */
14053 intcmp_mode = CCNOmode;
14058 if (code == GT || !TARGET_IEEE_FP)
14060 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x45)));
14065 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
14066 emit_insn (gen_addqi_ext_1 (scratch, scratch, constm1_rtx));
14067 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x44)));
14068 intcmp_mode = CCmode;
14074 if (code == LT && TARGET_IEEE_FP)
14076 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
14077 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x01)));
14078 intcmp_mode = CCmode;
14083 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x01)));
14089 if (code == GE || !TARGET_IEEE_FP)
14091 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x05)));
14096 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
14097 emit_insn (gen_xorqi_cc_ext_1 (scratch, scratch,
14104 if (code == LE && TARGET_IEEE_FP)
14106 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
14107 emit_insn (gen_addqi_ext_1 (scratch, scratch, constm1_rtx));
14108 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x40)));
14109 intcmp_mode = CCmode;
14114 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x45)));
14120 if (code == EQ && TARGET_IEEE_FP)
14122 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
14123 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x40)));
14124 intcmp_mode = CCmode;
14129 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x40)));
14136 if (code == NE && TARGET_IEEE_FP)
14138 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
14139 emit_insn (gen_xorqi_cc_ext_1 (scratch, scratch,
14145 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x40)));
14151 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x04)));
14155 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x04)));
14160 gcc_unreachable ();
14164 /* Return the test that should be put into the flags user, i.e.
14165 the bcc, scc, or cmov instruction. */
14166 return gen_rtx_fmt_ee (code, VOIDmode,
14167 gen_rtx_REG (intcmp_mode, FLAGS_REG),
14172 ix86_expand_compare (enum rtx_code code, rtx *second_test, rtx *bypass_test)
14175 op0 = ix86_compare_op0;
14176 op1 = ix86_compare_op1;
14179 *second_test = NULL_RTX;
14181 *bypass_test = NULL_RTX;
14183 if (ix86_compare_emitted)
14185 ret = gen_rtx_fmt_ee (code, VOIDmode, ix86_compare_emitted, const0_rtx);
14186 ix86_compare_emitted = NULL_RTX;
14188 else if (SCALAR_FLOAT_MODE_P (GET_MODE (op0)))
14190 gcc_assert (!DECIMAL_FLOAT_MODE_P (GET_MODE (op0)));
14191 ret = ix86_expand_fp_compare (code, op0, op1, NULL_RTX,
14192 second_test, bypass_test);
14195 ret = ix86_expand_int_compare (code, op0, op1);
14200 /* Return true if the CODE will result in nontrivial jump sequence. */
14202 ix86_fp_jump_nontrivial_p (enum rtx_code code)
14204 enum rtx_code bypass_code, first_code, second_code;
14207 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
14208 return bypass_code != UNKNOWN || second_code != UNKNOWN;
14212 ix86_expand_branch (enum rtx_code code, rtx label)
14216 /* If we have emitted a compare insn, go straight to simple.
14217 ix86_expand_compare won't emit anything if ix86_compare_emitted
14219 if (ix86_compare_emitted)
14222 switch (GET_MODE (ix86_compare_op0))
14228 tmp = ix86_expand_compare (code, NULL, NULL);
14229 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
14230 gen_rtx_LABEL_REF (VOIDmode, label),
14232 emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
14241 enum rtx_code bypass_code, first_code, second_code;
14243 code = ix86_prepare_fp_compare_args (code, &ix86_compare_op0,
14244 &ix86_compare_op1);
14246 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
14248 /* Check whether we will use the natural sequence with one jump. If
14249 so, we can expand jump early. Otherwise delay expansion by
14250 creating compound insn to not confuse optimizers. */
14251 if (bypass_code == UNKNOWN && second_code == UNKNOWN)
14253 ix86_split_fp_branch (code, ix86_compare_op0, ix86_compare_op1,
14254 gen_rtx_LABEL_REF (VOIDmode, label),
14255 pc_rtx, NULL_RTX, NULL_RTX);
14259 tmp = gen_rtx_fmt_ee (code, VOIDmode,
14260 ix86_compare_op0, ix86_compare_op1);
14261 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
14262 gen_rtx_LABEL_REF (VOIDmode, label),
14264 tmp = gen_rtx_SET (VOIDmode, pc_rtx, tmp);
14266 use_fcomi = ix86_use_fcomi_compare (code);
14267 vec = rtvec_alloc (3 + !use_fcomi);
14268 RTVEC_ELT (vec, 0) = tmp;
14270 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCFPmode, FPSR_REG));
14272 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCFPmode, FLAGS_REG));
14275 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (HImode));
14277 emit_jump_insn (gen_rtx_PARALLEL (VOIDmode, vec));
14286 /* Expand DImode branch into multiple compare+branch. */
14288 rtx lo[2], hi[2], label2;
14289 enum rtx_code code1, code2, code3;
14290 enum machine_mode submode;
14292 if (CONSTANT_P (ix86_compare_op0) && ! CONSTANT_P (ix86_compare_op1))
14294 tmp = ix86_compare_op0;
14295 ix86_compare_op0 = ix86_compare_op1;
14296 ix86_compare_op1 = tmp;
14297 code = swap_condition (code);
14299 if (GET_MODE (ix86_compare_op0) == DImode)
14301 split_di (&ix86_compare_op0, 1, lo+0, hi+0);
14302 split_di (&ix86_compare_op1, 1, lo+1, hi+1);
14307 split_ti (&ix86_compare_op0, 1, lo+0, hi+0);
14308 split_ti (&ix86_compare_op1, 1, lo+1, hi+1);
14312 /* When comparing for equality, we can use (hi0^hi1)|(lo0^lo1) to
14313 avoid two branches. This costs one extra insn, so disable when
14314 optimizing for size. */
14316 if ((code == EQ || code == NE)
14317 && (!optimize_insn_for_size_p ()
14318 || hi[1] == const0_rtx || lo[1] == const0_rtx))
14323 if (hi[1] != const0_rtx)
14324 xor1 = expand_binop (submode, xor_optab, xor1, hi[1],
14325 NULL_RTX, 0, OPTAB_WIDEN);
14328 if (lo[1] != const0_rtx)
14329 xor0 = expand_binop (submode, xor_optab, xor0, lo[1],
14330 NULL_RTX, 0, OPTAB_WIDEN);
14332 tmp = expand_binop (submode, ior_optab, xor1, xor0,
14333 NULL_RTX, 0, OPTAB_WIDEN);
14335 ix86_compare_op0 = tmp;
14336 ix86_compare_op1 = const0_rtx;
14337 ix86_expand_branch (code, label);
14341 /* Otherwise, if we are doing less-than or greater-or-equal-than,
14342 op1 is a constant and the low word is zero, then we can just
14343 examine the high word. Similarly for low word -1 and
14344 less-or-equal-than or greater-than. */
14346 if (CONST_INT_P (hi[1]))
14349 case LT: case LTU: case GE: case GEU:
14350 if (lo[1] == const0_rtx)
14352 ix86_compare_op0 = hi[0];
14353 ix86_compare_op1 = hi[1];
14354 ix86_expand_branch (code, label);
14358 case LE: case LEU: case GT: case GTU:
14359 if (lo[1] == constm1_rtx)
14361 ix86_compare_op0 = hi[0];
14362 ix86_compare_op1 = hi[1];
14363 ix86_expand_branch (code, label);
14371 /* Otherwise, we need two or three jumps. */
14373 label2 = gen_label_rtx ();
14376 code2 = swap_condition (code);
14377 code3 = unsigned_condition (code);
14381 case LT: case GT: case LTU: case GTU:
14384 case LE: code1 = LT; code2 = GT; break;
14385 case GE: code1 = GT; code2 = LT; break;
14386 case LEU: code1 = LTU; code2 = GTU; break;
14387 case GEU: code1 = GTU; code2 = LTU; break;
14389 case EQ: code1 = UNKNOWN; code2 = NE; break;
14390 case NE: code2 = UNKNOWN; break;
14393 gcc_unreachable ();
14398 * if (hi(a) < hi(b)) goto true;
14399 * if (hi(a) > hi(b)) goto false;
14400 * if (lo(a) < lo(b)) goto true;
14404 ix86_compare_op0 = hi[0];
14405 ix86_compare_op1 = hi[1];
14407 if (code1 != UNKNOWN)
14408 ix86_expand_branch (code1, label);
14409 if (code2 != UNKNOWN)
14410 ix86_expand_branch (code2, label2);
14412 ix86_compare_op0 = lo[0];
14413 ix86_compare_op1 = lo[1];
14414 ix86_expand_branch (code3, label);
14416 if (code2 != UNKNOWN)
14417 emit_label (label2);
14422 gcc_unreachable ();
14426 /* Split branch based on floating point condition. */
14428 ix86_split_fp_branch (enum rtx_code code, rtx op1, rtx op2,
14429 rtx target1, rtx target2, rtx tmp, rtx pushed)
14431 rtx second, bypass;
14432 rtx label = NULL_RTX;
14434 int bypass_probability = -1, second_probability = -1, probability = -1;
14437 if (target2 != pc_rtx)
14440 code = reverse_condition_maybe_unordered (code);
14445 condition = ix86_expand_fp_compare (code, op1, op2,
14446 tmp, &second, &bypass);
14448 /* Remove pushed operand from stack. */
14450 ix86_free_from_memory (GET_MODE (pushed));
14452 if (split_branch_probability >= 0)
14454 /* Distribute the probabilities across the jumps.
14455 Assume the BYPASS and SECOND to be always test
14457 probability = split_branch_probability;
14459 /* Value of 1 is low enough to make no need for probability
14460 to be updated. Later we may run some experiments and see
14461 if unordered values are more frequent in practice. */
14463 bypass_probability = 1;
14465 second_probability = 1;
14467 if (bypass != NULL_RTX)
14469 label = gen_label_rtx ();
14470 i = emit_jump_insn (gen_rtx_SET
14472 gen_rtx_IF_THEN_ELSE (VOIDmode,
14474 gen_rtx_LABEL_REF (VOIDmode,
14477 if (bypass_probability >= 0)
14479 = gen_rtx_EXPR_LIST (REG_BR_PROB,
14480 GEN_INT (bypass_probability),
14483 i = emit_jump_insn (gen_rtx_SET
14485 gen_rtx_IF_THEN_ELSE (VOIDmode,
14486 condition, target1, target2)));
14487 if (probability >= 0)
14489 = gen_rtx_EXPR_LIST (REG_BR_PROB,
14490 GEN_INT (probability),
14492 if (second != NULL_RTX)
14494 i = emit_jump_insn (gen_rtx_SET
14496 gen_rtx_IF_THEN_ELSE (VOIDmode, second, target1,
14498 if (second_probability >= 0)
14500 = gen_rtx_EXPR_LIST (REG_BR_PROB,
14501 GEN_INT (second_probability),
14504 if (label != NULL_RTX)
14505 emit_label (label);
14509 ix86_expand_setcc (enum rtx_code code, rtx dest)
14511 rtx ret, tmp, tmpreg, equiv;
14512 rtx second_test, bypass_test;
14514 if (GET_MODE (ix86_compare_op0) == (TARGET_64BIT ? TImode : DImode))
14515 return 0; /* FAIL */
14517 gcc_assert (GET_MODE (dest) == QImode);
14519 ret = ix86_expand_compare (code, &second_test, &bypass_test);
14520 PUT_MODE (ret, QImode);
14525 emit_insn (gen_rtx_SET (VOIDmode, tmp, ret));
14526 if (bypass_test || second_test)
14528 rtx test = second_test;
14530 rtx tmp2 = gen_reg_rtx (QImode);
14533 gcc_assert (!second_test);
14534 test = bypass_test;
14536 PUT_CODE (test, reverse_condition_maybe_unordered (GET_CODE (test)));
14538 PUT_MODE (test, QImode);
14539 emit_insn (gen_rtx_SET (VOIDmode, tmp2, test));
14542 emit_insn (gen_andqi3 (tmp, tmpreg, tmp2));
14544 emit_insn (gen_iorqi3 (tmp, tmpreg, tmp2));
14547 /* Attach a REG_EQUAL note describing the comparison result. */
14548 if (ix86_compare_op0 && ix86_compare_op1)
14550 equiv = simplify_gen_relational (code, QImode,
14551 GET_MODE (ix86_compare_op0),
14552 ix86_compare_op0, ix86_compare_op1);
14553 set_unique_reg_note (get_last_insn (), REG_EQUAL, equiv);
14556 return 1; /* DONE */
14559 /* Expand comparison setting or clearing carry flag. Return true when
14560 successful and set pop for the operation. */
14562 ix86_expand_carry_flag_compare (enum rtx_code code, rtx op0, rtx op1, rtx *pop)
14564 enum machine_mode mode =
14565 GET_MODE (op0) != VOIDmode ? GET_MODE (op0) : GET_MODE (op1);
14567 /* Do not handle DImode compares that go through special path. */
14568 if (mode == (TARGET_64BIT ? TImode : DImode))
14571 if (SCALAR_FLOAT_MODE_P (mode))
14573 rtx second_test = NULL, bypass_test = NULL;
14574 rtx compare_op, compare_seq;
14576 gcc_assert (!DECIMAL_FLOAT_MODE_P (mode));
14578 /* Shortcut: following common codes never translate
14579 into carry flag compares. */
14580 if (code == EQ || code == NE || code == UNEQ || code == LTGT
14581 || code == ORDERED || code == UNORDERED)
14584 /* These comparisons require zero flag; swap operands so they won't. */
14585 if ((code == GT || code == UNLE || code == LE || code == UNGT)
14586 && !TARGET_IEEE_FP)
14591 code = swap_condition (code);
14594 /* Try to expand the comparison and verify that we end up with
14595 carry flag based comparison. This fails to be true only when
14596 we decide to expand comparison using arithmetic that is not
14597 too common scenario. */
14599 compare_op = ix86_expand_fp_compare (code, op0, op1, NULL_RTX,
14600 &second_test, &bypass_test);
14601 compare_seq = get_insns ();
14604 if (second_test || bypass_test)
14607 if (GET_MODE (XEXP (compare_op, 0)) == CCFPmode
14608 || GET_MODE (XEXP (compare_op, 0)) == CCFPUmode)
14609 code = ix86_fp_compare_code_to_integer (GET_CODE (compare_op));
14611 code = GET_CODE (compare_op);
14613 if (code != LTU && code != GEU)
14616 emit_insn (compare_seq);
14621 if (!INTEGRAL_MODE_P (mode))
14630 /* Convert a==0 into (unsigned)a<1. */
14633 if (op1 != const0_rtx)
14636 code = (code == EQ ? LTU : GEU);
14639 /* Convert a>b into b<a or a>=b-1. */
14642 if (CONST_INT_P (op1))
14644 op1 = gen_int_mode (INTVAL (op1) + 1, GET_MODE (op0));
14645 /* Bail out on overflow. We still can swap operands but that
14646 would force loading of the constant into register. */
14647 if (op1 == const0_rtx
14648 || !x86_64_immediate_operand (op1, GET_MODE (op1)))
14650 code = (code == GTU ? GEU : LTU);
14657 code = (code == GTU ? LTU : GEU);
14661 /* Convert a>=0 into (unsigned)a<0x80000000. */
14664 if (mode == DImode || op1 != const0_rtx)
14666 op1 = gen_int_mode (1 << (GET_MODE_BITSIZE (mode) - 1), mode);
14667 code = (code == LT ? GEU : LTU);
14671 if (mode == DImode || op1 != constm1_rtx)
14673 op1 = gen_int_mode (1 << (GET_MODE_BITSIZE (mode) - 1), mode);
14674 code = (code == LE ? GEU : LTU);
14680 /* Swapping operands may cause constant to appear as first operand. */
14681 if (!nonimmediate_operand (op0, VOIDmode))
14683 if (!can_create_pseudo_p ())
14685 op0 = force_reg (mode, op0);
14687 ix86_compare_op0 = op0;
14688 ix86_compare_op1 = op1;
14689 *pop = ix86_expand_compare (code, NULL, NULL);
14690 gcc_assert (GET_CODE (*pop) == LTU || GET_CODE (*pop) == GEU);
14695 ix86_expand_int_movcc (rtx operands[])
14697 enum rtx_code code = GET_CODE (operands[1]), compare_code;
14698 rtx compare_seq, compare_op;
14699 rtx second_test, bypass_test;
14700 enum machine_mode mode = GET_MODE (operands[0]);
14701 bool sign_bit_compare_p = false;;
14704 compare_op = ix86_expand_compare (code, &second_test, &bypass_test);
14705 compare_seq = get_insns ();
14708 compare_code = GET_CODE (compare_op);
14710 if ((ix86_compare_op1 == const0_rtx && (code == GE || code == LT))
14711 || (ix86_compare_op1 == constm1_rtx && (code == GT || code == LE)))
14712 sign_bit_compare_p = true;
14714 /* Don't attempt mode expansion here -- if we had to expand 5 or 6
14715 HImode insns, we'd be swallowed in word prefix ops. */
14717 if ((mode != HImode || TARGET_FAST_PREFIX)
14718 && (mode != (TARGET_64BIT ? TImode : DImode))
14719 && CONST_INT_P (operands[2])
14720 && CONST_INT_P (operands[3]))
14722 rtx out = operands[0];
14723 HOST_WIDE_INT ct = INTVAL (operands[2]);
14724 HOST_WIDE_INT cf = INTVAL (operands[3]);
14725 HOST_WIDE_INT diff;
14728 /* Sign bit compares are better done using shifts than we do by using
14730 if (sign_bit_compare_p
14731 || ix86_expand_carry_flag_compare (code, ix86_compare_op0,
14732 ix86_compare_op1, &compare_op))
14734 /* Detect overlap between destination and compare sources. */
14737 if (!sign_bit_compare_p)
14739 bool fpcmp = false;
14741 compare_code = GET_CODE (compare_op);
14743 if (GET_MODE (XEXP (compare_op, 0)) == CCFPmode
14744 || GET_MODE (XEXP (compare_op, 0)) == CCFPUmode)
14747 compare_code = ix86_fp_compare_code_to_integer (compare_code);
14750 /* To simplify rest of code, restrict to the GEU case. */
14751 if (compare_code == LTU)
14753 HOST_WIDE_INT tmp = ct;
14756 compare_code = reverse_condition (compare_code);
14757 code = reverse_condition (code);
14762 PUT_CODE (compare_op,
14763 reverse_condition_maybe_unordered
14764 (GET_CODE (compare_op)));
14766 PUT_CODE (compare_op, reverse_condition (GET_CODE (compare_op)));
14770 if (reg_overlap_mentioned_p (out, ix86_compare_op0)
14771 || reg_overlap_mentioned_p (out, ix86_compare_op1))
14772 tmp = gen_reg_rtx (mode);
14774 if (mode == DImode)
14775 emit_insn (gen_x86_movdicc_0_m1_rex64 (tmp, compare_op));
14777 emit_insn (gen_x86_movsicc_0_m1 (gen_lowpart (SImode, tmp), compare_op));
14781 if (code == GT || code == GE)
14782 code = reverse_condition (code);
14785 HOST_WIDE_INT tmp = ct;
14790 tmp = emit_store_flag (tmp, code, ix86_compare_op0,
14791 ix86_compare_op1, VOIDmode, 0, -1);
14804 tmp = expand_simple_binop (mode, PLUS,
14806 copy_rtx (tmp), 1, OPTAB_DIRECT);
14817 tmp = expand_simple_binop (mode, IOR,
14819 copy_rtx (tmp), 1, OPTAB_DIRECT);
14821 else if (diff == -1 && ct)
14831 tmp = expand_simple_unop (mode, NOT, tmp, copy_rtx (tmp), 1);
14833 tmp = expand_simple_binop (mode, PLUS,
14834 copy_rtx (tmp), GEN_INT (cf),
14835 copy_rtx (tmp), 1, OPTAB_DIRECT);
14843 * andl cf - ct, dest
14853 tmp = expand_simple_unop (mode, NOT, tmp, copy_rtx (tmp), 1);
14856 tmp = expand_simple_binop (mode, AND,
14858 gen_int_mode (cf - ct, mode),
14859 copy_rtx (tmp), 1, OPTAB_DIRECT);
14861 tmp = expand_simple_binop (mode, PLUS,
14862 copy_rtx (tmp), GEN_INT (ct),
14863 copy_rtx (tmp), 1, OPTAB_DIRECT);
14866 if (!rtx_equal_p (tmp, out))
14867 emit_move_insn (copy_rtx (out), copy_rtx (tmp));
14869 return 1; /* DONE */
14874 enum machine_mode cmp_mode = GET_MODE (ix86_compare_op0);
14877 tmp = ct, ct = cf, cf = tmp;
14880 if (SCALAR_FLOAT_MODE_P (cmp_mode))
14882 gcc_assert (!DECIMAL_FLOAT_MODE_P (cmp_mode));
14884 /* We may be reversing unordered compare to normal compare, that
14885 is not valid in general (we may convert non-trapping condition
14886 to trapping one), however on i386 we currently emit all
14887 comparisons unordered. */
14888 compare_code = reverse_condition_maybe_unordered (compare_code);
14889 code = reverse_condition_maybe_unordered (code);
14893 compare_code = reverse_condition (compare_code);
14894 code = reverse_condition (code);
14898 compare_code = UNKNOWN;
14899 if (GET_MODE_CLASS (GET_MODE (ix86_compare_op0)) == MODE_INT
14900 && CONST_INT_P (ix86_compare_op1))
14902 if (ix86_compare_op1 == const0_rtx
14903 && (code == LT || code == GE))
14904 compare_code = code;
14905 else if (ix86_compare_op1 == constm1_rtx)
14909 else if (code == GT)
14914 /* Optimize dest = (op0 < 0) ? -1 : cf. */
14915 if (compare_code != UNKNOWN
14916 && GET_MODE (ix86_compare_op0) == GET_MODE (out)
14917 && (cf == -1 || ct == -1))
14919 /* If lea code below could be used, only optimize
14920 if it results in a 2 insn sequence. */
14922 if (! (diff == 1 || diff == 2 || diff == 4 || diff == 8
14923 || diff == 3 || diff == 5 || diff == 9)
14924 || (compare_code == LT && ct == -1)
14925 || (compare_code == GE && cf == -1))
14928 * notl op1 (if necessary)
14936 code = reverse_condition (code);
14939 out = emit_store_flag (out, code, ix86_compare_op0,
14940 ix86_compare_op1, VOIDmode, 0, -1);
14942 out = expand_simple_binop (mode, IOR,
14944 out, 1, OPTAB_DIRECT);
14945 if (out != operands[0])
14946 emit_move_insn (operands[0], out);
14948 return 1; /* DONE */
14953 if ((diff == 1 || diff == 2 || diff == 4 || diff == 8
14954 || diff == 3 || diff == 5 || diff == 9)
14955 && ((mode != QImode && mode != HImode) || !TARGET_PARTIAL_REG_STALL)
14957 || x86_64_immediate_operand (GEN_INT (cf), VOIDmode)))
14963 * lea cf(dest*(ct-cf)),dest
14967 * This also catches the degenerate setcc-only case.
14973 out = emit_store_flag (out, code, ix86_compare_op0,
14974 ix86_compare_op1, VOIDmode, 0, 1);
14977 /* On x86_64 the lea instruction operates on Pmode, so we need
14978 to get arithmetics done in proper mode to match. */
14980 tmp = copy_rtx (out);
14984 out1 = copy_rtx (out);
14985 tmp = gen_rtx_MULT (mode, out1, GEN_INT (diff & ~1));
14989 tmp = gen_rtx_PLUS (mode, tmp, out1);
14995 tmp = gen_rtx_PLUS (mode, tmp, GEN_INT (cf));
14998 if (!rtx_equal_p (tmp, out))
15001 out = force_operand (tmp, copy_rtx (out));
15003 emit_insn (gen_rtx_SET (VOIDmode, copy_rtx (out), copy_rtx (tmp)));
15005 if (!rtx_equal_p (out, operands[0]))
15006 emit_move_insn (operands[0], copy_rtx (out));
15008 return 1; /* DONE */
15012 * General case: Jumpful:
15013 * xorl dest,dest cmpl op1, op2
15014 * cmpl op1, op2 movl ct, dest
15015 * setcc dest jcc 1f
15016 * decl dest movl cf, dest
15017 * andl (cf-ct),dest 1:
15020 * Size 20. Size 14.
15022 * This is reasonably steep, but branch mispredict costs are
15023 * high on modern cpus, so consider failing only if optimizing
15027 if ((!TARGET_CMOVE || (mode == QImode && TARGET_PARTIAL_REG_STALL))
15028 && BRANCH_COST (optimize_insn_for_speed_p (),
15033 enum machine_mode cmp_mode = GET_MODE (ix86_compare_op0);
15038 if (SCALAR_FLOAT_MODE_P (cmp_mode))
15040 gcc_assert (!DECIMAL_FLOAT_MODE_P (cmp_mode));
15042 /* We may be reversing unordered compare to normal compare,
15043 that is not valid in general (we may convert non-trapping
15044 condition to trapping one), however on i386 we currently
15045 emit all comparisons unordered. */
15046 code = reverse_condition_maybe_unordered (code);
15050 code = reverse_condition (code);
15051 if (compare_code != UNKNOWN)
15052 compare_code = reverse_condition (compare_code);
15056 if (compare_code != UNKNOWN)
15058 /* notl op1 (if needed)
15063 For x < 0 (resp. x <= -1) there will be no notl,
15064 so if possible swap the constants to get rid of the
15066 True/false will be -1/0 while code below (store flag
15067 followed by decrement) is 0/-1, so the constants need
15068 to be exchanged once more. */
15070 if (compare_code == GE || !cf)
15072 code = reverse_condition (code);
15077 HOST_WIDE_INT tmp = cf;
15082 out = emit_store_flag (out, code, ix86_compare_op0,
15083 ix86_compare_op1, VOIDmode, 0, -1);
15087 out = emit_store_flag (out, code, ix86_compare_op0,
15088 ix86_compare_op1, VOIDmode, 0, 1);
15090 out = expand_simple_binop (mode, PLUS, copy_rtx (out), constm1_rtx,
15091 copy_rtx (out), 1, OPTAB_DIRECT);
15094 out = expand_simple_binop (mode, AND, copy_rtx (out),
15095 gen_int_mode (cf - ct, mode),
15096 copy_rtx (out), 1, OPTAB_DIRECT);
15098 out = expand_simple_binop (mode, PLUS, copy_rtx (out), GEN_INT (ct),
15099 copy_rtx (out), 1, OPTAB_DIRECT);
15100 if (!rtx_equal_p (out, operands[0]))
15101 emit_move_insn (operands[0], copy_rtx (out));
15103 return 1; /* DONE */
15107 if (!TARGET_CMOVE || (mode == QImode && TARGET_PARTIAL_REG_STALL))
15109 /* Try a few things more with specific constants and a variable. */
15112 rtx var, orig_out, out, tmp;
15114 if (BRANCH_COST (optimize_insn_for_speed_p (), false) <= 2)
15115 return 0; /* FAIL */
15117 /* If one of the two operands is an interesting constant, load a
15118 constant with the above and mask it in with a logical operation. */
15120 if (CONST_INT_P (operands[2]))
15123 if (INTVAL (operands[2]) == 0 && operands[3] != constm1_rtx)
15124 operands[3] = constm1_rtx, op = and_optab;
15125 else if (INTVAL (operands[2]) == -1 && operands[3] != const0_rtx)
15126 operands[3] = const0_rtx, op = ior_optab;
15128 return 0; /* FAIL */
15130 else if (CONST_INT_P (operands[3]))
15133 if (INTVAL (operands[3]) == 0 && operands[2] != constm1_rtx)
15134 operands[2] = constm1_rtx, op = and_optab;
15135 else if (INTVAL (operands[3]) == -1 && operands[3] != const0_rtx)
15136 operands[2] = const0_rtx, op = ior_optab;
15138 return 0; /* FAIL */
15141 return 0; /* FAIL */
15143 orig_out = operands[0];
15144 tmp = gen_reg_rtx (mode);
15147 /* Recurse to get the constant loaded. */
15148 if (ix86_expand_int_movcc (operands) == 0)
15149 return 0; /* FAIL */
15151 /* Mask in the interesting variable. */
15152 out = expand_binop (mode, op, var, tmp, orig_out, 0,
15154 if (!rtx_equal_p (out, orig_out))
15155 emit_move_insn (copy_rtx (orig_out), copy_rtx (out));
15157 return 1; /* DONE */
15161 * For comparison with above,
15171 if (! nonimmediate_operand (operands[2], mode))
15172 operands[2] = force_reg (mode, operands[2]);
15173 if (! nonimmediate_operand (operands[3], mode))
15174 operands[3] = force_reg (mode, operands[3]);
15176 if (bypass_test && reg_overlap_mentioned_p (operands[0], operands[3]))
15178 rtx tmp = gen_reg_rtx (mode);
15179 emit_move_insn (tmp, operands[3]);
15182 if (second_test && reg_overlap_mentioned_p (operands[0], operands[2]))
15184 rtx tmp = gen_reg_rtx (mode);
15185 emit_move_insn (tmp, operands[2]);
15189 if (! register_operand (operands[2], VOIDmode)
15191 || ! register_operand (operands[3], VOIDmode)))
15192 operands[2] = force_reg (mode, operands[2]);
15195 && ! register_operand (operands[3], VOIDmode))
15196 operands[3] = force_reg (mode, operands[3]);
15198 emit_insn (compare_seq);
15199 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
15200 gen_rtx_IF_THEN_ELSE (mode,
15201 compare_op, operands[2],
15204 emit_insn (gen_rtx_SET (VOIDmode, copy_rtx (operands[0]),
15205 gen_rtx_IF_THEN_ELSE (mode,
15207 copy_rtx (operands[3]),
15208 copy_rtx (operands[0]))));
15210 emit_insn (gen_rtx_SET (VOIDmode, copy_rtx (operands[0]),
15211 gen_rtx_IF_THEN_ELSE (mode,
15213 copy_rtx (operands[2]),
15214 copy_rtx (operands[0]))));
15216 return 1; /* DONE */
15219 /* Swap, force into registers, or otherwise massage the two operands
15220 to an sse comparison with a mask result. Thus we differ a bit from
15221 ix86_prepare_fp_compare_args which expects to produce a flags result.
15223 The DEST operand exists to help determine whether to commute commutative
15224 operators. The POP0/POP1 operands are updated in place. The new
15225 comparison code is returned, or UNKNOWN if not implementable. */
15227 static enum rtx_code
15228 ix86_prepare_sse_fp_compare_args (rtx dest, enum rtx_code code,
15229 rtx *pop0, rtx *pop1)
15237 /* We have no LTGT as an operator. We could implement it with
15238 NE & ORDERED, but this requires an extra temporary. It's
15239 not clear that it's worth it. */
15246 /* These are supported directly. */
15253 /* For commutative operators, try to canonicalize the destination
15254 operand to be first in the comparison - this helps reload to
15255 avoid extra moves. */
15256 if (!dest || !rtx_equal_p (dest, *pop1))
15264 /* These are not supported directly. Swap the comparison operands
15265 to transform into something that is supported. */
15269 code = swap_condition (code);
15273 gcc_unreachable ();
15279 /* Detect conditional moves that exactly match min/max operational
15280 semantics. Note that this is IEEE safe, as long as we don't
15281 interchange the operands.
15283 Returns FALSE if this conditional move doesn't match a MIN/MAX,
15284 and TRUE if the operation is successful and instructions are emitted. */
15287 ix86_expand_sse_fp_minmax (rtx dest, enum rtx_code code, rtx cmp_op0,
15288 rtx cmp_op1, rtx if_true, rtx if_false)
15290 enum machine_mode mode;
15296 else if (code == UNGE)
15299 if_true = if_false;
15305 if (rtx_equal_p (cmp_op0, if_true) && rtx_equal_p (cmp_op1, if_false))
15307 else if (rtx_equal_p (cmp_op1, if_true) && rtx_equal_p (cmp_op0, if_false))
15312 mode = GET_MODE (dest);
15314 /* We want to check HONOR_NANS and HONOR_SIGNED_ZEROS here,
15315 but MODE may be a vector mode and thus not appropriate. */
15316 if (!flag_finite_math_only || !flag_unsafe_math_optimizations)
15318 int u = is_min ? UNSPEC_IEEE_MIN : UNSPEC_IEEE_MAX;
15321 if_true = force_reg (mode, if_true);
15322 v = gen_rtvec (2, if_true, if_false);
15323 tmp = gen_rtx_UNSPEC (mode, v, u);
15327 code = is_min ? SMIN : SMAX;
15328 tmp = gen_rtx_fmt_ee (code, mode, if_true, if_false);
15331 emit_insn (gen_rtx_SET (VOIDmode, dest, tmp));
15335 /* Expand an sse vector comparison. Return the register with the result. */
15338 ix86_expand_sse_cmp (rtx dest, enum rtx_code code, rtx cmp_op0, rtx cmp_op1,
15339 rtx op_true, rtx op_false)
15341 enum machine_mode mode = GET_MODE (dest);
15344 cmp_op0 = force_reg (mode, cmp_op0);
15345 if (!nonimmediate_operand (cmp_op1, mode))
15346 cmp_op1 = force_reg (mode, cmp_op1);
15349 || reg_overlap_mentioned_p (dest, op_true)
15350 || reg_overlap_mentioned_p (dest, op_false))
15351 dest = gen_reg_rtx (mode);
15353 x = gen_rtx_fmt_ee (code, mode, cmp_op0, cmp_op1);
15354 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
15359 /* Expand DEST = CMP ? OP_TRUE : OP_FALSE into a sequence of logical
15360 operations. This is used for both scalar and vector conditional moves. */
15363 ix86_expand_sse_movcc (rtx dest, rtx cmp, rtx op_true, rtx op_false)
15365 enum machine_mode mode = GET_MODE (dest);
15368 if (op_false == CONST0_RTX (mode))
15370 op_true = force_reg (mode, op_true);
15371 x = gen_rtx_AND (mode, cmp, op_true);
15372 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
15374 else if (op_true == CONST0_RTX (mode))
15376 op_false = force_reg (mode, op_false);
15377 x = gen_rtx_NOT (mode, cmp);
15378 x = gen_rtx_AND (mode, x, op_false);
15379 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
15381 else if (TARGET_SSE5)
15383 rtx pcmov = gen_rtx_SET (mode, dest,
15384 gen_rtx_IF_THEN_ELSE (mode, cmp,
15391 op_true = force_reg (mode, op_true);
15392 op_false = force_reg (mode, op_false);
15394 t2 = gen_reg_rtx (mode);
15396 t3 = gen_reg_rtx (mode);
15400 x = gen_rtx_AND (mode, op_true, cmp);
15401 emit_insn (gen_rtx_SET (VOIDmode, t2, x));
15403 x = gen_rtx_NOT (mode, cmp);
15404 x = gen_rtx_AND (mode, x, op_false);
15405 emit_insn (gen_rtx_SET (VOIDmode, t3, x));
15407 x = gen_rtx_IOR (mode, t3, t2);
15408 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
15412 /* Expand a floating-point conditional move. Return true if successful. */
15415 ix86_expand_fp_movcc (rtx operands[])
15417 enum machine_mode mode = GET_MODE (operands[0]);
15418 enum rtx_code code = GET_CODE (operands[1]);
15419 rtx tmp, compare_op, second_test, bypass_test;
15421 if (TARGET_SSE_MATH && SSE_FLOAT_MODE_P (mode))
15423 enum machine_mode cmode;
15425 /* Since we've no cmove for sse registers, don't force bad register
15426 allocation just to gain access to it. Deny movcc when the
15427 comparison mode doesn't match the move mode. */
15428 cmode = GET_MODE (ix86_compare_op0);
15429 if (cmode == VOIDmode)
15430 cmode = GET_MODE (ix86_compare_op1);
15434 code = ix86_prepare_sse_fp_compare_args (operands[0], code,
15436 &ix86_compare_op1);
15437 if (code == UNKNOWN)
15440 if (ix86_expand_sse_fp_minmax (operands[0], code, ix86_compare_op0,
15441 ix86_compare_op1, operands[2],
15445 tmp = ix86_expand_sse_cmp (operands[0], code, ix86_compare_op0,
15446 ix86_compare_op1, operands[2], operands[3]);
15447 ix86_expand_sse_movcc (operands[0], tmp, operands[2], operands[3]);
15451 /* The floating point conditional move instructions don't directly
15452 support conditions resulting from a signed integer comparison. */
15454 compare_op = ix86_expand_compare (code, &second_test, &bypass_test);
15456 /* The floating point conditional move instructions don't directly
15457 support signed integer comparisons. */
15459 if (!fcmov_comparison_operator (compare_op, VOIDmode))
15461 gcc_assert (!second_test && !bypass_test);
15462 tmp = gen_reg_rtx (QImode);
15463 ix86_expand_setcc (code, tmp);
15465 ix86_compare_op0 = tmp;
15466 ix86_compare_op1 = const0_rtx;
15467 compare_op = ix86_expand_compare (code, &second_test, &bypass_test);
15469 if (bypass_test && reg_overlap_mentioned_p (operands[0], operands[3]))
15471 tmp = gen_reg_rtx (mode);
15472 emit_move_insn (tmp, operands[3]);
15475 if (second_test && reg_overlap_mentioned_p (operands[0], operands[2]))
15477 tmp = gen_reg_rtx (mode);
15478 emit_move_insn (tmp, operands[2]);
15482 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
15483 gen_rtx_IF_THEN_ELSE (mode, compare_op,
15484 operands[2], operands[3])));
15486 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
15487 gen_rtx_IF_THEN_ELSE (mode, bypass_test,
15488 operands[3], operands[0])));
15490 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
15491 gen_rtx_IF_THEN_ELSE (mode, second_test,
15492 operands[2], operands[0])));
15497 /* Expand a floating-point vector conditional move; a vcond operation
15498 rather than a movcc operation. */
15501 ix86_expand_fp_vcond (rtx operands[])
15503 enum rtx_code code = GET_CODE (operands[3]);
15506 code = ix86_prepare_sse_fp_compare_args (operands[0], code,
15507 &operands[4], &operands[5]);
15508 if (code == UNKNOWN)
15511 if (ix86_expand_sse_fp_minmax (operands[0], code, operands[4],
15512 operands[5], operands[1], operands[2]))
15515 cmp = ix86_expand_sse_cmp (operands[0], code, operands[4], operands[5],
15516 operands[1], operands[2]);
15517 ix86_expand_sse_movcc (operands[0], cmp, operands[1], operands[2]);
15521 /* Expand a signed/unsigned integral vector conditional move. */
15524 ix86_expand_int_vcond (rtx operands[])
15526 enum machine_mode mode = GET_MODE (operands[0]);
15527 enum rtx_code code = GET_CODE (operands[3]);
15528 bool negate = false;
15531 cop0 = operands[4];
15532 cop1 = operands[5];
15534 /* SSE5 supports all of the comparisons on all vector int types. */
15537 /* Canonicalize the comparison to EQ, GT, GTU. */
15548 code = reverse_condition (code);
15554 code = reverse_condition (code);
15560 code = swap_condition (code);
15561 x = cop0, cop0 = cop1, cop1 = x;
15565 gcc_unreachable ();
15568 /* Only SSE4.1/SSE4.2 supports V2DImode. */
15569 if (mode == V2DImode)
15574 /* SSE4.1 supports EQ. */
15575 if (!TARGET_SSE4_1)
15581 /* SSE4.2 supports GT/GTU. */
15582 if (!TARGET_SSE4_2)
15587 gcc_unreachable ();
15591 /* Unsigned parallel compare is not supported by the hardware. Play some
15592 tricks to turn this into a signed comparison against 0. */
15595 cop0 = force_reg (mode, cop0);
15604 /* Perform a parallel modulo subtraction. */
15605 t1 = gen_reg_rtx (mode);
15606 emit_insn ((mode == V4SImode
15608 : gen_subv2di3) (t1, cop0, cop1));
15610 /* Extract the original sign bit of op0. */
15611 mask = ix86_build_signbit_mask (GET_MODE_INNER (mode),
15613 t2 = gen_reg_rtx (mode);
15614 emit_insn ((mode == V4SImode
15616 : gen_andv2di3) (t2, cop0, mask));
15618 /* XOR it back into the result of the subtraction. This results
15619 in the sign bit set iff we saw unsigned underflow. */
15620 x = gen_reg_rtx (mode);
15621 emit_insn ((mode == V4SImode
15623 : gen_xorv2di3) (x, t1, t2));
15631 /* Perform a parallel unsigned saturating subtraction. */
15632 x = gen_reg_rtx (mode);
15633 emit_insn (gen_rtx_SET (VOIDmode, x,
15634 gen_rtx_US_MINUS (mode, cop0, cop1)));
15641 gcc_unreachable ();
15645 cop1 = CONST0_RTX (mode);
15649 x = ix86_expand_sse_cmp (operands[0], code, cop0, cop1,
15650 operands[1+negate], operands[2-negate]);
15652 ix86_expand_sse_movcc (operands[0], x, operands[1+negate],
15653 operands[2-negate]);
15657 /* Unpack OP[1] into the next wider integer vector type. UNSIGNED_P is
15658 true if we should do zero extension, else sign extension. HIGH_P is
15659 true if we want the N/2 high elements, else the low elements. */
15662 ix86_expand_sse_unpack (rtx operands[2], bool unsigned_p, bool high_p)
15664 enum machine_mode imode = GET_MODE (operands[1]);
15665 rtx (*unpack)(rtx, rtx, rtx);
15672 unpack = gen_vec_interleave_highv16qi;
15674 unpack = gen_vec_interleave_lowv16qi;
15678 unpack = gen_vec_interleave_highv8hi;
15680 unpack = gen_vec_interleave_lowv8hi;
15684 unpack = gen_vec_interleave_highv4si;
15686 unpack = gen_vec_interleave_lowv4si;
15689 gcc_unreachable ();
15692 dest = gen_lowpart (imode, operands[0]);
15695 se = force_reg (imode, CONST0_RTX (imode));
15697 se = ix86_expand_sse_cmp (gen_reg_rtx (imode), GT, CONST0_RTX (imode),
15698 operands[1], pc_rtx, pc_rtx);
15700 emit_insn (unpack (dest, operands[1], se));
15703 /* This function performs the same task as ix86_expand_sse_unpack,
15704 but with SSE4.1 instructions. */
15707 ix86_expand_sse4_unpack (rtx operands[2], bool unsigned_p, bool high_p)
15709 enum machine_mode imode = GET_MODE (operands[1]);
15710 rtx (*unpack)(rtx, rtx);
15717 unpack = gen_sse4_1_zero_extendv8qiv8hi2;
15719 unpack = gen_sse4_1_extendv8qiv8hi2;
15723 unpack = gen_sse4_1_zero_extendv4hiv4si2;
15725 unpack = gen_sse4_1_extendv4hiv4si2;
15729 unpack = gen_sse4_1_zero_extendv2siv2di2;
15731 unpack = gen_sse4_1_extendv2siv2di2;
15734 gcc_unreachable ();
15737 dest = operands[0];
15740 /* Shift higher 8 bytes to lower 8 bytes. */
15741 src = gen_reg_rtx (imode);
15742 emit_insn (gen_sse2_lshrti3 (gen_lowpart (TImode, src),
15743 gen_lowpart (TImode, operands[1]),
15749 emit_insn (unpack (dest, src));
15752 /* This function performs the same task as ix86_expand_sse_unpack,
15753 but with sse5 instructions. */
15756 ix86_expand_sse5_unpack (rtx operands[2], bool unsigned_p, bool high_p)
15758 enum machine_mode imode = GET_MODE (operands[1]);
15759 int pperm_bytes[16];
15761 int h = (high_p) ? 8 : 0;
15764 rtvec v = rtvec_alloc (16);
15767 rtx op0 = operands[0], op1 = operands[1];
15772 vs = rtvec_alloc (8);
15773 h2 = (high_p) ? 8 : 0;
15774 for (i = 0; i < 8; i++)
15776 pperm_bytes[2*i+0] = PPERM_SRC | PPERM_SRC2 | i | h;
15777 pperm_bytes[2*i+1] = ((unsigned_p)
15779 : PPERM_SIGN | PPERM_SRC2 | i | h);
15782 for (i = 0; i < 16; i++)
15783 RTVEC_ELT (v, i) = GEN_INT (pperm_bytes[i]);
15785 for (i = 0; i < 8; i++)
15786 RTVEC_ELT (vs, i) = GEN_INT (i + h2);
15788 p = gen_rtx_PARALLEL (VOIDmode, vs);
15789 x = force_reg (V16QImode, gen_rtx_CONST_VECTOR (V16QImode, v));
15791 emit_insn (gen_sse5_pperm_zero_v16qi_v8hi (op0, op1, p, x));
15793 emit_insn (gen_sse5_pperm_sign_v16qi_v8hi (op0, op1, p, x));
15797 vs = rtvec_alloc (4);
15798 h2 = (high_p) ? 4 : 0;
15799 for (i = 0; i < 4; i++)
15801 sign_extend = ((unsigned_p)
15803 : PPERM_SIGN | PPERM_SRC2 | ((2*i) + 1 + h));
15804 pperm_bytes[4*i+0] = PPERM_SRC | PPERM_SRC2 | ((2*i) + 0 + h);
15805 pperm_bytes[4*i+1] = PPERM_SRC | PPERM_SRC2 | ((2*i) + 1 + h);
15806 pperm_bytes[4*i+2] = sign_extend;
15807 pperm_bytes[4*i+3] = sign_extend;
15810 for (i = 0; i < 16; i++)
15811 RTVEC_ELT (v, i) = GEN_INT (pperm_bytes[i]);
15813 for (i = 0; i < 4; i++)
15814 RTVEC_ELT (vs, i) = GEN_INT (i + h2);
15816 p = gen_rtx_PARALLEL (VOIDmode, vs);
15817 x = force_reg (V16QImode, gen_rtx_CONST_VECTOR (V16QImode, v));
15819 emit_insn (gen_sse5_pperm_zero_v8hi_v4si (op0, op1, p, x));
15821 emit_insn (gen_sse5_pperm_sign_v8hi_v4si (op0, op1, p, x));
15825 vs = rtvec_alloc (2);
15826 h2 = (high_p) ? 2 : 0;
15827 for (i = 0; i < 2; i++)
15829 sign_extend = ((unsigned_p)
15831 : PPERM_SIGN | PPERM_SRC2 | ((4*i) + 3 + h));
15832 pperm_bytes[8*i+0] = PPERM_SRC | PPERM_SRC2 | ((4*i) + 0 + h);
15833 pperm_bytes[8*i+1] = PPERM_SRC | PPERM_SRC2 | ((4*i) + 1 + h);
15834 pperm_bytes[8*i+2] = PPERM_SRC | PPERM_SRC2 | ((4*i) + 2 + h);
15835 pperm_bytes[8*i+3] = PPERM_SRC | PPERM_SRC2 | ((4*i) + 3 + h);
15836 pperm_bytes[8*i+4] = sign_extend;
15837 pperm_bytes[8*i+5] = sign_extend;
15838 pperm_bytes[8*i+6] = sign_extend;
15839 pperm_bytes[8*i+7] = sign_extend;
15842 for (i = 0; i < 16; i++)
15843 RTVEC_ELT (v, i) = GEN_INT (pperm_bytes[i]);
15845 for (i = 0; i < 2; i++)
15846 RTVEC_ELT (vs, i) = GEN_INT (i + h2);
15848 p = gen_rtx_PARALLEL (VOIDmode, vs);
15849 x = force_reg (V16QImode, gen_rtx_CONST_VECTOR (V16QImode, v));
15851 emit_insn (gen_sse5_pperm_zero_v4si_v2di (op0, op1, p, x));
15853 emit_insn (gen_sse5_pperm_sign_v4si_v2di (op0, op1, p, x));
15857 gcc_unreachable ();
15863 /* Pack the high bits from OPERANDS[1] and low bits from OPERANDS[2] into the
15864 next narrower integer vector type */
15866 ix86_expand_sse5_pack (rtx operands[3])
15868 enum machine_mode imode = GET_MODE (operands[0]);
15869 int pperm_bytes[16];
15871 rtvec v = rtvec_alloc (16);
15873 rtx op0 = operands[0];
15874 rtx op1 = operands[1];
15875 rtx op2 = operands[2];
15880 for (i = 0; i < 8; i++)
15882 pperm_bytes[i+0] = PPERM_SRC | PPERM_SRC1 | (i*2);
15883 pperm_bytes[i+8] = PPERM_SRC | PPERM_SRC2 | (i*2);
15886 for (i = 0; i < 16; i++)
15887 RTVEC_ELT (v, i) = GEN_INT (pperm_bytes[i]);
15889 x = force_reg (V16QImode, gen_rtx_CONST_VECTOR (V16QImode, v));
15890 emit_insn (gen_sse5_pperm_pack_v8hi_v16qi (op0, op1, op2, x));
15894 for (i = 0; i < 4; i++)
15896 pperm_bytes[(2*i)+0] = PPERM_SRC | PPERM_SRC1 | ((i*4) + 0);
15897 pperm_bytes[(2*i)+1] = PPERM_SRC | PPERM_SRC1 | ((i*4) + 1);
15898 pperm_bytes[(2*i)+8] = PPERM_SRC | PPERM_SRC2 | ((i*4) + 0);
15899 pperm_bytes[(2*i)+9] = PPERM_SRC | PPERM_SRC2 | ((i*4) + 1);
15902 for (i = 0; i < 16; i++)
15903 RTVEC_ELT (v, i) = GEN_INT (pperm_bytes[i]);
15905 x = force_reg (V16QImode, gen_rtx_CONST_VECTOR (V16QImode, v));
15906 emit_insn (gen_sse5_pperm_pack_v4si_v8hi (op0, op1, op2, x));
15910 for (i = 0; i < 2; i++)
15912 pperm_bytes[(4*i)+0] = PPERM_SRC | PPERM_SRC1 | ((i*8) + 0);
15913 pperm_bytes[(4*i)+1] = PPERM_SRC | PPERM_SRC1 | ((i*8) + 1);
15914 pperm_bytes[(4*i)+2] = PPERM_SRC | PPERM_SRC1 | ((i*8) + 2);
15915 pperm_bytes[(4*i)+3] = PPERM_SRC | PPERM_SRC1 | ((i*8) + 3);
15916 pperm_bytes[(4*i)+8] = PPERM_SRC | PPERM_SRC2 | ((i*8) + 0);
15917 pperm_bytes[(4*i)+9] = PPERM_SRC | PPERM_SRC2 | ((i*8) + 1);
15918 pperm_bytes[(4*i)+10] = PPERM_SRC | PPERM_SRC2 | ((i*8) + 2);
15919 pperm_bytes[(4*i)+11] = PPERM_SRC | PPERM_SRC2 | ((i*8) + 3);
15922 for (i = 0; i < 16; i++)
15923 RTVEC_ELT (v, i) = GEN_INT (pperm_bytes[i]);
15925 x = force_reg (V16QImode, gen_rtx_CONST_VECTOR (V16QImode, v));
15926 emit_insn (gen_sse5_pperm_pack_v2di_v4si (op0, op1, op2, x));
15930 gcc_unreachable ();
15936 /* Expand conditional increment or decrement using adb/sbb instructions.
15937 The default case using setcc followed by the conditional move can be
15938 done by generic code. */
15940 ix86_expand_int_addcc (rtx operands[])
15942 enum rtx_code code = GET_CODE (operands[1]);
15944 rtx val = const0_rtx;
15945 bool fpcmp = false;
15946 enum machine_mode mode = GET_MODE (operands[0]);
15948 if (operands[3] != const1_rtx
15949 && operands[3] != constm1_rtx)
15951 if (!ix86_expand_carry_flag_compare (code, ix86_compare_op0,
15952 ix86_compare_op1, &compare_op))
15954 code = GET_CODE (compare_op);
15956 if (GET_MODE (XEXP (compare_op, 0)) == CCFPmode
15957 || GET_MODE (XEXP (compare_op, 0)) == CCFPUmode)
15960 code = ix86_fp_compare_code_to_integer (code);
15967 PUT_CODE (compare_op,
15968 reverse_condition_maybe_unordered
15969 (GET_CODE (compare_op)));
15971 PUT_CODE (compare_op, reverse_condition (GET_CODE (compare_op)));
15973 PUT_MODE (compare_op, mode);
15975 /* Construct either adc or sbb insn. */
15976 if ((code == LTU) == (operands[3] == constm1_rtx))
15978 switch (GET_MODE (operands[0]))
15981 emit_insn (gen_subqi3_carry (operands[0], operands[2], val, compare_op));
15984 emit_insn (gen_subhi3_carry (operands[0], operands[2], val, compare_op));
15987 emit_insn (gen_subsi3_carry (operands[0], operands[2], val, compare_op));
15990 emit_insn (gen_subdi3_carry_rex64 (operands[0], operands[2], val, compare_op));
15993 gcc_unreachable ();
15998 switch (GET_MODE (operands[0]))
16001 emit_insn (gen_addqi3_carry (operands[0], operands[2], val, compare_op));
16004 emit_insn (gen_addhi3_carry (operands[0], operands[2], val, compare_op));
16007 emit_insn (gen_addsi3_carry (operands[0], operands[2], val, compare_op));
16010 emit_insn (gen_adddi3_carry_rex64 (operands[0], operands[2], val, compare_op));
16013 gcc_unreachable ();
16016 return 1; /* DONE */
16020 /* Split operands 0 and 1 into SImode parts. Similar to split_di, but
16021 works for floating pointer parameters and nonoffsetable memories.
16022 For pushes, it returns just stack offsets; the values will be saved
16023 in the right order. Maximally three parts are generated. */
16026 ix86_split_to_parts (rtx operand, rtx *parts, enum machine_mode mode)
16031 size = mode==XFmode ? 3 : GET_MODE_SIZE (mode) / 4;
16033 size = (GET_MODE_SIZE (mode) + 4) / 8;
16035 gcc_assert (!REG_P (operand) || !MMX_REGNO_P (REGNO (operand)));
16036 gcc_assert (size >= 2 && size <= 4);
16038 /* Optimize constant pool reference to immediates. This is used by fp
16039 moves, that force all constants to memory to allow combining. */
16040 if (MEM_P (operand) && MEM_READONLY_P (operand))
16042 rtx tmp = maybe_get_pool_constant (operand);
16047 if (MEM_P (operand) && !offsettable_memref_p (operand))
16049 /* The only non-offsetable memories we handle are pushes. */
16050 int ok = push_operand (operand, VOIDmode);
16054 operand = copy_rtx (operand);
16055 PUT_MODE (operand, Pmode);
16056 parts[0] = parts[1] = parts[2] = parts[3] = operand;
16060 if (GET_CODE (operand) == CONST_VECTOR)
16062 enum machine_mode imode = int_mode_for_mode (mode);
16063 /* Caution: if we looked through a constant pool memory above,
16064 the operand may actually have a different mode now. That's
16065 ok, since we want to pun this all the way back to an integer. */
16066 operand = simplify_subreg (imode, operand, GET_MODE (operand), 0);
16067 gcc_assert (operand != NULL);
16073 if (mode == DImode)
16074 split_di (&operand, 1, &parts[0], &parts[1]);
16079 if (REG_P (operand))
16081 gcc_assert (reload_completed);
16082 for (i = 0; i < size; i++)
16083 parts[i] = gen_rtx_REG (SImode, REGNO (operand) + i);
16085 else if (offsettable_memref_p (operand))
16087 operand = adjust_address (operand, SImode, 0);
16088 parts[0] = operand;
16089 for (i = 1; i < size; i++)
16090 parts[i] = adjust_address (operand, SImode, 4 * i);
16092 else if (GET_CODE (operand) == CONST_DOUBLE)
16097 REAL_VALUE_FROM_CONST_DOUBLE (r, operand);
16101 real_to_target (l, &r, mode);
16102 parts[3] = gen_int_mode (l[3], SImode);
16103 parts[2] = gen_int_mode (l[2], SImode);
16106 REAL_VALUE_TO_TARGET_LONG_DOUBLE (r, l);
16107 parts[2] = gen_int_mode (l[2], SImode);
16110 REAL_VALUE_TO_TARGET_DOUBLE (r, l);
16113 gcc_unreachable ();
16115 parts[1] = gen_int_mode (l[1], SImode);
16116 parts[0] = gen_int_mode (l[0], SImode);
16119 gcc_unreachable ();
16124 if (mode == TImode)
16125 split_ti (&operand, 1, &parts[0], &parts[1]);
16126 if (mode == XFmode || mode == TFmode)
16128 enum machine_mode upper_mode = mode==XFmode ? SImode : DImode;
16129 if (REG_P (operand))
16131 gcc_assert (reload_completed);
16132 parts[0] = gen_rtx_REG (DImode, REGNO (operand) + 0);
16133 parts[1] = gen_rtx_REG (upper_mode, REGNO (operand) + 1);
16135 else if (offsettable_memref_p (operand))
16137 operand = adjust_address (operand, DImode, 0);
16138 parts[0] = operand;
16139 parts[1] = adjust_address (operand, upper_mode, 8);
16141 else if (GET_CODE (operand) == CONST_DOUBLE)
16146 REAL_VALUE_FROM_CONST_DOUBLE (r, operand);
16147 real_to_target (l, &r, mode);
16149 /* Do not use shift by 32 to avoid warning on 32bit systems. */
16150 if (HOST_BITS_PER_WIDE_INT >= 64)
16153 ((l[0] & (((HOST_WIDE_INT) 2 << 31) - 1))
16154 + ((((HOST_WIDE_INT) l[1]) << 31) << 1),
16157 parts[0] = immed_double_const (l[0], l[1], DImode);
16159 if (upper_mode == SImode)
16160 parts[1] = gen_int_mode (l[2], SImode);
16161 else if (HOST_BITS_PER_WIDE_INT >= 64)
16164 ((l[2] & (((HOST_WIDE_INT) 2 << 31) - 1))
16165 + ((((HOST_WIDE_INT) l[3]) << 31) << 1),
16168 parts[1] = immed_double_const (l[2], l[3], DImode);
16171 gcc_unreachable ();
16178 /* Emit insns to perform a move or push of DI, DF, XF, and TF values.
16179 Return false when normal moves are needed; true when all required
16180 insns have been emitted. Operands 2-4 contain the input values
16181 int the correct order; operands 5-7 contain the output values. */
16184 ix86_split_long_move (rtx operands[])
16189 int collisions = 0;
16190 enum machine_mode mode = GET_MODE (operands[0]);
16191 bool collisionparts[4];
16193 /* The DFmode expanders may ask us to move double.
16194 For 64bit target this is single move. By hiding the fact
16195 here we simplify i386.md splitters. */
16196 if (GET_MODE_SIZE (GET_MODE (operands[0])) == 8 && TARGET_64BIT)
16198 /* Optimize constant pool reference to immediates. This is used by
16199 fp moves, that force all constants to memory to allow combining. */
16201 if (MEM_P (operands[1])
16202 && GET_CODE (XEXP (operands[1], 0)) == SYMBOL_REF
16203 && CONSTANT_POOL_ADDRESS_P (XEXP (operands[1], 0)))
16204 operands[1] = get_pool_constant (XEXP (operands[1], 0));
16205 if (push_operand (operands[0], VOIDmode))
16207 operands[0] = copy_rtx (operands[0]);
16208 PUT_MODE (operands[0], Pmode);
16211 operands[0] = gen_lowpart (DImode, operands[0]);
16212 operands[1] = gen_lowpart (DImode, operands[1]);
16213 emit_move_insn (operands[0], operands[1]);
16217 /* The only non-offsettable memory we handle is push. */
16218 if (push_operand (operands[0], VOIDmode))
16221 gcc_assert (!MEM_P (operands[0])
16222 || offsettable_memref_p (operands[0]));
16224 nparts = ix86_split_to_parts (operands[1], part[1], GET_MODE (operands[0]));
16225 ix86_split_to_parts (operands[0], part[0], GET_MODE (operands[0]));
16227 /* When emitting push, take care for source operands on the stack. */
16228 if (push && MEM_P (operands[1])
16229 && reg_overlap_mentioned_p (stack_pointer_rtx, operands[1]))
16230 for (i = 0; i < nparts - 1; i++)
16231 part[1][i] = change_address (part[1][i],
16232 GET_MODE (part[1][i]),
16233 XEXP (part[1][i + 1], 0));
16235 /* We need to do copy in the right order in case an address register
16236 of the source overlaps the destination. */
16237 if (REG_P (part[0][0]) && MEM_P (part[1][0]))
16241 for (i = 0; i < nparts; i++)
16244 = reg_overlap_mentioned_p (part[0][i], XEXP (part[1][0], 0));
16245 if (collisionparts[i])
16249 /* Collision in the middle part can be handled by reordering. */
16250 if (collisions == 1 && nparts == 3 && collisionparts [1])
16252 tmp = part[0][1]; part[0][1] = part[0][2]; part[0][2] = tmp;
16253 tmp = part[1][1]; part[1][1] = part[1][2]; part[1][2] = tmp;
16255 else if (collisions == 1
16257 && (collisionparts [1] || collisionparts [2]))
16259 if (collisionparts [1])
16261 tmp = part[0][1]; part[0][1] = part[0][2]; part[0][2] = tmp;
16262 tmp = part[1][1]; part[1][1] = part[1][2]; part[1][2] = tmp;
16266 tmp = part[0][2]; part[0][2] = part[0][3]; part[0][3] = tmp;
16267 tmp = part[1][2]; part[1][2] = part[1][3]; part[1][3] = tmp;
16271 /* If there are more collisions, we can't handle it by reordering.
16272 Do an lea to the last part and use only one colliding move. */
16273 else if (collisions > 1)
16279 base = part[0][nparts - 1];
16281 /* Handle the case when the last part isn't valid for lea.
16282 Happens in 64-bit mode storing the 12-byte XFmode. */
16283 if (GET_MODE (base) != Pmode)
16284 base = gen_rtx_REG (Pmode, REGNO (base));
16286 emit_insn (gen_rtx_SET (VOIDmode, base, XEXP (part[1][0], 0)));
16287 part[1][0] = replace_equiv_address (part[1][0], base);
16288 for (i = 1; i < nparts; i++)
16290 tmp = plus_constant (base, UNITS_PER_WORD * i);
16291 part[1][i] = replace_equiv_address (part[1][i], tmp);
16302 if (TARGET_128BIT_LONG_DOUBLE && mode == XFmode)
16303 emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx, GEN_INT (-4)));
16304 emit_move_insn (part[0][2], part[1][2]);
16306 else if (nparts == 4)
16308 emit_move_insn (part[0][3], part[1][3]);
16309 emit_move_insn (part[0][2], part[1][2]);
16314 /* In 64bit mode we don't have 32bit push available. In case this is
16315 register, it is OK - we will just use larger counterpart. We also
16316 retype memory - these comes from attempt to avoid REX prefix on
16317 moving of second half of TFmode value. */
16318 if (GET_MODE (part[1][1]) == SImode)
16320 switch (GET_CODE (part[1][1]))
16323 part[1][1] = adjust_address (part[1][1], DImode, 0);
16327 part[1][1] = gen_rtx_REG (DImode, REGNO (part[1][1]));
16331 gcc_unreachable ();
16334 if (GET_MODE (part[1][0]) == SImode)
16335 part[1][0] = part[1][1];
16338 emit_move_insn (part[0][1], part[1][1]);
16339 emit_move_insn (part[0][0], part[1][0]);
16343 /* Choose correct order to not overwrite the source before it is copied. */
16344 if ((REG_P (part[0][0])
16345 && REG_P (part[1][1])
16346 && (REGNO (part[0][0]) == REGNO (part[1][1])
16348 && REGNO (part[0][0]) == REGNO (part[1][2]))
16350 && REGNO (part[0][0]) == REGNO (part[1][3]))))
16352 && reg_overlap_mentioned_p (part[0][0], XEXP (part[1][0], 0))))
16354 for (i = 0, j = nparts - 1; i < nparts; i++, j--)
16356 operands[2 + i] = part[0][j];
16357 operands[6 + i] = part[1][j];
16362 for (i = 0; i < nparts; i++)
16364 operands[2 + i] = part[0][i];
16365 operands[6 + i] = part[1][i];
16369 /* If optimizing for size, attempt to locally unCSE nonzero constants. */
16370 if (optimize_insn_for_size_p ())
16372 for (j = 0; j < nparts - 1; j++)
16373 if (CONST_INT_P (operands[6 + j])
16374 && operands[6 + j] != const0_rtx
16375 && REG_P (operands[2 + j]))
16376 for (i = j; i < nparts - 1; i++)
16377 if (CONST_INT_P (operands[7 + i])
16378 && INTVAL (operands[7 + i]) == INTVAL (operands[6 + j]))
16379 operands[7 + i] = operands[2 + j];
16382 for (i = 0; i < nparts; i++)
16383 emit_move_insn (operands[2 + i], operands[6 + i]);
16388 /* Helper function of ix86_split_ashl used to generate an SImode/DImode
16389 left shift by a constant, either using a single shift or
16390 a sequence of add instructions. */
16393 ix86_expand_ashl_const (rtx operand, int count, enum machine_mode mode)
16397 emit_insn ((mode == DImode
16399 : gen_adddi3) (operand, operand, operand));
16401 else if (!optimize_insn_for_size_p ()
16402 && count * ix86_cost->add <= ix86_cost->shift_const)
16405 for (i=0; i<count; i++)
16407 emit_insn ((mode == DImode
16409 : gen_adddi3) (operand, operand, operand));
16413 emit_insn ((mode == DImode
16415 : gen_ashldi3) (operand, operand, GEN_INT (count)));
16419 ix86_split_ashl (rtx *operands, rtx scratch, enum machine_mode mode)
16421 rtx low[2], high[2];
16423 const int single_width = mode == DImode ? 32 : 64;
16425 if (CONST_INT_P (operands[2]))
16427 (mode == DImode ? split_di : split_ti) (operands, 2, low, high);
16428 count = INTVAL (operands[2]) & (single_width * 2 - 1);
16430 if (count >= single_width)
16432 emit_move_insn (high[0], low[1]);
16433 emit_move_insn (low[0], const0_rtx);
16435 if (count > single_width)
16436 ix86_expand_ashl_const (high[0], count - single_width, mode);
16440 if (!rtx_equal_p (operands[0], operands[1]))
16441 emit_move_insn (operands[0], operands[1]);
16442 emit_insn ((mode == DImode
16444 : gen_x86_64_shld) (high[0], low[0], GEN_INT (count)));
16445 ix86_expand_ashl_const (low[0], count, mode);
16450 (mode == DImode ? split_di : split_ti) (operands, 1, low, high);
16452 if (operands[1] == const1_rtx)
16454 /* Assuming we've chosen a QImode capable registers, then 1 << N
16455 can be done with two 32/64-bit shifts, no branches, no cmoves. */
16456 if (ANY_QI_REG_P (low[0]) && ANY_QI_REG_P (high[0]))
16458 rtx s, d, flags = gen_rtx_REG (CCZmode, FLAGS_REG);
16460 ix86_expand_clear (low[0]);
16461 ix86_expand_clear (high[0]);
16462 emit_insn (gen_testqi_ccz_1 (operands[2], GEN_INT (single_width)));
16464 d = gen_lowpart (QImode, low[0]);
16465 d = gen_rtx_STRICT_LOW_PART (VOIDmode, d);
16466 s = gen_rtx_EQ (QImode, flags, const0_rtx);
16467 emit_insn (gen_rtx_SET (VOIDmode, d, s));
16469 d = gen_lowpart (QImode, high[0]);
16470 d = gen_rtx_STRICT_LOW_PART (VOIDmode, d);
16471 s = gen_rtx_NE (QImode, flags, const0_rtx);
16472 emit_insn (gen_rtx_SET (VOIDmode, d, s));
16475 /* Otherwise, we can get the same results by manually performing
16476 a bit extract operation on bit 5/6, and then performing the two
16477 shifts. The two methods of getting 0/1 into low/high are exactly
16478 the same size. Avoiding the shift in the bit extract case helps
16479 pentium4 a bit; no one else seems to care much either way. */
16484 if (TARGET_PARTIAL_REG_STALL && !optimize_insn_for_size_p ())
16485 x = gen_rtx_ZERO_EXTEND (mode == DImode ? SImode : DImode, operands[2]);
16487 x = gen_lowpart (mode == DImode ? SImode : DImode, operands[2]);
16488 emit_insn (gen_rtx_SET (VOIDmode, high[0], x));
16490 emit_insn ((mode == DImode
16492 : gen_lshrdi3) (high[0], high[0], GEN_INT (mode == DImode ? 5 : 6)));
16493 emit_insn ((mode == DImode
16495 : gen_anddi3) (high[0], high[0], GEN_INT (1)));
16496 emit_move_insn (low[0], high[0]);
16497 emit_insn ((mode == DImode
16499 : gen_xordi3) (low[0], low[0], GEN_INT (1)));
16502 emit_insn ((mode == DImode
16504 : gen_ashldi3) (low[0], low[0], operands[2]));
16505 emit_insn ((mode == DImode
16507 : gen_ashldi3) (high[0], high[0], operands[2]));
16511 if (operands[1] == constm1_rtx)
16513 /* For -1 << N, we can avoid the shld instruction, because we
16514 know that we're shifting 0...31/63 ones into a -1. */
16515 emit_move_insn (low[0], constm1_rtx);
16516 if (optimize_insn_for_size_p ())
16517 emit_move_insn (high[0], low[0]);
16519 emit_move_insn (high[0], constm1_rtx);
16523 if (!rtx_equal_p (operands[0], operands[1]))
16524 emit_move_insn (operands[0], operands[1]);
16526 (mode == DImode ? split_di : split_ti) (operands, 1, low, high);
16527 emit_insn ((mode == DImode
16529 : gen_x86_64_shld) (high[0], low[0], operands[2]));
16532 emit_insn ((mode == DImode ? gen_ashlsi3 : gen_ashldi3) (low[0], low[0], operands[2]));
16534 if (TARGET_CMOVE && scratch)
16536 ix86_expand_clear (scratch);
16537 emit_insn ((mode == DImode
16538 ? gen_x86_shift_adj_1
16539 : gen_x86_64_shift_adj_1) (high[0], low[0], operands[2],
16543 emit_insn ((mode == DImode
16544 ? gen_x86_shift_adj_2
16545 : gen_x86_64_shift_adj_2) (high[0], low[0], operands[2]));
16549 ix86_split_ashr (rtx *operands, rtx scratch, enum machine_mode mode)
16551 rtx low[2], high[2];
16553 const int single_width = mode == DImode ? 32 : 64;
16555 if (CONST_INT_P (operands[2]))
16557 (mode == DImode ? split_di : split_ti) (operands, 2, low, high);
16558 count = INTVAL (operands[2]) & (single_width * 2 - 1);
16560 if (count == single_width * 2 - 1)
16562 emit_move_insn (high[0], high[1]);
16563 emit_insn ((mode == DImode
16565 : gen_ashrdi3) (high[0], high[0],
16566 GEN_INT (single_width - 1)));
16567 emit_move_insn (low[0], high[0]);
16570 else if (count >= single_width)
16572 emit_move_insn (low[0], high[1]);
16573 emit_move_insn (high[0], low[0]);
16574 emit_insn ((mode == DImode
16576 : gen_ashrdi3) (high[0], high[0],
16577 GEN_INT (single_width - 1)));
16578 if (count > single_width)
16579 emit_insn ((mode == DImode
16581 : gen_ashrdi3) (low[0], low[0],
16582 GEN_INT (count - single_width)));
16586 if (!rtx_equal_p (operands[0], operands[1]))
16587 emit_move_insn (operands[0], operands[1]);
16588 emit_insn ((mode == DImode
16590 : gen_x86_64_shrd) (low[0], high[0], GEN_INT (count)));
16591 emit_insn ((mode == DImode
16593 : gen_ashrdi3) (high[0], high[0], GEN_INT (count)));
16598 if (!rtx_equal_p (operands[0], operands[1]))
16599 emit_move_insn (operands[0], operands[1]);
16601 (mode == DImode ? split_di : split_ti) (operands, 1, low, high);
16603 emit_insn ((mode == DImode
16605 : gen_x86_64_shrd) (low[0], high[0], operands[2]));
16606 emit_insn ((mode == DImode
16608 : gen_ashrdi3) (high[0], high[0], operands[2]));
16610 if (TARGET_CMOVE && scratch)
16612 emit_move_insn (scratch, high[0]);
16613 emit_insn ((mode == DImode
16615 : gen_ashrdi3) (scratch, scratch,
16616 GEN_INT (single_width - 1)));
16617 emit_insn ((mode == DImode
16618 ? gen_x86_shift_adj_1
16619 : gen_x86_64_shift_adj_1) (low[0], high[0], operands[2],
16623 emit_insn ((mode == DImode
16624 ? gen_x86_shift_adj_3
16625 : gen_x86_64_shift_adj_3) (low[0], high[0], operands[2]));
16630 ix86_split_lshr (rtx *operands, rtx scratch, enum machine_mode mode)
16632 rtx low[2], high[2];
16634 const int single_width = mode == DImode ? 32 : 64;
16636 if (CONST_INT_P (operands[2]))
16638 (mode == DImode ? split_di : split_ti) (operands, 2, low, high);
16639 count = INTVAL (operands[2]) & (single_width * 2 - 1);
16641 if (count >= single_width)
16643 emit_move_insn (low[0], high[1]);
16644 ix86_expand_clear (high[0]);
16646 if (count > single_width)
16647 emit_insn ((mode == DImode
16649 : gen_lshrdi3) (low[0], low[0],
16650 GEN_INT (count - single_width)));
16654 if (!rtx_equal_p (operands[0], operands[1]))
16655 emit_move_insn (operands[0], operands[1]);
16656 emit_insn ((mode == DImode
16658 : gen_x86_64_shrd) (low[0], high[0], GEN_INT (count)));
16659 emit_insn ((mode == DImode
16661 : gen_lshrdi3) (high[0], high[0], GEN_INT (count)));
16666 if (!rtx_equal_p (operands[0], operands[1]))
16667 emit_move_insn (operands[0], operands[1]);
16669 (mode == DImode ? split_di : split_ti) (operands, 1, low, high);
16671 emit_insn ((mode == DImode
16673 : gen_x86_64_shrd) (low[0], high[0], operands[2]));
16674 emit_insn ((mode == DImode
16676 : gen_lshrdi3) (high[0], high[0], operands[2]));
16678 /* Heh. By reversing the arguments, we can reuse this pattern. */
16679 if (TARGET_CMOVE && scratch)
16681 ix86_expand_clear (scratch);
16682 emit_insn ((mode == DImode
16683 ? gen_x86_shift_adj_1
16684 : gen_x86_64_shift_adj_1) (low[0], high[0], operands[2],
16688 emit_insn ((mode == DImode
16689 ? gen_x86_shift_adj_2
16690 : gen_x86_64_shift_adj_2) (low[0], high[0], operands[2]));
16694 /* Predict just emitted jump instruction to be taken with probability PROB. */
16696 predict_jump (int prob)
16698 rtx insn = get_last_insn ();
16699 gcc_assert (JUMP_P (insn));
16701 = gen_rtx_EXPR_LIST (REG_BR_PROB,
16706 /* Helper function for the string operations below. Dest VARIABLE whether
16707 it is aligned to VALUE bytes. If true, jump to the label. */
16709 ix86_expand_aligntest (rtx variable, int value, bool epilogue)
16711 rtx label = gen_label_rtx ();
16712 rtx tmpcount = gen_reg_rtx (GET_MODE (variable));
16713 if (GET_MODE (variable) == DImode)
16714 emit_insn (gen_anddi3 (tmpcount, variable, GEN_INT (value)));
16716 emit_insn (gen_andsi3 (tmpcount, variable, GEN_INT (value)));
16717 emit_cmp_and_jump_insns (tmpcount, const0_rtx, EQ, 0, GET_MODE (variable),
16720 predict_jump (REG_BR_PROB_BASE * 50 / 100);
16722 predict_jump (REG_BR_PROB_BASE * 90 / 100);
16726 /* Adjust COUNTER by the VALUE. */
16728 ix86_adjust_counter (rtx countreg, HOST_WIDE_INT value)
16730 if (GET_MODE (countreg) == DImode)
16731 emit_insn (gen_adddi3 (countreg, countreg, GEN_INT (-value)));
16733 emit_insn (gen_addsi3 (countreg, countreg, GEN_INT (-value)));
16736 /* Zero extend possibly SImode EXP to Pmode register. */
16738 ix86_zero_extend_to_Pmode (rtx exp)
16741 if (GET_MODE (exp) == VOIDmode)
16742 return force_reg (Pmode, exp);
16743 if (GET_MODE (exp) == Pmode)
16744 return copy_to_mode_reg (Pmode, exp);
16745 r = gen_reg_rtx (Pmode);
16746 emit_insn (gen_zero_extendsidi2 (r, exp));
16750 /* Divide COUNTREG by SCALE. */
16752 scale_counter (rtx countreg, int scale)
16755 rtx piece_size_mask;
16759 if (CONST_INT_P (countreg))
16760 return GEN_INT (INTVAL (countreg) / scale);
16761 gcc_assert (REG_P (countreg));
16763 piece_size_mask = GEN_INT (scale - 1);
16764 sc = expand_simple_binop (GET_MODE (countreg), LSHIFTRT, countreg,
16765 GEN_INT (exact_log2 (scale)),
16766 NULL, 1, OPTAB_DIRECT);
16770 /* Return mode for the memcpy/memset loop counter. Prefer SImode over
16771 DImode for constant loop counts. */
16773 static enum machine_mode
16774 counter_mode (rtx count_exp)
16776 if (GET_MODE (count_exp) != VOIDmode)
16777 return GET_MODE (count_exp);
16778 if (GET_CODE (count_exp) != CONST_INT)
16780 if (TARGET_64BIT && (INTVAL (count_exp) & ~0xffffffff))
16785 /* When SRCPTR is non-NULL, output simple loop to move memory
16786 pointer to SRCPTR to DESTPTR via chunks of MODE unrolled UNROLL times,
16787 overall size is COUNT specified in bytes. When SRCPTR is NULL, output the
16788 equivalent loop to set memory by VALUE (supposed to be in MODE).
16790 The size is rounded down to whole number of chunk size moved at once.
16791 SRCMEM and DESTMEM provide MEMrtx to feed proper aliasing info. */
16795 expand_set_or_movmem_via_loop (rtx destmem, rtx srcmem,
16796 rtx destptr, rtx srcptr, rtx value,
16797 rtx count, enum machine_mode mode, int unroll,
16800 rtx out_label, top_label, iter, tmp;
16801 enum machine_mode iter_mode = counter_mode (count);
16802 rtx piece_size = GEN_INT (GET_MODE_SIZE (mode) * unroll);
16803 rtx piece_size_mask = GEN_INT (~((GET_MODE_SIZE (mode) * unroll) - 1));
16809 top_label = gen_label_rtx ();
16810 out_label = gen_label_rtx ();
16811 iter = gen_reg_rtx (iter_mode);
16813 size = expand_simple_binop (iter_mode, AND, count, piece_size_mask,
16814 NULL, 1, OPTAB_DIRECT);
16815 /* Those two should combine. */
16816 if (piece_size == const1_rtx)
16818 emit_cmp_and_jump_insns (size, const0_rtx, EQ, NULL_RTX, iter_mode,
16820 predict_jump (REG_BR_PROB_BASE * 10 / 100);
16822 emit_move_insn (iter, const0_rtx);
16824 emit_label (top_label);
16826 tmp = convert_modes (Pmode, iter_mode, iter, true);
16827 x_addr = gen_rtx_PLUS (Pmode, destptr, tmp);
16828 destmem = change_address (destmem, mode, x_addr);
16832 y_addr = gen_rtx_PLUS (Pmode, srcptr, copy_rtx (tmp));
16833 srcmem = change_address (srcmem, mode, y_addr);
16835 /* When unrolling for chips that reorder memory reads and writes,
16836 we can save registers by using single temporary.
16837 Also using 4 temporaries is overkill in 32bit mode. */
16838 if (!TARGET_64BIT && 0)
16840 for (i = 0; i < unroll; i++)
16845 adjust_address (copy_rtx (destmem), mode, GET_MODE_SIZE (mode));
16847 adjust_address (copy_rtx (srcmem), mode, GET_MODE_SIZE (mode));
16849 emit_move_insn (destmem, srcmem);
16855 gcc_assert (unroll <= 4);
16856 for (i = 0; i < unroll; i++)
16858 tmpreg[i] = gen_reg_rtx (mode);
16862 adjust_address (copy_rtx (srcmem), mode, GET_MODE_SIZE (mode));
16864 emit_move_insn (tmpreg[i], srcmem);
16866 for (i = 0; i < unroll; i++)
16871 adjust_address (copy_rtx (destmem), mode, GET_MODE_SIZE (mode));
16873 emit_move_insn (destmem, tmpreg[i]);
16878 for (i = 0; i < unroll; i++)
16882 adjust_address (copy_rtx (destmem), mode, GET_MODE_SIZE (mode));
16883 emit_move_insn (destmem, value);
16886 tmp = expand_simple_binop (iter_mode, PLUS, iter, piece_size, iter,
16887 true, OPTAB_LIB_WIDEN);
16889 emit_move_insn (iter, tmp);
16891 emit_cmp_and_jump_insns (iter, size, LT, NULL_RTX, iter_mode,
16893 if (expected_size != -1)
16895 expected_size /= GET_MODE_SIZE (mode) * unroll;
16896 if (expected_size == 0)
16898 else if (expected_size > REG_BR_PROB_BASE)
16899 predict_jump (REG_BR_PROB_BASE - 1);
16901 predict_jump (REG_BR_PROB_BASE - (REG_BR_PROB_BASE + expected_size / 2) / expected_size);
16904 predict_jump (REG_BR_PROB_BASE * 80 / 100);
16905 iter = ix86_zero_extend_to_Pmode (iter);
16906 tmp = expand_simple_binop (Pmode, PLUS, destptr, iter, destptr,
16907 true, OPTAB_LIB_WIDEN);
16908 if (tmp != destptr)
16909 emit_move_insn (destptr, tmp);
16912 tmp = expand_simple_binop (Pmode, PLUS, srcptr, iter, srcptr,
16913 true, OPTAB_LIB_WIDEN);
16915 emit_move_insn (srcptr, tmp);
16917 emit_label (out_label);
16920 /* Output "rep; mov" instruction.
16921 Arguments have same meaning as for previous function */
16923 expand_movmem_via_rep_mov (rtx destmem, rtx srcmem,
16924 rtx destptr, rtx srcptr,
16926 enum machine_mode mode)
16932 /* If the size is known, it is shorter to use rep movs. */
16933 if (mode == QImode && CONST_INT_P (count)
16934 && !(INTVAL (count) & 3))
16937 if (destptr != XEXP (destmem, 0) || GET_MODE (destmem) != BLKmode)
16938 destmem = adjust_automodify_address_nv (destmem, BLKmode, destptr, 0);
16939 if (srcptr != XEXP (srcmem, 0) || GET_MODE (srcmem) != BLKmode)
16940 srcmem = adjust_automodify_address_nv (srcmem, BLKmode, srcptr, 0);
16941 countreg = ix86_zero_extend_to_Pmode (scale_counter (count, GET_MODE_SIZE (mode)));
16942 if (mode != QImode)
16944 destexp = gen_rtx_ASHIFT (Pmode, countreg,
16945 GEN_INT (exact_log2 (GET_MODE_SIZE (mode))));
16946 destexp = gen_rtx_PLUS (Pmode, destexp, destptr);
16947 srcexp = gen_rtx_ASHIFT (Pmode, countreg,
16948 GEN_INT (exact_log2 (GET_MODE_SIZE (mode))));
16949 srcexp = gen_rtx_PLUS (Pmode, srcexp, srcptr);
16953 destexp = gen_rtx_PLUS (Pmode, destptr, countreg);
16954 srcexp = gen_rtx_PLUS (Pmode, srcptr, countreg);
16956 if (CONST_INT_P (count))
16958 count = GEN_INT (INTVAL (count)
16959 & ~((HOST_WIDE_INT) GET_MODE_SIZE (mode) - 1));
16960 destmem = shallow_copy_rtx (destmem);
16961 srcmem = shallow_copy_rtx (srcmem);
16962 set_mem_size (destmem, count);
16963 set_mem_size (srcmem, count);
16967 if (MEM_SIZE (destmem))
16968 set_mem_size (destmem, NULL_RTX);
16969 if (MEM_SIZE (srcmem))
16970 set_mem_size (srcmem, NULL_RTX);
16972 emit_insn (gen_rep_mov (destptr, destmem, srcptr, srcmem, countreg,
16976 /* Output "rep; stos" instruction.
16977 Arguments have same meaning as for previous function */
16979 expand_setmem_via_rep_stos (rtx destmem, rtx destptr, rtx value,
16980 rtx count, enum machine_mode mode,
16986 if (destptr != XEXP (destmem, 0) || GET_MODE (destmem) != BLKmode)
16987 destmem = adjust_automodify_address_nv (destmem, BLKmode, destptr, 0);
16988 value = force_reg (mode, gen_lowpart (mode, value));
16989 countreg = ix86_zero_extend_to_Pmode (scale_counter (count, GET_MODE_SIZE (mode)));
16990 if (mode != QImode)
16992 destexp = gen_rtx_ASHIFT (Pmode, countreg,
16993 GEN_INT (exact_log2 (GET_MODE_SIZE (mode))));
16994 destexp = gen_rtx_PLUS (Pmode, destexp, destptr);
16997 destexp = gen_rtx_PLUS (Pmode, destptr, countreg);
16998 if (orig_value == const0_rtx && CONST_INT_P (count))
17000 count = GEN_INT (INTVAL (count)
17001 & ~((HOST_WIDE_INT) GET_MODE_SIZE (mode) - 1));
17002 destmem = shallow_copy_rtx (destmem);
17003 set_mem_size (destmem, count);
17005 else if (MEM_SIZE (destmem))
17006 set_mem_size (destmem, NULL_RTX);
17007 emit_insn (gen_rep_stos (destptr, countreg, destmem, value, destexp));
17011 emit_strmov (rtx destmem, rtx srcmem,
17012 rtx destptr, rtx srcptr, enum machine_mode mode, int offset)
17014 rtx src = adjust_automodify_address_nv (srcmem, mode, srcptr, offset);
17015 rtx dest = adjust_automodify_address_nv (destmem, mode, destptr, offset);
17016 emit_insn (gen_strmov (destptr, dest, srcptr, src));
17019 /* Output code to copy at most count & (max_size - 1) bytes from SRC to DEST. */
17021 expand_movmem_epilogue (rtx destmem, rtx srcmem,
17022 rtx destptr, rtx srcptr, rtx count, int max_size)
17025 if (CONST_INT_P (count))
17027 HOST_WIDE_INT countval = INTVAL (count);
17030 if ((countval & 0x10) && max_size > 16)
17034 emit_strmov (destmem, srcmem, destptr, srcptr, DImode, offset);
17035 emit_strmov (destmem, srcmem, destptr, srcptr, DImode, offset + 8);
17038 gcc_unreachable ();
17041 if ((countval & 0x08) && max_size > 8)
17044 emit_strmov (destmem, srcmem, destptr, srcptr, DImode, offset);
17047 emit_strmov (destmem, srcmem, destptr, srcptr, SImode, offset);
17048 emit_strmov (destmem, srcmem, destptr, srcptr, SImode, offset + 4);
17052 if ((countval & 0x04) && max_size > 4)
17054 emit_strmov (destmem, srcmem, destptr, srcptr, SImode, offset);
17057 if ((countval & 0x02) && max_size > 2)
17059 emit_strmov (destmem, srcmem, destptr, srcptr, HImode, offset);
17062 if ((countval & 0x01) && max_size > 1)
17064 emit_strmov (destmem, srcmem, destptr, srcptr, QImode, offset);
17071 count = expand_simple_binop (GET_MODE (count), AND, count, GEN_INT (max_size - 1),
17072 count, 1, OPTAB_DIRECT);
17073 expand_set_or_movmem_via_loop (destmem, srcmem, destptr, srcptr, NULL,
17074 count, QImode, 1, 4);
17078 /* When there are stringops, we can cheaply increase dest and src pointers.
17079 Otherwise we save code size by maintaining offset (zero is readily
17080 available from preceding rep operation) and using x86 addressing modes.
17082 if (TARGET_SINGLE_STRINGOP)
17086 rtx label = ix86_expand_aligntest (count, 4, true);
17087 src = change_address (srcmem, SImode, srcptr);
17088 dest = change_address (destmem, SImode, destptr);
17089 emit_insn (gen_strmov (destptr, dest, srcptr, src));
17090 emit_label (label);
17091 LABEL_NUSES (label) = 1;
17095 rtx label = ix86_expand_aligntest (count, 2, true);
17096 src = change_address (srcmem, HImode, srcptr);
17097 dest = change_address (destmem, HImode, destptr);
17098 emit_insn (gen_strmov (destptr, dest, srcptr, src));
17099 emit_label (label);
17100 LABEL_NUSES (label) = 1;
17104 rtx label = ix86_expand_aligntest (count, 1, true);
17105 src = change_address (srcmem, QImode, srcptr);
17106 dest = change_address (destmem, QImode, destptr);
17107 emit_insn (gen_strmov (destptr, dest, srcptr, src));
17108 emit_label (label);
17109 LABEL_NUSES (label) = 1;
17114 rtx offset = force_reg (Pmode, const0_rtx);
17119 rtx label = ix86_expand_aligntest (count, 4, true);
17120 src = change_address (srcmem, SImode, srcptr);
17121 dest = change_address (destmem, SImode, destptr);
17122 emit_move_insn (dest, src);
17123 tmp = expand_simple_binop (Pmode, PLUS, offset, GEN_INT (4), NULL,
17124 true, OPTAB_LIB_WIDEN);
17126 emit_move_insn (offset, tmp);
17127 emit_label (label);
17128 LABEL_NUSES (label) = 1;
17132 rtx label = ix86_expand_aligntest (count, 2, true);
17133 tmp = gen_rtx_PLUS (Pmode, srcptr, offset);
17134 src = change_address (srcmem, HImode, tmp);
17135 tmp = gen_rtx_PLUS (Pmode, destptr, offset);
17136 dest = change_address (destmem, HImode, tmp);
17137 emit_move_insn (dest, src);
17138 tmp = expand_simple_binop (Pmode, PLUS, offset, GEN_INT (2), tmp,
17139 true, OPTAB_LIB_WIDEN);
17141 emit_move_insn (offset, tmp);
17142 emit_label (label);
17143 LABEL_NUSES (label) = 1;
17147 rtx label = ix86_expand_aligntest (count, 1, true);
17148 tmp = gen_rtx_PLUS (Pmode, srcptr, offset);
17149 src = change_address (srcmem, QImode, tmp);
17150 tmp = gen_rtx_PLUS (Pmode, destptr, offset);
17151 dest = change_address (destmem, QImode, tmp);
17152 emit_move_insn (dest, src);
17153 emit_label (label);
17154 LABEL_NUSES (label) = 1;
17159 /* Output code to set at most count & (max_size - 1) bytes starting by DEST. */
17161 expand_setmem_epilogue_via_loop (rtx destmem, rtx destptr, rtx value,
17162 rtx count, int max_size)
17165 expand_simple_binop (counter_mode (count), AND, count,
17166 GEN_INT (max_size - 1), count, 1, OPTAB_DIRECT);
17167 expand_set_or_movmem_via_loop (destmem, NULL, destptr, NULL,
17168 gen_lowpart (QImode, value), count, QImode,
17172 /* Output code to set at most count & (max_size - 1) bytes starting by DEST. */
17174 expand_setmem_epilogue (rtx destmem, rtx destptr, rtx value, rtx count, int max_size)
17178 if (CONST_INT_P (count))
17180 HOST_WIDE_INT countval = INTVAL (count);
17183 if ((countval & 0x10) && max_size > 16)
17187 dest = adjust_automodify_address_nv (destmem, DImode, destptr, offset);
17188 emit_insn (gen_strset (destptr, dest, value));
17189 dest = adjust_automodify_address_nv (destmem, DImode, destptr, offset + 8);
17190 emit_insn (gen_strset (destptr, dest, value));
17193 gcc_unreachable ();
17196 if ((countval & 0x08) && max_size > 8)
17200 dest = adjust_automodify_address_nv (destmem, DImode, destptr, offset);
17201 emit_insn (gen_strset (destptr, dest, value));
17205 dest = adjust_automodify_address_nv (destmem, SImode, destptr, offset);
17206 emit_insn (gen_strset (destptr, dest, value));
17207 dest = adjust_automodify_address_nv (destmem, SImode, destptr, offset + 4);
17208 emit_insn (gen_strset (destptr, dest, value));
17212 if ((countval & 0x04) && max_size > 4)
17214 dest = adjust_automodify_address_nv (destmem, SImode, destptr, offset);
17215 emit_insn (gen_strset (destptr, dest, gen_lowpart (SImode, value)));
17218 if ((countval & 0x02) && max_size > 2)
17220 dest = adjust_automodify_address_nv (destmem, HImode, destptr, offset);
17221 emit_insn (gen_strset (destptr, dest, gen_lowpart (HImode, value)));
17224 if ((countval & 0x01) && max_size > 1)
17226 dest = adjust_automodify_address_nv (destmem, QImode, destptr, offset);
17227 emit_insn (gen_strset (destptr, dest, gen_lowpart (QImode, value)));
17234 expand_setmem_epilogue_via_loop (destmem, destptr, value, count, max_size);
17239 rtx label = ix86_expand_aligntest (count, 16, true);
17242 dest = change_address (destmem, DImode, destptr);
17243 emit_insn (gen_strset (destptr, dest, value));
17244 emit_insn (gen_strset (destptr, dest, value));
17248 dest = change_address (destmem, SImode, destptr);
17249 emit_insn (gen_strset (destptr, dest, value));
17250 emit_insn (gen_strset (destptr, dest, value));
17251 emit_insn (gen_strset (destptr, dest, value));
17252 emit_insn (gen_strset (destptr, dest, value));
17254 emit_label (label);
17255 LABEL_NUSES (label) = 1;
17259 rtx label = ix86_expand_aligntest (count, 8, true);
17262 dest = change_address (destmem, DImode, destptr);
17263 emit_insn (gen_strset (destptr, dest, value));
17267 dest = change_address (destmem, SImode, destptr);
17268 emit_insn (gen_strset (destptr, dest, value));
17269 emit_insn (gen_strset (destptr, dest, value));
17271 emit_label (label);
17272 LABEL_NUSES (label) = 1;
17276 rtx label = ix86_expand_aligntest (count, 4, true);
17277 dest = change_address (destmem, SImode, destptr);
17278 emit_insn (gen_strset (destptr, dest, gen_lowpart (SImode, value)));
17279 emit_label (label);
17280 LABEL_NUSES (label) = 1;
17284 rtx label = ix86_expand_aligntest (count, 2, true);
17285 dest = change_address (destmem, HImode, destptr);
17286 emit_insn (gen_strset (destptr, dest, gen_lowpart (HImode, value)));
17287 emit_label (label);
17288 LABEL_NUSES (label) = 1;
17292 rtx label = ix86_expand_aligntest (count, 1, true);
17293 dest = change_address (destmem, QImode, destptr);
17294 emit_insn (gen_strset (destptr, dest, gen_lowpart (QImode, value)));
17295 emit_label (label);
17296 LABEL_NUSES (label) = 1;
17300 /* Copy enough from DEST to SRC to align DEST known to by aligned by ALIGN to
17301 DESIRED_ALIGNMENT. */
17303 expand_movmem_prologue (rtx destmem, rtx srcmem,
17304 rtx destptr, rtx srcptr, rtx count,
17305 int align, int desired_alignment)
17307 if (align <= 1 && desired_alignment > 1)
17309 rtx label = ix86_expand_aligntest (destptr, 1, false);
17310 srcmem = change_address (srcmem, QImode, srcptr);
17311 destmem = change_address (destmem, QImode, destptr);
17312 emit_insn (gen_strmov (destptr, destmem, srcptr, srcmem));
17313 ix86_adjust_counter (count, 1);
17314 emit_label (label);
17315 LABEL_NUSES (label) = 1;
17317 if (align <= 2 && desired_alignment > 2)
17319 rtx label = ix86_expand_aligntest (destptr, 2, false);
17320 srcmem = change_address (srcmem, HImode, srcptr);
17321 destmem = change_address (destmem, HImode, destptr);
17322 emit_insn (gen_strmov (destptr, destmem, srcptr, srcmem));
17323 ix86_adjust_counter (count, 2);
17324 emit_label (label);
17325 LABEL_NUSES (label) = 1;
17327 if (align <= 4 && desired_alignment > 4)
17329 rtx label = ix86_expand_aligntest (destptr, 4, false);
17330 srcmem = change_address (srcmem, SImode, srcptr);
17331 destmem = change_address (destmem, SImode, destptr);
17332 emit_insn (gen_strmov (destptr, destmem, srcptr, srcmem));
17333 ix86_adjust_counter (count, 4);
17334 emit_label (label);
17335 LABEL_NUSES (label) = 1;
17337 gcc_assert (desired_alignment <= 8);
17340 /* Copy enough from DST to SRC to align DST known to DESIRED_ALIGN.
17341 ALIGN_BYTES is how many bytes need to be copied. */
17343 expand_constant_movmem_prologue (rtx dst, rtx *srcp, rtx destreg, rtx srcreg,
17344 int desired_align, int align_bytes)
17347 rtx src_size, dst_size;
17349 int src_align_bytes = get_mem_align_offset (src, desired_align * BITS_PER_UNIT);
17350 if (src_align_bytes >= 0)
17351 src_align_bytes = desired_align - src_align_bytes;
17352 src_size = MEM_SIZE (src);
17353 dst_size = MEM_SIZE (dst);
17354 if (align_bytes & 1)
17356 dst = adjust_automodify_address_nv (dst, QImode, destreg, 0);
17357 src = adjust_automodify_address_nv (src, QImode, srcreg, 0);
17359 emit_insn (gen_strmov (destreg, dst, srcreg, src));
17361 if (align_bytes & 2)
17363 dst = adjust_automodify_address_nv (dst, HImode, destreg, off);
17364 src = adjust_automodify_address_nv (src, HImode, srcreg, off);
17365 if (MEM_ALIGN (dst) < 2 * BITS_PER_UNIT)
17366 set_mem_align (dst, 2 * BITS_PER_UNIT);
17367 if (src_align_bytes >= 0
17368 && (src_align_bytes & 1) == (align_bytes & 1)
17369 && MEM_ALIGN (src) < 2 * BITS_PER_UNIT)
17370 set_mem_align (src, 2 * BITS_PER_UNIT);
17372 emit_insn (gen_strmov (destreg, dst, srcreg, src));
17374 if (align_bytes & 4)
17376 dst = adjust_automodify_address_nv (dst, SImode, destreg, off);
17377 src = adjust_automodify_address_nv (src, SImode, srcreg, off);
17378 if (MEM_ALIGN (dst) < 4 * BITS_PER_UNIT)
17379 set_mem_align (dst, 4 * BITS_PER_UNIT);
17380 if (src_align_bytes >= 0)
17382 unsigned int src_align = 0;
17383 if ((src_align_bytes & 3) == (align_bytes & 3))
17385 else if ((src_align_bytes & 1) == (align_bytes & 1))
17387 if (MEM_ALIGN (src) < src_align * BITS_PER_UNIT)
17388 set_mem_align (src, src_align * BITS_PER_UNIT);
17391 emit_insn (gen_strmov (destreg, dst, srcreg, src));
17393 dst = adjust_automodify_address_nv (dst, BLKmode, destreg, off);
17394 src = adjust_automodify_address_nv (src, BLKmode, srcreg, off);
17395 if (MEM_ALIGN (dst) < (unsigned int) desired_align * BITS_PER_UNIT)
17396 set_mem_align (dst, desired_align * BITS_PER_UNIT);
17397 if (src_align_bytes >= 0)
17399 unsigned int src_align = 0;
17400 if ((src_align_bytes & 7) == (align_bytes & 7))
17402 else if ((src_align_bytes & 3) == (align_bytes & 3))
17404 else if ((src_align_bytes & 1) == (align_bytes & 1))
17406 if (src_align > (unsigned int) desired_align)
17407 src_align = desired_align;
17408 if (MEM_ALIGN (src) < src_align * BITS_PER_UNIT)
17409 set_mem_align (src, src_align * BITS_PER_UNIT);
17412 set_mem_size (dst, GEN_INT (INTVAL (dst_size) - align_bytes));
17414 set_mem_size (dst, GEN_INT (INTVAL (src_size) - align_bytes));
17419 /* Set enough from DEST to align DEST known to by aligned by ALIGN to
17420 DESIRED_ALIGNMENT. */
17422 expand_setmem_prologue (rtx destmem, rtx destptr, rtx value, rtx count,
17423 int align, int desired_alignment)
17425 if (align <= 1 && desired_alignment > 1)
17427 rtx label = ix86_expand_aligntest (destptr, 1, false);
17428 destmem = change_address (destmem, QImode, destptr);
17429 emit_insn (gen_strset (destptr, destmem, gen_lowpart (QImode, value)));
17430 ix86_adjust_counter (count, 1);
17431 emit_label (label);
17432 LABEL_NUSES (label) = 1;
17434 if (align <= 2 && desired_alignment > 2)
17436 rtx label = ix86_expand_aligntest (destptr, 2, false);
17437 destmem = change_address (destmem, HImode, destptr);
17438 emit_insn (gen_strset (destptr, destmem, gen_lowpart (HImode, value)));
17439 ix86_adjust_counter (count, 2);
17440 emit_label (label);
17441 LABEL_NUSES (label) = 1;
17443 if (align <= 4 && desired_alignment > 4)
17445 rtx label = ix86_expand_aligntest (destptr, 4, false);
17446 destmem = change_address (destmem, SImode, destptr);
17447 emit_insn (gen_strset (destptr, destmem, gen_lowpart (SImode, value)));
17448 ix86_adjust_counter (count, 4);
17449 emit_label (label);
17450 LABEL_NUSES (label) = 1;
17452 gcc_assert (desired_alignment <= 8);
17455 /* Set enough from DST to align DST known to by aligned by ALIGN to
17456 DESIRED_ALIGN. ALIGN_BYTES is how many bytes need to be stored. */
17458 expand_constant_setmem_prologue (rtx dst, rtx destreg, rtx value,
17459 int desired_align, int align_bytes)
17462 rtx dst_size = MEM_SIZE (dst);
17463 if (align_bytes & 1)
17465 dst = adjust_automodify_address_nv (dst, QImode, destreg, 0);
17467 emit_insn (gen_strset (destreg, dst,
17468 gen_lowpart (QImode, value)));
17470 if (align_bytes & 2)
17472 dst = adjust_automodify_address_nv (dst, HImode, destreg, off);
17473 if (MEM_ALIGN (dst) < 2 * BITS_PER_UNIT)
17474 set_mem_align (dst, 2 * BITS_PER_UNIT);
17476 emit_insn (gen_strset (destreg, dst,
17477 gen_lowpart (HImode, value)));
17479 if (align_bytes & 4)
17481 dst = adjust_automodify_address_nv (dst, SImode, destreg, off);
17482 if (MEM_ALIGN (dst) < 4 * BITS_PER_UNIT)
17483 set_mem_align (dst, 4 * BITS_PER_UNIT);
17485 emit_insn (gen_strset (destreg, dst,
17486 gen_lowpart (SImode, value)));
17488 dst = adjust_automodify_address_nv (dst, BLKmode, destreg, off);
17489 if (MEM_ALIGN (dst) < (unsigned int) desired_align * BITS_PER_UNIT)
17490 set_mem_align (dst, desired_align * BITS_PER_UNIT);
17492 set_mem_size (dst, GEN_INT (INTVAL (dst_size) - align_bytes));
17496 /* Given COUNT and EXPECTED_SIZE, decide on codegen of string operation. */
17497 static enum stringop_alg
17498 decide_alg (HOST_WIDE_INT count, HOST_WIDE_INT expected_size, bool memset,
17499 int *dynamic_check)
17501 const struct stringop_algs * algs;
17502 bool optimize_for_speed;
17503 /* Algorithms using the rep prefix want at least edi and ecx;
17504 additionally, memset wants eax and memcpy wants esi. Don't
17505 consider such algorithms if the user has appropriated those
17506 registers for their own purposes. */
17507 bool rep_prefix_usable = !(fixed_regs[CX_REG] || fixed_regs[DI_REG]
17509 ? fixed_regs[AX_REG] : fixed_regs[SI_REG]));
17511 #define ALG_USABLE_P(alg) (rep_prefix_usable \
17512 || (alg != rep_prefix_1_byte \
17513 && alg != rep_prefix_4_byte \
17514 && alg != rep_prefix_8_byte))
17515 const struct processor_costs *cost;
17517 /* Even if the string operation call is cold, we still might spend a lot
17518 of time processing large blocks. */
17519 if (optimize_function_for_size_p (cfun)
17520 || (optimize_insn_for_size_p ()
17521 && expected_size != -1 && expected_size < 256))
17522 optimize_for_speed = false;
17524 optimize_for_speed = true;
17526 cost = optimize_for_speed ? ix86_cost : &ix86_size_cost;
17528 *dynamic_check = -1;
17530 algs = &cost->memset[TARGET_64BIT != 0];
17532 algs = &cost->memcpy[TARGET_64BIT != 0];
17533 if (stringop_alg != no_stringop && ALG_USABLE_P (stringop_alg))
17534 return stringop_alg;
17535 /* rep; movq or rep; movl is the smallest variant. */
17536 else if (!optimize_for_speed)
17538 if (!count || (count & 3))
17539 return rep_prefix_usable ? rep_prefix_1_byte : loop_1_byte;
17541 return rep_prefix_usable ? rep_prefix_4_byte : loop;
17543 /* Very tiny blocks are best handled via the loop, REP is expensive to setup.
17545 else if (expected_size != -1 && expected_size < 4)
17546 return loop_1_byte;
17547 else if (expected_size != -1)
17550 enum stringop_alg alg = libcall;
17551 for (i = 0; i < NAX_STRINGOP_ALGS; i++)
17553 /* We get here if the algorithms that were not libcall-based
17554 were rep-prefix based and we are unable to use rep prefixes
17555 based on global register usage. Break out of the loop and
17556 use the heuristic below. */
17557 if (algs->size[i].max == 0)
17559 if (algs->size[i].max >= expected_size || algs->size[i].max == -1)
17561 enum stringop_alg candidate = algs->size[i].alg;
17563 if (candidate != libcall && ALG_USABLE_P (candidate))
17565 /* Honor TARGET_INLINE_ALL_STRINGOPS by picking
17566 last non-libcall inline algorithm. */
17567 if (TARGET_INLINE_ALL_STRINGOPS)
17569 /* When the current size is best to be copied by a libcall,
17570 but we are still forced to inline, run the heuristic below
17571 that will pick code for medium sized blocks. */
17572 if (alg != libcall)
17576 else if (ALG_USABLE_P (candidate))
17580 gcc_assert (TARGET_INLINE_ALL_STRINGOPS || !rep_prefix_usable);
17582 /* When asked to inline the call anyway, try to pick meaningful choice.
17583 We look for maximal size of block that is faster to copy by hand and
17584 take blocks of at most of that size guessing that average size will
17585 be roughly half of the block.
17587 If this turns out to be bad, we might simply specify the preferred
17588 choice in ix86_costs. */
17589 if ((TARGET_INLINE_ALL_STRINGOPS || TARGET_INLINE_STRINGOPS_DYNAMICALLY)
17590 && (algs->unknown_size == libcall || !ALG_USABLE_P (algs->unknown_size)))
17593 enum stringop_alg alg;
17595 bool any_alg_usable_p = true;
17597 for (i = 0; i < NAX_STRINGOP_ALGS; i++)
17599 enum stringop_alg candidate = algs->size[i].alg;
17600 any_alg_usable_p = any_alg_usable_p && ALG_USABLE_P (candidate);
17602 if (candidate != libcall && candidate
17603 && ALG_USABLE_P (candidate))
17604 max = algs->size[i].max;
17606 /* If there aren't any usable algorithms, then recursing on
17607 smaller sizes isn't going to find anything. Just return the
17608 simple byte-at-a-time copy loop. */
17609 if (!any_alg_usable_p)
17611 /* Pick something reasonable. */
17612 if (TARGET_INLINE_STRINGOPS_DYNAMICALLY)
17613 *dynamic_check = 128;
17614 return loop_1_byte;
17618 alg = decide_alg (count, max / 2, memset, dynamic_check);
17619 gcc_assert (*dynamic_check == -1);
17620 gcc_assert (alg != libcall);
17621 if (TARGET_INLINE_STRINGOPS_DYNAMICALLY)
17622 *dynamic_check = max;
17625 return ALG_USABLE_P (algs->unknown_size) ? algs->unknown_size : libcall;
17626 #undef ALG_USABLE_P
17629 /* Decide on alignment. We know that the operand is already aligned to ALIGN
17630 (ALIGN can be based on profile feedback and thus it is not 100% guaranteed). */
17632 decide_alignment (int align,
17633 enum stringop_alg alg,
17636 int desired_align = 0;
17640 gcc_unreachable ();
17642 case unrolled_loop:
17643 desired_align = GET_MODE_SIZE (Pmode);
17645 case rep_prefix_8_byte:
17648 case rep_prefix_4_byte:
17649 /* PentiumPro has special logic triggering for 8 byte aligned blocks.
17650 copying whole cacheline at once. */
17651 if (TARGET_PENTIUMPRO)
17656 case rep_prefix_1_byte:
17657 /* PentiumPro has special logic triggering for 8 byte aligned blocks.
17658 copying whole cacheline at once. */
17659 if (TARGET_PENTIUMPRO)
17673 if (desired_align < align)
17674 desired_align = align;
17675 if (expected_size != -1 && expected_size < 4)
17676 desired_align = align;
17677 return desired_align;
17680 /* Return the smallest power of 2 greater than VAL. */
17682 smallest_pow2_greater_than (int val)
17690 /* Expand string move (memcpy) operation. Use i386 string operations when
17691 profitable. expand_setmem contains similar code. The code depends upon
17692 architecture, block size and alignment, but always has the same
17695 1) Prologue guard: Conditional that jumps up to epilogues for small
17696 blocks that can be handled by epilogue alone. This is faster but
17697 also needed for correctness, since prologue assume the block is larger
17698 than the desired alignment.
17700 Optional dynamic check for size and libcall for large
17701 blocks is emitted here too, with -minline-stringops-dynamically.
17703 2) Prologue: copy first few bytes in order to get destination aligned
17704 to DESIRED_ALIGN. It is emitted only when ALIGN is less than
17705 DESIRED_ALIGN and and up to DESIRED_ALIGN - ALIGN bytes can be copied.
17706 We emit either a jump tree on power of two sized blocks, or a byte loop.
17708 3) Main body: the copying loop itself, copying in SIZE_NEEDED chunks
17709 with specified algorithm.
17711 4) Epilogue: code copying tail of the block that is too small to be
17712 handled by main body (or up to size guarded by prologue guard). */
17715 ix86_expand_movmem (rtx dst, rtx src, rtx count_exp, rtx align_exp,
17716 rtx expected_align_exp, rtx expected_size_exp)
17722 rtx jump_around_label = NULL;
17723 HOST_WIDE_INT align = 1;
17724 unsigned HOST_WIDE_INT count = 0;
17725 HOST_WIDE_INT expected_size = -1;
17726 int size_needed = 0, epilogue_size_needed;
17727 int desired_align = 0, align_bytes = 0;
17728 enum stringop_alg alg;
17730 bool need_zero_guard = false;
17732 if (CONST_INT_P (align_exp))
17733 align = INTVAL (align_exp);
17734 /* i386 can do misaligned access on reasonably increased cost. */
17735 if (CONST_INT_P (expected_align_exp)
17736 && INTVAL (expected_align_exp) > align)
17737 align = INTVAL (expected_align_exp);
17738 /* ALIGN is the minimum of destination and source alignment, but we care here
17739 just about destination alignment. */
17740 else if (MEM_ALIGN (dst) > (unsigned HOST_WIDE_INT) align * BITS_PER_UNIT)
17741 align = MEM_ALIGN (dst) / BITS_PER_UNIT;
17743 if (CONST_INT_P (count_exp))
17744 count = expected_size = INTVAL (count_exp);
17745 if (CONST_INT_P (expected_size_exp) && count == 0)
17746 expected_size = INTVAL (expected_size_exp);
17748 /* Make sure we don't need to care about overflow later on. */
17749 if (count > ((unsigned HOST_WIDE_INT) 1 << 30))
17752 /* Step 0: Decide on preferred algorithm, desired alignment and
17753 size of chunks to be copied by main loop. */
17755 alg = decide_alg (count, expected_size, false, &dynamic_check);
17756 desired_align = decide_alignment (align, alg, expected_size);
17758 if (!TARGET_ALIGN_STRINGOPS)
17759 align = desired_align;
17761 if (alg == libcall)
17763 gcc_assert (alg != no_stringop);
17765 count_exp = copy_to_mode_reg (GET_MODE (count_exp), count_exp);
17766 destreg = copy_to_mode_reg (Pmode, XEXP (dst, 0));
17767 srcreg = copy_to_mode_reg (Pmode, XEXP (src, 0));
17772 gcc_unreachable ();
17774 need_zero_guard = true;
17775 size_needed = GET_MODE_SIZE (Pmode);
17777 case unrolled_loop:
17778 need_zero_guard = true;
17779 size_needed = GET_MODE_SIZE (Pmode) * (TARGET_64BIT ? 4 : 2);
17781 case rep_prefix_8_byte:
17784 case rep_prefix_4_byte:
17787 case rep_prefix_1_byte:
17791 need_zero_guard = true;
17796 epilogue_size_needed = size_needed;
17798 /* Step 1: Prologue guard. */
17800 /* Alignment code needs count to be in register. */
17801 if (CONST_INT_P (count_exp) && desired_align > align)
17803 if (INTVAL (count_exp) > desired_align
17804 && INTVAL (count_exp) > size_needed)
17807 = get_mem_align_offset (dst, desired_align * BITS_PER_UNIT);
17808 if (align_bytes <= 0)
17811 align_bytes = desired_align - align_bytes;
17813 if (align_bytes == 0)
17814 count_exp = force_reg (counter_mode (count_exp), count_exp);
17816 gcc_assert (desired_align >= 1 && align >= 1);
17818 /* Ensure that alignment prologue won't copy past end of block. */
17819 if (size_needed > 1 || (desired_align > 1 && desired_align > align))
17821 epilogue_size_needed = MAX (size_needed - 1, desired_align - align);
17822 /* Epilogue always copies COUNT_EXP & EPILOGUE_SIZE_NEEDED bytes.
17823 Make sure it is power of 2. */
17824 epilogue_size_needed = smallest_pow2_greater_than (epilogue_size_needed);
17828 if (count < (unsigned HOST_WIDE_INT)epilogue_size_needed)
17830 /* If main algorithm works on QImode, no epilogue is needed.
17831 For small sizes just don't align anything. */
17832 if (size_needed == 1)
17833 desired_align = align;
17840 label = gen_label_rtx ();
17841 emit_cmp_and_jump_insns (count_exp,
17842 GEN_INT (epilogue_size_needed),
17843 LTU, 0, counter_mode (count_exp), 1, label);
17844 if (expected_size == -1 || expected_size < epilogue_size_needed)
17845 predict_jump (REG_BR_PROB_BASE * 60 / 100);
17847 predict_jump (REG_BR_PROB_BASE * 20 / 100);
17851 /* Emit code to decide on runtime whether library call or inline should be
17853 if (dynamic_check != -1)
17855 if (CONST_INT_P (count_exp))
17857 if (UINTVAL (count_exp) >= (unsigned HOST_WIDE_INT)dynamic_check)
17859 emit_block_move_via_libcall (dst, src, count_exp, false);
17860 count_exp = const0_rtx;
17866 rtx hot_label = gen_label_rtx ();
17867 jump_around_label = gen_label_rtx ();
17868 emit_cmp_and_jump_insns (count_exp, GEN_INT (dynamic_check - 1),
17869 LEU, 0, GET_MODE (count_exp), 1, hot_label);
17870 predict_jump (REG_BR_PROB_BASE * 90 / 100);
17871 emit_block_move_via_libcall (dst, src, count_exp, false);
17872 emit_jump (jump_around_label);
17873 emit_label (hot_label);
17877 /* Step 2: Alignment prologue. */
17879 if (desired_align > align)
17881 if (align_bytes == 0)
17883 /* Except for the first move in epilogue, we no longer know
17884 constant offset in aliasing info. It don't seems to worth
17885 the pain to maintain it for the first move, so throw away
17887 src = change_address (src, BLKmode, srcreg);
17888 dst = change_address (dst, BLKmode, destreg);
17889 expand_movmem_prologue (dst, src, destreg, srcreg, count_exp, align,
17894 /* If we know how many bytes need to be stored before dst is
17895 sufficiently aligned, maintain aliasing info accurately. */
17896 dst = expand_constant_movmem_prologue (dst, &src, destreg, srcreg,
17897 desired_align, align_bytes);
17898 count_exp = plus_constant (count_exp, -align_bytes);
17899 count -= align_bytes;
17901 if (need_zero_guard
17902 && (count < (unsigned HOST_WIDE_INT) size_needed
17903 || (align_bytes == 0
17904 && count < ((unsigned HOST_WIDE_INT) size_needed
17905 + desired_align - align))))
17907 /* It is possible that we copied enough so the main loop will not
17909 gcc_assert (size_needed > 1);
17910 if (label == NULL_RTX)
17911 label = gen_label_rtx ();
17912 emit_cmp_and_jump_insns (count_exp,
17913 GEN_INT (size_needed),
17914 LTU, 0, counter_mode (count_exp), 1, label);
17915 if (expected_size == -1
17916 || expected_size < (desired_align - align) / 2 + size_needed)
17917 predict_jump (REG_BR_PROB_BASE * 20 / 100);
17919 predict_jump (REG_BR_PROB_BASE * 60 / 100);
17922 if (label && size_needed == 1)
17924 emit_label (label);
17925 LABEL_NUSES (label) = 1;
17927 epilogue_size_needed = 1;
17929 else if (label == NULL_RTX)
17930 epilogue_size_needed = size_needed;
17932 /* Step 3: Main loop. */
17938 gcc_unreachable ();
17940 expand_set_or_movmem_via_loop (dst, src, destreg, srcreg, NULL,
17941 count_exp, QImode, 1, expected_size);
17944 expand_set_or_movmem_via_loop (dst, src, destreg, srcreg, NULL,
17945 count_exp, Pmode, 1, expected_size);
17947 case unrolled_loop:
17948 /* Unroll only by factor of 2 in 32bit mode, since we don't have enough
17949 registers for 4 temporaries anyway. */
17950 expand_set_or_movmem_via_loop (dst, src, destreg, srcreg, NULL,
17951 count_exp, Pmode, TARGET_64BIT ? 4 : 2,
17954 case rep_prefix_8_byte:
17955 expand_movmem_via_rep_mov (dst, src, destreg, srcreg, count_exp,
17958 case rep_prefix_4_byte:
17959 expand_movmem_via_rep_mov (dst, src, destreg, srcreg, count_exp,
17962 case rep_prefix_1_byte:
17963 expand_movmem_via_rep_mov (dst, src, destreg, srcreg, count_exp,
17967 /* Adjust properly the offset of src and dest memory for aliasing. */
17968 if (CONST_INT_P (count_exp))
17970 src = adjust_automodify_address_nv (src, BLKmode, srcreg,
17971 (count / size_needed) * size_needed);
17972 dst = adjust_automodify_address_nv (dst, BLKmode, destreg,
17973 (count / size_needed) * size_needed);
17977 src = change_address (src, BLKmode, srcreg);
17978 dst = change_address (dst, BLKmode, destreg);
17981 /* Step 4: Epilogue to copy the remaining bytes. */
17985 /* When the main loop is done, COUNT_EXP might hold original count,
17986 while we want to copy only COUNT_EXP & SIZE_NEEDED bytes.
17987 Epilogue code will actually copy COUNT_EXP & EPILOGUE_SIZE_NEEDED
17988 bytes. Compensate if needed. */
17990 if (size_needed < epilogue_size_needed)
17993 expand_simple_binop (counter_mode (count_exp), AND, count_exp,
17994 GEN_INT (size_needed - 1), count_exp, 1,
17996 if (tmp != count_exp)
17997 emit_move_insn (count_exp, tmp);
17999 emit_label (label);
18000 LABEL_NUSES (label) = 1;
18003 if (count_exp != const0_rtx && epilogue_size_needed > 1)
18004 expand_movmem_epilogue (dst, src, destreg, srcreg, count_exp,
18005 epilogue_size_needed);
18006 if (jump_around_label)
18007 emit_label (jump_around_label);
18011 /* Helper function for memcpy. For QImode value 0xXY produce
18012 0xXYXYXYXY of wide specified by MODE. This is essentially
18013 a * 0x10101010, but we can do slightly better than
18014 synth_mult by unwinding the sequence by hand on CPUs with
18017 promote_duplicated_reg (enum machine_mode mode, rtx val)
18019 enum machine_mode valmode = GET_MODE (val);
18021 int nops = mode == DImode ? 3 : 2;
18023 gcc_assert (mode == SImode || mode == DImode);
18024 if (val == const0_rtx)
18025 return copy_to_mode_reg (mode, const0_rtx);
18026 if (CONST_INT_P (val))
18028 HOST_WIDE_INT v = INTVAL (val) & 255;
18032 if (mode == DImode)
18033 v |= (v << 16) << 16;
18034 return copy_to_mode_reg (mode, gen_int_mode (v, mode));
18037 if (valmode == VOIDmode)
18039 if (valmode != QImode)
18040 val = gen_lowpart (QImode, val);
18041 if (mode == QImode)
18043 if (!TARGET_PARTIAL_REG_STALL)
18045 if (ix86_cost->mult_init[mode == DImode ? 3 : 2]
18046 + ix86_cost->mult_bit * (mode == DImode ? 8 : 4)
18047 <= (ix86_cost->shift_const + ix86_cost->add) * nops
18048 + (COSTS_N_INSNS (TARGET_PARTIAL_REG_STALL == 0)))
18050 rtx reg = convert_modes (mode, QImode, val, true);
18051 tmp = promote_duplicated_reg (mode, const1_rtx);
18052 return expand_simple_binop (mode, MULT, reg, tmp, NULL, 1,
18057 rtx reg = convert_modes (mode, QImode, val, true);
18059 if (!TARGET_PARTIAL_REG_STALL)
18060 if (mode == SImode)
18061 emit_insn (gen_movsi_insv_1 (reg, reg));
18063 emit_insn (gen_movdi_insv_1_rex64 (reg, reg));
18066 tmp = expand_simple_binop (mode, ASHIFT, reg, GEN_INT (8),
18067 NULL, 1, OPTAB_DIRECT);
18069 expand_simple_binop (mode, IOR, reg, tmp, reg, 1, OPTAB_DIRECT);
18071 tmp = expand_simple_binop (mode, ASHIFT, reg, GEN_INT (16),
18072 NULL, 1, OPTAB_DIRECT);
18073 reg = expand_simple_binop (mode, IOR, reg, tmp, reg, 1, OPTAB_DIRECT);
18074 if (mode == SImode)
18076 tmp = expand_simple_binop (mode, ASHIFT, reg, GEN_INT (32),
18077 NULL, 1, OPTAB_DIRECT);
18078 reg = expand_simple_binop (mode, IOR, reg, tmp, reg, 1, OPTAB_DIRECT);
18083 /* Duplicate value VAL using promote_duplicated_reg into maximal size that will
18084 be needed by main loop copying SIZE_NEEDED chunks and prologue getting
18085 alignment from ALIGN to DESIRED_ALIGN. */
18087 promote_duplicated_reg_to_size (rtx val, int size_needed, int desired_align, int align)
18092 && (size_needed > 4 || (desired_align > align && desired_align > 4)))
18093 promoted_val = promote_duplicated_reg (DImode, val);
18094 else if (size_needed > 2 || (desired_align > align && desired_align > 2))
18095 promoted_val = promote_duplicated_reg (SImode, val);
18096 else if (size_needed > 1 || (desired_align > align && desired_align > 1))
18097 promoted_val = promote_duplicated_reg (HImode, val);
18099 promoted_val = val;
18101 return promoted_val;
18104 /* Expand string clear operation (bzero). Use i386 string operations when
18105 profitable. See expand_movmem comment for explanation of individual
18106 steps performed. */
18108 ix86_expand_setmem (rtx dst, rtx count_exp, rtx val_exp, rtx align_exp,
18109 rtx expected_align_exp, rtx expected_size_exp)
18114 rtx jump_around_label = NULL;
18115 HOST_WIDE_INT align = 1;
18116 unsigned HOST_WIDE_INT count = 0;
18117 HOST_WIDE_INT expected_size = -1;
18118 int size_needed = 0, epilogue_size_needed;
18119 int desired_align = 0, align_bytes = 0;
18120 enum stringop_alg alg;
18121 rtx promoted_val = NULL;
18122 bool force_loopy_epilogue = false;
18124 bool need_zero_guard = false;
18126 if (CONST_INT_P (align_exp))
18127 align = INTVAL (align_exp);
18128 /* i386 can do misaligned access on reasonably increased cost. */
18129 if (CONST_INT_P (expected_align_exp)
18130 && INTVAL (expected_align_exp) > align)
18131 align = INTVAL (expected_align_exp);
18132 if (CONST_INT_P (count_exp))
18133 count = expected_size = INTVAL (count_exp);
18134 if (CONST_INT_P (expected_size_exp) && count == 0)
18135 expected_size = INTVAL (expected_size_exp);
18137 /* Make sure we don't need to care about overflow later on. */
18138 if (count > ((unsigned HOST_WIDE_INT) 1 << 30))
18141 /* Step 0: Decide on preferred algorithm, desired alignment and
18142 size of chunks to be copied by main loop. */
18144 alg = decide_alg (count, expected_size, true, &dynamic_check);
18145 desired_align = decide_alignment (align, alg, expected_size);
18147 if (!TARGET_ALIGN_STRINGOPS)
18148 align = desired_align;
18150 if (alg == libcall)
18152 gcc_assert (alg != no_stringop);
18154 count_exp = copy_to_mode_reg (counter_mode (count_exp), count_exp);
18155 destreg = copy_to_mode_reg (Pmode, XEXP (dst, 0));
18160 gcc_unreachable ();
18162 need_zero_guard = true;
18163 size_needed = GET_MODE_SIZE (Pmode);
18165 case unrolled_loop:
18166 need_zero_guard = true;
18167 size_needed = GET_MODE_SIZE (Pmode) * 4;
18169 case rep_prefix_8_byte:
18172 case rep_prefix_4_byte:
18175 case rep_prefix_1_byte:
18179 need_zero_guard = true;
18183 epilogue_size_needed = size_needed;
18185 /* Step 1: Prologue guard. */
18187 /* Alignment code needs count to be in register. */
18188 if (CONST_INT_P (count_exp) && desired_align > align)
18190 if (INTVAL (count_exp) > desired_align
18191 && INTVAL (count_exp) > size_needed)
18194 = get_mem_align_offset (dst, desired_align * BITS_PER_UNIT);
18195 if (align_bytes <= 0)
18198 align_bytes = desired_align - align_bytes;
18200 if (align_bytes == 0)
18202 enum machine_mode mode = SImode;
18203 if (TARGET_64BIT && (count & ~0xffffffff))
18205 count_exp = force_reg (mode, count_exp);
18208 /* Do the cheap promotion to allow better CSE across the
18209 main loop and epilogue (ie one load of the big constant in the
18210 front of all code. */
18211 if (CONST_INT_P (val_exp))
18212 promoted_val = promote_duplicated_reg_to_size (val_exp, size_needed,
18213 desired_align, align);
18214 /* Ensure that alignment prologue won't copy past end of block. */
18215 if (size_needed > 1 || (desired_align > 1 && desired_align > align))
18217 epilogue_size_needed = MAX (size_needed - 1, desired_align - align);
18218 /* Epilogue always copies COUNT_EXP & (EPILOGUE_SIZE_NEEDED - 1) bytes.
18219 Make sure it is power of 2. */
18220 epilogue_size_needed = smallest_pow2_greater_than (epilogue_size_needed);
18222 /* To improve performance of small blocks, we jump around the VAL
18223 promoting mode. This mean that if the promoted VAL is not constant,
18224 we might not use it in the epilogue and have to use byte
18226 if (epilogue_size_needed > 2 && !promoted_val)
18227 force_loopy_epilogue = true;
18230 if (count < (unsigned HOST_WIDE_INT)epilogue_size_needed)
18232 /* If main algorithm works on QImode, no epilogue is needed.
18233 For small sizes just don't align anything. */
18234 if (size_needed == 1)
18235 desired_align = align;
18242 label = gen_label_rtx ();
18243 emit_cmp_and_jump_insns (count_exp,
18244 GEN_INT (epilogue_size_needed),
18245 LTU, 0, counter_mode (count_exp), 1, label);
18246 if (expected_size == -1 || expected_size <= epilogue_size_needed)
18247 predict_jump (REG_BR_PROB_BASE * 60 / 100);
18249 predict_jump (REG_BR_PROB_BASE * 20 / 100);
18252 if (dynamic_check != -1)
18254 rtx hot_label = gen_label_rtx ();
18255 jump_around_label = gen_label_rtx ();
18256 emit_cmp_and_jump_insns (count_exp, GEN_INT (dynamic_check - 1),
18257 LEU, 0, counter_mode (count_exp), 1, hot_label);
18258 predict_jump (REG_BR_PROB_BASE * 90 / 100);
18259 set_storage_via_libcall (dst, count_exp, val_exp, false);
18260 emit_jump (jump_around_label);
18261 emit_label (hot_label);
18264 /* Step 2: Alignment prologue. */
18266 /* Do the expensive promotion once we branched off the small blocks. */
18268 promoted_val = promote_duplicated_reg_to_size (val_exp, size_needed,
18269 desired_align, align);
18270 gcc_assert (desired_align >= 1 && align >= 1);
18272 if (desired_align > align)
18274 if (align_bytes == 0)
18276 /* Except for the first move in epilogue, we no longer know
18277 constant offset in aliasing info. It don't seems to worth
18278 the pain to maintain it for the first move, so throw away
18280 dst = change_address (dst, BLKmode, destreg);
18281 expand_setmem_prologue (dst, destreg, promoted_val, count_exp, align,
18286 /* If we know how many bytes need to be stored before dst is
18287 sufficiently aligned, maintain aliasing info accurately. */
18288 dst = expand_constant_setmem_prologue (dst, destreg, promoted_val,
18289 desired_align, align_bytes);
18290 count_exp = plus_constant (count_exp, -align_bytes);
18291 count -= align_bytes;
18293 if (need_zero_guard
18294 && (count < (unsigned HOST_WIDE_INT) size_needed
18295 || (align_bytes == 0
18296 && count < ((unsigned HOST_WIDE_INT) size_needed
18297 + desired_align - align))))
18299 /* It is possible that we copied enough so the main loop will not
18301 gcc_assert (size_needed > 1);
18302 if (label == NULL_RTX)
18303 label = gen_label_rtx ();
18304 emit_cmp_and_jump_insns (count_exp,
18305 GEN_INT (size_needed),
18306 LTU, 0, counter_mode (count_exp), 1, label);
18307 if (expected_size == -1
18308 || expected_size < (desired_align - align) / 2 + size_needed)
18309 predict_jump (REG_BR_PROB_BASE * 20 / 100);
18311 predict_jump (REG_BR_PROB_BASE * 60 / 100);
18314 if (label && size_needed == 1)
18316 emit_label (label);
18317 LABEL_NUSES (label) = 1;
18319 promoted_val = val_exp;
18320 epilogue_size_needed = 1;
18322 else if (label == NULL_RTX)
18323 epilogue_size_needed = size_needed;
18325 /* Step 3: Main loop. */
18331 gcc_unreachable ();
18333 expand_set_or_movmem_via_loop (dst, NULL, destreg, NULL, promoted_val,
18334 count_exp, QImode, 1, expected_size);
18337 expand_set_or_movmem_via_loop (dst, NULL, destreg, NULL, promoted_val,
18338 count_exp, Pmode, 1, expected_size);
18340 case unrolled_loop:
18341 expand_set_or_movmem_via_loop (dst, NULL, destreg, NULL, promoted_val,
18342 count_exp, Pmode, 4, expected_size);
18344 case rep_prefix_8_byte:
18345 expand_setmem_via_rep_stos (dst, destreg, promoted_val, count_exp,
18348 case rep_prefix_4_byte:
18349 expand_setmem_via_rep_stos (dst, destreg, promoted_val, count_exp,
18352 case rep_prefix_1_byte:
18353 expand_setmem_via_rep_stos (dst, destreg, promoted_val, count_exp,
18357 /* Adjust properly the offset of src and dest memory for aliasing. */
18358 if (CONST_INT_P (count_exp))
18359 dst = adjust_automodify_address_nv (dst, BLKmode, destreg,
18360 (count / size_needed) * size_needed);
18362 dst = change_address (dst, BLKmode, destreg);
18364 /* Step 4: Epilogue to copy the remaining bytes. */
18368 /* When the main loop is done, COUNT_EXP might hold original count,
18369 while we want to copy only COUNT_EXP & SIZE_NEEDED bytes.
18370 Epilogue code will actually copy COUNT_EXP & EPILOGUE_SIZE_NEEDED
18371 bytes. Compensate if needed. */
18373 if (size_needed < epilogue_size_needed)
18376 expand_simple_binop (counter_mode (count_exp), AND, count_exp,
18377 GEN_INT (size_needed - 1), count_exp, 1,
18379 if (tmp != count_exp)
18380 emit_move_insn (count_exp, tmp);
18382 emit_label (label);
18383 LABEL_NUSES (label) = 1;
18386 if (count_exp != const0_rtx && epilogue_size_needed > 1)
18388 if (force_loopy_epilogue)
18389 expand_setmem_epilogue_via_loop (dst, destreg, val_exp, count_exp,
18390 epilogue_size_needed);
18392 expand_setmem_epilogue (dst, destreg, promoted_val, count_exp,
18393 epilogue_size_needed);
18395 if (jump_around_label)
18396 emit_label (jump_around_label);
18400 /* Expand the appropriate insns for doing strlen if not just doing
18403 out = result, initialized with the start address
18404 align_rtx = alignment of the address.
18405 scratch = scratch register, initialized with the startaddress when
18406 not aligned, otherwise undefined
18408 This is just the body. It needs the initializations mentioned above and
18409 some address computing at the end. These things are done in i386.md. */
18412 ix86_expand_strlensi_unroll_1 (rtx out, rtx src, rtx align_rtx)
18416 rtx align_2_label = NULL_RTX;
18417 rtx align_3_label = NULL_RTX;
18418 rtx align_4_label = gen_label_rtx ();
18419 rtx end_0_label = gen_label_rtx ();
18421 rtx tmpreg = gen_reg_rtx (SImode);
18422 rtx scratch = gen_reg_rtx (SImode);
18426 if (CONST_INT_P (align_rtx))
18427 align = INTVAL (align_rtx);
18429 /* Loop to check 1..3 bytes for null to get an aligned pointer. */
18431 /* Is there a known alignment and is it less than 4? */
18434 rtx scratch1 = gen_reg_rtx (Pmode);
18435 emit_move_insn (scratch1, out);
18436 /* Is there a known alignment and is it not 2? */
18439 align_3_label = gen_label_rtx (); /* Label when aligned to 3-byte */
18440 align_2_label = gen_label_rtx (); /* Label when aligned to 2-byte */
18442 /* Leave just the 3 lower bits. */
18443 align_rtx = expand_binop (Pmode, and_optab, scratch1, GEN_INT (3),
18444 NULL_RTX, 0, OPTAB_WIDEN);
18446 emit_cmp_and_jump_insns (align_rtx, const0_rtx, EQ, NULL,
18447 Pmode, 1, align_4_label);
18448 emit_cmp_and_jump_insns (align_rtx, const2_rtx, EQ, NULL,
18449 Pmode, 1, align_2_label);
18450 emit_cmp_and_jump_insns (align_rtx, const2_rtx, GTU, NULL,
18451 Pmode, 1, align_3_label);
18455 /* Since the alignment is 2, we have to check 2 or 0 bytes;
18456 check if is aligned to 4 - byte. */
18458 align_rtx = expand_binop (Pmode, and_optab, scratch1, const2_rtx,
18459 NULL_RTX, 0, OPTAB_WIDEN);
18461 emit_cmp_and_jump_insns (align_rtx, const0_rtx, EQ, NULL,
18462 Pmode, 1, align_4_label);
18465 mem = change_address (src, QImode, out);
18467 /* Now compare the bytes. */
18469 /* Compare the first n unaligned byte on a byte per byte basis. */
18470 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL,
18471 QImode, 1, end_0_label);
18473 /* Increment the address. */
18474 emit_insn ((*ix86_gen_add3) (out, out, const1_rtx));
18476 /* Not needed with an alignment of 2 */
18479 emit_label (align_2_label);
18481 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL, QImode, 1,
18484 emit_insn ((*ix86_gen_add3) (out, out, const1_rtx));
18486 emit_label (align_3_label);
18489 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL, QImode, 1,
18492 emit_insn ((*ix86_gen_add3) (out, out, const1_rtx));
18495 /* Generate loop to check 4 bytes at a time. It is not a good idea to
18496 align this loop. It gives only huge programs, but does not help to
18498 emit_label (align_4_label);
18500 mem = change_address (src, SImode, out);
18501 emit_move_insn (scratch, mem);
18502 emit_insn ((*ix86_gen_add3) (out, out, GEN_INT (4)));
18504 /* This formula yields a nonzero result iff one of the bytes is zero.
18505 This saves three branches inside loop and many cycles. */
18507 emit_insn (gen_addsi3 (tmpreg, scratch, GEN_INT (-0x01010101)));
18508 emit_insn (gen_one_cmplsi2 (scratch, scratch));
18509 emit_insn (gen_andsi3 (tmpreg, tmpreg, scratch));
18510 emit_insn (gen_andsi3 (tmpreg, tmpreg,
18511 gen_int_mode (0x80808080, SImode)));
18512 emit_cmp_and_jump_insns (tmpreg, const0_rtx, EQ, 0, SImode, 1,
18517 rtx reg = gen_reg_rtx (SImode);
18518 rtx reg2 = gen_reg_rtx (Pmode);
18519 emit_move_insn (reg, tmpreg);
18520 emit_insn (gen_lshrsi3 (reg, reg, GEN_INT (16)));
18522 /* If zero is not in the first two bytes, move two bytes forward. */
18523 emit_insn (gen_testsi_ccno_1 (tmpreg, GEN_INT (0x8080)));
18524 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
18525 tmp = gen_rtx_EQ (VOIDmode, tmp, const0_rtx);
18526 emit_insn (gen_rtx_SET (VOIDmode, tmpreg,
18527 gen_rtx_IF_THEN_ELSE (SImode, tmp,
18530 /* Emit lea manually to avoid clobbering of flags. */
18531 emit_insn (gen_rtx_SET (SImode, reg2,
18532 gen_rtx_PLUS (Pmode, out, const2_rtx)));
18534 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
18535 tmp = gen_rtx_EQ (VOIDmode, tmp, const0_rtx);
18536 emit_insn (gen_rtx_SET (VOIDmode, out,
18537 gen_rtx_IF_THEN_ELSE (Pmode, tmp,
18544 rtx end_2_label = gen_label_rtx ();
18545 /* Is zero in the first two bytes? */
18547 emit_insn (gen_testsi_ccno_1 (tmpreg, GEN_INT (0x8080)));
18548 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
18549 tmp = gen_rtx_NE (VOIDmode, tmp, const0_rtx);
18550 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
18551 gen_rtx_LABEL_REF (VOIDmode, end_2_label),
18553 tmp = emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
18554 JUMP_LABEL (tmp) = end_2_label;
18556 /* Not in the first two. Move two bytes forward. */
18557 emit_insn (gen_lshrsi3 (tmpreg, tmpreg, GEN_INT (16)));
18558 emit_insn ((*ix86_gen_add3) (out, out, const2_rtx));
18560 emit_label (end_2_label);
18564 /* Avoid branch in fixing the byte. */
18565 tmpreg = gen_lowpart (QImode, tmpreg);
18566 emit_insn (gen_addqi3_cc (tmpreg, tmpreg, tmpreg));
18567 cmp = gen_rtx_LTU (Pmode, gen_rtx_REG (CCmode, FLAGS_REG), const0_rtx);
18568 emit_insn ((*ix86_gen_sub3_carry) (out, out, GEN_INT (3), cmp));
18570 emit_label (end_0_label);
18573 /* Expand strlen. */
18576 ix86_expand_strlen (rtx out, rtx src, rtx eoschar, rtx align)
18578 rtx addr, scratch1, scratch2, scratch3, scratch4;
18580 /* The generic case of strlen expander is long. Avoid it's
18581 expanding unless TARGET_INLINE_ALL_STRINGOPS. */
18583 if (TARGET_UNROLL_STRLEN && eoschar == const0_rtx && optimize > 1
18584 && !TARGET_INLINE_ALL_STRINGOPS
18585 && !optimize_insn_for_size_p ()
18586 && (!CONST_INT_P (align) || INTVAL (align) < 4))
18589 addr = force_reg (Pmode, XEXP (src, 0));
18590 scratch1 = gen_reg_rtx (Pmode);
18592 if (TARGET_UNROLL_STRLEN && eoschar == const0_rtx && optimize > 1
18593 && !optimize_insn_for_size_p ())
18595 /* Well it seems that some optimizer does not combine a call like
18596 foo(strlen(bar), strlen(bar));
18597 when the move and the subtraction is done here. It does calculate
18598 the length just once when these instructions are done inside of
18599 output_strlen_unroll(). But I think since &bar[strlen(bar)] is
18600 often used and I use one fewer register for the lifetime of
18601 output_strlen_unroll() this is better. */
18603 emit_move_insn (out, addr);
18605 ix86_expand_strlensi_unroll_1 (out, src, align);
18607 /* strlensi_unroll_1 returns the address of the zero at the end of
18608 the string, like memchr(), so compute the length by subtracting
18609 the start address. */
18610 emit_insn ((*ix86_gen_sub3) (out, out, addr));
18616 /* Can't use this if the user has appropriated eax, ecx, or edi. */
18617 if (fixed_regs[AX_REG] || fixed_regs[CX_REG] || fixed_regs[DI_REG])
18620 scratch2 = gen_reg_rtx (Pmode);
18621 scratch3 = gen_reg_rtx (Pmode);
18622 scratch4 = force_reg (Pmode, constm1_rtx);
18624 emit_move_insn (scratch3, addr);
18625 eoschar = force_reg (QImode, eoschar);
18627 src = replace_equiv_address_nv (src, scratch3);
18629 /* If .md starts supporting :P, this can be done in .md. */
18630 unspec = gen_rtx_UNSPEC (Pmode, gen_rtvec (4, src, eoschar, align,
18631 scratch4), UNSPEC_SCAS);
18632 emit_insn (gen_strlenqi_1 (scratch1, scratch3, unspec));
18633 emit_insn ((*ix86_gen_one_cmpl2) (scratch2, scratch1));
18634 emit_insn ((*ix86_gen_add3) (out, scratch2, constm1_rtx));
18639 /* For given symbol (function) construct code to compute address of it's PLT
18640 entry in large x86-64 PIC model. */
18642 construct_plt_address (rtx symbol)
18644 rtx tmp = gen_reg_rtx (Pmode);
18645 rtx unspec = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, symbol), UNSPEC_PLTOFF);
18647 gcc_assert (GET_CODE (symbol) == SYMBOL_REF);
18648 gcc_assert (ix86_cmodel == CM_LARGE_PIC);
18650 emit_move_insn (tmp, gen_rtx_CONST (Pmode, unspec));
18651 emit_insn (gen_adddi3 (tmp, tmp, pic_offset_table_rtx));
18656 ix86_expand_call (rtx retval, rtx fnaddr, rtx callarg1,
18658 rtx pop, int sibcall)
18660 rtx use = NULL, call;
18662 if (pop == const0_rtx)
18664 gcc_assert (!TARGET_64BIT || !pop);
18666 if (TARGET_MACHO && !TARGET_64BIT)
18669 if (flag_pic && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF)
18670 fnaddr = machopic_indirect_call_target (fnaddr);
18675 /* Static functions and indirect calls don't need the pic register. */
18676 if (flag_pic && (!TARGET_64BIT || ix86_cmodel == CM_LARGE_PIC)
18677 && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF
18678 && ! SYMBOL_REF_LOCAL_P (XEXP (fnaddr, 0)))
18679 use_reg (&use, pic_offset_table_rtx);
18682 if (TARGET_64BIT && INTVAL (callarg2) >= 0)
18684 rtx al = gen_rtx_REG (QImode, AX_REG);
18685 emit_move_insn (al, callarg2);
18686 use_reg (&use, al);
18689 if (ix86_cmodel == CM_LARGE_PIC
18690 && GET_CODE (fnaddr) == MEM
18691 && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF
18692 && !local_symbolic_operand (XEXP (fnaddr, 0), VOIDmode))
18693 fnaddr = gen_rtx_MEM (QImode, construct_plt_address (XEXP (fnaddr, 0)));
18694 else if (! call_insn_operand (XEXP (fnaddr, 0), Pmode))
18696 fnaddr = copy_to_mode_reg (Pmode, XEXP (fnaddr, 0));
18697 fnaddr = gen_rtx_MEM (QImode, fnaddr);
18699 if (sibcall && TARGET_64BIT
18700 && !constant_call_address_operand (XEXP (fnaddr, 0), Pmode))
18703 addr = copy_to_mode_reg (Pmode, XEXP (fnaddr, 0));
18704 fnaddr = gen_rtx_REG (Pmode, R11_REG);
18705 emit_move_insn (fnaddr, addr);
18706 fnaddr = gen_rtx_MEM (QImode, fnaddr);
18709 call = gen_rtx_CALL (VOIDmode, fnaddr, callarg1);
18711 call = gen_rtx_SET (VOIDmode, retval, call);
18714 pop = gen_rtx_PLUS (Pmode, stack_pointer_rtx, pop);
18715 pop = gen_rtx_SET (VOIDmode, stack_pointer_rtx, pop);
18716 call = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, call, pop));
18719 && ix86_cfun_abi () == MS_ABI
18720 && (!callarg2 || INTVAL (callarg2) != -2))
18722 /* We need to represent that SI and DI registers are clobbered
18724 static int clobbered_registers[] = {
18725 XMM6_REG, XMM7_REG, XMM8_REG,
18726 XMM9_REG, XMM10_REG, XMM11_REG,
18727 XMM12_REG, XMM13_REG, XMM14_REG,
18728 XMM15_REG, SI_REG, DI_REG
18731 rtx vec[ARRAY_SIZE (clobbered_registers) + 2];
18732 rtx unspec = gen_rtx_UNSPEC (VOIDmode, gen_rtvec (1, const0_rtx),
18733 UNSPEC_MS_TO_SYSV_CALL);
18737 for (i = 0; i < ARRAY_SIZE (clobbered_registers); i++)
18738 vec[i + 2] = gen_rtx_CLOBBER (SSE_REGNO_P (clobbered_registers[i])
18741 (SSE_REGNO_P (clobbered_registers[i])
18743 clobbered_registers[i]));
18745 call = gen_rtx_PARALLEL (VOIDmode,
18746 gen_rtvec_v (ARRAY_SIZE (clobbered_registers)
18750 call = emit_call_insn (call);
18752 CALL_INSN_FUNCTION_USAGE (call) = use;
18756 /* Clear stack slot assignments remembered from previous functions.
18757 This is called from INIT_EXPANDERS once before RTL is emitted for each
18760 static struct machine_function *
18761 ix86_init_machine_status (void)
18763 struct machine_function *f;
18765 f = GGC_CNEW (struct machine_function);
18766 f->use_fast_prologue_epilogue_nregs = -1;
18767 f->tls_descriptor_call_expanded_p = 0;
18768 f->call_abi = ix86_abi;
18773 /* Return a MEM corresponding to a stack slot with mode MODE.
18774 Allocate a new slot if necessary.
18776 The RTL for a function can have several slots available: N is
18777 which slot to use. */
18780 assign_386_stack_local (enum machine_mode mode, enum ix86_stack_slot n)
18782 struct stack_local_entry *s;
18784 gcc_assert (n < MAX_386_STACK_LOCALS);
18786 /* Virtual slot is valid only before vregs are instantiated. */
18787 gcc_assert ((n == SLOT_VIRTUAL) == !virtuals_instantiated);
18789 for (s = ix86_stack_locals; s; s = s->next)
18790 if (s->mode == mode && s->n == n)
18791 return copy_rtx (s->rtl);
18793 s = (struct stack_local_entry *)
18794 ggc_alloc (sizeof (struct stack_local_entry));
18797 s->rtl = assign_stack_local (mode, GET_MODE_SIZE (mode), 0);
18799 s->next = ix86_stack_locals;
18800 ix86_stack_locals = s;
18804 /* Construct the SYMBOL_REF for the tls_get_addr function. */
18806 static GTY(()) rtx ix86_tls_symbol;
18808 ix86_tls_get_addr (void)
18811 if (!ix86_tls_symbol)
18813 ix86_tls_symbol = gen_rtx_SYMBOL_REF (Pmode,
18814 (TARGET_ANY_GNU_TLS
18816 ? "___tls_get_addr"
18817 : "__tls_get_addr");
18820 return ix86_tls_symbol;
18823 /* Construct the SYMBOL_REF for the _TLS_MODULE_BASE_ symbol. */
18825 static GTY(()) rtx ix86_tls_module_base_symbol;
18827 ix86_tls_module_base (void)
18830 if (!ix86_tls_module_base_symbol)
18832 ix86_tls_module_base_symbol = gen_rtx_SYMBOL_REF (Pmode,
18833 "_TLS_MODULE_BASE_");
18834 SYMBOL_REF_FLAGS (ix86_tls_module_base_symbol)
18835 |= TLS_MODEL_GLOBAL_DYNAMIC << SYMBOL_FLAG_TLS_SHIFT;
18838 return ix86_tls_module_base_symbol;
18841 /* Calculate the length of the memory address in the instruction
18842 encoding. Does not include the one-byte modrm, opcode, or prefix. */
18845 memory_address_length (rtx addr)
18847 struct ix86_address parts;
18848 rtx base, index, disp;
18852 if (GET_CODE (addr) == PRE_DEC
18853 || GET_CODE (addr) == POST_INC
18854 || GET_CODE (addr) == PRE_MODIFY
18855 || GET_CODE (addr) == POST_MODIFY)
18858 ok = ix86_decompose_address (addr, &parts);
18861 if (parts.base && GET_CODE (parts.base) == SUBREG)
18862 parts.base = SUBREG_REG (parts.base);
18863 if (parts.index && GET_CODE (parts.index) == SUBREG)
18864 parts.index = SUBREG_REG (parts.index);
18867 index = parts.index;
18872 - esp as the base always wants an index,
18873 - ebp as the base always wants a displacement. */
18875 /* Register Indirect. */
18876 if (base && !index && !disp)
18878 /* esp (for its index) and ebp (for its displacement) need
18879 the two-byte modrm form. */
18880 if (addr == stack_pointer_rtx
18881 || addr == arg_pointer_rtx
18882 || addr == frame_pointer_rtx
18883 || addr == hard_frame_pointer_rtx)
18887 /* Direct Addressing. */
18888 else if (disp && !base && !index)
18893 /* Find the length of the displacement constant. */
18896 if (base && satisfies_constraint_K (disp))
18901 /* ebp always wants a displacement. */
18902 else if (base == hard_frame_pointer_rtx)
18905 /* An index requires the two-byte modrm form.... */
18907 /* ...like esp, which always wants an index. */
18908 || base == stack_pointer_rtx
18909 || base == arg_pointer_rtx
18910 || base == frame_pointer_rtx)
18917 /* Compute default value for "length_immediate" attribute. When SHORTFORM
18918 is set, expect that insn have 8bit immediate alternative. */
18920 ix86_attr_length_immediate_default (rtx insn, int shortform)
18924 extract_insn_cached (insn);
18925 for (i = recog_data.n_operands - 1; i >= 0; --i)
18926 if (CONSTANT_P (recog_data.operand[i]))
18929 if (shortform && satisfies_constraint_K (recog_data.operand[i]))
18933 switch (get_attr_mode (insn))
18944 /* Immediates for DImode instructions are encoded as 32bit sign extended values. */
18949 fatal_insn ("unknown insn mode", insn);
18955 /* Compute default value for "length_address" attribute. */
18957 ix86_attr_length_address_default (rtx insn)
18961 if (get_attr_type (insn) == TYPE_LEA)
18963 rtx set = PATTERN (insn);
18965 if (GET_CODE (set) == PARALLEL)
18966 set = XVECEXP (set, 0, 0);
18968 gcc_assert (GET_CODE (set) == SET);
18970 return memory_address_length (SET_SRC (set));
18973 extract_insn_cached (insn);
18974 for (i = recog_data.n_operands - 1; i >= 0; --i)
18975 if (MEM_P (recog_data.operand[i]))
18977 return memory_address_length (XEXP (recog_data.operand[i], 0));
18983 /* Compute default value for "length_vex" attribute. It includes
18984 2 or 3 byte VEX prefix and 1 opcode byte. */
18987 ix86_attr_length_vex_default (rtx insn, int has_0f_opcode,
18992 /* Only 0f opcode can use 2 byte VEX prefix and VEX W bit uses 3
18993 byte VEX prefix. */
18994 if (!has_0f_opcode || has_vex_w)
18997 /* We can always use 2 byte VEX prefix in 32bit. */
19001 extract_insn_cached (insn);
19003 for (i = recog_data.n_operands - 1; i >= 0; --i)
19004 if (REG_P (recog_data.operand[i]))
19006 /* REX.W bit uses 3 byte VEX prefix. */
19007 if (GET_MODE (recog_data.operand[i]) == DImode)
19012 /* REX.X or REX.B bits use 3 byte VEX prefix. */
19013 if (MEM_P (recog_data.operand[i])
19014 && x86_extended_reg_mentioned_p (recog_data.operand[i]))
19021 /* Return the maximum number of instructions a cpu can issue. */
19024 ix86_issue_rate (void)
19028 case PROCESSOR_PENTIUM:
19032 case PROCESSOR_PENTIUMPRO:
19033 case PROCESSOR_PENTIUM4:
19034 case PROCESSOR_ATHLON:
19036 case PROCESSOR_AMDFAM10:
19037 case PROCESSOR_NOCONA:
19038 case PROCESSOR_GENERIC32:
19039 case PROCESSOR_GENERIC64:
19042 case PROCESSOR_CORE2:
19050 /* A subroutine of ix86_adjust_cost -- return true iff INSN reads flags set
19051 by DEP_INSN and nothing set by DEP_INSN. */
19054 ix86_flags_dependent (rtx insn, rtx dep_insn, enum attr_type insn_type)
19058 /* Simplify the test for uninteresting insns. */
19059 if (insn_type != TYPE_SETCC
19060 && insn_type != TYPE_ICMOV
19061 && insn_type != TYPE_FCMOV
19062 && insn_type != TYPE_IBR)
19065 if ((set = single_set (dep_insn)) != 0)
19067 set = SET_DEST (set);
19070 else if (GET_CODE (PATTERN (dep_insn)) == PARALLEL
19071 && XVECLEN (PATTERN (dep_insn), 0) == 2
19072 && GET_CODE (XVECEXP (PATTERN (dep_insn), 0, 0)) == SET
19073 && GET_CODE (XVECEXP (PATTERN (dep_insn), 0, 1)) == SET)
19075 set = SET_DEST (XVECEXP (PATTERN (dep_insn), 0, 0));
19076 set2 = SET_DEST (XVECEXP (PATTERN (dep_insn), 0, 0));
19081 if (!REG_P (set) || REGNO (set) != FLAGS_REG)
19084 /* This test is true if the dependent insn reads the flags but
19085 not any other potentially set register. */
19086 if (!reg_overlap_mentioned_p (set, PATTERN (insn)))
19089 if (set2 && reg_overlap_mentioned_p (set2, PATTERN (insn)))
19095 /* Return true iff USE_INSN has a memory address with operands set by
19099 ix86_agi_dependent (rtx set_insn, rtx use_insn)
19102 extract_insn_cached (use_insn);
19103 for (i = recog_data.n_operands - 1; i >= 0; --i)
19104 if (MEM_P (recog_data.operand[i]))
19106 rtx addr = XEXP (recog_data.operand[i], 0);
19107 return modified_in_p (addr, set_insn) != 0;
19113 ix86_adjust_cost (rtx insn, rtx link, rtx dep_insn, int cost)
19115 enum attr_type insn_type, dep_insn_type;
19116 enum attr_memory memory;
19118 int dep_insn_code_number;
19120 /* Anti and output dependencies have zero cost on all CPUs. */
19121 if (REG_NOTE_KIND (link) != 0)
19124 dep_insn_code_number = recog_memoized (dep_insn);
19126 /* If we can't recognize the insns, we can't really do anything. */
19127 if (dep_insn_code_number < 0 || recog_memoized (insn) < 0)
19130 insn_type = get_attr_type (insn);
19131 dep_insn_type = get_attr_type (dep_insn);
19135 case PROCESSOR_PENTIUM:
19136 /* Address Generation Interlock adds a cycle of latency. */
19137 if (insn_type == TYPE_LEA)
19139 rtx addr = PATTERN (insn);
19141 if (GET_CODE (addr) == PARALLEL)
19142 addr = XVECEXP (addr, 0, 0);
19144 gcc_assert (GET_CODE (addr) == SET);
19146 addr = SET_SRC (addr);
19147 if (modified_in_p (addr, dep_insn))
19150 else if (ix86_agi_dependent (dep_insn, insn))
19153 /* ??? Compares pair with jump/setcc. */
19154 if (ix86_flags_dependent (insn, dep_insn, insn_type))
19157 /* Floating point stores require value to be ready one cycle earlier. */
19158 if (insn_type == TYPE_FMOV
19159 && get_attr_memory (insn) == MEMORY_STORE
19160 && !ix86_agi_dependent (dep_insn, insn))
19164 case PROCESSOR_PENTIUMPRO:
19165 memory = get_attr_memory (insn);
19167 /* INT->FP conversion is expensive. */
19168 if (get_attr_fp_int_src (dep_insn))
19171 /* There is one cycle extra latency between an FP op and a store. */
19172 if (insn_type == TYPE_FMOV
19173 && (set = single_set (dep_insn)) != NULL_RTX
19174 && (set2 = single_set (insn)) != NULL_RTX
19175 && rtx_equal_p (SET_DEST (set), SET_SRC (set2))
19176 && MEM_P (SET_DEST (set2)))
19179 /* Show ability of reorder buffer to hide latency of load by executing
19180 in parallel with previous instruction in case
19181 previous instruction is not needed to compute the address. */
19182 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
19183 && !ix86_agi_dependent (dep_insn, insn))
19185 /* Claim moves to take one cycle, as core can issue one load
19186 at time and the next load can start cycle later. */
19187 if (dep_insn_type == TYPE_IMOV
19188 || dep_insn_type == TYPE_FMOV)
19196 memory = get_attr_memory (insn);
19198 /* The esp dependency is resolved before the instruction is really
19200 if ((insn_type == TYPE_PUSH || insn_type == TYPE_POP)
19201 && (dep_insn_type == TYPE_PUSH || dep_insn_type == TYPE_POP))
19204 /* INT->FP conversion is expensive. */
19205 if (get_attr_fp_int_src (dep_insn))
19208 /* Show ability of reorder buffer to hide latency of load by executing
19209 in parallel with previous instruction in case
19210 previous instruction is not needed to compute the address. */
19211 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
19212 && !ix86_agi_dependent (dep_insn, insn))
19214 /* Claim moves to take one cycle, as core can issue one load
19215 at time and the next load can start cycle later. */
19216 if (dep_insn_type == TYPE_IMOV
19217 || dep_insn_type == TYPE_FMOV)
19226 case PROCESSOR_ATHLON:
19228 case PROCESSOR_AMDFAM10:
19229 case PROCESSOR_GENERIC32:
19230 case PROCESSOR_GENERIC64:
19231 memory = get_attr_memory (insn);
19233 /* Show ability of reorder buffer to hide latency of load by executing
19234 in parallel with previous instruction in case
19235 previous instruction is not needed to compute the address. */
19236 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
19237 && !ix86_agi_dependent (dep_insn, insn))
19239 enum attr_unit unit = get_attr_unit (insn);
19242 /* Because of the difference between the length of integer and
19243 floating unit pipeline preparation stages, the memory operands
19244 for floating point are cheaper.
19246 ??? For Athlon it the difference is most probably 2. */
19247 if (unit == UNIT_INTEGER || unit == UNIT_UNKNOWN)
19250 loadcost = TARGET_ATHLON ? 2 : 0;
19252 if (cost >= loadcost)
19265 /* How many alternative schedules to try. This should be as wide as the
19266 scheduling freedom in the DFA, but no wider. Making this value too
19267 large results extra work for the scheduler. */
19270 ia32_multipass_dfa_lookahead (void)
19274 case PROCESSOR_PENTIUM:
19277 case PROCESSOR_PENTIUMPRO:
19287 /* Compute the alignment given to a constant that is being placed in memory.
19288 EXP is the constant and ALIGN is the alignment that the object would
19290 The value of this function is used instead of that alignment to align
19294 ix86_constant_alignment (tree exp, int align)
19296 if (TREE_CODE (exp) == REAL_CST || TREE_CODE (exp) == VECTOR_CST
19297 || TREE_CODE (exp) == INTEGER_CST)
19299 if (TYPE_MODE (TREE_TYPE (exp)) == DFmode && align < 64)
19301 else if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (exp))) && align < 128)
19304 else if (!optimize_size && TREE_CODE (exp) == STRING_CST
19305 && TREE_STRING_LENGTH (exp) >= 31 && align < BITS_PER_WORD)
19306 return BITS_PER_WORD;
19311 /* Compute the alignment for a static variable.
19312 TYPE is the data type, and ALIGN is the alignment that
19313 the object would ordinarily have. The value of this function is used
19314 instead of that alignment to align the object. */
19317 ix86_data_alignment (tree type, int align)
19319 int max_align = optimize_size ? BITS_PER_WORD : MIN (256, MAX_OFILE_ALIGNMENT);
19321 if (AGGREGATE_TYPE_P (type)
19322 && TYPE_SIZE (type)
19323 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
19324 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= (unsigned) max_align
19325 || TREE_INT_CST_HIGH (TYPE_SIZE (type)))
19326 && align < max_align)
19329 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
19330 to 16byte boundary. */
19333 if (AGGREGATE_TYPE_P (type)
19334 && TYPE_SIZE (type)
19335 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
19336 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= 128
19337 || TREE_INT_CST_HIGH (TYPE_SIZE (type))) && align < 128)
19341 if (TREE_CODE (type) == ARRAY_TYPE)
19343 if (TYPE_MODE (TREE_TYPE (type)) == DFmode && align < 64)
19345 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type))) && align < 128)
19348 else if (TREE_CODE (type) == COMPLEX_TYPE)
19351 if (TYPE_MODE (type) == DCmode && align < 64)
19353 if ((TYPE_MODE (type) == XCmode
19354 || TYPE_MODE (type) == TCmode) && align < 128)
19357 else if ((TREE_CODE (type) == RECORD_TYPE
19358 || TREE_CODE (type) == UNION_TYPE
19359 || TREE_CODE (type) == QUAL_UNION_TYPE)
19360 && TYPE_FIELDS (type))
19362 if (DECL_MODE (TYPE_FIELDS (type)) == DFmode && align < 64)
19364 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type))) && align < 128)
19367 else if (TREE_CODE (type) == REAL_TYPE || TREE_CODE (type) == VECTOR_TYPE
19368 || TREE_CODE (type) == INTEGER_TYPE)
19370 if (TYPE_MODE (type) == DFmode && align < 64)
19372 if (ALIGN_MODE_128 (TYPE_MODE (type)) && align < 128)
19379 /* Compute the alignment for a local variable or a stack slot. EXP is
19380 the data type or decl itself, MODE is the widest mode available and
19381 ALIGN is the alignment that the object would ordinarily have. The
19382 value of this macro is used instead of that alignment to align the
19386 ix86_local_alignment (tree exp, enum machine_mode mode,
19387 unsigned int align)
19391 if (exp && DECL_P (exp))
19393 type = TREE_TYPE (exp);
19402 /* Don't do dynamic stack realignment for long long objects with
19403 -mpreferred-stack-boundary=2. */
19406 && ix86_preferred_stack_boundary < 64
19407 && (mode == DImode || (type && TYPE_MODE (type) == DImode))
19408 && (!type || !TYPE_USER_ALIGN (type))
19409 && (!decl || !DECL_USER_ALIGN (decl)))
19412 /* If TYPE is NULL, we are allocating a stack slot for caller-save
19413 register in MODE. We will return the largest alignment of XF
19417 if (mode == XFmode && align < GET_MODE_ALIGNMENT (DFmode))
19418 align = GET_MODE_ALIGNMENT (DFmode);
19422 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
19423 to 16byte boundary. */
19426 if (AGGREGATE_TYPE_P (type)
19427 && TYPE_SIZE (type)
19428 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
19429 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= 16
19430 || TREE_INT_CST_HIGH (TYPE_SIZE (type))) && align < 128)
19433 if (TREE_CODE (type) == ARRAY_TYPE)
19435 if (TYPE_MODE (TREE_TYPE (type)) == DFmode && align < 64)
19437 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type))) && align < 128)
19440 else if (TREE_CODE (type) == COMPLEX_TYPE)
19442 if (TYPE_MODE (type) == DCmode && align < 64)
19444 if ((TYPE_MODE (type) == XCmode
19445 || TYPE_MODE (type) == TCmode) && align < 128)
19448 else if ((TREE_CODE (type) == RECORD_TYPE
19449 || TREE_CODE (type) == UNION_TYPE
19450 || TREE_CODE (type) == QUAL_UNION_TYPE)
19451 && TYPE_FIELDS (type))
19453 if (DECL_MODE (TYPE_FIELDS (type)) == DFmode && align < 64)
19455 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type))) && align < 128)
19458 else if (TREE_CODE (type) == REAL_TYPE || TREE_CODE (type) == VECTOR_TYPE
19459 || TREE_CODE (type) == INTEGER_TYPE)
19462 if (TYPE_MODE (type) == DFmode && align < 64)
19464 if (ALIGN_MODE_128 (TYPE_MODE (type)) && align < 128)
19470 /* Emit RTL insns to initialize the variable parts of a trampoline.
19471 FNADDR is an RTX for the address of the function's pure code.
19472 CXT is an RTX for the static chain value for the function. */
19474 x86_initialize_trampoline (rtx tramp, rtx fnaddr, rtx cxt)
19478 /* Compute offset from the end of the jmp to the target function. */
19479 rtx disp = expand_binop (SImode, sub_optab, fnaddr,
19480 plus_constant (tramp, 10),
19481 NULL_RTX, 1, OPTAB_DIRECT);
19482 emit_move_insn (gen_rtx_MEM (QImode, tramp),
19483 gen_int_mode (0xb9, QImode));
19484 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 1)), cxt);
19485 emit_move_insn (gen_rtx_MEM (QImode, plus_constant (tramp, 5)),
19486 gen_int_mode (0xe9, QImode));
19487 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 6)), disp);
19492 /* Try to load address using shorter movl instead of movabs.
19493 We may want to support movq for kernel mode, but kernel does not use
19494 trampolines at the moment. */
19495 if (x86_64_zext_immediate_operand (fnaddr, VOIDmode))
19497 fnaddr = copy_to_mode_reg (DImode, fnaddr);
19498 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
19499 gen_int_mode (0xbb41, HImode));
19500 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, offset + 2)),
19501 gen_lowpart (SImode, fnaddr));
19506 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
19507 gen_int_mode (0xbb49, HImode));
19508 emit_move_insn (gen_rtx_MEM (DImode, plus_constant (tramp, offset + 2)),
19512 /* Load static chain using movabs to r10. */
19513 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
19514 gen_int_mode (0xba49, HImode));
19515 emit_move_insn (gen_rtx_MEM (DImode, plus_constant (tramp, offset + 2)),
19518 /* Jump to the r11 */
19519 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
19520 gen_int_mode (0xff49, HImode));
19521 emit_move_insn (gen_rtx_MEM (QImode, plus_constant (tramp, offset+2)),
19522 gen_int_mode (0xe3, QImode));
19524 gcc_assert (offset <= TRAMPOLINE_SIZE);
19527 #ifdef ENABLE_EXECUTE_STACK
19528 emit_library_call (gen_rtx_SYMBOL_REF (Pmode, "__enable_execute_stack"),
19529 LCT_NORMAL, VOIDmode, 1, tramp, Pmode);
19533 /* Codes for all the SSE/MMX builtins. */
19536 IX86_BUILTIN_ADDPS,
19537 IX86_BUILTIN_ADDSS,
19538 IX86_BUILTIN_DIVPS,
19539 IX86_BUILTIN_DIVSS,
19540 IX86_BUILTIN_MULPS,
19541 IX86_BUILTIN_MULSS,
19542 IX86_BUILTIN_SUBPS,
19543 IX86_BUILTIN_SUBSS,
19545 IX86_BUILTIN_CMPEQPS,
19546 IX86_BUILTIN_CMPLTPS,
19547 IX86_BUILTIN_CMPLEPS,
19548 IX86_BUILTIN_CMPGTPS,
19549 IX86_BUILTIN_CMPGEPS,
19550 IX86_BUILTIN_CMPNEQPS,
19551 IX86_BUILTIN_CMPNLTPS,
19552 IX86_BUILTIN_CMPNLEPS,
19553 IX86_BUILTIN_CMPNGTPS,
19554 IX86_BUILTIN_CMPNGEPS,
19555 IX86_BUILTIN_CMPORDPS,
19556 IX86_BUILTIN_CMPUNORDPS,
19557 IX86_BUILTIN_CMPEQSS,
19558 IX86_BUILTIN_CMPLTSS,
19559 IX86_BUILTIN_CMPLESS,
19560 IX86_BUILTIN_CMPNEQSS,
19561 IX86_BUILTIN_CMPNLTSS,
19562 IX86_BUILTIN_CMPNLESS,
19563 IX86_BUILTIN_CMPNGTSS,
19564 IX86_BUILTIN_CMPNGESS,
19565 IX86_BUILTIN_CMPORDSS,
19566 IX86_BUILTIN_CMPUNORDSS,
19568 IX86_BUILTIN_COMIEQSS,
19569 IX86_BUILTIN_COMILTSS,
19570 IX86_BUILTIN_COMILESS,
19571 IX86_BUILTIN_COMIGTSS,
19572 IX86_BUILTIN_COMIGESS,
19573 IX86_BUILTIN_COMINEQSS,
19574 IX86_BUILTIN_UCOMIEQSS,
19575 IX86_BUILTIN_UCOMILTSS,
19576 IX86_BUILTIN_UCOMILESS,
19577 IX86_BUILTIN_UCOMIGTSS,
19578 IX86_BUILTIN_UCOMIGESS,
19579 IX86_BUILTIN_UCOMINEQSS,
19581 IX86_BUILTIN_CVTPI2PS,
19582 IX86_BUILTIN_CVTPS2PI,
19583 IX86_BUILTIN_CVTSI2SS,
19584 IX86_BUILTIN_CVTSI642SS,
19585 IX86_BUILTIN_CVTSS2SI,
19586 IX86_BUILTIN_CVTSS2SI64,
19587 IX86_BUILTIN_CVTTPS2PI,
19588 IX86_BUILTIN_CVTTSS2SI,
19589 IX86_BUILTIN_CVTTSS2SI64,
19591 IX86_BUILTIN_MAXPS,
19592 IX86_BUILTIN_MAXSS,
19593 IX86_BUILTIN_MINPS,
19594 IX86_BUILTIN_MINSS,
19596 IX86_BUILTIN_LOADUPS,
19597 IX86_BUILTIN_STOREUPS,
19598 IX86_BUILTIN_MOVSS,
19600 IX86_BUILTIN_MOVHLPS,
19601 IX86_BUILTIN_MOVLHPS,
19602 IX86_BUILTIN_LOADHPS,
19603 IX86_BUILTIN_LOADLPS,
19604 IX86_BUILTIN_STOREHPS,
19605 IX86_BUILTIN_STORELPS,
19607 IX86_BUILTIN_MASKMOVQ,
19608 IX86_BUILTIN_MOVMSKPS,
19609 IX86_BUILTIN_PMOVMSKB,
19611 IX86_BUILTIN_MOVNTPS,
19612 IX86_BUILTIN_MOVNTQ,
19614 IX86_BUILTIN_LOADDQU,
19615 IX86_BUILTIN_STOREDQU,
19617 IX86_BUILTIN_PACKSSWB,
19618 IX86_BUILTIN_PACKSSDW,
19619 IX86_BUILTIN_PACKUSWB,
19621 IX86_BUILTIN_PADDB,
19622 IX86_BUILTIN_PADDW,
19623 IX86_BUILTIN_PADDD,
19624 IX86_BUILTIN_PADDQ,
19625 IX86_BUILTIN_PADDSB,
19626 IX86_BUILTIN_PADDSW,
19627 IX86_BUILTIN_PADDUSB,
19628 IX86_BUILTIN_PADDUSW,
19629 IX86_BUILTIN_PSUBB,
19630 IX86_BUILTIN_PSUBW,
19631 IX86_BUILTIN_PSUBD,
19632 IX86_BUILTIN_PSUBQ,
19633 IX86_BUILTIN_PSUBSB,
19634 IX86_BUILTIN_PSUBSW,
19635 IX86_BUILTIN_PSUBUSB,
19636 IX86_BUILTIN_PSUBUSW,
19639 IX86_BUILTIN_PANDN,
19643 IX86_BUILTIN_PAVGB,
19644 IX86_BUILTIN_PAVGW,
19646 IX86_BUILTIN_PCMPEQB,
19647 IX86_BUILTIN_PCMPEQW,
19648 IX86_BUILTIN_PCMPEQD,
19649 IX86_BUILTIN_PCMPGTB,
19650 IX86_BUILTIN_PCMPGTW,
19651 IX86_BUILTIN_PCMPGTD,
19653 IX86_BUILTIN_PMADDWD,
19655 IX86_BUILTIN_PMAXSW,
19656 IX86_BUILTIN_PMAXUB,
19657 IX86_BUILTIN_PMINSW,
19658 IX86_BUILTIN_PMINUB,
19660 IX86_BUILTIN_PMULHUW,
19661 IX86_BUILTIN_PMULHW,
19662 IX86_BUILTIN_PMULLW,
19664 IX86_BUILTIN_PSADBW,
19665 IX86_BUILTIN_PSHUFW,
19667 IX86_BUILTIN_PSLLW,
19668 IX86_BUILTIN_PSLLD,
19669 IX86_BUILTIN_PSLLQ,
19670 IX86_BUILTIN_PSRAW,
19671 IX86_BUILTIN_PSRAD,
19672 IX86_BUILTIN_PSRLW,
19673 IX86_BUILTIN_PSRLD,
19674 IX86_BUILTIN_PSRLQ,
19675 IX86_BUILTIN_PSLLWI,
19676 IX86_BUILTIN_PSLLDI,
19677 IX86_BUILTIN_PSLLQI,
19678 IX86_BUILTIN_PSRAWI,
19679 IX86_BUILTIN_PSRADI,
19680 IX86_BUILTIN_PSRLWI,
19681 IX86_BUILTIN_PSRLDI,
19682 IX86_BUILTIN_PSRLQI,
19684 IX86_BUILTIN_PUNPCKHBW,
19685 IX86_BUILTIN_PUNPCKHWD,
19686 IX86_BUILTIN_PUNPCKHDQ,
19687 IX86_BUILTIN_PUNPCKLBW,
19688 IX86_BUILTIN_PUNPCKLWD,
19689 IX86_BUILTIN_PUNPCKLDQ,
19691 IX86_BUILTIN_SHUFPS,
19693 IX86_BUILTIN_RCPPS,
19694 IX86_BUILTIN_RCPSS,
19695 IX86_BUILTIN_RSQRTPS,
19696 IX86_BUILTIN_RSQRTPS_NR,
19697 IX86_BUILTIN_RSQRTSS,
19698 IX86_BUILTIN_RSQRTF,
19699 IX86_BUILTIN_SQRTPS,
19700 IX86_BUILTIN_SQRTPS_NR,
19701 IX86_BUILTIN_SQRTSS,
19703 IX86_BUILTIN_UNPCKHPS,
19704 IX86_BUILTIN_UNPCKLPS,
19706 IX86_BUILTIN_ANDPS,
19707 IX86_BUILTIN_ANDNPS,
19709 IX86_BUILTIN_XORPS,
19712 IX86_BUILTIN_LDMXCSR,
19713 IX86_BUILTIN_STMXCSR,
19714 IX86_BUILTIN_SFENCE,
19716 /* 3DNow! Original */
19717 IX86_BUILTIN_FEMMS,
19718 IX86_BUILTIN_PAVGUSB,
19719 IX86_BUILTIN_PF2ID,
19720 IX86_BUILTIN_PFACC,
19721 IX86_BUILTIN_PFADD,
19722 IX86_BUILTIN_PFCMPEQ,
19723 IX86_BUILTIN_PFCMPGE,
19724 IX86_BUILTIN_PFCMPGT,
19725 IX86_BUILTIN_PFMAX,
19726 IX86_BUILTIN_PFMIN,
19727 IX86_BUILTIN_PFMUL,
19728 IX86_BUILTIN_PFRCP,
19729 IX86_BUILTIN_PFRCPIT1,
19730 IX86_BUILTIN_PFRCPIT2,
19731 IX86_BUILTIN_PFRSQIT1,
19732 IX86_BUILTIN_PFRSQRT,
19733 IX86_BUILTIN_PFSUB,
19734 IX86_BUILTIN_PFSUBR,
19735 IX86_BUILTIN_PI2FD,
19736 IX86_BUILTIN_PMULHRW,
19738 /* 3DNow! Athlon Extensions */
19739 IX86_BUILTIN_PF2IW,
19740 IX86_BUILTIN_PFNACC,
19741 IX86_BUILTIN_PFPNACC,
19742 IX86_BUILTIN_PI2FW,
19743 IX86_BUILTIN_PSWAPDSI,
19744 IX86_BUILTIN_PSWAPDSF,
19747 IX86_BUILTIN_ADDPD,
19748 IX86_BUILTIN_ADDSD,
19749 IX86_BUILTIN_DIVPD,
19750 IX86_BUILTIN_DIVSD,
19751 IX86_BUILTIN_MULPD,
19752 IX86_BUILTIN_MULSD,
19753 IX86_BUILTIN_SUBPD,
19754 IX86_BUILTIN_SUBSD,
19756 IX86_BUILTIN_CMPEQPD,
19757 IX86_BUILTIN_CMPLTPD,
19758 IX86_BUILTIN_CMPLEPD,
19759 IX86_BUILTIN_CMPGTPD,
19760 IX86_BUILTIN_CMPGEPD,
19761 IX86_BUILTIN_CMPNEQPD,
19762 IX86_BUILTIN_CMPNLTPD,
19763 IX86_BUILTIN_CMPNLEPD,
19764 IX86_BUILTIN_CMPNGTPD,
19765 IX86_BUILTIN_CMPNGEPD,
19766 IX86_BUILTIN_CMPORDPD,
19767 IX86_BUILTIN_CMPUNORDPD,
19768 IX86_BUILTIN_CMPEQSD,
19769 IX86_BUILTIN_CMPLTSD,
19770 IX86_BUILTIN_CMPLESD,
19771 IX86_BUILTIN_CMPNEQSD,
19772 IX86_BUILTIN_CMPNLTSD,
19773 IX86_BUILTIN_CMPNLESD,
19774 IX86_BUILTIN_CMPORDSD,
19775 IX86_BUILTIN_CMPUNORDSD,
19777 IX86_BUILTIN_COMIEQSD,
19778 IX86_BUILTIN_COMILTSD,
19779 IX86_BUILTIN_COMILESD,
19780 IX86_BUILTIN_COMIGTSD,
19781 IX86_BUILTIN_COMIGESD,
19782 IX86_BUILTIN_COMINEQSD,
19783 IX86_BUILTIN_UCOMIEQSD,
19784 IX86_BUILTIN_UCOMILTSD,
19785 IX86_BUILTIN_UCOMILESD,
19786 IX86_BUILTIN_UCOMIGTSD,
19787 IX86_BUILTIN_UCOMIGESD,
19788 IX86_BUILTIN_UCOMINEQSD,
19790 IX86_BUILTIN_MAXPD,
19791 IX86_BUILTIN_MAXSD,
19792 IX86_BUILTIN_MINPD,
19793 IX86_BUILTIN_MINSD,
19795 IX86_BUILTIN_ANDPD,
19796 IX86_BUILTIN_ANDNPD,
19798 IX86_BUILTIN_XORPD,
19800 IX86_BUILTIN_SQRTPD,
19801 IX86_BUILTIN_SQRTSD,
19803 IX86_BUILTIN_UNPCKHPD,
19804 IX86_BUILTIN_UNPCKLPD,
19806 IX86_BUILTIN_SHUFPD,
19808 IX86_BUILTIN_LOADUPD,
19809 IX86_BUILTIN_STOREUPD,
19810 IX86_BUILTIN_MOVSD,
19812 IX86_BUILTIN_LOADHPD,
19813 IX86_BUILTIN_LOADLPD,
19815 IX86_BUILTIN_CVTDQ2PD,
19816 IX86_BUILTIN_CVTDQ2PS,
19818 IX86_BUILTIN_CVTPD2DQ,
19819 IX86_BUILTIN_CVTPD2PI,
19820 IX86_BUILTIN_CVTPD2PS,
19821 IX86_BUILTIN_CVTTPD2DQ,
19822 IX86_BUILTIN_CVTTPD2PI,
19824 IX86_BUILTIN_CVTPI2PD,
19825 IX86_BUILTIN_CVTSI2SD,
19826 IX86_BUILTIN_CVTSI642SD,
19828 IX86_BUILTIN_CVTSD2SI,
19829 IX86_BUILTIN_CVTSD2SI64,
19830 IX86_BUILTIN_CVTSD2SS,
19831 IX86_BUILTIN_CVTSS2SD,
19832 IX86_BUILTIN_CVTTSD2SI,
19833 IX86_BUILTIN_CVTTSD2SI64,
19835 IX86_BUILTIN_CVTPS2DQ,
19836 IX86_BUILTIN_CVTPS2PD,
19837 IX86_BUILTIN_CVTTPS2DQ,
19839 IX86_BUILTIN_MOVNTI,
19840 IX86_BUILTIN_MOVNTPD,
19841 IX86_BUILTIN_MOVNTDQ,
19843 IX86_BUILTIN_MOVQ128,
19846 IX86_BUILTIN_MASKMOVDQU,
19847 IX86_BUILTIN_MOVMSKPD,
19848 IX86_BUILTIN_PMOVMSKB128,
19850 IX86_BUILTIN_PACKSSWB128,
19851 IX86_BUILTIN_PACKSSDW128,
19852 IX86_BUILTIN_PACKUSWB128,
19854 IX86_BUILTIN_PADDB128,
19855 IX86_BUILTIN_PADDW128,
19856 IX86_BUILTIN_PADDD128,
19857 IX86_BUILTIN_PADDQ128,
19858 IX86_BUILTIN_PADDSB128,
19859 IX86_BUILTIN_PADDSW128,
19860 IX86_BUILTIN_PADDUSB128,
19861 IX86_BUILTIN_PADDUSW128,
19862 IX86_BUILTIN_PSUBB128,
19863 IX86_BUILTIN_PSUBW128,
19864 IX86_BUILTIN_PSUBD128,
19865 IX86_BUILTIN_PSUBQ128,
19866 IX86_BUILTIN_PSUBSB128,
19867 IX86_BUILTIN_PSUBSW128,
19868 IX86_BUILTIN_PSUBUSB128,
19869 IX86_BUILTIN_PSUBUSW128,
19871 IX86_BUILTIN_PAND128,
19872 IX86_BUILTIN_PANDN128,
19873 IX86_BUILTIN_POR128,
19874 IX86_BUILTIN_PXOR128,
19876 IX86_BUILTIN_PAVGB128,
19877 IX86_BUILTIN_PAVGW128,
19879 IX86_BUILTIN_PCMPEQB128,
19880 IX86_BUILTIN_PCMPEQW128,
19881 IX86_BUILTIN_PCMPEQD128,
19882 IX86_BUILTIN_PCMPGTB128,
19883 IX86_BUILTIN_PCMPGTW128,
19884 IX86_BUILTIN_PCMPGTD128,
19886 IX86_BUILTIN_PMADDWD128,
19888 IX86_BUILTIN_PMAXSW128,
19889 IX86_BUILTIN_PMAXUB128,
19890 IX86_BUILTIN_PMINSW128,
19891 IX86_BUILTIN_PMINUB128,
19893 IX86_BUILTIN_PMULUDQ,
19894 IX86_BUILTIN_PMULUDQ128,
19895 IX86_BUILTIN_PMULHUW128,
19896 IX86_BUILTIN_PMULHW128,
19897 IX86_BUILTIN_PMULLW128,
19899 IX86_BUILTIN_PSADBW128,
19900 IX86_BUILTIN_PSHUFHW,
19901 IX86_BUILTIN_PSHUFLW,
19902 IX86_BUILTIN_PSHUFD,
19904 IX86_BUILTIN_PSLLDQI128,
19905 IX86_BUILTIN_PSLLWI128,
19906 IX86_BUILTIN_PSLLDI128,
19907 IX86_BUILTIN_PSLLQI128,
19908 IX86_BUILTIN_PSRAWI128,
19909 IX86_BUILTIN_PSRADI128,
19910 IX86_BUILTIN_PSRLDQI128,
19911 IX86_BUILTIN_PSRLWI128,
19912 IX86_BUILTIN_PSRLDI128,
19913 IX86_BUILTIN_PSRLQI128,
19915 IX86_BUILTIN_PSLLDQ128,
19916 IX86_BUILTIN_PSLLW128,
19917 IX86_BUILTIN_PSLLD128,
19918 IX86_BUILTIN_PSLLQ128,
19919 IX86_BUILTIN_PSRAW128,
19920 IX86_BUILTIN_PSRAD128,
19921 IX86_BUILTIN_PSRLW128,
19922 IX86_BUILTIN_PSRLD128,
19923 IX86_BUILTIN_PSRLQ128,
19925 IX86_BUILTIN_PUNPCKHBW128,
19926 IX86_BUILTIN_PUNPCKHWD128,
19927 IX86_BUILTIN_PUNPCKHDQ128,
19928 IX86_BUILTIN_PUNPCKHQDQ128,
19929 IX86_BUILTIN_PUNPCKLBW128,
19930 IX86_BUILTIN_PUNPCKLWD128,
19931 IX86_BUILTIN_PUNPCKLDQ128,
19932 IX86_BUILTIN_PUNPCKLQDQ128,
19934 IX86_BUILTIN_CLFLUSH,
19935 IX86_BUILTIN_MFENCE,
19936 IX86_BUILTIN_LFENCE,
19939 IX86_BUILTIN_ADDSUBPS,
19940 IX86_BUILTIN_HADDPS,
19941 IX86_BUILTIN_HSUBPS,
19942 IX86_BUILTIN_MOVSHDUP,
19943 IX86_BUILTIN_MOVSLDUP,
19944 IX86_BUILTIN_ADDSUBPD,
19945 IX86_BUILTIN_HADDPD,
19946 IX86_BUILTIN_HSUBPD,
19947 IX86_BUILTIN_LDDQU,
19949 IX86_BUILTIN_MONITOR,
19950 IX86_BUILTIN_MWAIT,
19953 IX86_BUILTIN_PHADDW,
19954 IX86_BUILTIN_PHADDD,
19955 IX86_BUILTIN_PHADDSW,
19956 IX86_BUILTIN_PHSUBW,
19957 IX86_BUILTIN_PHSUBD,
19958 IX86_BUILTIN_PHSUBSW,
19959 IX86_BUILTIN_PMADDUBSW,
19960 IX86_BUILTIN_PMULHRSW,
19961 IX86_BUILTIN_PSHUFB,
19962 IX86_BUILTIN_PSIGNB,
19963 IX86_BUILTIN_PSIGNW,
19964 IX86_BUILTIN_PSIGND,
19965 IX86_BUILTIN_PALIGNR,
19966 IX86_BUILTIN_PABSB,
19967 IX86_BUILTIN_PABSW,
19968 IX86_BUILTIN_PABSD,
19970 IX86_BUILTIN_PHADDW128,
19971 IX86_BUILTIN_PHADDD128,
19972 IX86_BUILTIN_PHADDSW128,
19973 IX86_BUILTIN_PHSUBW128,
19974 IX86_BUILTIN_PHSUBD128,
19975 IX86_BUILTIN_PHSUBSW128,
19976 IX86_BUILTIN_PMADDUBSW128,
19977 IX86_BUILTIN_PMULHRSW128,
19978 IX86_BUILTIN_PSHUFB128,
19979 IX86_BUILTIN_PSIGNB128,
19980 IX86_BUILTIN_PSIGNW128,
19981 IX86_BUILTIN_PSIGND128,
19982 IX86_BUILTIN_PALIGNR128,
19983 IX86_BUILTIN_PABSB128,
19984 IX86_BUILTIN_PABSW128,
19985 IX86_BUILTIN_PABSD128,
19987 /* AMDFAM10 - SSE4A New Instructions. */
19988 IX86_BUILTIN_MOVNTSD,
19989 IX86_BUILTIN_MOVNTSS,
19990 IX86_BUILTIN_EXTRQI,
19991 IX86_BUILTIN_EXTRQ,
19992 IX86_BUILTIN_INSERTQI,
19993 IX86_BUILTIN_INSERTQ,
19996 IX86_BUILTIN_BLENDPD,
19997 IX86_BUILTIN_BLENDPS,
19998 IX86_BUILTIN_BLENDVPD,
19999 IX86_BUILTIN_BLENDVPS,
20000 IX86_BUILTIN_PBLENDVB128,
20001 IX86_BUILTIN_PBLENDW128,
20006 IX86_BUILTIN_INSERTPS128,
20008 IX86_BUILTIN_MOVNTDQA,
20009 IX86_BUILTIN_MPSADBW128,
20010 IX86_BUILTIN_PACKUSDW128,
20011 IX86_BUILTIN_PCMPEQQ,
20012 IX86_BUILTIN_PHMINPOSUW128,
20014 IX86_BUILTIN_PMAXSB128,
20015 IX86_BUILTIN_PMAXSD128,
20016 IX86_BUILTIN_PMAXUD128,
20017 IX86_BUILTIN_PMAXUW128,
20019 IX86_BUILTIN_PMINSB128,
20020 IX86_BUILTIN_PMINSD128,
20021 IX86_BUILTIN_PMINUD128,
20022 IX86_BUILTIN_PMINUW128,
20024 IX86_BUILTIN_PMOVSXBW128,
20025 IX86_BUILTIN_PMOVSXBD128,
20026 IX86_BUILTIN_PMOVSXBQ128,
20027 IX86_BUILTIN_PMOVSXWD128,
20028 IX86_BUILTIN_PMOVSXWQ128,
20029 IX86_BUILTIN_PMOVSXDQ128,
20031 IX86_BUILTIN_PMOVZXBW128,
20032 IX86_BUILTIN_PMOVZXBD128,
20033 IX86_BUILTIN_PMOVZXBQ128,
20034 IX86_BUILTIN_PMOVZXWD128,
20035 IX86_BUILTIN_PMOVZXWQ128,
20036 IX86_BUILTIN_PMOVZXDQ128,
20038 IX86_BUILTIN_PMULDQ128,
20039 IX86_BUILTIN_PMULLD128,
20041 IX86_BUILTIN_ROUNDPD,
20042 IX86_BUILTIN_ROUNDPS,
20043 IX86_BUILTIN_ROUNDSD,
20044 IX86_BUILTIN_ROUNDSS,
20046 IX86_BUILTIN_PTESTZ,
20047 IX86_BUILTIN_PTESTC,
20048 IX86_BUILTIN_PTESTNZC,
20050 IX86_BUILTIN_VEC_INIT_V2SI,
20051 IX86_BUILTIN_VEC_INIT_V4HI,
20052 IX86_BUILTIN_VEC_INIT_V8QI,
20053 IX86_BUILTIN_VEC_EXT_V2DF,
20054 IX86_BUILTIN_VEC_EXT_V2DI,
20055 IX86_BUILTIN_VEC_EXT_V4SF,
20056 IX86_BUILTIN_VEC_EXT_V4SI,
20057 IX86_BUILTIN_VEC_EXT_V8HI,
20058 IX86_BUILTIN_VEC_EXT_V2SI,
20059 IX86_BUILTIN_VEC_EXT_V4HI,
20060 IX86_BUILTIN_VEC_EXT_V16QI,
20061 IX86_BUILTIN_VEC_SET_V2DI,
20062 IX86_BUILTIN_VEC_SET_V4SF,
20063 IX86_BUILTIN_VEC_SET_V4SI,
20064 IX86_BUILTIN_VEC_SET_V8HI,
20065 IX86_BUILTIN_VEC_SET_V4HI,
20066 IX86_BUILTIN_VEC_SET_V16QI,
20068 IX86_BUILTIN_VEC_PACK_SFIX,
20071 IX86_BUILTIN_CRC32QI,
20072 IX86_BUILTIN_CRC32HI,
20073 IX86_BUILTIN_CRC32SI,
20074 IX86_BUILTIN_CRC32DI,
20076 IX86_BUILTIN_PCMPESTRI128,
20077 IX86_BUILTIN_PCMPESTRM128,
20078 IX86_BUILTIN_PCMPESTRA128,
20079 IX86_BUILTIN_PCMPESTRC128,
20080 IX86_BUILTIN_PCMPESTRO128,
20081 IX86_BUILTIN_PCMPESTRS128,
20082 IX86_BUILTIN_PCMPESTRZ128,
20083 IX86_BUILTIN_PCMPISTRI128,
20084 IX86_BUILTIN_PCMPISTRM128,
20085 IX86_BUILTIN_PCMPISTRA128,
20086 IX86_BUILTIN_PCMPISTRC128,
20087 IX86_BUILTIN_PCMPISTRO128,
20088 IX86_BUILTIN_PCMPISTRS128,
20089 IX86_BUILTIN_PCMPISTRZ128,
20091 IX86_BUILTIN_PCMPGTQ,
20093 /* AES instructions */
20094 IX86_BUILTIN_AESENC128,
20095 IX86_BUILTIN_AESENCLAST128,
20096 IX86_BUILTIN_AESDEC128,
20097 IX86_BUILTIN_AESDECLAST128,
20098 IX86_BUILTIN_AESIMC128,
20099 IX86_BUILTIN_AESKEYGENASSIST128,
20101 /* PCLMUL instruction */
20102 IX86_BUILTIN_PCLMULQDQ128,
20105 IX86_BUILTIN_ADDPD256,
20106 IX86_BUILTIN_ADDPS256,
20107 IX86_BUILTIN_ADDSUBPD256,
20108 IX86_BUILTIN_ADDSUBPS256,
20109 IX86_BUILTIN_ANDPD256,
20110 IX86_BUILTIN_ANDPS256,
20111 IX86_BUILTIN_ANDNPD256,
20112 IX86_BUILTIN_ANDNPS256,
20113 IX86_BUILTIN_BLENDPD256,
20114 IX86_BUILTIN_BLENDPS256,
20115 IX86_BUILTIN_BLENDVPD256,
20116 IX86_BUILTIN_BLENDVPS256,
20117 IX86_BUILTIN_DIVPD256,
20118 IX86_BUILTIN_DIVPS256,
20119 IX86_BUILTIN_DPPS256,
20120 IX86_BUILTIN_HADDPD256,
20121 IX86_BUILTIN_HADDPS256,
20122 IX86_BUILTIN_HSUBPD256,
20123 IX86_BUILTIN_HSUBPS256,
20124 IX86_BUILTIN_MAXPD256,
20125 IX86_BUILTIN_MAXPS256,
20126 IX86_BUILTIN_MINPD256,
20127 IX86_BUILTIN_MINPS256,
20128 IX86_BUILTIN_MULPD256,
20129 IX86_BUILTIN_MULPS256,
20130 IX86_BUILTIN_ORPD256,
20131 IX86_BUILTIN_ORPS256,
20132 IX86_BUILTIN_SHUFPD256,
20133 IX86_BUILTIN_SHUFPS256,
20134 IX86_BUILTIN_SUBPD256,
20135 IX86_BUILTIN_SUBPS256,
20136 IX86_BUILTIN_XORPD256,
20137 IX86_BUILTIN_XORPS256,
20138 IX86_BUILTIN_CMPSD,
20139 IX86_BUILTIN_CMPSS,
20140 IX86_BUILTIN_CMPPD,
20141 IX86_BUILTIN_CMPPS,
20142 IX86_BUILTIN_CMPPD256,
20143 IX86_BUILTIN_CMPPS256,
20144 IX86_BUILTIN_CVTDQ2PD256,
20145 IX86_BUILTIN_CVTDQ2PS256,
20146 IX86_BUILTIN_CVTPD2PS256,
20147 IX86_BUILTIN_CVTPS2DQ256,
20148 IX86_BUILTIN_CVTPS2PD256,
20149 IX86_BUILTIN_CVTTPD2DQ256,
20150 IX86_BUILTIN_CVTPD2DQ256,
20151 IX86_BUILTIN_CVTTPS2DQ256,
20152 IX86_BUILTIN_EXTRACTF128PD256,
20153 IX86_BUILTIN_EXTRACTF128PS256,
20154 IX86_BUILTIN_EXTRACTF128SI256,
20155 IX86_BUILTIN_VZEROALL,
20156 IX86_BUILTIN_VZEROUPPER,
20157 IX86_BUILTIN_VZEROUPPER_REX64,
20158 IX86_BUILTIN_VPERMILVARPD,
20159 IX86_BUILTIN_VPERMILVARPS,
20160 IX86_BUILTIN_VPERMILVARPD256,
20161 IX86_BUILTIN_VPERMILVARPS256,
20162 IX86_BUILTIN_VPERMILPD,
20163 IX86_BUILTIN_VPERMILPS,
20164 IX86_BUILTIN_VPERMILPD256,
20165 IX86_BUILTIN_VPERMILPS256,
20166 IX86_BUILTIN_VPERM2F128PD256,
20167 IX86_BUILTIN_VPERM2F128PS256,
20168 IX86_BUILTIN_VPERM2F128SI256,
20169 IX86_BUILTIN_VBROADCASTSS,
20170 IX86_BUILTIN_VBROADCASTSD256,
20171 IX86_BUILTIN_VBROADCASTSS256,
20172 IX86_BUILTIN_VBROADCASTPD256,
20173 IX86_BUILTIN_VBROADCASTPS256,
20174 IX86_BUILTIN_VINSERTF128PD256,
20175 IX86_BUILTIN_VINSERTF128PS256,
20176 IX86_BUILTIN_VINSERTF128SI256,
20177 IX86_BUILTIN_LOADUPD256,
20178 IX86_BUILTIN_LOADUPS256,
20179 IX86_BUILTIN_STOREUPD256,
20180 IX86_BUILTIN_STOREUPS256,
20181 IX86_BUILTIN_LDDQU256,
20182 IX86_BUILTIN_MOVNTDQ256,
20183 IX86_BUILTIN_MOVNTPD256,
20184 IX86_BUILTIN_MOVNTPS256,
20185 IX86_BUILTIN_LOADDQU256,
20186 IX86_BUILTIN_STOREDQU256,
20187 IX86_BUILTIN_MASKLOADPD,
20188 IX86_BUILTIN_MASKLOADPS,
20189 IX86_BUILTIN_MASKSTOREPD,
20190 IX86_BUILTIN_MASKSTOREPS,
20191 IX86_BUILTIN_MASKLOADPD256,
20192 IX86_BUILTIN_MASKLOADPS256,
20193 IX86_BUILTIN_MASKSTOREPD256,
20194 IX86_BUILTIN_MASKSTOREPS256,
20195 IX86_BUILTIN_MOVSHDUP256,
20196 IX86_BUILTIN_MOVSLDUP256,
20197 IX86_BUILTIN_MOVDDUP256,
20199 IX86_BUILTIN_SQRTPD256,
20200 IX86_BUILTIN_SQRTPS256,
20201 IX86_BUILTIN_SQRTPS_NR256,
20202 IX86_BUILTIN_RSQRTPS256,
20203 IX86_BUILTIN_RSQRTPS_NR256,
20205 IX86_BUILTIN_RCPPS256,
20207 IX86_BUILTIN_ROUNDPD256,
20208 IX86_BUILTIN_ROUNDPS256,
20210 IX86_BUILTIN_UNPCKHPD256,
20211 IX86_BUILTIN_UNPCKLPD256,
20212 IX86_BUILTIN_UNPCKHPS256,
20213 IX86_BUILTIN_UNPCKLPS256,
20215 IX86_BUILTIN_SI256_SI,
20216 IX86_BUILTIN_PS256_PS,
20217 IX86_BUILTIN_PD256_PD,
20218 IX86_BUILTIN_SI_SI256,
20219 IX86_BUILTIN_PS_PS256,
20220 IX86_BUILTIN_PD_PD256,
20222 IX86_BUILTIN_VTESTZPD,
20223 IX86_BUILTIN_VTESTCPD,
20224 IX86_BUILTIN_VTESTNZCPD,
20225 IX86_BUILTIN_VTESTZPS,
20226 IX86_BUILTIN_VTESTCPS,
20227 IX86_BUILTIN_VTESTNZCPS,
20228 IX86_BUILTIN_VTESTZPD256,
20229 IX86_BUILTIN_VTESTCPD256,
20230 IX86_BUILTIN_VTESTNZCPD256,
20231 IX86_BUILTIN_VTESTZPS256,
20232 IX86_BUILTIN_VTESTCPS256,
20233 IX86_BUILTIN_VTESTNZCPS256,
20234 IX86_BUILTIN_PTESTZ256,
20235 IX86_BUILTIN_PTESTC256,
20236 IX86_BUILTIN_PTESTNZC256,
20238 IX86_BUILTIN_MOVMSKPD256,
20239 IX86_BUILTIN_MOVMSKPS256,
20241 /* TFmode support builtins. */
20243 IX86_BUILTIN_HUGE_VALQ,
20244 IX86_BUILTIN_FABSQ,
20245 IX86_BUILTIN_COPYSIGNQ,
20247 /* SSE5 instructions */
20248 IX86_BUILTIN_FMADDSS,
20249 IX86_BUILTIN_FMADDSD,
20250 IX86_BUILTIN_FMADDPS,
20251 IX86_BUILTIN_FMADDPD,
20252 IX86_BUILTIN_FMSUBSS,
20253 IX86_BUILTIN_FMSUBSD,
20254 IX86_BUILTIN_FMSUBPS,
20255 IX86_BUILTIN_FMSUBPD,
20256 IX86_BUILTIN_FNMADDSS,
20257 IX86_BUILTIN_FNMADDSD,
20258 IX86_BUILTIN_FNMADDPS,
20259 IX86_BUILTIN_FNMADDPD,
20260 IX86_BUILTIN_FNMSUBSS,
20261 IX86_BUILTIN_FNMSUBSD,
20262 IX86_BUILTIN_FNMSUBPS,
20263 IX86_BUILTIN_FNMSUBPD,
20264 IX86_BUILTIN_PCMOV,
20265 IX86_BUILTIN_PCMOV_V2DI,
20266 IX86_BUILTIN_PCMOV_V4SI,
20267 IX86_BUILTIN_PCMOV_V8HI,
20268 IX86_BUILTIN_PCMOV_V16QI,
20269 IX86_BUILTIN_PCMOV_V4SF,
20270 IX86_BUILTIN_PCMOV_V2DF,
20271 IX86_BUILTIN_PPERM,
20272 IX86_BUILTIN_PERMPS,
20273 IX86_BUILTIN_PERMPD,
20274 IX86_BUILTIN_PMACSSWW,
20275 IX86_BUILTIN_PMACSWW,
20276 IX86_BUILTIN_PMACSSWD,
20277 IX86_BUILTIN_PMACSWD,
20278 IX86_BUILTIN_PMACSSDD,
20279 IX86_BUILTIN_PMACSDD,
20280 IX86_BUILTIN_PMACSSDQL,
20281 IX86_BUILTIN_PMACSSDQH,
20282 IX86_BUILTIN_PMACSDQL,
20283 IX86_BUILTIN_PMACSDQH,
20284 IX86_BUILTIN_PMADCSSWD,
20285 IX86_BUILTIN_PMADCSWD,
20286 IX86_BUILTIN_PHADDBW,
20287 IX86_BUILTIN_PHADDBD,
20288 IX86_BUILTIN_PHADDBQ,
20289 IX86_BUILTIN_PHADDWD,
20290 IX86_BUILTIN_PHADDWQ,
20291 IX86_BUILTIN_PHADDDQ,
20292 IX86_BUILTIN_PHADDUBW,
20293 IX86_BUILTIN_PHADDUBD,
20294 IX86_BUILTIN_PHADDUBQ,
20295 IX86_BUILTIN_PHADDUWD,
20296 IX86_BUILTIN_PHADDUWQ,
20297 IX86_BUILTIN_PHADDUDQ,
20298 IX86_BUILTIN_PHSUBBW,
20299 IX86_BUILTIN_PHSUBWD,
20300 IX86_BUILTIN_PHSUBDQ,
20301 IX86_BUILTIN_PROTB,
20302 IX86_BUILTIN_PROTW,
20303 IX86_BUILTIN_PROTD,
20304 IX86_BUILTIN_PROTQ,
20305 IX86_BUILTIN_PROTB_IMM,
20306 IX86_BUILTIN_PROTW_IMM,
20307 IX86_BUILTIN_PROTD_IMM,
20308 IX86_BUILTIN_PROTQ_IMM,
20309 IX86_BUILTIN_PSHLB,
20310 IX86_BUILTIN_PSHLW,
20311 IX86_BUILTIN_PSHLD,
20312 IX86_BUILTIN_PSHLQ,
20313 IX86_BUILTIN_PSHAB,
20314 IX86_BUILTIN_PSHAW,
20315 IX86_BUILTIN_PSHAD,
20316 IX86_BUILTIN_PSHAQ,
20317 IX86_BUILTIN_FRCZSS,
20318 IX86_BUILTIN_FRCZSD,
20319 IX86_BUILTIN_FRCZPS,
20320 IX86_BUILTIN_FRCZPD,
20321 IX86_BUILTIN_CVTPH2PS,
20322 IX86_BUILTIN_CVTPS2PH,
20324 IX86_BUILTIN_COMEQSS,
20325 IX86_BUILTIN_COMNESS,
20326 IX86_BUILTIN_COMLTSS,
20327 IX86_BUILTIN_COMLESS,
20328 IX86_BUILTIN_COMGTSS,
20329 IX86_BUILTIN_COMGESS,
20330 IX86_BUILTIN_COMUEQSS,
20331 IX86_BUILTIN_COMUNESS,
20332 IX86_BUILTIN_COMULTSS,
20333 IX86_BUILTIN_COMULESS,
20334 IX86_BUILTIN_COMUGTSS,
20335 IX86_BUILTIN_COMUGESS,
20336 IX86_BUILTIN_COMORDSS,
20337 IX86_BUILTIN_COMUNORDSS,
20338 IX86_BUILTIN_COMFALSESS,
20339 IX86_BUILTIN_COMTRUESS,
20341 IX86_BUILTIN_COMEQSD,
20342 IX86_BUILTIN_COMNESD,
20343 IX86_BUILTIN_COMLTSD,
20344 IX86_BUILTIN_COMLESD,
20345 IX86_BUILTIN_COMGTSD,
20346 IX86_BUILTIN_COMGESD,
20347 IX86_BUILTIN_COMUEQSD,
20348 IX86_BUILTIN_COMUNESD,
20349 IX86_BUILTIN_COMULTSD,
20350 IX86_BUILTIN_COMULESD,
20351 IX86_BUILTIN_COMUGTSD,
20352 IX86_BUILTIN_COMUGESD,
20353 IX86_BUILTIN_COMORDSD,
20354 IX86_BUILTIN_COMUNORDSD,
20355 IX86_BUILTIN_COMFALSESD,
20356 IX86_BUILTIN_COMTRUESD,
20358 IX86_BUILTIN_COMEQPS,
20359 IX86_BUILTIN_COMNEPS,
20360 IX86_BUILTIN_COMLTPS,
20361 IX86_BUILTIN_COMLEPS,
20362 IX86_BUILTIN_COMGTPS,
20363 IX86_BUILTIN_COMGEPS,
20364 IX86_BUILTIN_COMUEQPS,
20365 IX86_BUILTIN_COMUNEPS,
20366 IX86_BUILTIN_COMULTPS,
20367 IX86_BUILTIN_COMULEPS,
20368 IX86_BUILTIN_COMUGTPS,
20369 IX86_BUILTIN_COMUGEPS,
20370 IX86_BUILTIN_COMORDPS,
20371 IX86_BUILTIN_COMUNORDPS,
20372 IX86_BUILTIN_COMFALSEPS,
20373 IX86_BUILTIN_COMTRUEPS,
20375 IX86_BUILTIN_COMEQPD,
20376 IX86_BUILTIN_COMNEPD,
20377 IX86_BUILTIN_COMLTPD,
20378 IX86_BUILTIN_COMLEPD,
20379 IX86_BUILTIN_COMGTPD,
20380 IX86_BUILTIN_COMGEPD,
20381 IX86_BUILTIN_COMUEQPD,
20382 IX86_BUILTIN_COMUNEPD,
20383 IX86_BUILTIN_COMULTPD,
20384 IX86_BUILTIN_COMULEPD,
20385 IX86_BUILTIN_COMUGTPD,
20386 IX86_BUILTIN_COMUGEPD,
20387 IX86_BUILTIN_COMORDPD,
20388 IX86_BUILTIN_COMUNORDPD,
20389 IX86_BUILTIN_COMFALSEPD,
20390 IX86_BUILTIN_COMTRUEPD,
20392 IX86_BUILTIN_PCOMEQUB,
20393 IX86_BUILTIN_PCOMNEUB,
20394 IX86_BUILTIN_PCOMLTUB,
20395 IX86_BUILTIN_PCOMLEUB,
20396 IX86_BUILTIN_PCOMGTUB,
20397 IX86_BUILTIN_PCOMGEUB,
20398 IX86_BUILTIN_PCOMFALSEUB,
20399 IX86_BUILTIN_PCOMTRUEUB,
20400 IX86_BUILTIN_PCOMEQUW,
20401 IX86_BUILTIN_PCOMNEUW,
20402 IX86_BUILTIN_PCOMLTUW,
20403 IX86_BUILTIN_PCOMLEUW,
20404 IX86_BUILTIN_PCOMGTUW,
20405 IX86_BUILTIN_PCOMGEUW,
20406 IX86_BUILTIN_PCOMFALSEUW,
20407 IX86_BUILTIN_PCOMTRUEUW,
20408 IX86_BUILTIN_PCOMEQUD,
20409 IX86_BUILTIN_PCOMNEUD,
20410 IX86_BUILTIN_PCOMLTUD,
20411 IX86_BUILTIN_PCOMLEUD,
20412 IX86_BUILTIN_PCOMGTUD,
20413 IX86_BUILTIN_PCOMGEUD,
20414 IX86_BUILTIN_PCOMFALSEUD,
20415 IX86_BUILTIN_PCOMTRUEUD,
20416 IX86_BUILTIN_PCOMEQUQ,
20417 IX86_BUILTIN_PCOMNEUQ,
20418 IX86_BUILTIN_PCOMLTUQ,
20419 IX86_BUILTIN_PCOMLEUQ,
20420 IX86_BUILTIN_PCOMGTUQ,
20421 IX86_BUILTIN_PCOMGEUQ,
20422 IX86_BUILTIN_PCOMFALSEUQ,
20423 IX86_BUILTIN_PCOMTRUEUQ,
20425 IX86_BUILTIN_PCOMEQB,
20426 IX86_BUILTIN_PCOMNEB,
20427 IX86_BUILTIN_PCOMLTB,
20428 IX86_BUILTIN_PCOMLEB,
20429 IX86_BUILTIN_PCOMGTB,
20430 IX86_BUILTIN_PCOMGEB,
20431 IX86_BUILTIN_PCOMFALSEB,
20432 IX86_BUILTIN_PCOMTRUEB,
20433 IX86_BUILTIN_PCOMEQW,
20434 IX86_BUILTIN_PCOMNEW,
20435 IX86_BUILTIN_PCOMLTW,
20436 IX86_BUILTIN_PCOMLEW,
20437 IX86_BUILTIN_PCOMGTW,
20438 IX86_BUILTIN_PCOMGEW,
20439 IX86_BUILTIN_PCOMFALSEW,
20440 IX86_BUILTIN_PCOMTRUEW,
20441 IX86_BUILTIN_PCOMEQD,
20442 IX86_BUILTIN_PCOMNED,
20443 IX86_BUILTIN_PCOMLTD,
20444 IX86_BUILTIN_PCOMLED,
20445 IX86_BUILTIN_PCOMGTD,
20446 IX86_BUILTIN_PCOMGED,
20447 IX86_BUILTIN_PCOMFALSED,
20448 IX86_BUILTIN_PCOMTRUED,
20449 IX86_BUILTIN_PCOMEQQ,
20450 IX86_BUILTIN_PCOMNEQ,
20451 IX86_BUILTIN_PCOMLTQ,
20452 IX86_BUILTIN_PCOMLEQ,
20453 IX86_BUILTIN_PCOMGTQ,
20454 IX86_BUILTIN_PCOMGEQ,
20455 IX86_BUILTIN_PCOMFALSEQ,
20456 IX86_BUILTIN_PCOMTRUEQ,
20461 /* Table for the ix86 builtin decls. */
20462 static GTY(()) tree ix86_builtins[(int) IX86_BUILTIN_MAX];
20464 /* Table of all of the builtin functions that are possible with different ISA's
20465 but are waiting to be built until a function is declared to use that
20467 struct builtin_isa GTY(())
20469 tree type; /* builtin type to use in the declaration */
20470 const char *name; /* function name */
20471 int isa; /* isa_flags this builtin is defined for */
20472 bool const_p; /* true if the declaration is constant */
20475 static GTY(()) struct builtin_isa ix86_builtins_isa[(int) IX86_BUILTIN_MAX];
20478 /* Add an ix86 target builtin function with CODE, NAME and TYPE. Save the MASK
20479 * of which isa_flags to use in the ix86_builtins_isa array. Stores the
20480 * function decl in the ix86_builtins array. Returns the function decl or
20481 * NULL_TREE, if the builtin was not added.
20483 * If the front end has a special hook for builtin functions, delay adding
20484 * builtin functions that aren't in the current ISA until the ISA is changed
20485 * with function specific optimization. Doing so, can save about 300K for the
20486 * default compiler. When the builtin is expanded, check at that time whether
20489 * If the front end doesn't have a special hook, record all builtins, even if
20490 * it isn't an instruction set in the current ISA in case the user uses
20491 * function specific options for a different ISA, so that we don't get scope
20492 * errors if a builtin is added in the middle of a function scope. */
20495 def_builtin (int mask, const char *name, tree type, enum ix86_builtins code)
20497 tree decl = NULL_TREE;
20499 if (!(mask & OPTION_MASK_ISA_64BIT) || TARGET_64BIT)
20501 ix86_builtins_isa[(int) code].isa = mask;
20503 if ((mask & ix86_isa_flags) != 0
20504 || (lang_hooks.builtin_function
20505 == lang_hooks.builtin_function_ext_scope))
20508 decl = add_builtin_function (name, type, code, BUILT_IN_MD, NULL,
20510 ix86_builtins[(int) code] = decl;
20511 ix86_builtins_isa[(int) code].type = NULL_TREE;
20515 ix86_builtins[(int) code] = NULL_TREE;
20516 ix86_builtins_isa[(int) code].const_p = false;
20517 ix86_builtins_isa[(int) code].type = type;
20518 ix86_builtins_isa[(int) code].name = name;
20525 /* Like def_builtin, but also marks the function decl "const". */
20528 def_builtin_const (int mask, const char *name, tree type,
20529 enum ix86_builtins code)
20531 tree decl = def_builtin (mask, name, type, code);
20533 TREE_READONLY (decl) = 1;
20535 ix86_builtins_isa[(int) code].const_p = true;
20540 /* Add any new builtin functions for a given ISA that may not have been
20541 declared. This saves a bit of space compared to adding all of the
20542 declarations to the tree, even if we didn't use them. */
20545 ix86_add_new_builtins (int isa)
20550 for (i = 0; i < (int)IX86_BUILTIN_MAX; i++)
20552 if ((ix86_builtins_isa[i].isa & isa) != 0
20553 && ix86_builtins_isa[i].type != NULL_TREE)
20555 decl = add_builtin_function_ext_scope (ix86_builtins_isa[i].name,
20556 ix86_builtins_isa[i].type,
20557 i, BUILT_IN_MD, NULL,
20560 ix86_builtins[i] = decl;
20561 ix86_builtins_isa[i].type = NULL_TREE;
20562 if (ix86_builtins_isa[i].const_p)
20563 TREE_READONLY (decl) = 1;
20568 /* Bits for builtin_description.flag. */
20570 /* Set when we don't support the comparison natively, and should
20571 swap_comparison in order to support it. */
20572 #define BUILTIN_DESC_SWAP_OPERANDS 1
20574 struct builtin_description
20576 const unsigned int mask;
20577 const enum insn_code icode;
20578 const char *const name;
20579 const enum ix86_builtins code;
20580 const enum rtx_code comparison;
20584 static const struct builtin_description bdesc_comi[] =
20586 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comieq", IX86_BUILTIN_COMIEQSS, UNEQ, 0 },
20587 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comilt", IX86_BUILTIN_COMILTSS, UNLT, 0 },
20588 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comile", IX86_BUILTIN_COMILESS, UNLE, 0 },
20589 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comigt", IX86_BUILTIN_COMIGTSS, GT, 0 },
20590 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comige", IX86_BUILTIN_COMIGESS, GE, 0 },
20591 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comineq", IX86_BUILTIN_COMINEQSS, LTGT, 0 },
20592 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomieq", IX86_BUILTIN_UCOMIEQSS, UNEQ, 0 },
20593 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomilt", IX86_BUILTIN_UCOMILTSS, UNLT, 0 },
20594 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomile", IX86_BUILTIN_UCOMILESS, UNLE, 0 },
20595 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomigt", IX86_BUILTIN_UCOMIGTSS, GT, 0 },
20596 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomige", IX86_BUILTIN_UCOMIGESS, GE, 0 },
20597 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomineq", IX86_BUILTIN_UCOMINEQSS, LTGT, 0 },
20598 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdeq", IX86_BUILTIN_COMIEQSD, UNEQ, 0 },
20599 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdlt", IX86_BUILTIN_COMILTSD, UNLT, 0 },
20600 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdle", IX86_BUILTIN_COMILESD, UNLE, 0 },
20601 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdgt", IX86_BUILTIN_COMIGTSD, GT, 0 },
20602 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdge", IX86_BUILTIN_COMIGESD, GE, 0 },
20603 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdneq", IX86_BUILTIN_COMINEQSD, LTGT, 0 },
20604 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdeq", IX86_BUILTIN_UCOMIEQSD, UNEQ, 0 },
20605 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdlt", IX86_BUILTIN_UCOMILTSD, UNLT, 0 },
20606 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdle", IX86_BUILTIN_UCOMILESD, UNLE, 0 },
20607 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdgt", IX86_BUILTIN_UCOMIGTSD, GT, 0 },
20608 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdge", IX86_BUILTIN_UCOMIGESD, GE, 0 },
20609 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdneq", IX86_BUILTIN_UCOMINEQSD, LTGT, 0 },
20612 static const struct builtin_description bdesc_pcmpestr[] =
20615 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestri128", IX86_BUILTIN_PCMPESTRI128, UNKNOWN, 0 },
20616 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestrm128", IX86_BUILTIN_PCMPESTRM128, UNKNOWN, 0 },
20617 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestria128", IX86_BUILTIN_PCMPESTRA128, UNKNOWN, (int) CCAmode },
20618 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestric128", IX86_BUILTIN_PCMPESTRC128, UNKNOWN, (int) CCCmode },
20619 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestrio128", IX86_BUILTIN_PCMPESTRO128, UNKNOWN, (int) CCOmode },
20620 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestris128", IX86_BUILTIN_PCMPESTRS128, UNKNOWN, (int) CCSmode },
20621 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestriz128", IX86_BUILTIN_PCMPESTRZ128, UNKNOWN, (int) CCZmode },
20624 static const struct builtin_description bdesc_pcmpistr[] =
20627 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistri128", IX86_BUILTIN_PCMPISTRI128, UNKNOWN, 0 },
20628 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistrm128", IX86_BUILTIN_PCMPISTRM128, UNKNOWN, 0 },
20629 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistria128", IX86_BUILTIN_PCMPISTRA128, UNKNOWN, (int) CCAmode },
20630 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistric128", IX86_BUILTIN_PCMPISTRC128, UNKNOWN, (int) CCCmode },
20631 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistrio128", IX86_BUILTIN_PCMPISTRO128, UNKNOWN, (int) CCOmode },
20632 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistris128", IX86_BUILTIN_PCMPISTRS128, UNKNOWN, (int) CCSmode },
20633 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistriz128", IX86_BUILTIN_PCMPISTRZ128, UNKNOWN, (int) CCZmode },
20636 /* Special builtin types */
20637 enum ix86_special_builtin_type
20639 SPECIAL_FTYPE_UNKNOWN,
20641 V32QI_FTYPE_PCCHAR,
20642 V16QI_FTYPE_PCCHAR,
20644 V8SF_FTYPE_PCFLOAT,
20646 V4DF_FTYPE_PCDOUBLE,
20647 V4SF_FTYPE_PCFLOAT,
20648 V2DF_FTYPE_PCDOUBLE,
20649 V8SF_FTYPE_PCV8SF_V8SF,
20650 V4DF_FTYPE_PCV4DF_V4DF,
20651 V4SF_FTYPE_V4SF_PCV2SF,
20652 V4SF_FTYPE_PCV4SF_V4SF,
20653 V2DF_FTYPE_V2DF_PCDOUBLE,
20654 V2DF_FTYPE_PCV2DF_V2DF,
20656 VOID_FTYPE_PV2SF_V4SF,
20657 VOID_FTYPE_PV4DI_V4DI,
20658 VOID_FTYPE_PV2DI_V2DI,
20659 VOID_FTYPE_PCHAR_V32QI,
20660 VOID_FTYPE_PCHAR_V16QI,
20661 VOID_FTYPE_PFLOAT_V8SF,
20662 VOID_FTYPE_PFLOAT_V4SF,
20663 VOID_FTYPE_PDOUBLE_V4DF,
20664 VOID_FTYPE_PDOUBLE_V2DF,
20666 VOID_FTYPE_PINT_INT,
20667 VOID_FTYPE_PV8SF_V8SF_V8SF,
20668 VOID_FTYPE_PV4DF_V4DF_V4DF,
20669 VOID_FTYPE_PV4SF_V4SF_V4SF,
20670 VOID_FTYPE_PV2DF_V2DF_V2DF
20673 /* Builtin types */
20674 enum ix86_builtin_type
20677 FLOAT128_FTYPE_FLOAT128,
20679 FLOAT128_FTYPE_FLOAT128_FLOAT128,
20680 INT_FTYPE_V8SF_V8SF_PTEST,
20681 INT_FTYPE_V4DI_V4DI_PTEST,
20682 INT_FTYPE_V4DF_V4DF_PTEST,
20683 INT_FTYPE_V4SF_V4SF_PTEST,
20684 INT_FTYPE_V2DI_V2DI_PTEST,
20685 INT_FTYPE_V2DF_V2DF_PTEST,
20717 V4SF_FTYPE_V4SF_VEC_MERGE,
20726 V2DF_FTYPE_V2DF_VEC_MERGE,
20737 V16QI_FTYPE_V16QI_V16QI,
20738 V16QI_FTYPE_V8HI_V8HI,
20739 V8QI_FTYPE_V8QI_V8QI,
20740 V8QI_FTYPE_V4HI_V4HI,
20741 V8HI_FTYPE_V8HI_V8HI,
20742 V8HI_FTYPE_V8HI_V8HI_COUNT,
20743 V8HI_FTYPE_V16QI_V16QI,
20744 V8HI_FTYPE_V4SI_V4SI,
20745 V8HI_FTYPE_V8HI_SI_COUNT,
20746 V8SF_FTYPE_V8SF_V8SF,
20747 V8SF_FTYPE_V8SF_V8SI,
20748 V4SI_FTYPE_V4SI_V4SI,
20749 V4SI_FTYPE_V4SI_V4SI_COUNT,
20750 V4SI_FTYPE_V8HI_V8HI,
20751 V4SI_FTYPE_V4SF_V4SF,
20752 V4SI_FTYPE_V2DF_V2DF,
20753 V4SI_FTYPE_V4SI_SI_COUNT,
20754 V4HI_FTYPE_V4HI_V4HI,
20755 V4HI_FTYPE_V4HI_V4HI_COUNT,
20756 V4HI_FTYPE_V8QI_V8QI,
20757 V4HI_FTYPE_V2SI_V2SI,
20758 V4HI_FTYPE_V4HI_SI_COUNT,
20759 V4DF_FTYPE_V4DF_V4DF,
20760 V4DF_FTYPE_V4DF_V4DI,
20761 V4SF_FTYPE_V4SF_V4SF,
20762 V4SF_FTYPE_V4SF_V4SF_SWAP,
20763 V4SF_FTYPE_V4SF_V4SI,
20764 V4SF_FTYPE_V4SF_V2SI,
20765 V4SF_FTYPE_V4SF_V2DF,
20766 V4SF_FTYPE_V4SF_DI,
20767 V4SF_FTYPE_V4SF_SI,
20768 V2DI_FTYPE_V2DI_V2DI,
20769 V2DI_FTYPE_V2DI_V2DI_COUNT,
20770 V2DI_FTYPE_V16QI_V16QI,
20771 V2DI_FTYPE_V4SI_V4SI,
20772 V2DI_FTYPE_V2DI_V16QI,
20773 V2DI_FTYPE_V2DF_V2DF,
20774 V2DI_FTYPE_V2DI_SI_COUNT,
20775 V2SI_FTYPE_V2SI_V2SI,
20776 V2SI_FTYPE_V2SI_V2SI_COUNT,
20777 V2SI_FTYPE_V4HI_V4HI,
20778 V2SI_FTYPE_V2SF_V2SF,
20779 V2SI_FTYPE_V2SI_SI_COUNT,
20780 V2DF_FTYPE_V2DF_V2DF,
20781 V2DF_FTYPE_V2DF_V2DF_SWAP,
20782 V2DF_FTYPE_V2DF_V4SF,
20783 V2DF_FTYPE_V2DF_V2DI,
20784 V2DF_FTYPE_V2DF_DI,
20785 V2DF_FTYPE_V2DF_SI,
20786 V2SF_FTYPE_V2SF_V2SF,
20787 V1DI_FTYPE_V1DI_V1DI,
20788 V1DI_FTYPE_V1DI_V1DI_COUNT,
20789 V1DI_FTYPE_V8QI_V8QI,
20790 V1DI_FTYPE_V2SI_V2SI,
20791 V1DI_FTYPE_V1DI_SI_COUNT,
20792 UINT64_FTYPE_UINT64_UINT64,
20793 UINT_FTYPE_UINT_UINT,
20794 UINT_FTYPE_UINT_USHORT,
20795 UINT_FTYPE_UINT_UCHAR,
20796 V8HI_FTYPE_V8HI_INT,
20797 V4SI_FTYPE_V4SI_INT,
20798 V4HI_FTYPE_V4HI_INT,
20799 V8SF_FTYPE_V8SF_INT,
20800 V4SI_FTYPE_V8SI_INT,
20801 V4SF_FTYPE_V8SF_INT,
20802 V2DF_FTYPE_V4DF_INT,
20803 V4DF_FTYPE_V4DF_INT,
20804 V4SF_FTYPE_V4SF_INT,
20805 V2DI_FTYPE_V2DI_INT,
20806 V2DI2TI_FTYPE_V2DI_INT,
20807 V2DF_FTYPE_V2DF_INT,
20808 V16QI_FTYPE_V16QI_V16QI_V16QI,
20809 V8SF_FTYPE_V8SF_V8SF_V8SF,
20810 V4DF_FTYPE_V4DF_V4DF_V4DF,
20811 V4SF_FTYPE_V4SF_V4SF_V4SF,
20812 V2DF_FTYPE_V2DF_V2DF_V2DF,
20813 V16QI_FTYPE_V16QI_V16QI_INT,
20814 V8SI_FTYPE_V8SI_V8SI_INT,
20815 V8SI_FTYPE_V8SI_V4SI_INT,
20816 V8HI_FTYPE_V8HI_V8HI_INT,
20817 V8SF_FTYPE_V8SF_V8SF_INT,
20818 V8SF_FTYPE_V8SF_V4SF_INT,
20819 V4SI_FTYPE_V4SI_V4SI_INT,
20820 V4DF_FTYPE_V4DF_V4DF_INT,
20821 V4DF_FTYPE_V4DF_V2DF_INT,
20822 V4SF_FTYPE_V4SF_V4SF_INT,
20823 V2DI_FTYPE_V2DI_V2DI_INT,
20824 V2DI2TI_FTYPE_V2DI_V2DI_INT,
20825 V1DI2DI_FTYPE_V1DI_V1DI_INT,
20826 V2DF_FTYPE_V2DF_V2DF_INT,
20827 V2DI_FTYPE_V2DI_UINT_UINT,
20828 V2DI_FTYPE_V2DI_V2DI_UINT_UINT
20831 /* Special builtins with variable number of arguments. */
20832 static const struct builtin_description bdesc_special_args[] =
20835 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_emms, "__builtin_ia32_emms", IX86_BUILTIN_EMMS, UNKNOWN, (int) VOID_FTYPE_VOID },
20838 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_femms, "__builtin_ia32_femms", IX86_BUILTIN_FEMMS, UNKNOWN, (int) VOID_FTYPE_VOID },
20841 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movups, "__builtin_ia32_storeups", IX86_BUILTIN_STOREUPS, UNKNOWN, (int) VOID_FTYPE_PFLOAT_V4SF },
20842 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movntv4sf, "__builtin_ia32_movntps", IX86_BUILTIN_MOVNTPS, UNKNOWN, (int) VOID_FTYPE_PFLOAT_V4SF },
20843 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movups, "__builtin_ia32_loadups", IX86_BUILTIN_LOADUPS, UNKNOWN, (int) V4SF_FTYPE_PCFLOAT },
20845 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_loadhps_exp, "__builtin_ia32_loadhps", IX86_BUILTIN_LOADHPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_PCV2SF },
20846 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_loadlps_exp, "__builtin_ia32_loadlps", IX86_BUILTIN_LOADLPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_PCV2SF },
20847 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_storehps, "__builtin_ia32_storehps", IX86_BUILTIN_STOREHPS, UNKNOWN, (int) VOID_FTYPE_PV2SF_V4SF },
20848 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_storelps, "__builtin_ia32_storelps", IX86_BUILTIN_STORELPS, UNKNOWN, (int) VOID_FTYPE_PV2SF_V4SF },
20850 /* SSE or 3DNow!A */
20851 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_sse_sfence, "__builtin_ia32_sfence", IX86_BUILTIN_SFENCE, UNKNOWN, (int) VOID_FTYPE_VOID },
20852 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_sse_movntdi, "__builtin_ia32_movntq", IX86_BUILTIN_MOVNTQ, UNKNOWN, (int) VOID_FTYPE_PDI_DI },
20855 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_lfence, "__builtin_ia32_lfence", IX86_BUILTIN_LFENCE, UNKNOWN, (int) VOID_FTYPE_VOID },
20856 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_mfence, 0, IX86_BUILTIN_MFENCE, UNKNOWN, (int) VOID_FTYPE_VOID },
20857 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movupd, "__builtin_ia32_storeupd", IX86_BUILTIN_STOREUPD, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V2DF },
20858 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movdqu, "__builtin_ia32_storedqu", IX86_BUILTIN_STOREDQU, UNKNOWN, (int) VOID_FTYPE_PCHAR_V16QI },
20859 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movntv2df, "__builtin_ia32_movntpd", IX86_BUILTIN_MOVNTPD, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V2DF },
20860 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movntv2di, "__builtin_ia32_movntdq", IX86_BUILTIN_MOVNTDQ, UNKNOWN, (int) VOID_FTYPE_PV2DI_V2DI },
20861 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movntsi, "__builtin_ia32_movnti", IX86_BUILTIN_MOVNTI, UNKNOWN, (int) VOID_FTYPE_PINT_INT },
20862 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movupd, "__builtin_ia32_loadupd", IX86_BUILTIN_LOADUPD, UNKNOWN, (int) V2DF_FTYPE_PCDOUBLE },
20863 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movdqu, "__builtin_ia32_loaddqu", IX86_BUILTIN_LOADDQU, UNKNOWN, (int) V16QI_FTYPE_PCCHAR },
20865 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_loadhpd_exp, "__builtin_ia32_loadhpd", IX86_BUILTIN_LOADHPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_PCDOUBLE },
20866 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_loadlpd_exp, "__builtin_ia32_loadlpd", IX86_BUILTIN_LOADLPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_PCDOUBLE },
20869 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_lddqu, "__builtin_ia32_lddqu", IX86_BUILTIN_LDDQU, UNKNOWN, (int) V16QI_FTYPE_PCCHAR },
20872 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_movntdqa, "__builtin_ia32_movntdqa", IX86_BUILTIN_MOVNTDQA, UNKNOWN, (int) V2DI_FTYPE_PV2DI },
20875 { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_vmmovntv2df, "__builtin_ia32_movntsd", IX86_BUILTIN_MOVNTSD, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V2DF },
20876 { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_vmmovntv4sf, "__builtin_ia32_movntss", IX86_BUILTIN_MOVNTSS, UNKNOWN, (int) VOID_FTYPE_PFLOAT_V4SF },
20879 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vzeroall, "__builtin_ia32_vzeroall", IX86_BUILTIN_VZEROALL, UNKNOWN, (int) VOID_FTYPE_VOID },
20880 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vzeroupper, 0, IX86_BUILTIN_VZEROUPPER, UNKNOWN, (int) VOID_FTYPE_VOID },
20881 { OPTION_MASK_ISA_AVX | OPTION_MASK_ISA_64BIT, CODE_FOR_avx_vzeroupper_rex64, 0, IX86_BUILTIN_VZEROUPPER_REX64, UNKNOWN, (int) VOID_FTYPE_VOID },
20883 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vbroadcastss, "__builtin_ia32_vbroadcastss", IX86_BUILTIN_VBROADCASTSS, UNKNOWN, (int) V4SF_FTYPE_PCFLOAT },
20884 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vbroadcastsd256, "__builtin_ia32_vbroadcastsd256", IX86_BUILTIN_VBROADCASTSD256, UNKNOWN, (int) V4DF_FTYPE_PCDOUBLE },
20885 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vbroadcastss256, "__builtin_ia32_vbroadcastss256", IX86_BUILTIN_VBROADCASTSS256, UNKNOWN, (int) V8SF_FTYPE_PCFLOAT },
20886 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vbroadcastf128_pd256, "__builtin_ia32_vbroadcastf128_pd256", IX86_BUILTIN_VBROADCASTPD256, UNKNOWN, (int) V4DF_FTYPE_PCV2DF },
20887 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vbroadcastf128_ps256, "__builtin_ia32_vbroadcastf128_ps256", IX86_BUILTIN_VBROADCASTPS256, UNKNOWN, (int) V8SF_FTYPE_PCV4SF },
20889 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movupd256, "__builtin_ia32_loadupd256", IX86_BUILTIN_LOADUPD256, UNKNOWN, (int) V4DF_FTYPE_PCDOUBLE },
20890 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movups256, "__builtin_ia32_loadups256", IX86_BUILTIN_LOADUPS256, UNKNOWN, (int) V8SF_FTYPE_PCFLOAT },
20891 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movupd256, "__builtin_ia32_storeupd256", IX86_BUILTIN_STOREUPD256, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V4DF },
20892 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movups256, "__builtin_ia32_storeups256", IX86_BUILTIN_STOREUPS256, UNKNOWN, (int) VOID_FTYPE_PFLOAT_V8SF },
20893 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movdqu256, "__builtin_ia32_loaddqu256", IX86_BUILTIN_LOADDQU256, UNKNOWN, (int) V32QI_FTYPE_PCCHAR },
20894 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movdqu256, "__builtin_ia32_storedqu256", IX86_BUILTIN_STOREDQU256, UNKNOWN, (int) VOID_FTYPE_PCHAR_V32QI },
20895 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_lddqu256, "__builtin_ia32_lddqu256", IX86_BUILTIN_LDDQU256, UNKNOWN, (int) V32QI_FTYPE_PCCHAR },
20897 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movntv4di, "__builtin_ia32_movntdq256", IX86_BUILTIN_MOVNTDQ256, UNKNOWN, (int) VOID_FTYPE_PV4DI_V4DI },
20898 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movntv4df, "__builtin_ia32_movntpd256", IX86_BUILTIN_MOVNTPD256, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V4DF },
20899 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movntv8sf, "__builtin_ia32_movntps256", IX86_BUILTIN_MOVNTPS256, UNKNOWN, (int) VOID_FTYPE_PFLOAT_V8SF },
20901 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskloadpd, "__builtin_ia32_maskloadpd", IX86_BUILTIN_MASKLOADPD, UNKNOWN, (int) V2DF_FTYPE_PCV2DF_V2DF },
20902 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskloadps, "__builtin_ia32_maskloadps", IX86_BUILTIN_MASKLOADPS, UNKNOWN, (int) V4SF_FTYPE_PCV4SF_V4SF },
20903 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskloadpd256, "__builtin_ia32_maskloadpd256", IX86_BUILTIN_MASKLOADPD256, UNKNOWN, (int) V4DF_FTYPE_PCV4DF_V4DF },
20904 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskloadps256, "__builtin_ia32_maskloadps256", IX86_BUILTIN_MASKLOADPS256, UNKNOWN, (int) V8SF_FTYPE_PCV8SF_V8SF },
20905 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskstorepd, "__builtin_ia32_maskstorepd", IX86_BUILTIN_MASKSTOREPD, UNKNOWN, (int) VOID_FTYPE_PV2DF_V2DF_V2DF },
20906 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskstoreps, "__builtin_ia32_maskstoreps", IX86_BUILTIN_MASKSTOREPS, UNKNOWN, (int) VOID_FTYPE_PV4SF_V4SF_V4SF },
20907 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskstorepd256, "__builtin_ia32_maskstorepd256", IX86_BUILTIN_MASKSTOREPD256, UNKNOWN, (int) VOID_FTYPE_PV4DF_V4DF_V4DF },
20908 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskstoreps256, "__builtin_ia32_maskstoreps256", IX86_BUILTIN_MASKSTOREPS256, UNKNOWN, (int) VOID_FTYPE_PV8SF_V8SF_V8SF },
20911 /* Builtins with variable number of arguments. */
20912 static const struct builtin_description bdesc_args[] =
20915 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_addv8qi3, "__builtin_ia32_paddb", IX86_BUILTIN_PADDB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
20916 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_addv4hi3, "__builtin_ia32_paddw", IX86_BUILTIN_PADDW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
20917 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_addv2si3, "__builtin_ia32_paddd", IX86_BUILTIN_PADDD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
20918 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_subv8qi3, "__builtin_ia32_psubb", IX86_BUILTIN_PSUBB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
20919 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_subv4hi3, "__builtin_ia32_psubw", IX86_BUILTIN_PSUBW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
20920 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_subv2si3, "__builtin_ia32_psubd", IX86_BUILTIN_PSUBD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
20922 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ssaddv8qi3, "__builtin_ia32_paddsb", IX86_BUILTIN_PADDSB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
20923 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ssaddv4hi3, "__builtin_ia32_paddsw", IX86_BUILTIN_PADDSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
20924 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_sssubv8qi3, "__builtin_ia32_psubsb", IX86_BUILTIN_PSUBSB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
20925 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_sssubv4hi3, "__builtin_ia32_psubsw", IX86_BUILTIN_PSUBSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
20926 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_usaddv8qi3, "__builtin_ia32_paddusb", IX86_BUILTIN_PADDUSB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
20927 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_usaddv4hi3, "__builtin_ia32_paddusw", IX86_BUILTIN_PADDUSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
20928 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ussubv8qi3, "__builtin_ia32_psubusb", IX86_BUILTIN_PSUBUSB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
20929 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ussubv4hi3, "__builtin_ia32_psubusw", IX86_BUILTIN_PSUBUSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
20931 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_mulv4hi3, "__builtin_ia32_pmullw", IX86_BUILTIN_PMULLW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
20932 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_smulv4hi3_highpart, "__builtin_ia32_pmulhw", IX86_BUILTIN_PMULHW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
20934 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_andv2si3, "__builtin_ia32_pand", IX86_BUILTIN_PAND, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
20935 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_andnotv2si3, "__builtin_ia32_pandn", IX86_BUILTIN_PANDN, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
20936 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_iorv2si3, "__builtin_ia32_por", IX86_BUILTIN_POR, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
20937 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_xorv2si3, "__builtin_ia32_pxor", IX86_BUILTIN_PXOR, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
20939 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_eqv8qi3, "__builtin_ia32_pcmpeqb", IX86_BUILTIN_PCMPEQB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
20940 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_eqv4hi3, "__builtin_ia32_pcmpeqw", IX86_BUILTIN_PCMPEQW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
20941 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_eqv2si3, "__builtin_ia32_pcmpeqd", IX86_BUILTIN_PCMPEQD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
20942 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_gtv8qi3, "__builtin_ia32_pcmpgtb", IX86_BUILTIN_PCMPGTB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
20943 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_gtv4hi3, "__builtin_ia32_pcmpgtw", IX86_BUILTIN_PCMPGTW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
20944 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_gtv2si3, "__builtin_ia32_pcmpgtd", IX86_BUILTIN_PCMPGTD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
20946 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpckhbw, "__builtin_ia32_punpckhbw", IX86_BUILTIN_PUNPCKHBW, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
20947 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpckhwd, "__builtin_ia32_punpckhwd", IX86_BUILTIN_PUNPCKHWD, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
20948 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpckhdq, "__builtin_ia32_punpckhdq", IX86_BUILTIN_PUNPCKHDQ, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
20949 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpcklbw, "__builtin_ia32_punpcklbw", IX86_BUILTIN_PUNPCKLBW, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
20950 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpcklwd, "__builtin_ia32_punpcklwd", IX86_BUILTIN_PUNPCKLWD, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI},
20951 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpckldq, "__builtin_ia32_punpckldq", IX86_BUILTIN_PUNPCKLDQ, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI},
20953 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_packsswb, "__builtin_ia32_packsswb", IX86_BUILTIN_PACKSSWB, UNKNOWN, (int) V8QI_FTYPE_V4HI_V4HI },
20954 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_packssdw, "__builtin_ia32_packssdw", IX86_BUILTIN_PACKSSDW, UNKNOWN, (int) V4HI_FTYPE_V2SI_V2SI },
20955 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_packuswb, "__builtin_ia32_packuswb", IX86_BUILTIN_PACKUSWB, UNKNOWN, (int) V8QI_FTYPE_V4HI_V4HI },
20957 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_pmaddwd, "__builtin_ia32_pmaddwd", IX86_BUILTIN_PMADDWD, UNKNOWN, (int) V2SI_FTYPE_V4HI_V4HI },
20959 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv4hi3, "__builtin_ia32_psllwi", IX86_BUILTIN_PSLLWI, UNKNOWN, (int) V4HI_FTYPE_V4HI_SI_COUNT },
20960 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv2si3, "__builtin_ia32_pslldi", IX86_BUILTIN_PSLLDI, UNKNOWN, (int) V2SI_FTYPE_V2SI_SI_COUNT },
20961 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv1di3, "__builtin_ia32_psllqi", IX86_BUILTIN_PSLLQI, UNKNOWN, (int) V1DI_FTYPE_V1DI_SI_COUNT },
20962 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv4hi3, "__builtin_ia32_psllw", IX86_BUILTIN_PSLLW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI_COUNT },
20963 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv2si3, "__builtin_ia32_pslld", IX86_BUILTIN_PSLLD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI_COUNT },
20964 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv1di3, "__builtin_ia32_psllq", IX86_BUILTIN_PSLLQ, UNKNOWN, (int) V1DI_FTYPE_V1DI_V1DI_COUNT },
20966 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv4hi3, "__builtin_ia32_psrlwi", IX86_BUILTIN_PSRLWI, UNKNOWN, (int) V4HI_FTYPE_V4HI_SI_COUNT },
20967 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv2si3, "__builtin_ia32_psrldi", IX86_BUILTIN_PSRLDI, UNKNOWN, (int) V2SI_FTYPE_V2SI_SI_COUNT },
20968 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv1di3, "__builtin_ia32_psrlqi", IX86_BUILTIN_PSRLQI, UNKNOWN, (int) V1DI_FTYPE_V1DI_SI_COUNT },
20969 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv4hi3, "__builtin_ia32_psrlw", IX86_BUILTIN_PSRLW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI_COUNT },
20970 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv2si3, "__builtin_ia32_psrld", IX86_BUILTIN_PSRLD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI_COUNT },
20971 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv1di3, "__builtin_ia32_psrlq", IX86_BUILTIN_PSRLQ, UNKNOWN, (int) V1DI_FTYPE_V1DI_V1DI_COUNT },
20973 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashrv4hi3, "__builtin_ia32_psrawi", IX86_BUILTIN_PSRAWI, UNKNOWN, (int) V4HI_FTYPE_V4HI_SI_COUNT },
20974 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashrv2si3, "__builtin_ia32_psradi", IX86_BUILTIN_PSRADI, UNKNOWN, (int) V2SI_FTYPE_V2SI_SI_COUNT },
20975 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashrv4hi3, "__builtin_ia32_psraw", IX86_BUILTIN_PSRAW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI_COUNT },
20976 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashrv2si3, "__builtin_ia32_psrad", IX86_BUILTIN_PSRAD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI_COUNT },
20979 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_pf2id, "__builtin_ia32_pf2id", IX86_BUILTIN_PF2ID, UNKNOWN, (int) V2SI_FTYPE_V2SF },
20980 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_floatv2si2, "__builtin_ia32_pi2fd", IX86_BUILTIN_PI2FD, UNKNOWN, (int) V2SF_FTYPE_V2SI },
20981 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_rcpv2sf2, "__builtin_ia32_pfrcp", IX86_BUILTIN_PFRCP, UNKNOWN, (int) V2SF_FTYPE_V2SF },
20982 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_rsqrtv2sf2, "__builtin_ia32_pfrsqrt", IX86_BUILTIN_PFRSQRT, UNKNOWN, (int) V2SF_FTYPE_V2SF },
20984 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_uavgv8qi3, "__builtin_ia32_pavgusb", IX86_BUILTIN_PAVGUSB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
20985 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_haddv2sf3, "__builtin_ia32_pfacc", IX86_BUILTIN_PFACC, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
20986 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_addv2sf3, "__builtin_ia32_pfadd", IX86_BUILTIN_PFADD, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
20987 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_eqv2sf3, "__builtin_ia32_pfcmpeq", IX86_BUILTIN_PFCMPEQ, UNKNOWN, (int) V2SI_FTYPE_V2SF_V2SF },
20988 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_gev2sf3, "__builtin_ia32_pfcmpge", IX86_BUILTIN_PFCMPGE, UNKNOWN, (int) V2SI_FTYPE_V2SF_V2SF },
20989 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_gtv2sf3, "__builtin_ia32_pfcmpgt", IX86_BUILTIN_PFCMPGT, UNKNOWN, (int) V2SI_FTYPE_V2SF_V2SF },
20990 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_smaxv2sf3, "__builtin_ia32_pfmax", IX86_BUILTIN_PFMAX, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
20991 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_sminv2sf3, "__builtin_ia32_pfmin", IX86_BUILTIN_PFMIN, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
20992 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_mulv2sf3, "__builtin_ia32_pfmul", IX86_BUILTIN_PFMUL, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
20993 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_rcpit1v2sf3, "__builtin_ia32_pfrcpit1", IX86_BUILTIN_PFRCPIT1, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
20994 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_rcpit2v2sf3, "__builtin_ia32_pfrcpit2", IX86_BUILTIN_PFRCPIT2, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
20995 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_rsqit1v2sf3, "__builtin_ia32_pfrsqit1", IX86_BUILTIN_PFRSQIT1, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
20996 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_subv2sf3, "__builtin_ia32_pfsub", IX86_BUILTIN_PFSUB, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
20997 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_subrv2sf3, "__builtin_ia32_pfsubr", IX86_BUILTIN_PFSUBR, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
20998 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_pmulhrwv4hi3, "__builtin_ia32_pmulhrw", IX86_BUILTIN_PMULHRW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
21001 { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pf2iw, "__builtin_ia32_pf2iw", IX86_BUILTIN_PF2IW, UNKNOWN, (int) V2SI_FTYPE_V2SF },
21002 { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pi2fw, "__builtin_ia32_pi2fw", IX86_BUILTIN_PI2FW, UNKNOWN, (int) V2SF_FTYPE_V2SI },
21003 { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pswapdv2si2, "__builtin_ia32_pswapdsi", IX86_BUILTIN_PSWAPDSI, UNKNOWN, (int) V2SI_FTYPE_V2SI },
21004 { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pswapdv2sf2, "__builtin_ia32_pswapdsf", IX86_BUILTIN_PSWAPDSF, UNKNOWN, (int) V2SF_FTYPE_V2SF },
21005 { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_hsubv2sf3, "__builtin_ia32_pfnacc", IX86_BUILTIN_PFNACC, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
21006 { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_addsubv2sf3, "__builtin_ia32_pfpnacc", IX86_BUILTIN_PFPNACC, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
21009 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movmskps, "__builtin_ia32_movmskps", IX86_BUILTIN_MOVMSKPS, UNKNOWN, (int) INT_FTYPE_V4SF },
21010 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_sqrtv4sf2, "__builtin_ia32_sqrtps", IX86_BUILTIN_SQRTPS, UNKNOWN, (int) V4SF_FTYPE_V4SF },
21011 { OPTION_MASK_ISA_SSE, CODE_FOR_sqrtv4sf2, "__builtin_ia32_sqrtps_nr", IX86_BUILTIN_SQRTPS_NR, UNKNOWN, (int) V4SF_FTYPE_V4SF },
21012 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_rsqrtv4sf2, "__builtin_ia32_rsqrtps", IX86_BUILTIN_RSQRTPS, UNKNOWN, (int) V4SF_FTYPE_V4SF },
21013 { OPTION_MASK_ISA_SSE, CODE_FOR_rsqrtv4sf2, "__builtin_ia32_rsqrtps_nr", IX86_BUILTIN_RSQRTPS_NR, UNKNOWN, (int) V4SF_FTYPE_V4SF },
21014 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_rcpv4sf2, "__builtin_ia32_rcpps", IX86_BUILTIN_RCPPS, UNKNOWN, (int) V4SF_FTYPE_V4SF },
21015 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvtps2pi, "__builtin_ia32_cvtps2pi", IX86_BUILTIN_CVTPS2PI, UNKNOWN, (int) V2SI_FTYPE_V4SF },
21016 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvtss2si, "__builtin_ia32_cvtss2si", IX86_BUILTIN_CVTSS2SI, UNKNOWN, (int) INT_FTYPE_V4SF },
21017 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_64BIT, CODE_FOR_sse_cvtss2siq, "__builtin_ia32_cvtss2si64", IX86_BUILTIN_CVTSS2SI64, UNKNOWN, (int) INT64_FTYPE_V4SF },
21018 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvttps2pi, "__builtin_ia32_cvttps2pi", IX86_BUILTIN_CVTTPS2PI, UNKNOWN, (int) V2SI_FTYPE_V4SF },
21019 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvttss2si, "__builtin_ia32_cvttss2si", IX86_BUILTIN_CVTTSS2SI, UNKNOWN, (int) INT_FTYPE_V4SF },
21020 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_64BIT, CODE_FOR_sse_cvttss2siq, "__builtin_ia32_cvttss2si64", IX86_BUILTIN_CVTTSS2SI64, UNKNOWN, (int) INT64_FTYPE_V4SF },
21022 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_shufps, "__builtin_ia32_shufps", IX86_BUILTIN_SHUFPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
21024 { OPTION_MASK_ISA_SSE, CODE_FOR_addv4sf3, "__builtin_ia32_addps", IX86_BUILTIN_ADDPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
21025 { OPTION_MASK_ISA_SSE, CODE_FOR_subv4sf3, "__builtin_ia32_subps", IX86_BUILTIN_SUBPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
21026 { OPTION_MASK_ISA_SSE, CODE_FOR_mulv4sf3, "__builtin_ia32_mulps", IX86_BUILTIN_MULPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
21027 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_divv4sf3, "__builtin_ia32_divps", IX86_BUILTIN_DIVPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
21028 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmaddv4sf3, "__builtin_ia32_addss", IX86_BUILTIN_ADDSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
21029 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmsubv4sf3, "__builtin_ia32_subss", IX86_BUILTIN_SUBSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
21030 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmulv4sf3, "__builtin_ia32_mulss", IX86_BUILTIN_MULSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
21031 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmdivv4sf3, "__builtin_ia32_divss", IX86_BUILTIN_DIVSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
21033 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpeqps", IX86_BUILTIN_CMPEQPS, EQ, (int) V4SF_FTYPE_V4SF_V4SF },
21034 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpltps", IX86_BUILTIN_CMPLTPS, LT, (int) V4SF_FTYPE_V4SF_V4SF },
21035 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpleps", IX86_BUILTIN_CMPLEPS, LE, (int) V4SF_FTYPE_V4SF_V4SF },
21036 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpgtps", IX86_BUILTIN_CMPGTPS, LT, (int) V4SF_FTYPE_V4SF_V4SF_SWAP },
21037 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpgeps", IX86_BUILTIN_CMPGEPS, LE, (int) V4SF_FTYPE_V4SF_V4SF_SWAP },
21038 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpunordps", IX86_BUILTIN_CMPUNORDPS, UNORDERED, (int) V4SF_FTYPE_V4SF_V4SF },
21039 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpneqps", IX86_BUILTIN_CMPNEQPS, NE, (int) V4SF_FTYPE_V4SF_V4SF },
21040 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpnltps", IX86_BUILTIN_CMPNLTPS, UNGE, (int) V4SF_FTYPE_V4SF_V4SF },
21041 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpnleps", IX86_BUILTIN_CMPNLEPS, UNGT, (int) V4SF_FTYPE_V4SF_V4SF },
21042 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpngtps", IX86_BUILTIN_CMPNGTPS, UNGE, (int) V4SF_FTYPE_V4SF_V4SF_SWAP },
21043 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpngeps", IX86_BUILTIN_CMPNGEPS, UNGT, (int) V4SF_FTYPE_V4SF_V4SF_SWAP},
21044 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpordps", IX86_BUILTIN_CMPORDPS, ORDERED, (int) V4SF_FTYPE_V4SF_V4SF },
21045 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpeqss", IX86_BUILTIN_CMPEQSS, EQ, (int) V4SF_FTYPE_V4SF_V4SF },
21046 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpltss", IX86_BUILTIN_CMPLTSS, LT, (int) V4SF_FTYPE_V4SF_V4SF },
21047 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpless", IX86_BUILTIN_CMPLESS, LE, (int) V4SF_FTYPE_V4SF_V4SF },
21048 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpunordss", IX86_BUILTIN_CMPUNORDSS, UNORDERED, (int) V4SF_FTYPE_V4SF_V4SF },
21049 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpneqss", IX86_BUILTIN_CMPNEQSS, NE, (int) V4SF_FTYPE_V4SF_V4SF },
21050 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpnltss", IX86_BUILTIN_CMPNLTSS, UNGE, (int) V4SF_FTYPE_V4SF_V4SF },
21051 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpnless", IX86_BUILTIN_CMPNLESS, UNGT, (int) V4SF_FTYPE_V4SF_V4SF },
21052 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpngtss", IX86_BUILTIN_CMPNGTSS, UNGE, (int) V4SF_FTYPE_V4SF_V4SF_SWAP },
21053 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpngess", IX86_BUILTIN_CMPNGESS, UNGT, (int) V4SF_FTYPE_V4SF_V4SF_SWAP },
21054 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpordss", IX86_BUILTIN_CMPORDSS, ORDERED, (int) V4SF_FTYPE_V4SF_V4SF },
21056 { OPTION_MASK_ISA_SSE, CODE_FOR_sminv4sf3, "__builtin_ia32_minps", IX86_BUILTIN_MINPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
21057 { OPTION_MASK_ISA_SSE, CODE_FOR_smaxv4sf3, "__builtin_ia32_maxps", IX86_BUILTIN_MAXPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
21058 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmsminv4sf3, "__builtin_ia32_minss", IX86_BUILTIN_MINSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
21059 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmsmaxv4sf3, "__builtin_ia32_maxss", IX86_BUILTIN_MAXSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
21061 { OPTION_MASK_ISA_SSE, CODE_FOR_andv4sf3, "__builtin_ia32_andps", IX86_BUILTIN_ANDPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
21062 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_andnotv4sf3, "__builtin_ia32_andnps", IX86_BUILTIN_ANDNPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
21063 { OPTION_MASK_ISA_SSE, CODE_FOR_iorv4sf3, "__builtin_ia32_orps", IX86_BUILTIN_ORPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
21064 { OPTION_MASK_ISA_SSE, CODE_FOR_xorv4sf3, "__builtin_ia32_xorps", IX86_BUILTIN_XORPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
21066 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movss, "__builtin_ia32_movss", IX86_BUILTIN_MOVSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
21067 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movhlps_exp, "__builtin_ia32_movhlps", IX86_BUILTIN_MOVHLPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
21068 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movlhps_exp, "__builtin_ia32_movlhps", IX86_BUILTIN_MOVLHPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
21069 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_unpckhps, "__builtin_ia32_unpckhps", IX86_BUILTIN_UNPCKHPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
21070 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_unpcklps, "__builtin_ia32_unpcklps", IX86_BUILTIN_UNPCKLPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
21072 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvtpi2ps, "__builtin_ia32_cvtpi2ps", IX86_BUILTIN_CVTPI2PS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V2SI },
21073 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvtsi2ss, "__builtin_ia32_cvtsi2ss", IX86_BUILTIN_CVTSI2SS, UNKNOWN, (int) V4SF_FTYPE_V4SF_SI },
21074 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_64BIT, CODE_FOR_sse_cvtsi2ssq, "__builtin_ia32_cvtsi642ss", IX86_BUILTIN_CVTSI642SS, UNKNOWN, V4SF_FTYPE_V4SF_DI },
21076 { OPTION_MASK_ISA_SSE, CODE_FOR_rsqrtsf2, "__builtin_ia32_rsqrtf", IX86_BUILTIN_RSQRTF, UNKNOWN, (int) FLOAT_FTYPE_FLOAT },
21078 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmsqrtv4sf2, "__builtin_ia32_sqrtss", IX86_BUILTIN_SQRTSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_VEC_MERGE },
21079 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmrsqrtv4sf2, "__builtin_ia32_rsqrtss", IX86_BUILTIN_RSQRTSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_VEC_MERGE },
21080 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmrcpv4sf2, "__builtin_ia32_rcpss", IX86_BUILTIN_RCPSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_VEC_MERGE },
21082 /* SSE MMX or 3Dnow!A */
21083 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_uavgv8qi3, "__builtin_ia32_pavgb", IX86_BUILTIN_PAVGB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
21084 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_uavgv4hi3, "__builtin_ia32_pavgw", IX86_BUILTIN_PAVGW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
21085 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_umulv4hi3_highpart, "__builtin_ia32_pmulhuw", IX86_BUILTIN_PMULHUW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
21087 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_umaxv8qi3, "__builtin_ia32_pmaxub", IX86_BUILTIN_PMAXUB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
21088 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_smaxv4hi3, "__builtin_ia32_pmaxsw", IX86_BUILTIN_PMAXSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
21089 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_uminv8qi3, "__builtin_ia32_pminub", IX86_BUILTIN_PMINUB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
21090 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_sminv4hi3, "__builtin_ia32_pminsw", IX86_BUILTIN_PMINSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
21092 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_psadbw, "__builtin_ia32_psadbw", IX86_BUILTIN_PSADBW, UNKNOWN, (int) V1DI_FTYPE_V8QI_V8QI },
21093 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pmovmskb, "__builtin_ia32_pmovmskb", IX86_BUILTIN_PMOVMSKB, UNKNOWN, (int) INT_FTYPE_V8QI },
21095 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pshufw, "__builtin_ia32_pshufw", IX86_BUILTIN_PSHUFW, UNKNOWN, (int) V4HI_FTYPE_V4HI_INT },
21098 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_shufpd, "__builtin_ia32_shufpd", IX86_BUILTIN_SHUFPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
21100 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movmskpd, "__builtin_ia32_movmskpd", IX86_BUILTIN_MOVMSKPD, UNKNOWN, (int) INT_FTYPE_V2DF },
21101 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_pmovmskb, "__builtin_ia32_pmovmskb128", IX86_BUILTIN_PMOVMSKB128, UNKNOWN, (int) INT_FTYPE_V16QI },
21102 { OPTION_MASK_ISA_SSE2, CODE_FOR_sqrtv2df2, "__builtin_ia32_sqrtpd", IX86_BUILTIN_SQRTPD, UNKNOWN, (int) V2DF_FTYPE_V2DF },
21103 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtdq2pd, "__builtin_ia32_cvtdq2pd", IX86_BUILTIN_CVTDQ2PD, UNKNOWN, (int) V2DF_FTYPE_V4SI },
21104 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtdq2ps, "__builtin_ia32_cvtdq2ps", IX86_BUILTIN_CVTDQ2PS, UNKNOWN, (int) V4SF_FTYPE_V4SI },
21106 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtpd2dq, "__builtin_ia32_cvtpd2dq", IX86_BUILTIN_CVTPD2DQ, UNKNOWN, (int) V4SI_FTYPE_V2DF },
21107 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtpd2pi, "__builtin_ia32_cvtpd2pi", IX86_BUILTIN_CVTPD2PI, UNKNOWN, (int) V2SI_FTYPE_V2DF },
21108 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtpd2ps, "__builtin_ia32_cvtpd2ps", IX86_BUILTIN_CVTPD2PS, UNKNOWN, (int) V4SF_FTYPE_V2DF },
21109 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvttpd2dq, "__builtin_ia32_cvttpd2dq", IX86_BUILTIN_CVTTPD2DQ, UNKNOWN, (int) V4SI_FTYPE_V2DF },
21110 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvttpd2pi, "__builtin_ia32_cvttpd2pi", IX86_BUILTIN_CVTTPD2PI, UNKNOWN, (int) V2SI_FTYPE_V2DF },
21112 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtpi2pd, "__builtin_ia32_cvtpi2pd", IX86_BUILTIN_CVTPI2PD, UNKNOWN, (int) V2DF_FTYPE_V2SI },
21114 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtsd2si, "__builtin_ia32_cvtsd2si", IX86_BUILTIN_CVTSD2SI, UNKNOWN, (int) INT_FTYPE_V2DF },
21115 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvttsd2si, "__builtin_ia32_cvttsd2si", IX86_BUILTIN_CVTTSD2SI, UNKNOWN, (int) INT_FTYPE_V2DF },
21116 { OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_64BIT, CODE_FOR_sse2_cvtsd2siq, "__builtin_ia32_cvtsd2si64", IX86_BUILTIN_CVTSD2SI64, UNKNOWN, (int) INT64_FTYPE_V2DF },
21117 { OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_64BIT, CODE_FOR_sse2_cvttsd2siq, "__builtin_ia32_cvttsd2si64", IX86_BUILTIN_CVTTSD2SI64, UNKNOWN, (int) INT64_FTYPE_V2DF },
21119 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtps2dq, "__builtin_ia32_cvtps2dq", IX86_BUILTIN_CVTPS2DQ, UNKNOWN, (int) V4SI_FTYPE_V4SF },
21120 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtps2pd, "__builtin_ia32_cvtps2pd", IX86_BUILTIN_CVTPS2PD, UNKNOWN, (int) V2DF_FTYPE_V4SF },
21121 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvttps2dq, "__builtin_ia32_cvttps2dq", IX86_BUILTIN_CVTTPS2DQ, UNKNOWN, (int) V4SI_FTYPE_V4SF },
21123 { OPTION_MASK_ISA_SSE2, CODE_FOR_addv2df3, "__builtin_ia32_addpd", IX86_BUILTIN_ADDPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
21124 { OPTION_MASK_ISA_SSE2, CODE_FOR_subv2df3, "__builtin_ia32_subpd", IX86_BUILTIN_SUBPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
21125 { OPTION_MASK_ISA_SSE2, CODE_FOR_mulv2df3, "__builtin_ia32_mulpd", IX86_BUILTIN_MULPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
21126 { OPTION_MASK_ISA_SSE2, CODE_FOR_divv2df3, "__builtin_ia32_divpd", IX86_BUILTIN_DIVPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
21127 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmaddv2df3, "__builtin_ia32_addsd", IX86_BUILTIN_ADDSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
21128 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmsubv2df3, "__builtin_ia32_subsd", IX86_BUILTIN_SUBSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
21129 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmulv2df3, "__builtin_ia32_mulsd", IX86_BUILTIN_MULSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
21130 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmdivv2df3, "__builtin_ia32_divsd", IX86_BUILTIN_DIVSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
21132 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpeqpd", IX86_BUILTIN_CMPEQPD, EQ, (int) V2DF_FTYPE_V2DF_V2DF },
21133 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpltpd", IX86_BUILTIN_CMPLTPD, LT, (int) V2DF_FTYPE_V2DF_V2DF },
21134 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmplepd", IX86_BUILTIN_CMPLEPD, LE, (int) V2DF_FTYPE_V2DF_V2DF },
21135 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpgtpd", IX86_BUILTIN_CMPGTPD, LT, (int) V2DF_FTYPE_V2DF_V2DF_SWAP },
21136 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpgepd", IX86_BUILTIN_CMPGEPD, LE, (int) V2DF_FTYPE_V2DF_V2DF_SWAP},
21137 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpunordpd", IX86_BUILTIN_CMPUNORDPD, UNORDERED, (int) V2DF_FTYPE_V2DF_V2DF },
21138 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpneqpd", IX86_BUILTIN_CMPNEQPD, NE, (int) V2DF_FTYPE_V2DF_V2DF },
21139 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpnltpd", IX86_BUILTIN_CMPNLTPD, UNGE, (int) V2DF_FTYPE_V2DF_V2DF },
21140 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpnlepd", IX86_BUILTIN_CMPNLEPD, UNGT, (int) V2DF_FTYPE_V2DF_V2DF },
21141 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpngtpd", IX86_BUILTIN_CMPNGTPD, UNGE, (int) V2DF_FTYPE_V2DF_V2DF_SWAP },
21142 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpngepd", IX86_BUILTIN_CMPNGEPD, UNGT, (int) V2DF_FTYPE_V2DF_V2DF_SWAP },
21143 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpordpd", IX86_BUILTIN_CMPORDPD, ORDERED, (int) V2DF_FTYPE_V2DF_V2DF },
21144 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpeqsd", IX86_BUILTIN_CMPEQSD, EQ, (int) V2DF_FTYPE_V2DF_V2DF },
21145 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpltsd", IX86_BUILTIN_CMPLTSD, LT, (int) V2DF_FTYPE_V2DF_V2DF },
21146 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmplesd", IX86_BUILTIN_CMPLESD, LE, (int) V2DF_FTYPE_V2DF_V2DF },
21147 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpunordsd", IX86_BUILTIN_CMPUNORDSD, UNORDERED, (int) V2DF_FTYPE_V2DF_V2DF },
21148 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpneqsd", IX86_BUILTIN_CMPNEQSD, NE, (int) V2DF_FTYPE_V2DF_V2DF },
21149 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpnltsd", IX86_BUILTIN_CMPNLTSD, UNGE, (int) V2DF_FTYPE_V2DF_V2DF },
21150 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpnlesd", IX86_BUILTIN_CMPNLESD, UNGT, (int) V2DF_FTYPE_V2DF_V2DF },
21151 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpordsd", IX86_BUILTIN_CMPORDSD, ORDERED, (int) V2DF_FTYPE_V2DF_V2DF },
21153 { OPTION_MASK_ISA_SSE2, CODE_FOR_sminv2df3, "__builtin_ia32_minpd", IX86_BUILTIN_MINPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
21154 { OPTION_MASK_ISA_SSE2, CODE_FOR_smaxv2df3, "__builtin_ia32_maxpd", IX86_BUILTIN_MAXPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
21155 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmsminv2df3, "__builtin_ia32_minsd", IX86_BUILTIN_MINSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
21156 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmsmaxv2df3, "__builtin_ia32_maxsd", IX86_BUILTIN_MAXSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
21158 { OPTION_MASK_ISA_SSE2, CODE_FOR_andv2df3, "__builtin_ia32_andpd", IX86_BUILTIN_ANDPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
21159 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_andnotv2df3, "__builtin_ia32_andnpd", IX86_BUILTIN_ANDNPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
21160 { OPTION_MASK_ISA_SSE2, CODE_FOR_iorv2df3, "__builtin_ia32_orpd", IX86_BUILTIN_ORPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
21161 { OPTION_MASK_ISA_SSE2, CODE_FOR_xorv2df3, "__builtin_ia32_xorpd", IX86_BUILTIN_XORPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
21163 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movsd, "__builtin_ia32_movsd", IX86_BUILTIN_MOVSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
21164 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_unpckhpd_exp, "__builtin_ia32_unpckhpd", IX86_BUILTIN_UNPCKHPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
21165 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_unpcklpd_exp, "__builtin_ia32_unpcklpd", IX86_BUILTIN_UNPCKLPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
21167 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_pack_sfix_v2df, "__builtin_ia32_vec_pack_sfix", IX86_BUILTIN_VEC_PACK_SFIX, UNKNOWN, (int) V4SI_FTYPE_V2DF_V2DF },
21169 { OPTION_MASK_ISA_SSE2, CODE_FOR_addv16qi3, "__builtin_ia32_paddb128", IX86_BUILTIN_PADDB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
21170 { OPTION_MASK_ISA_SSE2, CODE_FOR_addv8hi3, "__builtin_ia32_paddw128", IX86_BUILTIN_PADDW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
21171 { OPTION_MASK_ISA_SSE2, CODE_FOR_addv4si3, "__builtin_ia32_paddd128", IX86_BUILTIN_PADDD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
21172 { OPTION_MASK_ISA_SSE2, CODE_FOR_addv2di3, "__builtin_ia32_paddq128", IX86_BUILTIN_PADDQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
21173 { OPTION_MASK_ISA_SSE2, CODE_FOR_subv16qi3, "__builtin_ia32_psubb128", IX86_BUILTIN_PSUBB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
21174 { OPTION_MASK_ISA_SSE2, CODE_FOR_subv8hi3, "__builtin_ia32_psubw128", IX86_BUILTIN_PSUBW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
21175 { OPTION_MASK_ISA_SSE2, CODE_FOR_subv4si3, "__builtin_ia32_psubd128", IX86_BUILTIN_PSUBD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
21176 { OPTION_MASK_ISA_SSE2, CODE_FOR_subv2di3, "__builtin_ia32_psubq128", IX86_BUILTIN_PSUBQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
21178 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ssaddv16qi3, "__builtin_ia32_paddsb128", IX86_BUILTIN_PADDSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
21179 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ssaddv8hi3, "__builtin_ia32_paddsw128", IX86_BUILTIN_PADDSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
21180 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_sssubv16qi3, "__builtin_ia32_psubsb128", IX86_BUILTIN_PSUBSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
21181 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_sssubv8hi3, "__builtin_ia32_psubsw128", IX86_BUILTIN_PSUBSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
21182 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_usaddv16qi3, "__builtin_ia32_paddusb128", IX86_BUILTIN_PADDUSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
21183 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_usaddv8hi3, "__builtin_ia32_paddusw128", IX86_BUILTIN_PADDUSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
21184 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ussubv16qi3, "__builtin_ia32_psubusb128", IX86_BUILTIN_PSUBUSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
21185 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ussubv8hi3, "__builtin_ia32_psubusw128", IX86_BUILTIN_PSUBUSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
21187 { OPTION_MASK_ISA_SSE2, CODE_FOR_mulv8hi3, "__builtin_ia32_pmullw128", IX86_BUILTIN_PMULLW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
21188 { OPTION_MASK_ISA_SSE2, CODE_FOR_smulv8hi3_highpart, "__builtin_ia32_pmulhw128", IX86_BUILTIN_PMULHW128, UNKNOWN,(int) V8HI_FTYPE_V8HI_V8HI },
21190 { OPTION_MASK_ISA_SSE2, CODE_FOR_andv2di3, "__builtin_ia32_pand128", IX86_BUILTIN_PAND128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
21191 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_andnotv2di3, "__builtin_ia32_pandn128", IX86_BUILTIN_PANDN128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
21192 { OPTION_MASK_ISA_SSE2, CODE_FOR_iorv2di3, "__builtin_ia32_por128", IX86_BUILTIN_POR128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
21193 { OPTION_MASK_ISA_SSE2, CODE_FOR_xorv2di3, "__builtin_ia32_pxor128", IX86_BUILTIN_PXOR128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
21195 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_uavgv16qi3, "__builtin_ia32_pavgb128", IX86_BUILTIN_PAVGB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
21196 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_uavgv8hi3, "__builtin_ia32_pavgw128", IX86_BUILTIN_PAVGW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
21198 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_eqv16qi3, "__builtin_ia32_pcmpeqb128", IX86_BUILTIN_PCMPEQB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
21199 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_eqv8hi3, "__builtin_ia32_pcmpeqw128", IX86_BUILTIN_PCMPEQW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
21200 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_eqv4si3, "__builtin_ia32_pcmpeqd128", IX86_BUILTIN_PCMPEQD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
21201 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_gtv16qi3, "__builtin_ia32_pcmpgtb128", IX86_BUILTIN_PCMPGTB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
21202 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_gtv8hi3, "__builtin_ia32_pcmpgtw128", IX86_BUILTIN_PCMPGTW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
21203 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_gtv4si3, "__builtin_ia32_pcmpgtd128", IX86_BUILTIN_PCMPGTD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
21205 { OPTION_MASK_ISA_SSE2, CODE_FOR_umaxv16qi3, "__builtin_ia32_pmaxub128", IX86_BUILTIN_PMAXUB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
21206 { OPTION_MASK_ISA_SSE2, CODE_FOR_smaxv8hi3, "__builtin_ia32_pmaxsw128", IX86_BUILTIN_PMAXSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
21207 { OPTION_MASK_ISA_SSE2, CODE_FOR_uminv16qi3, "__builtin_ia32_pminub128", IX86_BUILTIN_PMINUB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
21208 { OPTION_MASK_ISA_SSE2, CODE_FOR_sminv8hi3, "__builtin_ia32_pminsw128", IX86_BUILTIN_PMINSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
21210 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_punpckhbw, "__builtin_ia32_punpckhbw128", IX86_BUILTIN_PUNPCKHBW128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
21211 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_punpckhwd, "__builtin_ia32_punpckhwd128", IX86_BUILTIN_PUNPCKHWD128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
21212 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_punpckhdq, "__builtin_ia32_punpckhdq128", IX86_BUILTIN_PUNPCKHDQ128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
21213 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_punpckhqdq, "__builtin_ia32_punpckhqdq128", IX86_BUILTIN_PUNPCKHQDQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
21214 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_punpcklbw, "__builtin_ia32_punpcklbw128", IX86_BUILTIN_PUNPCKLBW128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
21215 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_punpcklwd, "__builtin_ia32_punpcklwd128", IX86_BUILTIN_PUNPCKLWD128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
21216 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_punpckldq, "__builtin_ia32_punpckldq128", IX86_BUILTIN_PUNPCKLDQ128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
21217 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_punpcklqdq, "__builtin_ia32_punpcklqdq128", IX86_BUILTIN_PUNPCKLQDQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
21219 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_packsswb, "__builtin_ia32_packsswb128", IX86_BUILTIN_PACKSSWB128, UNKNOWN, (int) V16QI_FTYPE_V8HI_V8HI },
21220 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_packssdw, "__builtin_ia32_packssdw128", IX86_BUILTIN_PACKSSDW128, UNKNOWN, (int) V8HI_FTYPE_V4SI_V4SI },
21221 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_packuswb, "__builtin_ia32_packuswb128", IX86_BUILTIN_PACKUSWB128, UNKNOWN, (int) V16QI_FTYPE_V8HI_V8HI },
21223 { OPTION_MASK_ISA_SSE2, CODE_FOR_umulv8hi3_highpart, "__builtin_ia32_pmulhuw128", IX86_BUILTIN_PMULHUW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
21224 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_psadbw, "__builtin_ia32_psadbw128", IX86_BUILTIN_PSADBW128, UNKNOWN, (int) V2DI_FTYPE_V16QI_V16QI },
21226 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_umulv1siv1di3, "__builtin_ia32_pmuludq", IX86_BUILTIN_PMULUDQ, UNKNOWN, (int) V1DI_FTYPE_V2SI_V2SI },
21227 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_umulv2siv2di3, "__builtin_ia32_pmuludq128", IX86_BUILTIN_PMULUDQ128, UNKNOWN, (int) V2DI_FTYPE_V4SI_V4SI },
21229 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_pmaddwd, "__builtin_ia32_pmaddwd128", IX86_BUILTIN_PMADDWD128, UNKNOWN, (int) V4SI_FTYPE_V8HI_V8HI },
21231 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtsi2sd, "__builtin_ia32_cvtsi2sd", IX86_BUILTIN_CVTSI2SD, UNKNOWN, (int) V2DF_FTYPE_V2DF_SI },
21232 { OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_64BIT, CODE_FOR_sse2_cvtsi2sdq, "__builtin_ia32_cvtsi642sd", IX86_BUILTIN_CVTSI642SD, UNKNOWN, (int) V2DF_FTYPE_V2DF_DI },
21233 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtsd2ss, "__builtin_ia32_cvtsd2ss", IX86_BUILTIN_CVTSD2SS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V2DF },
21234 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtss2sd, "__builtin_ia32_cvtss2sd", IX86_BUILTIN_CVTSS2SD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V4SF },
21236 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ashlti3, "__builtin_ia32_pslldqi128", IX86_BUILTIN_PSLLDQI128, UNKNOWN, (int) V2DI2TI_FTYPE_V2DI_INT },
21237 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv8hi3, "__builtin_ia32_psllwi128", IX86_BUILTIN_PSLLWI128, UNKNOWN, (int) V8HI_FTYPE_V8HI_SI_COUNT },
21238 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv4si3, "__builtin_ia32_pslldi128", IX86_BUILTIN_PSLLDI128, UNKNOWN, (int) V4SI_FTYPE_V4SI_SI_COUNT },
21239 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv2di3, "__builtin_ia32_psllqi128", IX86_BUILTIN_PSLLQI128, UNKNOWN, (int) V2DI_FTYPE_V2DI_SI_COUNT },
21240 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv8hi3, "__builtin_ia32_psllw128", IX86_BUILTIN_PSLLW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_COUNT },
21241 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv4si3, "__builtin_ia32_pslld128", IX86_BUILTIN_PSLLD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_COUNT },
21242 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv2di3, "__builtin_ia32_psllq128", IX86_BUILTIN_PSLLQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_COUNT },
21244 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_lshrti3, "__builtin_ia32_psrldqi128", IX86_BUILTIN_PSRLDQI128, UNKNOWN, (int) V2DI2TI_FTYPE_V2DI_INT },
21245 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv8hi3, "__builtin_ia32_psrlwi128", IX86_BUILTIN_PSRLWI128, UNKNOWN, (int) V8HI_FTYPE_V8HI_SI_COUNT },
21246 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv4si3, "__builtin_ia32_psrldi128", IX86_BUILTIN_PSRLDI128, UNKNOWN, (int) V4SI_FTYPE_V4SI_SI_COUNT },
21247 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv2di3, "__builtin_ia32_psrlqi128", IX86_BUILTIN_PSRLQI128, UNKNOWN, (int) V2DI_FTYPE_V2DI_SI_COUNT },
21248 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv8hi3, "__builtin_ia32_psrlw128", IX86_BUILTIN_PSRLW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_COUNT },
21249 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv4si3, "__builtin_ia32_psrld128", IX86_BUILTIN_PSRLD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_COUNT },
21250 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv2di3, "__builtin_ia32_psrlq128", IX86_BUILTIN_PSRLQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_COUNT },
21252 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashrv8hi3, "__builtin_ia32_psrawi128", IX86_BUILTIN_PSRAWI128, UNKNOWN, (int) V8HI_FTYPE_V8HI_SI_COUNT },
21253 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashrv4si3, "__builtin_ia32_psradi128", IX86_BUILTIN_PSRADI128, UNKNOWN, (int) V4SI_FTYPE_V4SI_SI_COUNT },
21254 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashrv8hi3, "__builtin_ia32_psraw128", IX86_BUILTIN_PSRAW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_COUNT },
21255 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashrv4si3, "__builtin_ia32_psrad128", IX86_BUILTIN_PSRAD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_COUNT },
21257 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_pshufd, "__builtin_ia32_pshufd", IX86_BUILTIN_PSHUFD, UNKNOWN, (int) V4SI_FTYPE_V4SI_INT },
21258 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_pshuflw, "__builtin_ia32_pshuflw", IX86_BUILTIN_PSHUFLW, UNKNOWN, (int) V8HI_FTYPE_V8HI_INT },
21259 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_pshufhw, "__builtin_ia32_pshufhw", IX86_BUILTIN_PSHUFHW, UNKNOWN, (int) V8HI_FTYPE_V8HI_INT },
21261 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmsqrtv2df2, "__builtin_ia32_sqrtsd", IX86_BUILTIN_SQRTSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_VEC_MERGE },
21263 { OPTION_MASK_ISA_SSE2, CODE_FOR_abstf2, 0, IX86_BUILTIN_FABSQ, UNKNOWN, (int) FLOAT128_FTYPE_FLOAT128 },
21264 { OPTION_MASK_ISA_SSE2, CODE_FOR_copysigntf3, 0, IX86_BUILTIN_COPYSIGNQ, UNKNOWN, (int) FLOAT128_FTYPE_FLOAT128_FLOAT128 },
21266 { OPTION_MASK_ISA_SSE, CODE_FOR_sse2_movq128, "__builtin_ia32_movq128", IX86_BUILTIN_MOVQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI },
21269 { OPTION_MASK_ISA_SSE2, CODE_FOR_mmx_addv1di3, "__builtin_ia32_paddq", IX86_BUILTIN_PADDQ, UNKNOWN, (int) V1DI_FTYPE_V1DI_V1DI },
21270 { OPTION_MASK_ISA_SSE2, CODE_FOR_mmx_subv1di3, "__builtin_ia32_psubq", IX86_BUILTIN_PSUBQ, UNKNOWN, (int) V1DI_FTYPE_V1DI_V1DI },
21273 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_movshdup, "__builtin_ia32_movshdup", IX86_BUILTIN_MOVSHDUP, UNKNOWN, (int) V4SF_FTYPE_V4SF},
21274 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_movsldup, "__builtin_ia32_movsldup", IX86_BUILTIN_MOVSLDUP, UNKNOWN, (int) V4SF_FTYPE_V4SF },
21276 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_addsubv4sf3, "__builtin_ia32_addsubps", IX86_BUILTIN_ADDSUBPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
21277 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_addsubv2df3, "__builtin_ia32_addsubpd", IX86_BUILTIN_ADDSUBPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
21278 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_haddv4sf3, "__builtin_ia32_haddps", IX86_BUILTIN_HADDPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
21279 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_haddv2df3, "__builtin_ia32_haddpd", IX86_BUILTIN_HADDPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
21280 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_hsubv4sf3, "__builtin_ia32_hsubps", IX86_BUILTIN_HSUBPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
21281 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_hsubv2df3, "__builtin_ia32_hsubpd", IX86_BUILTIN_HSUBPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
21284 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv16qi2, "__builtin_ia32_pabsb128", IX86_BUILTIN_PABSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI },
21285 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv8qi2, "__builtin_ia32_pabsb", IX86_BUILTIN_PABSB, UNKNOWN, (int) V8QI_FTYPE_V8QI },
21286 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv8hi2, "__builtin_ia32_pabsw128", IX86_BUILTIN_PABSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI },
21287 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv4hi2, "__builtin_ia32_pabsw", IX86_BUILTIN_PABSW, UNKNOWN, (int) V4HI_FTYPE_V4HI },
21288 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv4si2, "__builtin_ia32_pabsd128", IX86_BUILTIN_PABSD128, UNKNOWN, (int) V4SI_FTYPE_V4SI },
21289 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv2si2, "__builtin_ia32_pabsd", IX86_BUILTIN_PABSD, UNKNOWN, (int) V2SI_FTYPE_V2SI },
21291 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phaddwv8hi3, "__builtin_ia32_phaddw128", IX86_BUILTIN_PHADDW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
21292 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phaddwv4hi3, "__builtin_ia32_phaddw", IX86_BUILTIN_PHADDW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
21293 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phadddv4si3, "__builtin_ia32_phaddd128", IX86_BUILTIN_PHADDD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
21294 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phadddv2si3, "__builtin_ia32_phaddd", IX86_BUILTIN_PHADDD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
21295 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phaddswv8hi3, "__builtin_ia32_phaddsw128", IX86_BUILTIN_PHADDSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
21296 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phaddswv4hi3, "__builtin_ia32_phaddsw", IX86_BUILTIN_PHADDSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
21297 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubwv8hi3, "__builtin_ia32_phsubw128", IX86_BUILTIN_PHSUBW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
21298 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubwv4hi3, "__builtin_ia32_phsubw", IX86_BUILTIN_PHSUBW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
21299 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubdv4si3, "__builtin_ia32_phsubd128", IX86_BUILTIN_PHSUBD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
21300 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubdv2si3, "__builtin_ia32_phsubd", IX86_BUILTIN_PHSUBD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
21301 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubswv8hi3, "__builtin_ia32_phsubsw128", IX86_BUILTIN_PHSUBSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
21302 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubswv4hi3, "__builtin_ia32_phsubsw", IX86_BUILTIN_PHSUBSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
21303 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pmaddubsw128, "__builtin_ia32_pmaddubsw128", IX86_BUILTIN_PMADDUBSW128, UNKNOWN, (int) V8HI_FTYPE_V16QI_V16QI },
21304 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pmaddubsw, "__builtin_ia32_pmaddubsw", IX86_BUILTIN_PMADDUBSW, UNKNOWN, (int) V4HI_FTYPE_V8QI_V8QI },
21305 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pmulhrswv8hi3, "__builtin_ia32_pmulhrsw128", IX86_BUILTIN_PMULHRSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
21306 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pmulhrswv4hi3, "__builtin_ia32_pmulhrsw", IX86_BUILTIN_PMULHRSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
21307 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pshufbv16qi3, "__builtin_ia32_pshufb128", IX86_BUILTIN_PSHUFB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
21308 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pshufbv8qi3, "__builtin_ia32_pshufb", IX86_BUILTIN_PSHUFB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
21309 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv16qi3, "__builtin_ia32_psignb128", IX86_BUILTIN_PSIGNB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
21310 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv8qi3, "__builtin_ia32_psignb", IX86_BUILTIN_PSIGNB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
21311 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv8hi3, "__builtin_ia32_psignw128", IX86_BUILTIN_PSIGNW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
21312 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv4hi3, "__builtin_ia32_psignw", IX86_BUILTIN_PSIGNW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
21313 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv4si3, "__builtin_ia32_psignd128", IX86_BUILTIN_PSIGND128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
21314 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv2si3, "__builtin_ia32_psignd", IX86_BUILTIN_PSIGND, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
21317 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_palignrti, "__builtin_ia32_palignr128", IX86_BUILTIN_PALIGNR128, UNKNOWN, (int) V2DI2TI_FTYPE_V2DI_V2DI_INT },
21318 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_palignrdi, "__builtin_ia32_palignr", IX86_BUILTIN_PALIGNR, UNKNOWN, (int) V1DI2DI_FTYPE_V1DI_V1DI_INT },
21321 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_blendpd, "__builtin_ia32_blendpd", IX86_BUILTIN_BLENDPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
21322 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_blendps, "__builtin_ia32_blendps", IX86_BUILTIN_BLENDPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
21323 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_blendvpd, "__builtin_ia32_blendvpd", IX86_BUILTIN_BLENDVPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF },
21324 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_blendvps, "__builtin_ia32_blendvps", IX86_BUILTIN_BLENDVPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF },
21325 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_dppd, "__builtin_ia32_dppd", IX86_BUILTIN_DPPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
21326 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_dpps, "__builtin_ia32_dpps", IX86_BUILTIN_DPPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
21327 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_insertps, "__builtin_ia32_insertps128", IX86_BUILTIN_INSERTPS128, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
21328 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_mpsadbw, "__builtin_ia32_mpsadbw128", IX86_BUILTIN_MPSADBW128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_INT },
21329 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_pblendvb, "__builtin_ia32_pblendvb128", IX86_BUILTIN_PBLENDVB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI },
21330 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_pblendw, "__builtin_ia32_pblendw128", IX86_BUILTIN_PBLENDW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_INT },
21332 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_extendv8qiv8hi2, "__builtin_ia32_pmovsxbw128", IX86_BUILTIN_PMOVSXBW128, UNKNOWN, (int) V8HI_FTYPE_V16QI },
21333 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_extendv4qiv4si2, "__builtin_ia32_pmovsxbd128", IX86_BUILTIN_PMOVSXBD128, UNKNOWN, (int) V4SI_FTYPE_V16QI },
21334 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_extendv2qiv2di2, "__builtin_ia32_pmovsxbq128", IX86_BUILTIN_PMOVSXBQ128, UNKNOWN, (int) V2DI_FTYPE_V16QI },
21335 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_extendv4hiv4si2, "__builtin_ia32_pmovsxwd128", IX86_BUILTIN_PMOVSXWD128, UNKNOWN, (int) V4SI_FTYPE_V8HI },
21336 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_extendv2hiv2di2, "__builtin_ia32_pmovsxwq128", IX86_BUILTIN_PMOVSXWQ128, UNKNOWN, (int) V2DI_FTYPE_V8HI },
21337 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_extendv2siv2di2, "__builtin_ia32_pmovsxdq128", IX86_BUILTIN_PMOVSXDQ128, UNKNOWN, (int) V2DI_FTYPE_V4SI },
21338 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv8qiv8hi2, "__builtin_ia32_pmovzxbw128", IX86_BUILTIN_PMOVZXBW128, UNKNOWN, (int) V8HI_FTYPE_V16QI },
21339 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv4qiv4si2, "__builtin_ia32_pmovzxbd128", IX86_BUILTIN_PMOVZXBD128, UNKNOWN, (int) V4SI_FTYPE_V16QI },
21340 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv2qiv2di2, "__builtin_ia32_pmovzxbq128", IX86_BUILTIN_PMOVZXBQ128, UNKNOWN, (int) V2DI_FTYPE_V16QI },
21341 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv4hiv4si2, "__builtin_ia32_pmovzxwd128", IX86_BUILTIN_PMOVZXWD128, UNKNOWN, (int) V4SI_FTYPE_V8HI },
21342 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv2hiv2di2, "__builtin_ia32_pmovzxwq128", IX86_BUILTIN_PMOVZXWQ128, UNKNOWN, (int) V2DI_FTYPE_V8HI },
21343 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv2siv2di2, "__builtin_ia32_pmovzxdq128", IX86_BUILTIN_PMOVZXDQ128, UNKNOWN, (int) V2DI_FTYPE_V4SI },
21344 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_phminposuw, "__builtin_ia32_phminposuw128", IX86_BUILTIN_PHMINPOSUW128, UNKNOWN, (int) V8HI_FTYPE_V8HI },
21346 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_packusdw, "__builtin_ia32_packusdw128", IX86_BUILTIN_PACKUSDW128, UNKNOWN, (int) V8HI_FTYPE_V4SI_V4SI },
21347 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_eqv2di3, "__builtin_ia32_pcmpeqq", IX86_BUILTIN_PCMPEQQ, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
21348 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_smaxv16qi3, "__builtin_ia32_pmaxsb128", IX86_BUILTIN_PMAXSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
21349 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_smaxv4si3, "__builtin_ia32_pmaxsd128", IX86_BUILTIN_PMAXSD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
21350 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_umaxv4si3, "__builtin_ia32_pmaxud128", IX86_BUILTIN_PMAXUD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
21351 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_umaxv8hi3, "__builtin_ia32_pmaxuw128", IX86_BUILTIN_PMAXUW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
21352 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sminv16qi3, "__builtin_ia32_pminsb128", IX86_BUILTIN_PMINSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
21353 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sminv4si3, "__builtin_ia32_pminsd128", IX86_BUILTIN_PMINSD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
21354 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_uminv4si3, "__builtin_ia32_pminud128", IX86_BUILTIN_PMINUD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
21355 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_uminv8hi3, "__builtin_ia32_pminuw128", IX86_BUILTIN_PMINUW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
21356 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_mulv2siv2di3, "__builtin_ia32_pmuldq128", IX86_BUILTIN_PMULDQ128, UNKNOWN, (int) V2DI_FTYPE_V4SI_V4SI },
21357 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_mulv4si3, "__builtin_ia32_pmulld128", IX86_BUILTIN_PMULLD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
21359 /* SSE4.1 and SSE5 */
21360 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundpd, "__builtin_ia32_roundpd", IX86_BUILTIN_ROUNDPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_INT },
21361 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundps, "__builtin_ia32_roundps", IX86_BUILTIN_ROUNDPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_INT },
21362 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundsd, "__builtin_ia32_roundsd", IX86_BUILTIN_ROUNDSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
21363 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundss, "__builtin_ia32_roundss", IX86_BUILTIN_ROUNDSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
21365 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_ptest, "__builtin_ia32_ptestz128", IX86_BUILTIN_PTESTZ, EQ, (int) INT_FTYPE_V2DI_V2DI_PTEST },
21366 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_ptest, "__builtin_ia32_ptestc128", IX86_BUILTIN_PTESTC, LTU, (int) INT_FTYPE_V2DI_V2DI_PTEST },
21367 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_ptest, "__builtin_ia32_ptestnzc128", IX86_BUILTIN_PTESTNZC, GTU, (int) INT_FTYPE_V2DI_V2DI_PTEST },
21370 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_gtv2di3, "__builtin_ia32_pcmpgtq", IX86_BUILTIN_PCMPGTQ, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
21371 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_crc32qi, "__builtin_ia32_crc32qi", IX86_BUILTIN_CRC32QI, UNKNOWN, (int) UINT_FTYPE_UINT_UCHAR },
21372 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_crc32hi, "__builtin_ia32_crc32hi", IX86_BUILTIN_CRC32HI, UNKNOWN, (int) UINT_FTYPE_UINT_USHORT },
21373 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_crc32si, "__builtin_ia32_crc32si", IX86_BUILTIN_CRC32SI, UNKNOWN, (int) UINT_FTYPE_UINT_UINT },
21374 { OPTION_MASK_ISA_SSE4_2 | OPTION_MASK_ISA_64BIT, CODE_FOR_sse4_2_crc32di, "__builtin_ia32_crc32di", IX86_BUILTIN_CRC32DI, UNKNOWN, (int) UINT64_FTYPE_UINT64_UINT64 },
21377 { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_extrqi, "__builtin_ia32_extrqi", IX86_BUILTIN_EXTRQI, UNKNOWN, (int) V2DI_FTYPE_V2DI_UINT_UINT },
21378 { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_extrq, "__builtin_ia32_extrq", IX86_BUILTIN_EXTRQ, UNKNOWN, (int) V2DI_FTYPE_V2DI_V16QI },
21379 { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_insertqi, "__builtin_ia32_insertqi", IX86_BUILTIN_INSERTQI, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_UINT_UINT },
21380 { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_insertq, "__builtin_ia32_insertq", IX86_BUILTIN_INSERTQ, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
21383 { OPTION_MASK_ISA_SSE2, CODE_FOR_aeskeygenassist, 0, IX86_BUILTIN_AESKEYGENASSIST128, UNKNOWN, (int) V2DI_FTYPE_V2DI_INT },
21384 { OPTION_MASK_ISA_SSE2, CODE_FOR_aesimc, 0, IX86_BUILTIN_AESIMC128, UNKNOWN, (int) V2DI_FTYPE_V2DI },
21386 { OPTION_MASK_ISA_SSE2, CODE_FOR_aesenc, 0, IX86_BUILTIN_AESENC128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
21387 { OPTION_MASK_ISA_SSE2, CODE_FOR_aesenclast, 0, IX86_BUILTIN_AESENCLAST128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
21388 { OPTION_MASK_ISA_SSE2, CODE_FOR_aesdec, 0, IX86_BUILTIN_AESDEC128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
21389 { OPTION_MASK_ISA_SSE2, CODE_FOR_aesdeclast, 0, IX86_BUILTIN_AESDECLAST128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
21392 { OPTION_MASK_ISA_SSE2, CODE_FOR_pclmulqdq, 0, IX86_BUILTIN_PCLMULQDQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_INT },
21395 { OPTION_MASK_ISA_AVX, CODE_FOR_addv4df3, "__builtin_ia32_addpd256", IX86_BUILTIN_ADDPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
21396 { OPTION_MASK_ISA_AVX, CODE_FOR_addv8sf3, "__builtin_ia32_addps256", IX86_BUILTIN_ADDPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
21397 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_addsubv4df3, "__builtin_ia32_addsubpd256", IX86_BUILTIN_ADDSUBPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
21398 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_addsubv8sf3, "__builtin_ia32_addsubps256", IX86_BUILTIN_ADDSUBPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
21399 { OPTION_MASK_ISA_AVX, CODE_FOR_andv4df3, "__builtin_ia32_andpd256", IX86_BUILTIN_ANDPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
21400 { OPTION_MASK_ISA_AVX, CODE_FOR_andv8sf3, "__builtin_ia32_andps256", IX86_BUILTIN_ANDPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
21401 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_andnotv4df3, "__builtin_ia32_andnpd256", IX86_BUILTIN_ANDNPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
21402 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_andnotv8sf3, "__builtin_ia32_andnps256", IX86_BUILTIN_ANDNPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
21403 { OPTION_MASK_ISA_AVX, CODE_FOR_divv4df3, "__builtin_ia32_divpd256", IX86_BUILTIN_DIVPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
21404 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_divv8sf3, "__builtin_ia32_divps256", IX86_BUILTIN_DIVPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
21405 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_haddv4df3, "__builtin_ia32_haddpd256", IX86_BUILTIN_HADDPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
21406 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_hsubv8sf3, "__builtin_ia32_hsubps256", IX86_BUILTIN_HSUBPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
21407 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_hsubv4df3, "__builtin_ia32_hsubpd256", IX86_BUILTIN_HSUBPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
21408 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_haddv8sf3, "__builtin_ia32_haddps256", IX86_BUILTIN_HADDPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
21409 { OPTION_MASK_ISA_AVX, CODE_FOR_smaxv4df3, "__builtin_ia32_maxpd256", IX86_BUILTIN_MAXPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
21410 { OPTION_MASK_ISA_AVX, CODE_FOR_smaxv8sf3, "__builtin_ia32_maxps256", IX86_BUILTIN_MAXPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
21411 { OPTION_MASK_ISA_AVX, CODE_FOR_sminv4df3, "__builtin_ia32_minpd256", IX86_BUILTIN_MINPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
21412 { OPTION_MASK_ISA_AVX, CODE_FOR_sminv8sf3, "__builtin_ia32_minps256", IX86_BUILTIN_MINPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
21413 { OPTION_MASK_ISA_AVX, CODE_FOR_mulv4df3, "__builtin_ia32_mulpd256", IX86_BUILTIN_MULPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
21414 { OPTION_MASK_ISA_AVX, CODE_FOR_mulv8sf3, "__builtin_ia32_mulps256", IX86_BUILTIN_MULPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
21415 { OPTION_MASK_ISA_AVX, CODE_FOR_iorv4df3, "__builtin_ia32_orpd256", IX86_BUILTIN_ORPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
21416 { OPTION_MASK_ISA_AVX, CODE_FOR_iorv8sf3, "__builtin_ia32_orps256", IX86_BUILTIN_ORPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
21417 { OPTION_MASK_ISA_AVX, CODE_FOR_subv4df3, "__builtin_ia32_subpd256", IX86_BUILTIN_SUBPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
21418 { OPTION_MASK_ISA_AVX, CODE_FOR_subv8sf3, "__builtin_ia32_subps256", IX86_BUILTIN_SUBPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
21419 { OPTION_MASK_ISA_AVX, CODE_FOR_xorv4df3, "__builtin_ia32_xorpd256", IX86_BUILTIN_XORPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
21420 { OPTION_MASK_ISA_AVX, CODE_FOR_xorv8sf3, "__builtin_ia32_xorps256", IX86_BUILTIN_XORPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
21422 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilvarv2df3, "__builtin_ia32_vpermilvarpd", IX86_BUILTIN_VPERMILVARPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DI },
21423 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilvarv4sf3, "__builtin_ia32_vpermilvarps", IX86_BUILTIN_VPERMILVARPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SI },
21424 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilvarv4df3, "__builtin_ia32_vpermilvarpd256", IX86_BUILTIN_VPERMILVARPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DI },
21425 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilvarv8sf3, "__builtin_ia32_vpermilvarps256", IX86_BUILTIN_VPERMILVARPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SI },
21427 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_blendpd256, "__builtin_ia32_blendpd256", IX86_BUILTIN_BLENDPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_INT },
21428 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_blendps256, "__builtin_ia32_blendps256", IX86_BUILTIN_BLENDPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_INT },
21429 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_blendvpd256, "__builtin_ia32_blendvpd256", IX86_BUILTIN_BLENDVPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF },
21430 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_blendvps256, "__builtin_ia32_blendvps256", IX86_BUILTIN_BLENDVPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF },
21431 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_dpps256, "__builtin_ia32_dpps256", IX86_BUILTIN_DPPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_INT },
21432 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_shufpd256, "__builtin_ia32_shufpd256", IX86_BUILTIN_SHUFPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_INT },
21433 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_shufps256, "__builtin_ia32_shufps256", IX86_BUILTIN_SHUFPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_INT },
21434 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cmpsdv2df3, "__builtin_ia32_cmpsd", IX86_BUILTIN_CMPSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
21435 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cmpssv4sf3, "__builtin_ia32_cmpss", IX86_BUILTIN_CMPSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
21436 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cmppdv2df3, "__builtin_ia32_cmppd", IX86_BUILTIN_CMPPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
21437 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cmppsv4sf3, "__builtin_ia32_cmpps", IX86_BUILTIN_CMPPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
21438 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cmppdv4df3, "__builtin_ia32_cmppd256", IX86_BUILTIN_CMPPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_INT },
21439 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cmppsv8sf3, "__builtin_ia32_cmpps256", IX86_BUILTIN_CMPPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_INT },
21440 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vextractf128v4df, "__builtin_ia32_vextractf128_pd256", IX86_BUILTIN_EXTRACTF128PD256, UNKNOWN, (int) V2DF_FTYPE_V4DF_INT },
21441 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vextractf128v8sf, "__builtin_ia32_vextractf128_ps256", IX86_BUILTIN_EXTRACTF128PS256, UNKNOWN, (int) V4SF_FTYPE_V8SF_INT },
21442 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vextractf128v8si, "__builtin_ia32_vextractf128_si256", IX86_BUILTIN_EXTRACTF128SI256, UNKNOWN, (int) V4SI_FTYPE_V8SI_INT },
21443 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cvtdq2pd256, "__builtin_ia32_cvtdq2pd256", IX86_BUILTIN_CVTDQ2PD256, UNKNOWN, (int) V4DF_FTYPE_V4SI },
21444 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cvtdq2ps256, "__builtin_ia32_cvtdq2ps256", IX86_BUILTIN_CVTDQ2PS256, UNKNOWN, (int) V8SF_FTYPE_V8SI },
21445 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cvtpd2ps256, "__builtin_ia32_cvtpd2ps256", IX86_BUILTIN_CVTPD2PS256, UNKNOWN, (int) V4SF_FTYPE_V4DF },
21446 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cvtps2dq256, "__builtin_ia32_cvtps2dq256", IX86_BUILTIN_CVTPS2DQ256, UNKNOWN, (int) V8SI_FTYPE_V8SF },
21447 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cvtps2pd256, "__builtin_ia32_cvtps2pd256", IX86_BUILTIN_CVTPS2PD256, UNKNOWN, (int) V4DF_FTYPE_V4SF },
21448 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cvttpd2dq256, "__builtin_ia32_cvttpd2dq256", IX86_BUILTIN_CVTTPD2DQ256, UNKNOWN, (int) V4SI_FTYPE_V4DF },
21449 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cvtpd2dq256, "__builtin_ia32_cvtpd2dq256", IX86_BUILTIN_CVTPD2DQ256, UNKNOWN, (int) V4SI_FTYPE_V4DF },
21450 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cvttps2dq256, "__builtin_ia32_cvttps2dq256", IX86_BUILTIN_CVTTPS2DQ256, UNKNOWN, (int) V8SI_FTYPE_V8SF },
21451 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vperm2f128v4df3, "__builtin_ia32_vperm2f128_pd256", IX86_BUILTIN_VPERM2F128PD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_INT },
21452 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vperm2f128v8sf3, "__builtin_ia32_vperm2f128_ps256", IX86_BUILTIN_VPERM2F128PS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_INT },
21453 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vperm2f128v8si3, "__builtin_ia32_vperm2f128_si256", IX86_BUILTIN_VPERM2F128SI256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_INT },
21454 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilv2df, "__builtin_ia32_vpermilpd", IX86_BUILTIN_VPERMILPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_INT },
21455 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilv4sf, "__builtin_ia32_vpermilps", IX86_BUILTIN_VPERMILPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_INT },
21456 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilv4df, "__builtin_ia32_vpermilpd256", IX86_BUILTIN_VPERMILPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_INT },
21457 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilv8sf, "__builtin_ia32_vpermilps256", IX86_BUILTIN_VPERMILPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_INT },
21458 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vinsertf128v4df, "__builtin_ia32_vinsertf128_pd256", IX86_BUILTIN_VINSERTF128PD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V2DF_INT },
21459 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vinsertf128v8sf, "__builtin_ia32_vinsertf128_ps256", IX86_BUILTIN_VINSERTF128PS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V4SF_INT },
21460 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vinsertf128v8si, "__builtin_ia32_vinsertf128_si256", IX86_BUILTIN_VINSERTF128SI256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V4SI_INT },
21462 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movshdup256, "__builtin_ia32_movshdup256", IX86_BUILTIN_MOVSHDUP256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
21463 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movsldup256, "__builtin_ia32_movsldup256", IX86_BUILTIN_MOVSLDUP256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
21464 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movddup256, "__builtin_ia32_movddup256", IX86_BUILTIN_MOVDDUP256, UNKNOWN, (int) V4DF_FTYPE_V4DF },
21466 { OPTION_MASK_ISA_AVX, CODE_FOR_sqrtv4df2, "__builtin_ia32_sqrtpd256", IX86_BUILTIN_SQRTPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF },
21467 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_sqrtv8sf2, "__builtin_ia32_sqrtps256", IX86_BUILTIN_SQRTPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
21468 { OPTION_MASK_ISA_AVX, CODE_FOR_sqrtv8sf2, "__builtin_ia32_sqrtps_nr256", IX86_BUILTIN_SQRTPS_NR256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
21469 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_rsqrtv8sf2, "__builtin_ia32_rsqrtps256", IX86_BUILTIN_RSQRTPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
21470 { OPTION_MASK_ISA_AVX, CODE_FOR_rsqrtv8sf2, "__builtin_ia32_rsqrtps_nr256", IX86_BUILTIN_RSQRTPS_NR256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
21472 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_rcpv8sf2, "__builtin_ia32_rcpps256", IX86_BUILTIN_RCPPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
21474 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundpd256, "__builtin_ia32_roundpd256", IX86_BUILTIN_ROUNDPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_INT },
21475 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundps256, "__builtin_ia32_roundps256", IX86_BUILTIN_ROUNDPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_INT },
21477 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_unpckhpd256, "__builtin_ia32_unpckhpd256", IX86_BUILTIN_UNPCKHPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
21478 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_unpcklpd256, "__builtin_ia32_unpcklpd256", IX86_BUILTIN_UNPCKLPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
21479 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_unpckhps256, "__builtin_ia32_unpckhps256", IX86_BUILTIN_UNPCKHPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
21480 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_unpcklps256, "__builtin_ia32_unpcklps256", IX86_BUILTIN_UNPCKLPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
21482 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_si256_si, "__builtin_ia32_si256_si", IX86_BUILTIN_SI256_SI, UNKNOWN, (int) V8SI_FTYPE_V4SI },
21483 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_ps256_ps, "__builtin_ia32_ps256_ps", IX86_BUILTIN_PS256_PS, UNKNOWN, (int) V8SF_FTYPE_V4SF },
21484 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_pd256_pd, "__builtin_ia32_pd256_pd", IX86_BUILTIN_PD256_PD, UNKNOWN, (int) V4DF_FTYPE_V2DF },
21485 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_si_si256, "__builtin_ia32_si_si256", IX86_BUILTIN_SI_SI256, UNKNOWN, (int) V4SI_FTYPE_V8SI },
21486 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_ps_ps256, "__builtin_ia32_ps_ps256", IX86_BUILTIN_PS_PS256, UNKNOWN, (int) V4SF_FTYPE_V8SF },
21487 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_pd_pd256, "__builtin_ia32_pd_pd256", IX86_BUILTIN_PD_PD256, UNKNOWN, (int) V2DF_FTYPE_V4DF },
21489 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestpd, "__builtin_ia32_vtestzpd", IX86_BUILTIN_VTESTZPD, EQ, (int) INT_FTYPE_V2DF_V2DF_PTEST },
21490 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestpd, "__builtin_ia32_vtestcpd", IX86_BUILTIN_VTESTCPD, LTU, (int) INT_FTYPE_V2DF_V2DF_PTEST },
21491 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestpd, "__builtin_ia32_vtestnzcpd", IX86_BUILTIN_VTESTNZCPD, GTU, (int) INT_FTYPE_V2DF_V2DF_PTEST },
21492 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestps, "__builtin_ia32_vtestzps", IX86_BUILTIN_VTESTZPS, EQ, (int) INT_FTYPE_V4SF_V4SF_PTEST },
21493 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestps, "__builtin_ia32_vtestcps", IX86_BUILTIN_VTESTCPS, LTU, (int) INT_FTYPE_V4SF_V4SF_PTEST },
21494 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestps, "__builtin_ia32_vtestnzcps", IX86_BUILTIN_VTESTNZCPS, GTU, (int) INT_FTYPE_V4SF_V4SF_PTEST },
21495 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestpd256, "__builtin_ia32_vtestzpd256", IX86_BUILTIN_VTESTZPD256, EQ, (int) INT_FTYPE_V4DF_V4DF_PTEST },
21496 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestpd256, "__builtin_ia32_vtestcpd256", IX86_BUILTIN_VTESTCPD256, LTU, (int) INT_FTYPE_V4DF_V4DF_PTEST },
21497 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestpd256, "__builtin_ia32_vtestnzcpd256", IX86_BUILTIN_VTESTNZCPD256, GTU, (int) INT_FTYPE_V4DF_V4DF_PTEST },
21498 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestps256, "__builtin_ia32_vtestzps256", IX86_BUILTIN_VTESTZPS256, EQ, (int) INT_FTYPE_V8SF_V8SF_PTEST },
21499 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestps256, "__builtin_ia32_vtestcps256", IX86_BUILTIN_VTESTCPS256, LTU, (int) INT_FTYPE_V8SF_V8SF_PTEST },
21500 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestps256, "__builtin_ia32_vtestnzcps256", IX86_BUILTIN_VTESTNZCPS256, GTU, (int) INT_FTYPE_V8SF_V8SF_PTEST },
21501 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_ptest256, "__builtin_ia32_ptestz256", IX86_BUILTIN_PTESTZ256, EQ, (int) INT_FTYPE_V4DI_V4DI_PTEST },
21502 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_ptest256, "__builtin_ia32_ptestc256", IX86_BUILTIN_PTESTC256, LTU, (int) INT_FTYPE_V4DI_V4DI_PTEST },
21503 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_ptest256, "__builtin_ia32_ptestnzc256", IX86_BUILTIN_PTESTNZC256, GTU, (int) INT_FTYPE_V4DI_V4DI_PTEST },
21505 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movmskpd256, "__builtin_ia32_movmskpd256", IX86_BUILTIN_MOVMSKPD256, UNKNOWN, (int) INT_FTYPE_V4DF },
21506 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movmskps256, "__builtin_ia32_movmskps256", IX86_BUILTIN_MOVMSKPS256, UNKNOWN, (int) INT_FTYPE_V8SF },
21510 enum multi_arg_type {
21520 MULTI_ARG_3_PERMPS,
21521 MULTI_ARG_3_PERMPD,
21528 MULTI_ARG_2_DI_IMM,
21529 MULTI_ARG_2_SI_IMM,
21530 MULTI_ARG_2_HI_IMM,
21531 MULTI_ARG_2_QI_IMM,
21532 MULTI_ARG_2_SF_CMP,
21533 MULTI_ARG_2_DF_CMP,
21534 MULTI_ARG_2_DI_CMP,
21535 MULTI_ARG_2_SI_CMP,
21536 MULTI_ARG_2_HI_CMP,
21537 MULTI_ARG_2_QI_CMP,
21560 static const struct builtin_description bdesc_multi_arg[] =
21562 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_vmfmaddv4sf4, "__builtin_ia32_fmaddss", IX86_BUILTIN_FMADDSS, 0, (int)MULTI_ARG_3_SF },
21563 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_vmfmaddv2df4, "__builtin_ia32_fmaddsd", IX86_BUILTIN_FMADDSD, 0, (int)MULTI_ARG_3_DF },
21564 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_fmaddv4sf4, "__builtin_ia32_fmaddps", IX86_BUILTIN_FMADDPS, 0, (int)MULTI_ARG_3_SF },
21565 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_fmaddv2df4, "__builtin_ia32_fmaddpd", IX86_BUILTIN_FMADDPD, 0, (int)MULTI_ARG_3_DF },
21566 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_vmfmsubv4sf4, "__builtin_ia32_fmsubss", IX86_BUILTIN_FMSUBSS, 0, (int)MULTI_ARG_3_SF },
21567 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_vmfmsubv2df4, "__builtin_ia32_fmsubsd", IX86_BUILTIN_FMSUBSD, 0, (int)MULTI_ARG_3_DF },
21568 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_fmsubv4sf4, "__builtin_ia32_fmsubps", IX86_BUILTIN_FMSUBPS, 0, (int)MULTI_ARG_3_SF },
21569 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_fmsubv2df4, "__builtin_ia32_fmsubpd", IX86_BUILTIN_FMSUBPD, 0, (int)MULTI_ARG_3_DF },
21570 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_vmfnmaddv4sf4, "__builtin_ia32_fnmaddss", IX86_BUILTIN_FNMADDSS, 0, (int)MULTI_ARG_3_SF },
21571 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_vmfnmaddv2df4, "__builtin_ia32_fnmaddsd", IX86_BUILTIN_FNMADDSD, 0, (int)MULTI_ARG_3_DF },
21572 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_fnmaddv4sf4, "__builtin_ia32_fnmaddps", IX86_BUILTIN_FNMADDPS, 0, (int)MULTI_ARG_3_SF },
21573 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_fnmaddv2df4, "__builtin_ia32_fnmaddpd", IX86_BUILTIN_FNMADDPD, 0, (int)MULTI_ARG_3_DF },
21574 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_vmfnmsubv4sf4, "__builtin_ia32_fnmsubss", IX86_BUILTIN_FNMSUBSS, 0, (int)MULTI_ARG_3_SF },
21575 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_vmfnmsubv2df4, "__builtin_ia32_fnmsubsd", IX86_BUILTIN_FNMSUBSD, 0, (int)MULTI_ARG_3_DF },
21576 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_fnmsubv4sf4, "__builtin_ia32_fnmsubps", IX86_BUILTIN_FNMSUBPS, 0, (int)MULTI_ARG_3_SF },
21577 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_fnmsubv2df4, "__builtin_ia32_fnmsubpd", IX86_BUILTIN_FNMSUBPD, 0, (int)MULTI_ARG_3_DF },
21578 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcmov_v2di, "__builtin_ia32_pcmov", IX86_BUILTIN_PCMOV, 0, (int)MULTI_ARG_3_DI },
21579 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcmov_v2di, "__builtin_ia32_pcmov_v2di", IX86_BUILTIN_PCMOV_V2DI, 0, (int)MULTI_ARG_3_DI },
21580 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcmov_v4si, "__builtin_ia32_pcmov_v4si", IX86_BUILTIN_PCMOV_V4SI, 0, (int)MULTI_ARG_3_SI },
21581 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcmov_v8hi, "__builtin_ia32_pcmov_v8hi", IX86_BUILTIN_PCMOV_V8HI, 0, (int)MULTI_ARG_3_HI },
21582 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcmov_v16qi, "__builtin_ia32_pcmov_v16qi",IX86_BUILTIN_PCMOV_V16QI,0, (int)MULTI_ARG_3_QI },
21583 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcmov_v2df, "__builtin_ia32_pcmov_v2df", IX86_BUILTIN_PCMOV_V2DF, 0, (int)MULTI_ARG_3_DF },
21584 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcmov_v4sf, "__builtin_ia32_pcmov_v4sf", IX86_BUILTIN_PCMOV_V4SF, 0, (int)MULTI_ARG_3_SF },
21585 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pperm, "__builtin_ia32_pperm", IX86_BUILTIN_PPERM, 0, (int)MULTI_ARG_3_QI },
21586 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_permv4sf, "__builtin_ia32_permps", IX86_BUILTIN_PERMPS, 0, (int)MULTI_ARG_3_PERMPS },
21587 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_permv2df, "__builtin_ia32_permpd", IX86_BUILTIN_PERMPD, 0, (int)MULTI_ARG_3_PERMPD },
21588 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pmacssww, "__builtin_ia32_pmacssww", IX86_BUILTIN_PMACSSWW, 0, (int)MULTI_ARG_3_HI },
21589 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pmacsww, "__builtin_ia32_pmacsww", IX86_BUILTIN_PMACSWW, 0, (int)MULTI_ARG_3_HI },
21590 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pmacsswd, "__builtin_ia32_pmacsswd", IX86_BUILTIN_PMACSSWD, 0, (int)MULTI_ARG_3_HI_SI },
21591 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pmacswd, "__builtin_ia32_pmacswd", IX86_BUILTIN_PMACSWD, 0, (int)MULTI_ARG_3_HI_SI },
21592 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pmacssdd, "__builtin_ia32_pmacssdd", IX86_BUILTIN_PMACSSDD, 0, (int)MULTI_ARG_3_SI },
21593 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pmacsdd, "__builtin_ia32_pmacsdd", IX86_BUILTIN_PMACSDD, 0, (int)MULTI_ARG_3_SI },
21594 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pmacssdql, "__builtin_ia32_pmacssdql", IX86_BUILTIN_PMACSSDQL, 0, (int)MULTI_ARG_3_SI_DI },
21595 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pmacssdqh, "__builtin_ia32_pmacssdqh", IX86_BUILTIN_PMACSSDQH, 0, (int)MULTI_ARG_3_SI_DI },
21596 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pmacsdql, "__builtin_ia32_pmacsdql", IX86_BUILTIN_PMACSDQL, 0, (int)MULTI_ARG_3_SI_DI },
21597 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pmacsdqh, "__builtin_ia32_pmacsdqh", IX86_BUILTIN_PMACSDQH, 0, (int)MULTI_ARG_3_SI_DI },
21598 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pmadcsswd, "__builtin_ia32_pmadcsswd", IX86_BUILTIN_PMADCSSWD, 0, (int)MULTI_ARG_3_HI_SI },
21599 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pmadcswd, "__builtin_ia32_pmadcswd", IX86_BUILTIN_PMADCSWD, 0, (int)MULTI_ARG_3_HI_SI },
21600 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vrotlv2di3, "__builtin_ia32_protq", IX86_BUILTIN_PROTQ, 0, (int)MULTI_ARG_2_DI },
21601 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vrotlv4si3, "__builtin_ia32_protd", IX86_BUILTIN_PROTD, 0, (int)MULTI_ARG_2_SI },
21602 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vrotlv8hi3, "__builtin_ia32_protw", IX86_BUILTIN_PROTW, 0, (int)MULTI_ARG_2_HI },
21603 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vrotlv16qi3, "__builtin_ia32_protb", IX86_BUILTIN_PROTB, 0, (int)MULTI_ARG_2_QI },
21604 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_rotlv2di3, "__builtin_ia32_protqi", IX86_BUILTIN_PROTQ_IMM, 0, (int)MULTI_ARG_2_DI_IMM },
21605 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_rotlv4si3, "__builtin_ia32_protdi", IX86_BUILTIN_PROTD_IMM, 0, (int)MULTI_ARG_2_SI_IMM },
21606 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_rotlv8hi3, "__builtin_ia32_protwi", IX86_BUILTIN_PROTW_IMM, 0, (int)MULTI_ARG_2_HI_IMM },
21607 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_rotlv16qi3, "__builtin_ia32_protbi", IX86_BUILTIN_PROTB_IMM, 0, (int)MULTI_ARG_2_QI_IMM },
21608 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_ashlv2di3, "__builtin_ia32_pshaq", IX86_BUILTIN_PSHAQ, 0, (int)MULTI_ARG_2_DI },
21609 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_ashlv4si3, "__builtin_ia32_pshad", IX86_BUILTIN_PSHAD, 0, (int)MULTI_ARG_2_SI },
21610 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_ashlv8hi3, "__builtin_ia32_pshaw", IX86_BUILTIN_PSHAW, 0, (int)MULTI_ARG_2_HI },
21611 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_ashlv16qi3, "__builtin_ia32_pshab", IX86_BUILTIN_PSHAB, 0, (int)MULTI_ARG_2_QI },
21612 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_lshlv2di3, "__builtin_ia32_pshlq", IX86_BUILTIN_PSHLQ, 0, (int)MULTI_ARG_2_DI },
21613 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_lshlv4si3, "__builtin_ia32_pshld", IX86_BUILTIN_PSHLD, 0, (int)MULTI_ARG_2_SI },
21614 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_lshlv8hi3, "__builtin_ia32_pshlw", IX86_BUILTIN_PSHLW, 0, (int)MULTI_ARG_2_HI },
21615 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_lshlv16qi3, "__builtin_ia32_pshlb", IX86_BUILTIN_PSHLB, 0, (int)MULTI_ARG_2_QI },
21616 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmfrczv4sf2, "__builtin_ia32_frczss", IX86_BUILTIN_FRCZSS, 0, (int)MULTI_ARG_2_SF },
21617 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmfrczv2df2, "__builtin_ia32_frczsd", IX86_BUILTIN_FRCZSD, 0, (int)MULTI_ARG_2_DF },
21618 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_frczv4sf2, "__builtin_ia32_frczps", IX86_BUILTIN_FRCZPS, 0, (int)MULTI_ARG_1_SF },
21619 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_frczv2df2, "__builtin_ia32_frczpd", IX86_BUILTIN_FRCZPD, 0, (int)MULTI_ARG_1_DF },
21620 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_cvtph2ps, "__builtin_ia32_cvtph2ps", IX86_BUILTIN_CVTPH2PS, 0, (int)MULTI_ARG_1_PH2PS },
21621 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_cvtps2ph, "__builtin_ia32_cvtps2ph", IX86_BUILTIN_CVTPS2PH, 0, (int)MULTI_ARG_1_PS2PH },
21622 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_phaddbw, "__builtin_ia32_phaddbw", IX86_BUILTIN_PHADDBW, 0, (int)MULTI_ARG_1_QI_HI },
21623 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_phaddbd, "__builtin_ia32_phaddbd", IX86_BUILTIN_PHADDBD, 0, (int)MULTI_ARG_1_QI_SI },
21624 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_phaddbq, "__builtin_ia32_phaddbq", IX86_BUILTIN_PHADDBQ, 0, (int)MULTI_ARG_1_QI_DI },
21625 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_phaddwd, "__builtin_ia32_phaddwd", IX86_BUILTIN_PHADDWD, 0, (int)MULTI_ARG_1_HI_SI },
21626 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_phaddwq, "__builtin_ia32_phaddwq", IX86_BUILTIN_PHADDWQ, 0, (int)MULTI_ARG_1_HI_DI },
21627 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_phadddq, "__builtin_ia32_phadddq", IX86_BUILTIN_PHADDDQ, 0, (int)MULTI_ARG_1_SI_DI },
21628 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_phaddubw, "__builtin_ia32_phaddubw", IX86_BUILTIN_PHADDUBW, 0, (int)MULTI_ARG_1_QI_HI },
21629 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_phaddubd, "__builtin_ia32_phaddubd", IX86_BUILTIN_PHADDUBD, 0, (int)MULTI_ARG_1_QI_SI },
21630 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_phaddubq, "__builtin_ia32_phaddubq", IX86_BUILTIN_PHADDUBQ, 0, (int)MULTI_ARG_1_QI_DI },
21631 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_phadduwd, "__builtin_ia32_phadduwd", IX86_BUILTIN_PHADDUWD, 0, (int)MULTI_ARG_1_HI_SI },
21632 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_phadduwq, "__builtin_ia32_phadduwq", IX86_BUILTIN_PHADDUWQ, 0, (int)MULTI_ARG_1_HI_DI },
21633 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_phaddudq, "__builtin_ia32_phaddudq", IX86_BUILTIN_PHADDUDQ, 0, (int)MULTI_ARG_1_SI_DI },
21634 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_phsubbw, "__builtin_ia32_phsubbw", IX86_BUILTIN_PHSUBBW, 0, (int)MULTI_ARG_1_QI_HI },
21635 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_phsubwd, "__builtin_ia32_phsubwd", IX86_BUILTIN_PHSUBWD, 0, (int)MULTI_ARG_1_HI_SI },
21636 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_phsubdq, "__builtin_ia32_phsubdq", IX86_BUILTIN_PHSUBDQ, 0, (int)MULTI_ARG_1_SI_DI },
21638 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv4sf3, "__builtin_ia32_comeqss", IX86_BUILTIN_COMEQSS, EQ, (int)MULTI_ARG_2_SF_CMP },
21639 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv4sf3, "__builtin_ia32_comness", IX86_BUILTIN_COMNESS, NE, (int)MULTI_ARG_2_SF_CMP },
21640 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv4sf3, "__builtin_ia32_comneqss", IX86_BUILTIN_COMNESS, NE, (int)MULTI_ARG_2_SF_CMP },
21641 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv4sf3, "__builtin_ia32_comltss", IX86_BUILTIN_COMLTSS, LT, (int)MULTI_ARG_2_SF_CMP },
21642 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv4sf3, "__builtin_ia32_comless", IX86_BUILTIN_COMLESS, LE, (int)MULTI_ARG_2_SF_CMP },
21643 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv4sf3, "__builtin_ia32_comgtss", IX86_BUILTIN_COMGTSS, GT, (int)MULTI_ARG_2_SF_CMP },
21644 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv4sf3, "__builtin_ia32_comgess", IX86_BUILTIN_COMGESS, GE, (int)MULTI_ARG_2_SF_CMP },
21645 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv4sf3, "__builtin_ia32_comueqss", IX86_BUILTIN_COMUEQSS, UNEQ, (int)MULTI_ARG_2_SF_CMP },
21646 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv4sf3, "__builtin_ia32_comuness", IX86_BUILTIN_COMUNESS, LTGT, (int)MULTI_ARG_2_SF_CMP },
21647 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv4sf3, "__builtin_ia32_comuneqss", IX86_BUILTIN_COMUNESS, LTGT, (int)MULTI_ARG_2_SF_CMP },
21648 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv4sf3, "__builtin_ia32_comunltss", IX86_BUILTIN_COMULTSS, UNLT, (int)MULTI_ARG_2_SF_CMP },
21649 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv4sf3, "__builtin_ia32_comunless", IX86_BUILTIN_COMULESS, UNLE, (int)MULTI_ARG_2_SF_CMP },
21650 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv4sf3, "__builtin_ia32_comungtss", IX86_BUILTIN_COMUGTSS, UNGT, (int)MULTI_ARG_2_SF_CMP },
21651 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv4sf3, "__builtin_ia32_comungess", IX86_BUILTIN_COMUGESS, UNGE, (int)MULTI_ARG_2_SF_CMP },
21652 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv4sf3, "__builtin_ia32_comordss", IX86_BUILTIN_COMORDSS, ORDERED, (int)MULTI_ARG_2_SF_CMP },
21653 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv4sf3, "__builtin_ia32_comunordss", IX86_BUILTIN_COMUNORDSS, UNORDERED, (int)MULTI_ARG_2_SF_CMP },
21655 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv2df3, "__builtin_ia32_comeqsd", IX86_BUILTIN_COMEQSD, EQ, (int)MULTI_ARG_2_DF_CMP },
21656 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv2df3, "__builtin_ia32_comnesd", IX86_BUILTIN_COMNESD, NE, (int)MULTI_ARG_2_DF_CMP },
21657 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv2df3, "__builtin_ia32_comneqsd", IX86_BUILTIN_COMNESD, NE, (int)MULTI_ARG_2_DF_CMP },
21658 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv2df3, "__builtin_ia32_comltsd", IX86_BUILTIN_COMLTSD, LT, (int)MULTI_ARG_2_DF_CMP },
21659 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv2df3, "__builtin_ia32_comlesd", IX86_BUILTIN_COMLESD, LE, (int)MULTI_ARG_2_DF_CMP },
21660 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv2df3, "__builtin_ia32_comgtsd", IX86_BUILTIN_COMGTSD, GT, (int)MULTI_ARG_2_DF_CMP },
21661 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv2df3, "__builtin_ia32_comgesd", IX86_BUILTIN_COMGESD, GE, (int)MULTI_ARG_2_DF_CMP },
21662 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv2df3, "__builtin_ia32_comueqsd", IX86_BUILTIN_COMUEQSD, UNEQ, (int)MULTI_ARG_2_DF_CMP },
21663 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv2df3, "__builtin_ia32_comunesd", IX86_BUILTIN_COMUNESD, LTGT, (int)MULTI_ARG_2_DF_CMP },
21664 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv2df3, "__builtin_ia32_comuneqsd", IX86_BUILTIN_COMUNESD, LTGT, (int)MULTI_ARG_2_DF_CMP },
21665 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv2df3, "__builtin_ia32_comunltsd", IX86_BUILTIN_COMULTSD, UNLT, (int)MULTI_ARG_2_DF_CMP },
21666 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv2df3, "__builtin_ia32_comunlesd", IX86_BUILTIN_COMULESD, UNLE, (int)MULTI_ARG_2_DF_CMP },
21667 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv2df3, "__builtin_ia32_comungtsd", IX86_BUILTIN_COMUGTSD, UNGT, (int)MULTI_ARG_2_DF_CMP },
21668 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv2df3, "__builtin_ia32_comungesd", IX86_BUILTIN_COMUGESD, UNGE, (int)MULTI_ARG_2_DF_CMP },
21669 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv2df3, "__builtin_ia32_comordsd", IX86_BUILTIN_COMORDSD, ORDERED, (int)MULTI_ARG_2_DF_CMP },
21670 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv2df3, "__builtin_ia32_comunordsd", IX86_BUILTIN_COMUNORDSD, UNORDERED, (int)MULTI_ARG_2_DF_CMP },
21672 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4sf3, "__builtin_ia32_comeqps", IX86_BUILTIN_COMEQPS, EQ, (int)MULTI_ARG_2_SF_CMP },
21673 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4sf3, "__builtin_ia32_comneps", IX86_BUILTIN_COMNEPS, NE, (int)MULTI_ARG_2_SF_CMP },
21674 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4sf3, "__builtin_ia32_comneqps", IX86_BUILTIN_COMNEPS, NE, (int)MULTI_ARG_2_SF_CMP },
21675 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4sf3, "__builtin_ia32_comltps", IX86_BUILTIN_COMLTPS, LT, (int)MULTI_ARG_2_SF_CMP },
21676 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4sf3, "__builtin_ia32_comleps", IX86_BUILTIN_COMLEPS, LE, (int)MULTI_ARG_2_SF_CMP },
21677 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4sf3, "__builtin_ia32_comgtps", IX86_BUILTIN_COMGTPS, GT, (int)MULTI_ARG_2_SF_CMP },
21678 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4sf3, "__builtin_ia32_comgeps", IX86_BUILTIN_COMGEPS, GE, (int)MULTI_ARG_2_SF_CMP },
21679 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4sf3, "__builtin_ia32_comueqps", IX86_BUILTIN_COMUEQPS, UNEQ, (int)MULTI_ARG_2_SF_CMP },
21680 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4sf3, "__builtin_ia32_comuneps", IX86_BUILTIN_COMUNEPS, LTGT, (int)MULTI_ARG_2_SF_CMP },
21681 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4sf3, "__builtin_ia32_comuneqps", IX86_BUILTIN_COMUNEPS, LTGT, (int)MULTI_ARG_2_SF_CMP },
21682 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4sf3, "__builtin_ia32_comunltps", IX86_BUILTIN_COMULTPS, UNLT, (int)MULTI_ARG_2_SF_CMP },
21683 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4sf3, "__builtin_ia32_comunleps", IX86_BUILTIN_COMULEPS, UNLE, (int)MULTI_ARG_2_SF_CMP },
21684 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4sf3, "__builtin_ia32_comungtps", IX86_BUILTIN_COMUGTPS, UNGT, (int)MULTI_ARG_2_SF_CMP },
21685 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4sf3, "__builtin_ia32_comungeps", IX86_BUILTIN_COMUGEPS, UNGE, (int)MULTI_ARG_2_SF_CMP },
21686 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4sf3, "__builtin_ia32_comordps", IX86_BUILTIN_COMORDPS, ORDERED, (int)MULTI_ARG_2_SF_CMP },
21687 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4sf3, "__builtin_ia32_comunordps", IX86_BUILTIN_COMUNORDPS, UNORDERED, (int)MULTI_ARG_2_SF_CMP },
21689 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2df3, "__builtin_ia32_comeqpd", IX86_BUILTIN_COMEQPD, EQ, (int)MULTI_ARG_2_DF_CMP },
21690 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2df3, "__builtin_ia32_comnepd", IX86_BUILTIN_COMNEPD, NE, (int)MULTI_ARG_2_DF_CMP },
21691 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2df3, "__builtin_ia32_comneqpd", IX86_BUILTIN_COMNEPD, NE, (int)MULTI_ARG_2_DF_CMP },
21692 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2df3, "__builtin_ia32_comltpd", IX86_BUILTIN_COMLTPD, LT, (int)MULTI_ARG_2_DF_CMP },
21693 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2df3, "__builtin_ia32_comlepd", IX86_BUILTIN_COMLEPD, LE, (int)MULTI_ARG_2_DF_CMP },
21694 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2df3, "__builtin_ia32_comgtpd", IX86_BUILTIN_COMGTPD, GT, (int)MULTI_ARG_2_DF_CMP },
21695 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2df3, "__builtin_ia32_comgepd", IX86_BUILTIN_COMGEPD, GE, (int)MULTI_ARG_2_DF_CMP },
21696 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2df3, "__builtin_ia32_comueqpd", IX86_BUILTIN_COMUEQPD, UNEQ, (int)MULTI_ARG_2_DF_CMP },
21697 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2df3, "__builtin_ia32_comunepd", IX86_BUILTIN_COMUNEPD, LTGT, (int)MULTI_ARG_2_DF_CMP },
21698 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2df3, "__builtin_ia32_comuneqpd", IX86_BUILTIN_COMUNEPD, LTGT, (int)MULTI_ARG_2_DF_CMP },
21699 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2df3, "__builtin_ia32_comunltpd", IX86_BUILTIN_COMULTPD, UNLT, (int)MULTI_ARG_2_DF_CMP },
21700 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2df3, "__builtin_ia32_comunlepd", IX86_BUILTIN_COMULEPD, UNLE, (int)MULTI_ARG_2_DF_CMP },
21701 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2df3, "__builtin_ia32_comungtpd", IX86_BUILTIN_COMUGTPD, UNGT, (int)MULTI_ARG_2_DF_CMP },
21702 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2df3, "__builtin_ia32_comungepd", IX86_BUILTIN_COMUGEPD, UNGE, (int)MULTI_ARG_2_DF_CMP },
21703 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2df3, "__builtin_ia32_comordpd", IX86_BUILTIN_COMORDPD, ORDERED, (int)MULTI_ARG_2_DF_CMP },
21704 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2df3, "__builtin_ia32_comunordpd", IX86_BUILTIN_COMUNORDPD, UNORDERED, (int)MULTI_ARG_2_DF_CMP },
21706 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv16qi3, "__builtin_ia32_pcomeqb", IX86_BUILTIN_PCOMEQB, EQ, (int)MULTI_ARG_2_QI_CMP },
21707 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv16qi3, "__builtin_ia32_pcomneb", IX86_BUILTIN_PCOMNEB, NE, (int)MULTI_ARG_2_QI_CMP },
21708 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv16qi3, "__builtin_ia32_pcomneqb", IX86_BUILTIN_PCOMNEB, NE, (int)MULTI_ARG_2_QI_CMP },
21709 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv16qi3, "__builtin_ia32_pcomltb", IX86_BUILTIN_PCOMLTB, LT, (int)MULTI_ARG_2_QI_CMP },
21710 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv16qi3, "__builtin_ia32_pcomleb", IX86_BUILTIN_PCOMLEB, LE, (int)MULTI_ARG_2_QI_CMP },
21711 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv16qi3, "__builtin_ia32_pcomgtb", IX86_BUILTIN_PCOMGTB, GT, (int)MULTI_ARG_2_QI_CMP },
21712 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv16qi3, "__builtin_ia32_pcomgeb", IX86_BUILTIN_PCOMGEB, GE, (int)MULTI_ARG_2_QI_CMP },
21714 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv8hi3, "__builtin_ia32_pcomeqw", IX86_BUILTIN_PCOMEQW, EQ, (int)MULTI_ARG_2_HI_CMP },
21715 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv8hi3, "__builtin_ia32_pcomnew", IX86_BUILTIN_PCOMNEW, NE, (int)MULTI_ARG_2_HI_CMP },
21716 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv8hi3, "__builtin_ia32_pcomneqw", IX86_BUILTIN_PCOMNEW, NE, (int)MULTI_ARG_2_HI_CMP },
21717 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv8hi3, "__builtin_ia32_pcomltw", IX86_BUILTIN_PCOMLTW, LT, (int)MULTI_ARG_2_HI_CMP },
21718 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv8hi3, "__builtin_ia32_pcomlew", IX86_BUILTIN_PCOMLEW, LE, (int)MULTI_ARG_2_HI_CMP },
21719 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv8hi3, "__builtin_ia32_pcomgtw", IX86_BUILTIN_PCOMGTW, GT, (int)MULTI_ARG_2_HI_CMP },
21720 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv8hi3, "__builtin_ia32_pcomgew", IX86_BUILTIN_PCOMGEW, GE, (int)MULTI_ARG_2_HI_CMP },
21722 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4si3, "__builtin_ia32_pcomeqd", IX86_BUILTIN_PCOMEQD, EQ, (int)MULTI_ARG_2_SI_CMP },
21723 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4si3, "__builtin_ia32_pcomned", IX86_BUILTIN_PCOMNED, NE, (int)MULTI_ARG_2_SI_CMP },
21724 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4si3, "__builtin_ia32_pcomneqd", IX86_BUILTIN_PCOMNED, NE, (int)MULTI_ARG_2_SI_CMP },
21725 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4si3, "__builtin_ia32_pcomltd", IX86_BUILTIN_PCOMLTD, LT, (int)MULTI_ARG_2_SI_CMP },
21726 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4si3, "__builtin_ia32_pcomled", IX86_BUILTIN_PCOMLED, LE, (int)MULTI_ARG_2_SI_CMP },
21727 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4si3, "__builtin_ia32_pcomgtd", IX86_BUILTIN_PCOMGTD, GT, (int)MULTI_ARG_2_SI_CMP },
21728 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4si3, "__builtin_ia32_pcomged", IX86_BUILTIN_PCOMGED, GE, (int)MULTI_ARG_2_SI_CMP },
21730 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2di3, "__builtin_ia32_pcomeqq", IX86_BUILTIN_PCOMEQQ, EQ, (int)MULTI_ARG_2_DI_CMP },
21731 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2di3, "__builtin_ia32_pcomneq", IX86_BUILTIN_PCOMNEQ, NE, (int)MULTI_ARG_2_DI_CMP },
21732 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2di3, "__builtin_ia32_pcomneqq", IX86_BUILTIN_PCOMNEQ, NE, (int)MULTI_ARG_2_DI_CMP },
21733 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2di3, "__builtin_ia32_pcomltq", IX86_BUILTIN_PCOMLTQ, LT, (int)MULTI_ARG_2_DI_CMP },
21734 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2di3, "__builtin_ia32_pcomleq", IX86_BUILTIN_PCOMLEQ, LE, (int)MULTI_ARG_2_DI_CMP },
21735 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2di3, "__builtin_ia32_pcomgtq", IX86_BUILTIN_PCOMGTQ, GT, (int)MULTI_ARG_2_DI_CMP },
21736 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2di3, "__builtin_ia32_pcomgeq", IX86_BUILTIN_PCOMGEQ, GE, (int)MULTI_ARG_2_DI_CMP },
21738 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_uns2v16qi3,"__builtin_ia32_pcomequb", IX86_BUILTIN_PCOMEQUB, EQ, (int)MULTI_ARG_2_QI_CMP },
21739 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_uns2v16qi3,"__builtin_ia32_pcomneub", IX86_BUILTIN_PCOMNEUB, NE, (int)MULTI_ARG_2_QI_CMP },
21740 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_uns2v16qi3,"__builtin_ia32_pcomnequb", IX86_BUILTIN_PCOMNEUB, NE, (int)MULTI_ARG_2_QI_CMP },
21741 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_unsv16qi3, "__builtin_ia32_pcomltub", IX86_BUILTIN_PCOMLTUB, LTU, (int)MULTI_ARG_2_QI_CMP },
21742 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_unsv16qi3, "__builtin_ia32_pcomleub", IX86_BUILTIN_PCOMLEUB, LEU, (int)MULTI_ARG_2_QI_CMP },
21743 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_unsv16qi3, "__builtin_ia32_pcomgtub", IX86_BUILTIN_PCOMGTUB, GTU, (int)MULTI_ARG_2_QI_CMP },
21744 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_unsv16qi3, "__builtin_ia32_pcomgeub", IX86_BUILTIN_PCOMGEUB, GEU, (int)MULTI_ARG_2_QI_CMP },
21746 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_uns2v8hi3, "__builtin_ia32_pcomequw", IX86_BUILTIN_PCOMEQUW, EQ, (int)MULTI_ARG_2_HI_CMP },
21747 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_uns2v8hi3, "__builtin_ia32_pcomneuw", IX86_BUILTIN_PCOMNEUW, NE, (int)MULTI_ARG_2_HI_CMP },
21748 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_uns2v8hi3, "__builtin_ia32_pcomnequw", IX86_BUILTIN_PCOMNEUW, NE, (int)MULTI_ARG_2_HI_CMP },
21749 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_unsv8hi3, "__builtin_ia32_pcomltuw", IX86_BUILTIN_PCOMLTUW, LTU, (int)MULTI_ARG_2_HI_CMP },
21750 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_unsv8hi3, "__builtin_ia32_pcomleuw", IX86_BUILTIN_PCOMLEUW, LEU, (int)MULTI_ARG_2_HI_CMP },
21751 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_unsv8hi3, "__builtin_ia32_pcomgtuw", IX86_BUILTIN_PCOMGTUW, GTU, (int)MULTI_ARG_2_HI_CMP },
21752 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_unsv8hi3, "__builtin_ia32_pcomgeuw", IX86_BUILTIN_PCOMGEUW, GEU, (int)MULTI_ARG_2_HI_CMP },
21754 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_uns2v4si3, "__builtin_ia32_pcomequd", IX86_BUILTIN_PCOMEQUD, EQ, (int)MULTI_ARG_2_SI_CMP },
21755 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_uns2v4si3, "__builtin_ia32_pcomneud", IX86_BUILTIN_PCOMNEUD, NE, (int)MULTI_ARG_2_SI_CMP },
21756 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_uns2v4si3, "__builtin_ia32_pcomnequd", IX86_BUILTIN_PCOMNEUD, NE, (int)MULTI_ARG_2_SI_CMP },
21757 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_unsv4si3, "__builtin_ia32_pcomltud", IX86_BUILTIN_PCOMLTUD, LTU, (int)MULTI_ARG_2_SI_CMP },
21758 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_unsv4si3, "__builtin_ia32_pcomleud", IX86_BUILTIN_PCOMLEUD, LEU, (int)MULTI_ARG_2_SI_CMP },
21759 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_unsv4si3, "__builtin_ia32_pcomgtud", IX86_BUILTIN_PCOMGTUD, GTU, (int)MULTI_ARG_2_SI_CMP },
21760 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_unsv4si3, "__builtin_ia32_pcomgeud", IX86_BUILTIN_PCOMGEUD, GEU, (int)MULTI_ARG_2_SI_CMP },
21762 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_uns2v2di3, "__builtin_ia32_pcomequq", IX86_BUILTIN_PCOMEQUQ, EQ, (int)MULTI_ARG_2_DI_CMP },
21763 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_uns2v2di3, "__builtin_ia32_pcomneuq", IX86_BUILTIN_PCOMNEUQ, NE, (int)MULTI_ARG_2_DI_CMP },
21764 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_uns2v2di3, "__builtin_ia32_pcomnequq", IX86_BUILTIN_PCOMNEUQ, NE, (int)MULTI_ARG_2_DI_CMP },
21765 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_unsv2di3, "__builtin_ia32_pcomltuq", IX86_BUILTIN_PCOMLTUQ, LTU, (int)MULTI_ARG_2_DI_CMP },
21766 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_unsv2di3, "__builtin_ia32_pcomleuq", IX86_BUILTIN_PCOMLEUQ, LEU, (int)MULTI_ARG_2_DI_CMP },
21767 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_unsv2di3, "__builtin_ia32_pcomgtuq", IX86_BUILTIN_PCOMGTUQ, GTU, (int)MULTI_ARG_2_DI_CMP },
21768 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_unsv2di3, "__builtin_ia32_pcomgeuq", IX86_BUILTIN_PCOMGEUQ, GEU, (int)MULTI_ARG_2_DI_CMP },
21770 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_com_tfv4sf3, "__builtin_ia32_comfalsess", IX86_BUILTIN_COMFALSESS, COM_FALSE_S, (int)MULTI_ARG_2_SF_TF },
21771 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_com_tfv4sf3, "__builtin_ia32_comtruess", IX86_BUILTIN_COMTRUESS, COM_TRUE_S, (int)MULTI_ARG_2_SF_TF },
21772 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_com_tfv4sf3, "__builtin_ia32_comfalseps", IX86_BUILTIN_COMFALSEPS, COM_FALSE_P, (int)MULTI_ARG_2_SF_TF },
21773 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_com_tfv4sf3, "__builtin_ia32_comtrueps", IX86_BUILTIN_COMTRUEPS, COM_TRUE_P, (int)MULTI_ARG_2_SF_TF },
21774 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_com_tfv2df3, "__builtin_ia32_comfalsesd", IX86_BUILTIN_COMFALSESD, COM_FALSE_S, (int)MULTI_ARG_2_DF_TF },
21775 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_com_tfv2df3, "__builtin_ia32_comtruesd", IX86_BUILTIN_COMTRUESD, COM_TRUE_S, (int)MULTI_ARG_2_DF_TF },
21776 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_com_tfv2df3, "__builtin_ia32_comfalsepd", IX86_BUILTIN_COMFALSEPD, COM_FALSE_P, (int)MULTI_ARG_2_DF_TF },
21777 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_com_tfv2df3, "__builtin_ia32_comtruepd", IX86_BUILTIN_COMTRUEPD, COM_TRUE_P, (int)MULTI_ARG_2_DF_TF },
21779 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcom_tfv16qi3, "__builtin_ia32_pcomfalseb", IX86_BUILTIN_PCOMFALSEB, PCOM_FALSE, (int)MULTI_ARG_2_QI_TF },
21780 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcom_tfv8hi3, "__builtin_ia32_pcomfalsew", IX86_BUILTIN_PCOMFALSEW, PCOM_FALSE, (int)MULTI_ARG_2_HI_TF },
21781 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcom_tfv4si3, "__builtin_ia32_pcomfalsed", IX86_BUILTIN_PCOMFALSED, PCOM_FALSE, (int)MULTI_ARG_2_SI_TF },
21782 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcom_tfv2di3, "__builtin_ia32_pcomfalseq", IX86_BUILTIN_PCOMFALSEQ, PCOM_FALSE, (int)MULTI_ARG_2_DI_TF },
21783 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcom_tfv16qi3, "__builtin_ia32_pcomfalseub",IX86_BUILTIN_PCOMFALSEUB,PCOM_FALSE, (int)MULTI_ARG_2_QI_TF },
21784 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcom_tfv8hi3, "__builtin_ia32_pcomfalseuw",IX86_BUILTIN_PCOMFALSEUW,PCOM_FALSE, (int)MULTI_ARG_2_HI_TF },
21785 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcom_tfv4si3, "__builtin_ia32_pcomfalseud",IX86_BUILTIN_PCOMFALSEUD,PCOM_FALSE, (int)MULTI_ARG_2_SI_TF },
21786 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcom_tfv2di3, "__builtin_ia32_pcomfalseuq",IX86_BUILTIN_PCOMFALSEUQ,PCOM_FALSE, (int)MULTI_ARG_2_DI_TF },
21788 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcom_tfv16qi3, "__builtin_ia32_pcomtrueb", IX86_BUILTIN_PCOMTRUEB, PCOM_TRUE, (int)MULTI_ARG_2_QI_TF },
21789 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcom_tfv8hi3, "__builtin_ia32_pcomtruew", IX86_BUILTIN_PCOMTRUEW, PCOM_TRUE, (int)MULTI_ARG_2_HI_TF },
21790 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcom_tfv4si3, "__builtin_ia32_pcomtrued", IX86_BUILTIN_PCOMTRUED, PCOM_TRUE, (int)MULTI_ARG_2_SI_TF },
21791 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcom_tfv2di3, "__builtin_ia32_pcomtrueq", IX86_BUILTIN_PCOMTRUEQ, PCOM_TRUE, (int)MULTI_ARG_2_DI_TF },
21792 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcom_tfv16qi3, "__builtin_ia32_pcomtrueub", IX86_BUILTIN_PCOMTRUEUB, PCOM_TRUE, (int)MULTI_ARG_2_QI_TF },
21793 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcom_tfv8hi3, "__builtin_ia32_pcomtrueuw", IX86_BUILTIN_PCOMTRUEUW, PCOM_TRUE, (int)MULTI_ARG_2_HI_TF },
21794 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcom_tfv4si3, "__builtin_ia32_pcomtrueud", IX86_BUILTIN_PCOMTRUEUD, PCOM_TRUE, (int)MULTI_ARG_2_SI_TF },
21795 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcom_tfv2di3, "__builtin_ia32_pcomtrueuq", IX86_BUILTIN_PCOMTRUEUQ, PCOM_TRUE, (int)MULTI_ARG_2_DI_TF },
21798 /* Set up all the MMX/SSE builtins, even builtins for instructions that are not
21799 in the current target ISA to allow the user to compile particular modules
21800 with different target specific options that differ from the command line
21803 ix86_init_mmx_sse_builtins (void)
21805 const struct builtin_description * d;
21808 tree V16QI_type_node = build_vector_type_for_mode (char_type_node, V16QImode);
21809 tree V2SI_type_node = build_vector_type_for_mode (intSI_type_node, V2SImode);
21810 tree V1DI_type_node
21811 = build_vector_type_for_mode (long_long_integer_type_node, V1DImode);
21812 tree V2SF_type_node = build_vector_type_for_mode (float_type_node, V2SFmode);
21813 tree V2DI_type_node
21814 = build_vector_type_for_mode (long_long_integer_type_node, V2DImode);
21815 tree V2DF_type_node = build_vector_type_for_mode (double_type_node, V2DFmode);
21816 tree V4SF_type_node = build_vector_type_for_mode (float_type_node, V4SFmode);
21817 tree V4SI_type_node = build_vector_type_for_mode (intSI_type_node, V4SImode);
21818 tree V4HI_type_node = build_vector_type_for_mode (intHI_type_node, V4HImode);
21819 tree V8QI_type_node = build_vector_type_for_mode (char_type_node, V8QImode);
21820 tree V8HI_type_node = build_vector_type_for_mode (intHI_type_node, V8HImode);
21822 tree pchar_type_node = build_pointer_type (char_type_node);
21823 tree pcchar_type_node
21824 = build_pointer_type (build_type_variant (char_type_node, 1, 0));
21825 tree pfloat_type_node = build_pointer_type (float_type_node);
21826 tree pcfloat_type_node
21827 = build_pointer_type (build_type_variant (float_type_node, 1, 0));
21828 tree pv2sf_type_node = build_pointer_type (V2SF_type_node);
21829 tree pcv2sf_type_node
21830 = build_pointer_type (build_type_variant (V2SF_type_node, 1, 0));
21831 tree pv2di_type_node = build_pointer_type (V2DI_type_node);
21832 tree pdi_type_node = build_pointer_type (long_long_unsigned_type_node);
21835 tree int_ftype_v4sf_v4sf
21836 = build_function_type_list (integer_type_node,
21837 V4SF_type_node, V4SF_type_node, NULL_TREE);
21838 tree v4si_ftype_v4sf_v4sf
21839 = build_function_type_list (V4SI_type_node,
21840 V4SF_type_node, V4SF_type_node, NULL_TREE);
21841 /* MMX/SSE/integer conversions. */
21842 tree int_ftype_v4sf
21843 = build_function_type_list (integer_type_node,
21844 V4SF_type_node, NULL_TREE);
21845 tree int64_ftype_v4sf
21846 = build_function_type_list (long_long_integer_type_node,
21847 V4SF_type_node, NULL_TREE);
21848 tree int_ftype_v8qi
21849 = build_function_type_list (integer_type_node, V8QI_type_node, NULL_TREE);
21850 tree v4sf_ftype_v4sf_int
21851 = build_function_type_list (V4SF_type_node,
21852 V4SF_type_node, integer_type_node, NULL_TREE);
21853 tree v4sf_ftype_v4sf_int64
21854 = build_function_type_list (V4SF_type_node,
21855 V4SF_type_node, long_long_integer_type_node,
21857 tree v4sf_ftype_v4sf_v2si
21858 = build_function_type_list (V4SF_type_node,
21859 V4SF_type_node, V2SI_type_node, NULL_TREE);
21861 /* Miscellaneous. */
21862 tree v8qi_ftype_v4hi_v4hi
21863 = build_function_type_list (V8QI_type_node,
21864 V4HI_type_node, V4HI_type_node, NULL_TREE);
21865 tree v4hi_ftype_v2si_v2si
21866 = build_function_type_list (V4HI_type_node,
21867 V2SI_type_node, V2SI_type_node, NULL_TREE);
21868 tree v4sf_ftype_v4sf_v4sf_int
21869 = build_function_type_list (V4SF_type_node,
21870 V4SF_type_node, V4SF_type_node,
21871 integer_type_node, NULL_TREE);
21872 tree v2si_ftype_v4hi_v4hi
21873 = build_function_type_list (V2SI_type_node,
21874 V4HI_type_node, V4HI_type_node, NULL_TREE);
21875 tree v4hi_ftype_v4hi_int
21876 = build_function_type_list (V4HI_type_node,
21877 V4HI_type_node, integer_type_node, NULL_TREE);
21878 tree v2si_ftype_v2si_int
21879 = build_function_type_list (V2SI_type_node,
21880 V2SI_type_node, integer_type_node, NULL_TREE);
21881 tree v1di_ftype_v1di_int
21882 = build_function_type_list (V1DI_type_node,
21883 V1DI_type_node, integer_type_node, NULL_TREE);
21885 tree void_ftype_void
21886 = build_function_type (void_type_node, void_list_node);
21887 tree void_ftype_unsigned
21888 = build_function_type_list (void_type_node, unsigned_type_node, NULL_TREE);
21889 tree void_ftype_unsigned_unsigned
21890 = build_function_type_list (void_type_node, unsigned_type_node,
21891 unsigned_type_node, NULL_TREE);
21892 tree void_ftype_pcvoid_unsigned_unsigned
21893 = build_function_type_list (void_type_node, const_ptr_type_node,
21894 unsigned_type_node, unsigned_type_node,
21896 tree unsigned_ftype_void
21897 = build_function_type (unsigned_type_node, void_list_node);
21898 tree v2si_ftype_v4sf
21899 = build_function_type_list (V2SI_type_node, V4SF_type_node, NULL_TREE);
21900 /* Loads/stores. */
21901 tree void_ftype_v8qi_v8qi_pchar
21902 = build_function_type_list (void_type_node,
21903 V8QI_type_node, V8QI_type_node,
21904 pchar_type_node, NULL_TREE);
21905 tree v4sf_ftype_pcfloat
21906 = build_function_type_list (V4SF_type_node, pcfloat_type_node, NULL_TREE);
21907 tree v4sf_ftype_v4sf_pcv2sf
21908 = build_function_type_list (V4SF_type_node,
21909 V4SF_type_node, pcv2sf_type_node, NULL_TREE);
21910 tree void_ftype_pv2sf_v4sf
21911 = build_function_type_list (void_type_node,
21912 pv2sf_type_node, V4SF_type_node, NULL_TREE);
21913 tree void_ftype_pfloat_v4sf
21914 = build_function_type_list (void_type_node,
21915 pfloat_type_node, V4SF_type_node, NULL_TREE);
21916 tree void_ftype_pdi_di
21917 = build_function_type_list (void_type_node,
21918 pdi_type_node, long_long_unsigned_type_node,
21920 tree void_ftype_pv2di_v2di
21921 = build_function_type_list (void_type_node,
21922 pv2di_type_node, V2DI_type_node, NULL_TREE);
21923 /* Normal vector unops. */
21924 tree v4sf_ftype_v4sf
21925 = build_function_type_list (V4SF_type_node, V4SF_type_node, NULL_TREE);
21926 tree v16qi_ftype_v16qi
21927 = build_function_type_list (V16QI_type_node, V16QI_type_node, NULL_TREE);
21928 tree v8hi_ftype_v8hi
21929 = build_function_type_list (V8HI_type_node, V8HI_type_node, NULL_TREE);
21930 tree v4si_ftype_v4si
21931 = build_function_type_list (V4SI_type_node, V4SI_type_node, NULL_TREE);
21932 tree v8qi_ftype_v8qi
21933 = build_function_type_list (V8QI_type_node, V8QI_type_node, NULL_TREE);
21934 tree v4hi_ftype_v4hi
21935 = build_function_type_list (V4HI_type_node, V4HI_type_node, NULL_TREE);
21937 /* Normal vector binops. */
21938 tree v4sf_ftype_v4sf_v4sf
21939 = build_function_type_list (V4SF_type_node,
21940 V4SF_type_node, V4SF_type_node, NULL_TREE);
21941 tree v8qi_ftype_v8qi_v8qi
21942 = build_function_type_list (V8QI_type_node,
21943 V8QI_type_node, V8QI_type_node, NULL_TREE);
21944 tree v4hi_ftype_v4hi_v4hi
21945 = build_function_type_list (V4HI_type_node,
21946 V4HI_type_node, V4HI_type_node, NULL_TREE);
21947 tree v2si_ftype_v2si_v2si
21948 = build_function_type_list (V2SI_type_node,
21949 V2SI_type_node, V2SI_type_node, NULL_TREE);
21950 tree v1di_ftype_v1di_v1di
21951 = build_function_type_list (V1DI_type_node,
21952 V1DI_type_node, V1DI_type_node, NULL_TREE);
21953 tree v1di_ftype_v1di_v1di_int
21954 = build_function_type_list (V1DI_type_node,
21955 V1DI_type_node, V1DI_type_node,
21956 integer_type_node, NULL_TREE);
21957 tree v2si_ftype_v2sf
21958 = build_function_type_list (V2SI_type_node, V2SF_type_node, NULL_TREE);
21959 tree v2sf_ftype_v2si
21960 = build_function_type_list (V2SF_type_node, V2SI_type_node, NULL_TREE);
21961 tree v2si_ftype_v2si
21962 = build_function_type_list (V2SI_type_node, V2SI_type_node, NULL_TREE);
21963 tree v2sf_ftype_v2sf
21964 = build_function_type_list (V2SF_type_node, V2SF_type_node, NULL_TREE);
21965 tree v2sf_ftype_v2sf_v2sf
21966 = build_function_type_list (V2SF_type_node,
21967 V2SF_type_node, V2SF_type_node, NULL_TREE);
21968 tree v2si_ftype_v2sf_v2sf
21969 = build_function_type_list (V2SI_type_node,
21970 V2SF_type_node, V2SF_type_node, NULL_TREE);
21971 tree pint_type_node = build_pointer_type (integer_type_node);
21972 tree pdouble_type_node = build_pointer_type (double_type_node);
21973 tree pcdouble_type_node = build_pointer_type (
21974 build_type_variant (double_type_node, 1, 0));
21975 tree int_ftype_v2df_v2df
21976 = build_function_type_list (integer_type_node,
21977 V2DF_type_node, V2DF_type_node, NULL_TREE);
21979 tree void_ftype_pcvoid
21980 = build_function_type_list (void_type_node, const_ptr_type_node, NULL_TREE);
21981 tree v4sf_ftype_v4si
21982 = build_function_type_list (V4SF_type_node, V4SI_type_node, NULL_TREE);
21983 tree v4si_ftype_v4sf
21984 = build_function_type_list (V4SI_type_node, V4SF_type_node, NULL_TREE);
21985 tree v2df_ftype_v4si
21986 = build_function_type_list (V2DF_type_node, V4SI_type_node, NULL_TREE);
21987 tree v4si_ftype_v2df
21988 = build_function_type_list (V4SI_type_node, V2DF_type_node, NULL_TREE);
21989 tree v4si_ftype_v2df_v2df
21990 = build_function_type_list (V4SI_type_node,
21991 V2DF_type_node, V2DF_type_node, NULL_TREE);
21992 tree v2si_ftype_v2df
21993 = build_function_type_list (V2SI_type_node, V2DF_type_node, NULL_TREE);
21994 tree v4sf_ftype_v2df
21995 = build_function_type_list (V4SF_type_node, V2DF_type_node, NULL_TREE);
21996 tree v2df_ftype_v2si
21997 = build_function_type_list (V2DF_type_node, V2SI_type_node, NULL_TREE);
21998 tree v2df_ftype_v4sf
21999 = build_function_type_list (V2DF_type_node, V4SF_type_node, NULL_TREE);
22000 tree int_ftype_v2df
22001 = build_function_type_list (integer_type_node, V2DF_type_node, NULL_TREE);
22002 tree int64_ftype_v2df
22003 = build_function_type_list (long_long_integer_type_node,
22004 V2DF_type_node, NULL_TREE);
22005 tree v2df_ftype_v2df_int
22006 = build_function_type_list (V2DF_type_node,
22007 V2DF_type_node, integer_type_node, NULL_TREE);
22008 tree v2df_ftype_v2df_int64
22009 = build_function_type_list (V2DF_type_node,
22010 V2DF_type_node, long_long_integer_type_node,
22012 tree v4sf_ftype_v4sf_v2df
22013 = build_function_type_list (V4SF_type_node,
22014 V4SF_type_node, V2DF_type_node, NULL_TREE);
22015 tree v2df_ftype_v2df_v4sf
22016 = build_function_type_list (V2DF_type_node,
22017 V2DF_type_node, V4SF_type_node, NULL_TREE);
22018 tree v2df_ftype_v2df_v2df_int
22019 = build_function_type_list (V2DF_type_node,
22020 V2DF_type_node, V2DF_type_node,
22023 tree v2df_ftype_v2df_pcdouble
22024 = build_function_type_list (V2DF_type_node,
22025 V2DF_type_node, pcdouble_type_node, NULL_TREE);
22026 tree void_ftype_pdouble_v2df
22027 = build_function_type_list (void_type_node,
22028 pdouble_type_node, V2DF_type_node, NULL_TREE);
22029 tree void_ftype_pint_int
22030 = build_function_type_list (void_type_node,
22031 pint_type_node, integer_type_node, NULL_TREE);
22032 tree void_ftype_v16qi_v16qi_pchar
22033 = build_function_type_list (void_type_node,
22034 V16QI_type_node, V16QI_type_node,
22035 pchar_type_node, NULL_TREE);
22036 tree v2df_ftype_pcdouble
22037 = build_function_type_list (V2DF_type_node, pcdouble_type_node, NULL_TREE);
22038 tree v2df_ftype_v2df_v2df
22039 = build_function_type_list (V2DF_type_node,
22040 V2DF_type_node, V2DF_type_node, NULL_TREE);
22041 tree v16qi_ftype_v16qi_v16qi
22042 = build_function_type_list (V16QI_type_node,
22043 V16QI_type_node, V16QI_type_node, NULL_TREE);
22044 tree v8hi_ftype_v8hi_v8hi
22045 = build_function_type_list (V8HI_type_node,
22046 V8HI_type_node, V8HI_type_node, NULL_TREE);
22047 tree v4si_ftype_v4si_v4si
22048 = build_function_type_list (V4SI_type_node,
22049 V4SI_type_node, V4SI_type_node, NULL_TREE);
22050 tree v2di_ftype_v2di_v2di
22051 = build_function_type_list (V2DI_type_node,
22052 V2DI_type_node, V2DI_type_node, NULL_TREE);
22053 tree v2di_ftype_v2df_v2df
22054 = build_function_type_list (V2DI_type_node,
22055 V2DF_type_node, V2DF_type_node, NULL_TREE);
22056 tree v2df_ftype_v2df
22057 = build_function_type_list (V2DF_type_node, V2DF_type_node, NULL_TREE);
22058 tree v2di_ftype_v2di_int
22059 = build_function_type_list (V2DI_type_node,
22060 V2DI_type_node, integer_type_node, NULL_TREE);
22061 tree v2di_ftype_v2di_v2di_int
22062 = build_function_type_list (V2DI_type_node, V2DI_type_node,
22063 V2DI_type_node, integer_type_node, NULL_TREE);
22064 tree v4si_ftype_v4si_int
22065 = build_function_type_list (V4SI_type_node,
22066 V4SI_type_node, integer_type_node, NULL_TREE);
22067 tree v8hi_ftype_v8hi_int
22068 = build_function_type_list (V8HI_type_node,
22069 V8HI_type_node, integer_type_node, NULL_TREE);
22070 tree v4si_ftype_v8hi_v8hi
22071 = build_function_type_list (V4SI_type_node,
22072 V8HI_type_node, V8HI_type_node, NULL_TREE);
22073 tree v1di_ftype_v8qi_v8qi
22074 = build_function_type_list (V1DI_type_node,
22075 V8QI_type_node, V8QI_type_node, NULL_TREE);
22076 tree v1di_ftype_v2si_v2si
22077 = build_function_type_list (V1DI_type_node,
22078 V2SI_type_node, V2SI_type_node, NULL_TREE);
22079 tree v2di_ftype_v16qi_v16qi
22080 = build_function_type_list (V2DI_type_node,
22081 V16QI_type_node, V16QI_type_node, NULL_TREE);
22082 tree v2di_ftype_v4si_v4si
22083 = build_function_type_list (V2DI_type_node,
22084 V4SI_type_node, V4SI_type_node, NULL_TREE);
22085 tree int_ftype_v16qi
22086 = build_function_type_list (integer_type_node, V16QI_type_node, NULL_TREE);
22087 tree v16qi_ftype_pcchar
22088 = build_function_type_list (V16QI_type_node, pcchar_type_node, NULL_TREE);
22089 tree void_ftype_pchar_v16qi
22090 = build_function_type_list (void_type_node,
22091 pchar_type_node, V16QI_type_node, NULL_TREE);
22093 tree v2di_ftype_v2di_unsigned_unsigned
22094 = build_function_type_list (V2DI_type_node, V2DI_type_node,
22095 unsigned_type_node, unsigned_type_node,
22097 tree v2di_ftype_v2di_v2di_unsigned_unsigned
22098 = build_function_type_list (V2DI_type_node, V2DI_type_node, V2DI_type_node,
22099 unsigned_type_node, unsigned_type_node,
22101 tree v2di_ftype_v2di_v16qi
22102 = build_function_type_list (V2DI_type_node, V2DI_type_node, V16QI_type_node,
22104 tree v2df_ftype_v2df_v2df_v2df
22105 = build_function_type_list (V2DF_type_node,
22106 V2DF_type_node, V2DF_type_node,
22107 V2DF_type_node, NULL_TREE);
22108 tree v4sf_ftype_v4sf_v4sf_v4sf
22109 = build_function_type_list (V4SF_type_node,
22110 V4SF_type_node, V4SF_type_node,
22111 V4SF_type_node, NULL_TREE);
22112 tree v8hi_ftype_v16qi
22113 = build_function_type_list (V8HI_type_node, V16QI_type_node,
22115 tree v4si_ftype_v16qi
22116 = build_function_type_list (V4SI_type_node, V16QI_type_node,
22118 tree v2di_ftype_v16qi
22119 = build_function_type_list (V2DI_type_node, V16QI_type_node,
22121 tree v4si_ftype_v8hi
22122 = build_function_type_list (V4SI_type_node, V8HI_type_node,
22124 tree v2di_ftype_v8hi
22125 = build_function_type_list (V2DI_type_node, V8HI_type_node,
22127 tree v2di_ftype_v4si
22128 = build_function_type_list (V2DI_type_node, V4SI_type_node,
22130 tree v2di_ftype_pv2di
22131 = build_function_type_list (V2DI_type_node, pv2di_type_node,
22133 tree v16qi_ftype_v16qi_v16qi_int
22134 = build_function_type_list (V16QI_type_node, V16QI_type_node,
22135 V16QI_type_node, integer_type_node,
22137 tree v16qi_ftype_v16qi_v16qi_v16qi
22138 = build_function_type_list (V16QI_type_node, V16QI_type_node,
22139 V16QI_type_node, V16QI_type_node,
22141 tree v8hi_ftype_v8hi_v8hi_int
22142 = build_function_type_list (V8HI_type_node, V8HI_type_node,
22143 V8HI_type_node, integer_type_node,
22145 tree v4si_ftype_v4si_v4si_int
22146 = build_function_type_list (V4SI_type_node, V4SI_type_node,
22147 V4SI_type_node, integer_type_node,
22149 tree int_ftype_v2di_v2di
22150 = build_function_type_list (integer_type_node,
22151 V2DI_type_node, V2DI_type_node,
22153 tree int_ftype_v16qi_int_v16qi_int_int
22154 = build_function_type_list (integer_type_node,
22161 tree v16qi_ftype_v16qi_int_v16qi_int_int
22162 = build_function_type_list (V16QI_type_node,
22169 tree int_ftype_v16qi_v16qi_int
22170 = build_function_type_list (integer_type_node,
22176 /* SSE5 instructions */
22177 tree v2di_ftype_v2di_v2di_v2di
22178 = build_function_type_list (V2DI_type_node,
22184 tree v4si_ftype_v4si_v4si_v4si
22185 = build_function_type_list (V4SI_type_node,
22191 tree v4si_ftype_v4si_v4si_v2di
22192 = build_function_type_list (V4SI_type_node,
22198 tree v8hi_ftype_v8hi_v8hi_v8hi
22199 = build_function_type_list (V8HI_type_node,
22205 tree v8hi_ftype_v8hi_v8hi_v4si
22206 = build_function_type_list (V8HI_type_node,
22212 tree v2df_ftype_v2df_v2df_v16qi
22213 = build_function_type_list (V2DF_type_node,
22219 tree v4sf_ftype_v4sf_v4sf_v16qi
22220 = build_function_type_list (V4SF_type_node,
22226 tree v2di_ftype_v2di_si
22227 = build_function_type_list (V2DI_type_node,
22232 tree v4si_ftype_v4si_si
22233 = build_function_type_list (V4SI_type_node,
22238 tree v8hi_ftype_v8hi_si
22239 = build_function_type_list (V8HI_type_node,
22244 tree v16qi_ftype_v16qi_si
22245 = build_function_type_list (V16QI_type_node,
22249 tree v4sf_ftype_v4hi
22250 = build_function_type_list (V4SF_type_node,
22254 tree v4hi_ftype_v4sf
22255 = build_function_type_list (V4HI_type_node,
22259 tree v2di_ftype_v2di
22260 = build_function_type_list (V2DI_type_node, V2DI_type_node, NULL_TREE);
22262 tree v16qi_ftype_v8hi_v8hi
22263 = build_function_type_list (V16QI_type_node,
22264 V8HI_type_node, V8HI_type_node,
22266 tree v8hi_ftype_v4si_v4si
22267 = build_function_type_list (V8HI_type_node,
22268 V4SI_type_node, V4SI_type_node,
22270 tree v8hi_ftype_v16qi_v16qi
22271 = build_function_type_list (V8HI_type_node,
22272 V16QI_type_node, V16QI_type_node,
22274 tree v4hi_ftype_v8qi_v8qi
22275 = build_function_type_list (V4HI_type_node,
22276 V8QI_type_node, V8QI_type_node,
22278 tree unsigned_ftype_unsigned_uchar
22279 = build_function_type_list (unsigned_type_node,
22280 unsigned_type_node,
22281 unsigned_char_type_node,
22283 tree unsigned_ftype_unsigned_ushort
22284 = build_function_type_list (unsigned_type_node,
22285 unsigned_type_node,
22286 short_unsigned_type_node,
22288 tree unsigned_ftype_unsigned_unsigned
22289 = build_function_type_list (unsigned_type_node,
22290 unsigned_type_node,
22291 unsigned_type_node,
22293 tree uint64_ftype_uint64_uint64
22294 = build_function_type_list (long_long_unsigned_type_node,
22295 long_long_unsigned_type_node,
22296 long_long_unsigned_type_node,
22298 tree float_ftype_float
22299 = build_function_type_list (float_type_node,
22304 tree V32QI_type_node = build_vector_type_for_mode (char_type_node,
22306 tree V8SI_type_node = build_vector_type_for_mode (intSI_type_node,
22308 tree V8SF_type_node = build_vector_type_for_mode (float_type_node,
22310 tree V4DI_type_node = build_vector_type_for_mode (long_long_integer_type_node,
22312 tree V4DF_type_node = build_vector_type_for_mode (double_type_node,
22314 tree v8sf_ftype_v8sf
22315 = build_function_type_list (V8SF_type_node,
22318 tree v8si_ftype_v8sf
22319 = build_function_type_list (V8SI_type_node,
22322 tree v8sf_ftype_v8si
22323 = build_function_type_list (V8SF_type_node,
22326 tree v4si_ftype_v4df
22327 = build_function_type_list (V4SI_type_node,
22330 tree v4df_ftype_v4df
22331 = build_function_type_list (V4DF_type_node,
22334 tree v4df_ftype_v4si
22335 = build_function_type_list (V4DF_type_node,
22338 tree v4df_ftype_v4sf
22339 = build_function_type_list (V4DF_type_node,
22342 tree v4sf_ftype_v4df
22343 = build_function_type_list (V4SF_type_node,
22346 tree v8sf_ftype_v8sf_v8sf
22347 = build_function_type_list (V8SF_type_node,
22348 V8SF_type_node, V8SF_type_node,
22350 tree v4df_ftype_v4df_v4df
22351 = build_function_type_list (V4DF_type_node,
22352 V4DF_type_node, V4DF_type_node,
22354 tree v8sf_ftype_v8sf_int
22355 = build_function_type_list (V8SF_type_node,
22356 V8SF_type_node, integer_type_node,
22358 tree v4si_ftype_v8si_int
22359 = build_function_type_list (V4SI_type_node,
22360 V8SI_type_node, integer_type_node,
22362 tree v4df_ftype_v4df_int
22363 = build_function_type_list (V4DF_type_node,
22364 V4DF_type_node, integer_type_node,
22366 tree v4sf_ftype_v8sf_int
22367 = build_function_type_list (V4SF_type_node,
22368 V8SF_type_node, integer_type_node,
22370 tree v2df_ftype_v4df_int
22371 = build_function_type_list (V2DF_type_node,
22372 V4DF_type_node, integer_type_node,
22374 tree v8sf_ftype_v8sf_v8sf_int
22375 = build_function_type_list (V8SF_type_node,
22376 V8SF_type_node, V8SF_type_node,
22379 tree v8sf_ftype_v8sf_v8sf_v8sf
22380 = build_function_type_list (V8SF_type_node,
22381 V8SF_type_node, V8SF_type_node,
22384 tree v4df_ftype_v4df_v4df_v4df
22385 = build_function_type_list (V4DF_type_node,
22386 V4DF_type_node, V4DF_type_node,
22389 tree v8si_ftype_v8si_v8si_int
22390 = build_function_type_list (V8SI_type_node,
22391 V8SI_type_node, V8SI_type_node,
22394 tree v4df_ftype_v4df_v4df_int
22395 = build_function_type_list (V4DF_type_node,
22396 V4DF_type_node, V4DF_type_node,
22399 tree v8sf_ftype_pcfloat
22400 = build_function_type_list (V8SF_type_node,
22403 tree v4df_ftype_pcdouble
22404 = build_function_type_list (V4DF_type_node,
22405 pcdouble_type_node,
22407 tree pcv4sf_type_node
22408 = build_pointer_type (build_type_variant (V4SF_type_node, 1, 0));
22409 tree pcv2df_type_node
22410 = build_pointer_type (build_type_variant (V2DF_type_node, 1, 0));
22411 tree v8sf_ftype_pcv4sf
22412 = build_function_type_list (V8SF_type_node,
22415 tree v4df_ftype_pcv2df
22416 = build_function_type_list (V4DF_type_node,
22419 tree v32qi_ftype_pcchar
22420 = build_function_type_list (V32QI_type_node,
22423 tree void_ftype_pchar_v32qi
22424 = build_function_type_list (void_type_node,
22425 pchar_type_node, V32QI_type_node,
22427 tree v8si_ftype_v8si_v4si_int
22428 = build_function_type_list (V8SI_type_node,
22429 V8SI_type_node, V4SI_type_node,
22432 tree pv4di_type_node = build_pointer_type (V4DI_type_node);
22433 tree void_ftype_pv4di_v4di
22434 = build_function_type_list (void_type_node,
22435 pv4di_type_node, V4DI_type_node,
22437 tree v8sf_ftype_v8sf_v4sf_int
22438 = build_function_type_list (V8SF_type_node,
22439 V8SF_type_node, V4SF_type_node,
22442 tree v4df_ftype_v4df_v2df_int
22443 = build_function_type_list (V4DF_type_node,
22444 V4DF_type_node, V2DF_type_node,
22447 tree void_ftype_pfloat_v8sf
22448 = build_function_type_list (void_type_node,
22449 pfloat_type_node, V8SF_type_node,
22451 tree void_ftype_pdouble_v4df
22452 = build_function_type_list (void_type_node,
22453 pdouble_type_node, V4DF_type_node,
22455 tree pv8sf_type_node = build_pointer_type (V8SF_type_node);
22456 tree pv4sf_type_node = build_pointer_type (V4SF_type_node);
22457 tree pv4df_type_node = build_pointer_type (V4DF_type_node);
22458 tree pv2df_type_node = build_pointer_type (V2DF_type_node);
22459 tree pcv8sf_type_node
22460 = build_pointer_type (build_type_variant (V8SF_type_node, 1, 0));
22461 tree pcv4df_type_node
22462 = build_pointer_type (build_type_variant (V4DF_type_node, 1, 0));
22463 tree v8sf_ftype_pcv8sf_v8sf
22464 = build_function_type_list (V8SF_type_node,
22465 pcv8sf_type_node, V8SF_type_node,
22467 tree v4df_ftype_pcv4df_v4df
22468 = build_function_type_list (V4DF_type_node,
22469 pcv4df_type_node, V4DF_type_node,
22471 tree v4sf_ftype_pcv4sf_v4sf
22472 = build_function_type_list (V4SF_type_node,
22473 pcv4sf_type_node, V4SF_type_node,
22475 tree v2df_ftype_pcv2df_v2df
22476 = build_function_type_list (V2DF_type_node,
22477 pcv2df_type_node, V2DF_type_node,
22479 tree void_ftype_pv8sf_v8sf_v8sf
22480 = build_function_type_list (void_type_node,
22481 pv8sf_type_node, V8SF_type_node,
22484 tree void_ftype_pv4df_v4df_v4df
22485 = build_function_type_list (void_type_node,
22486 pv4df_type_node, V4DF_type_node,
22489 tree void_ftype_pv4sf_v4sf_v4sf
22490 = build_function_type_list (void_type_node,
22491 pv4sf_type_node, V4SF_type_node,
22494 tree void_ftype_pv2df_v2df_v2df
22495 = build_function_type_list (void_type_node,
22496 pv2df_type_node, V2DF_type_node,
22499 tree v4df_ftype_v2df
22500 = build_function_type_list (V4DF_type_node,
22503 tree v8sf_ftype_v4sf
22504 = build_function_type_list (V8SF_type_node,
22507 tree v8si_ftype_v4si
22508 = build_function_type_list (V8SI_type_node,
22511 tree v2df_ftype_v4df
22512 = build_function_type_list (V2DF_type_node,
22515 tree v4sf_ftype_v8sf
22516 = build_function_type_list (V4SF_type_node,
22519 tree v4si_ftype_v8si
22520 = build_function_type_list (V4SI_type_node,
22523 tree int_ftype_v4df
22524 = build_function_type_list (integer_type_node,
22527 tree int_ftype_v8sf
22528 = build_function_type_list (integer_type_node,
22531 tree int_ftype_v8sf_v8sf
22532 = build_function_type_list (integer_type_node,
22533 V8SF_type_node, V8SF_type_node,
22535 tree int_ftype_v4di_v4di
22536 = build_function_type_list (integer_type_node,
22537 V4DI_type_node, V4DI_type_node,
22539 tree int_ftype_v4df_v4df
22540 = build_function_type_list (integer_type_node,
22541 V4DF_type_node, V4DF_type_node,
22543 tree v8sf_ftype_v8sf_v8si
22544 = build_function_type_list (V8SF_type_node,
22545 V8SF_type_node, V8SI_type_node,
22547 tree v4df_ftype_v4df_v4di
22548 = build_function_type_list (V4DF_type_node,
22549 V4DF_type_node, V4DI_type_node,
22551 tree v4sf_ftype_v4sf_v4si
22552 = build_function_type_list (V4SF_type_node,
22553 V4SF_type_node, V4SI_type_node, NULL_TREE);
22554 tree v2df_ftype_v2df_v2di
22555 = build_function_type_list (V2DF_type_node,
22556 V2DF_type_node, V2DI_type_node, NULL_TREE);
22560 /* Add all special builtins with variable number of operands. */
22561 for (i = 0, d = bdesc_special_args;
22562 i < ARRAY_SIZE (bdesc_special_args);
22570 switch ((enum ix86_special_builtin_type) d->flag)
22572 case VOID_FTYPE_VOID:
22573 type = void_ftype_void;
22575 case V32QI_FTYPE_PCCHAR:
22576 type = v32qi_ftype_pcchar;
22578 case V16QI_FTYPE_PCCHAR:
22579 type = v16qi_ftype_pcchar;
22581 case V8SF_FTYPE_PCV4SF:
22582 type = v8sf_ftype_pcv4sf;
22584 case V8SF_FTYPE_PCFLOAT:
22585 type = v8sf_ftype_pcfloat;
22587 case V4DF_FTYPE_PCV2DF:
22588 type = v4df_ftype_pcv2df;
22590 case V4DF_FTYPE_PCDOUBLE:
22591 type = v4df_ftype_pcdouble;
22593 case V4SF_FTYPE_PCFLOAT:
22594 type = v4sf_ftype_pcfloat;
22596 case V2DI_FTYPE_PV2DI:
22597 type = v2di_ftype_pv2di;
22599 case V2DF_FTYPE_PCDOUBLE:
22600 type = v2df_ftype_pcdouble;
22602 case V8SF_FTYPE_PCV8SF_V8SF:
22603 type = v8sf_ftype_pcv8sf_v8sf;
22605 case V4DF_FTYPE_PCV4DF_V4DF:
22606 type = v4df_ftype_pcv4df_v4df;
22608 case V4SF_FTYPE_V4SF_PCV2SF:
22609 type = v4sf_ftype_v4sf_pcv2sf;
22611 case V4SF_FTYPE_PCV4SF_V4SF:
22612 type = v4sf_ftype_pcv4sf_v4sf;
22614 case V2DF_FTYPE_V2DF_PCDOUBLE:
22615 type = v2df_ftype_v2df_pcdouble;
22617 case V2DF_FTYPE_PCV2DF_V2DF:
22618 type = v2df_ftype_pcv2df_v2df;
22620 case VOID_FTYPE_PV2SF_V4SF:
22621 type = void_ftype_pv2sf_v4sf;
22623 case VOID_FTYPE_PV4DI_V4DI:
22624 type = void_ftype_pv4di_v4di;
22626 case VOID_FTYPE_PV2DI_V2DI:
22627 type = void_ftype_pv2di_v2di;
22629 case VOID_FTYPE_PCHAR_V32QI:
22630 type = void_ftype_pchar_v32qi;
22632 case VOID_FTYPE_PCHAR_V16QI:
22633 type = void_ftype_pchar_v16qi;
22635 case VOID_FTYPE_PFLOAT_V8SF:
22636 type = void_ftype_pfloat_v8sf;
22638 case VOID_FTYPE_PFLOAT_V4SF:
22639 type = void_ftype_pfloat_v4sf;
22641 case VOID_FTYPE_PDOUBLE_V4DF:
22642 type = void_ftype_pdouble_v4df;
22644 case VOID_FTYPE_PDOUBLE_V2DF:
22645 type = void_ftype_pdouble_v2df;
22647 case VOID_FTYPE_PDI_DI:
22648 type = void_ftype_pdi_di;
22650 case VOID_FTYPE_PINT_INT:
22651 type = void_ftype_pint_int;
22653 case VOID_FTYPE_PV8SF_V8SF_V8SF:
22654 type = void_ftype_pv8sf_v8sf_v8sf;
22656 case VOID_FTYPE_PV4DF_V4DF_V4DF:
22657 type = void_ftype_pv4df_v4df_v4df;
22659 case VOID_FTYPE_PV4SF_V4SF_V4SF:
22660 type = void_ftype_pv4sf_v4sf_v4sf;
22662 case VOID_FTYPE_PV2DF_V2DF_V2DF:
22663 type = void_ftype_pv2df_v2df_v2df;
22666 gcc_unreachable ();
22669 def_builtin (d->mask, d->name, type, d->code);
22672 /* Add all builtins with variable number of operands. */
22673 for (i = 0, d = bdesc_args;
22674 i < ARRAY_SIZE (bdesc_args);
22682 switch ((enum ix86_builtin_type) d->flag)
22684 case FLOAT_FTYPE_FLOAT:
22685 type = float_ftype_float;
22687 case INT_FTYPE_V8SF_V8SF_PTEST:
22688 type = int_ftype_v8sf_v8sf;
22690 case INT_FTYPE_V4DI_V4DI_PTEST:
22691 type = int_ftype_v4di_v4di;
22693 case INT_FTYPE_V4DF_V4DF_PTEST:
22694 type = int_ftype_v4df_v4df;
22696 case INT_FTYPE_V4SF_V4SF_PTEST:
22697 type = int_ftype_v4sf_v4sf;
22699 case INT_FTYPE_V2DI_V2DI_PTEST:
22700 type = int_ftype_v2di_v2di;
22702 case INT_FTYPE_V2DF_V2DF_PTEST:
22703 type = int_ftype_v2df_v2df;
22705 case INT64_FTYPE_V4SF:
22706 type = int64_ftype_v4sf;
22708 case INT64_FTYPE_V2DF:
22709 type = int64_ftype_v2df;
22711 case INT_FTYPE_V16QI:
22712 type = int_ftype_v16qi;
22714 case INT_FTYPE_V8QI:
22715 type = int_ftype_v8qi;
22717 case INT_FTYPE_V8SF:
22718 type = int_ftype_v8sf;
22720 case INT_FTYPE_V4DF:
22721 type = int_ftype_v4df;
22723 case INT_FTYPE_V4SF:
22724 type = int_ftype_v4sf;
22726 case INT_FTYPE_V2DF:
22727 type = int_ftype_v2df;
22729 case V16QI_FTYPE_V16QI:
22730 type = v16qi_ftype_v16qi;
22732 case V8SI_FTYPE_V8SF:
22733 type = v8si_ftype_v8sf;
22735 case V8SI_FTYPE_V4SI:
22736 type = v8si_ftype_v4si;
22738 case V8HI_FTYPE_V8HI:
22739 type = v8hi_ftype_v8hi;
22741 case V8HI_FTYPE_V16QI:
22742 type = v8hi_ftype_v16qi;
22744 case V8QI_FTYPE_V8QI:
22745 type = v8qi_ftype_v8qi;
22747 case V8SF_FTYPE_V8SF:
22748 type = v8sf_ftype_v8sf;
22750 case V8SF_FTYPE_V8SI:
22751 type = v8sf_ftype_v8si;
22753 case V8SF_FTYPE_V4SF:
22754 type = v8sf_ftype_v4sf;
22756 case V4SI_FTYPE_V4DF:
22757 type = v4si_ftype_v4df;
22759 case V4SI_FTYPE_V4SI:
22760 type = v4si_ftype_v4si;
22762 case V4SI_FTYPE_V16QI:
22763 type = v4si_ftype_v16qi;
22765 case V4SI_FTYPE_V8SI:
22766 type = v4si_ftype_v8si;
22768 case V4SI_FTYPE_V8HI:
22769 type = v4si_ftype_v8hi;
22771 case V4SI_FTYPE_V4SF:
22772 type = v4si_ftype_v4sf;
22774 case V4SI_FTYPE_V2DF:
22775 type = v4si_ftype_v2df;
22777 case V4HI_FTYPE_V4HI:
22778 type = v4hi_ftype_v4hi;
22780 case V4DF_FTYPE_V4DF:
22781 type = v4df_ftype_v4df;
22783 case V4DF_FTYPE_V4SI:
22784 type = v4df_ftype_v4si;
22786 case V4DF_FTYPE_V4SF:
22787 type = v4df_ftype_v4sf;
22789 case V4DF_FTYPE_V2DF:
22790 type = v4df_ftype_v2df;
22792 case V4SF_FTYPE_V4SF:
22793 case V4SF_FTYPE_V4SF_VEC_MERGE:
22794 type = v4sf_ftype_v4sf;
22796 case V4SF_FTYPE_V8SF:
22797 type = v4sf_ftype_v8sf;
22799 case V4SF_FTYPE_V4SI:
22800 type = v4sf_ftype_v4si;
22802 case V4SF_FTYPE_V4DF:
22803 type = v4sf_ftype_v4df;
22805 case V4SF_FTYPE_V2DF:
22806 type = v4sf_ftype_v2df;
22808 case V2DI_FTYPE_V2DI:
22809 type = v2di_ftype_v2di;
22811 case V2DI_FTYPE_V16QI:
22812 type = v2di_ftype_v16qi;
22814 case V2DI_FTYPE_V8HI:
22815 type = v2di_ftype_v8hi;
22817 case V2DI_FTYPE_V4SI:
22818 type = v2di_ftype_v4si;
22820 case V2SI_FTYPE_V2SI:
22821 type = v2si_ftype_v2si;
22823 case V2SI_FTYPE_V4SF:
22824 type = v2si_ftype_v4sf;
22826 case V2SI_FTYPE_V2DF:
22827 type = v2si_ftype_v2df;
22829 case V2SI_FTYPE_V2SF:
22830 type = v2si_ftype_v2sf;
22832 case V2DF_FTYPE_V4DF:
22833 type = v2df_ftype_v4df;
22835 case V2DF_FTYPE_V4SF:
22836 type = v2df_ftype_v4sf;
22838 case V2DF_FTYPE_V2DF:
22839 case V2DF_FTYPE_V2DF_VEC_MERGE:
22840 type = v2df_ftype_v2df;
22842 case V2DF_FTYPE_V2SI:
22843 type = v2df_ftype_v2si;
22845 case V2DF_FTYPE_V4SI:
22846 type = v2df_ftype_v4si;
22848 case V2SF_FTYPE_V2SF:
22849 type = v2sf_ftype_v2sf;
22851 case V2SF_FTYPE_V2SI:
22852 type = v2sf_ftype_v2si;
22854 case V16QI_FTYPE_V16QI_V16QI:
22855 type = v16qi_ftype_v16qi_v16qi;
22857 case V16QI_FTYPE_V8HI_V8HI:
22858 type = v16qi_ftype_v8hi_v8hi;
22860 case V8QI_FTYPE_V8QI_V8QI:
22861 type = v8qi_ftype_v8qi_v8qi;
22863 case V8QI_FTYPE_V4HI_V4HI:
22864 type = v8qi_ftype_v4hi_v4hi;
22866 case V8HI_FTYPE_V8HI_V8HI:
22867 case V8HI_FTYPE_V8HI_V8HI_COUNT:
22868 type = v8hi_ftype_v8hi_v8hi;
22870 case V8HI_FTYPE_V16QI_V16QI:
22871 type = v8hi_ftype_v16qi_v16qi;
22873 case V8HI_FTYPE_V4SI_V4SI:
22874 type = v8hi_ftype_v4si_v4si;
22876 case V8HI_FTYPE_V8HI_SI_COUNT:
22877 type = v8hi_ftype_v8hi_int;
22879 case V8SF_FTYPE_V8SF_V8SF:
22880 type = v8sf_ftype_v8sf_v8sf;
22882 case V8SF_FTYPE_V8SF_V8SI:
22883 type = v8sf_ftype_v8sf_v8si;
22885 case V4SI_FTYPE_V4SI_V4SI:
22886 case V4SI_FTYPE_V4SI_V4SI_COUNT:
22887 type = v4si_ftype_v4si_v4si;
22889 case V4SI_FTYPE_V8HI_V8HI:
22890 type = v4si_ftype_v8hi_v8hi;
22892 case V4SI_FTYPE_V4SF_V4SF:
22893 type = v4si_ftype_v4sf_v4sf;
22895 case V4SI_FTYPE_V2DF_V2DF:
22896 type = v4si_ftype_v2df_v2df;
22898 case V4SI_FTYPE_V4SI_SI_COUNT:
22899 type = v4si_ftype_v4si_int;
22901 case V4HI_FTYPE_V4HI_V4HI:
22902 case V4HI_FTYPE_V4HI_V4HI_COUNT:
22903 type = v4hi_ftype_v4hi_v4hi;
22905 case V4HI_FTYPE_V8QI_V8QI:
22906 type = v4hi_ftype_v8qi_v8qi;
22908 case V4HI_FTYPE_V2SI_V2SI:
22909 type = v4hi_ftype_v2si_v2si;
22911 case V4HI_FTYPE_V4HI_SI_COUNT:
22912 type = v4hi_ftype_v4hi_int;
22914 case V4DF_FTYPE_V4DF_V4DF:
22915 type = v4df_ftype_v4df_v4df;
22917 case V4DF_FTYPE_V4DF_V4DI:
22918 type = v4df_ftype_v4df_v4di;
22920 case V4SF_FTYPE_V4SF_V4SF:
22921 case V4SF_FTYPE_V4SF_V4SF_SWAP:
22922 type = v4sf_ftype_v4sf_v4sf;
22924 case V4SF_FTYPE_V4SF_V4SI:
22925 type = v4sf_ftype_v4sf_v4si;
22927 case V4SF_FTYPE_V4SF_V2SI:
22928 type = v4sf_ftype_v4sf_v2si;
22930 case V4SF_FTYPE_V4SF_V2DF:
22931 type = v4sf_ftype_v4sf_v2df;
22933 case V4SF_FTYPE_V4SF_DI:
22934 type = v4sf_ftype_v4sf_int64;
22936 case V4SF_FTYPE_V4SF_SI:
22937 type = v4sf_ftype_v4sf_int;
22939 case V2DI_FTYPE_V2DI_V2DI:
22940 case V2DI_FTYPE_V2DI_V2DI_COUNT:
22941 type = v2di_ftype_v2di_v2di;
22943 case V2DI_FTYPE_V16QI_V16QI:
22944 type = v2di_ftype_v16qi_v16qi;
22946 case V2DI_FTYPE_V4SI_V4SI:
22947 type = v2di_ftype_v4si_v4si;
22949 case V2DI_FTYPE_V2DI_V16QI:
22950 type = v2di_ftype_v2di_v16qi;
22952 case V2DI_FTYPE_V2DF_V2DF:
22953 type = v2di_ftype_v2df_v2df;
22955 case V2DI_FTYPE_V2DI_SI_COUNT:
22956 type = v2di_ftype_v2di_int;
22958 case V2SI_FTYPE_V2SI_V2SI:
22959 case V2SI_FTYPE_V2SI_V2SI_COUNT:
22960 type = v2si_ftype_v2si_v2si;
22962 case V2SI_FTYPE_V4HI_V4HI:
22963 type = v2si_ftype_v4hi_v4hi;
22965 case V2SI_FTYPE_V2SF_V2SF:
22966 type = v2si_ftype_v2sf_v2sf;
22968 case V2SI_FTYPE_V2SI_SI_COUNT:
22969 type = v2si_ftype_v2si_int;
22971 case V2DF_FTYPE_V2DF_V2DF:
22972 case V2DF_FTYPE_V2DF_V2DF_SWAP:
22973 type = v2df_ftype_v2df_v2df;
22975 case V2DF_FTYPE_V2DF_V4SF:
22976 type = v2df_ftype_v2df_v4sf;
22978 case V2DF_FTYPE_V2DF_V2DI:
22979 type = v2df_ftype_v2df_v2di;
22981 case V2DF_FTYPE_V2DF_DI:
22982 type = v2df_ftype_v2df_int64;
22984 case V2DF_FTYPE_V2DF_SI:
22985 type = v2df_ftype_v2df_int;
22987 case V2SF_FTYPE_V2SF_V2SF:
22988 type = v2sf_ftype_v2sf_v2sf;
22990 case V1DI_FTYPE_V1DI_V1DI:
22991 case V1DI_FTYPE_V1DI_V1DI_COUNT:
22992 type = v1di_ftype_v1di_v1di;
22994 case V1DI_FTYPE_V8QI_V8QI:
22995 type = v1di_ftype_v8qi_v8qi;
22997 case V1DI_FTYPE_V2SI_V2SI:
22998 type = v1di_ftype_v2si_v2si;
23000 case V1DI_FTYPE_V1DI_SI_COUNT:
23001 type = v1di_ftype_v1di_int;
23003 case UINT64_FTYPE_UINT64_UINT64:
23004 type = uint64_ftype_uint64_uint64;
23006 case UINT_FTYPE_UINT_UINT:
23007 type = unsigned_ftype_unsigned_unsigned;
23009 case UINT_FTYPE_UINT_USHORT:
23010 type = unsigned_ftype_unsigned_ushort;
23012 case UINT_FTYPE_UINT_UCHAR:
23013 type = unsigned_ftype_unsigned_uchar;
23015 case V8HI_FTYPE_V8HI_INT:
23016 type = v8hi_ftype_v8hi_int;
23018 case V8SF_FTYPE_V8SF_INT:
23019 type = v8sf_ftype_v8sf_int;
23021 case V4SI_FTYPE_V4SI_INT:
23022 type = v4si_ftype_v4si_int;
23024 case V4SI_FTYPE_V8SI_INT:
23025 type = v4si_ftype_v8si_int;
23027 case V4HI_FTYPE_V4HI_INT:
23028 type = v4hi_ftype_v4hi_int;
23030 case V4DF_FTYPE_V4DF_INT:
23031 type = v4df_ftype_v4df_int;
23033 case V4SF_FTYPE_V4SF_INT:
23034 type = v4sf_ftype_v4sf_int;
23036 case V4SF_FTYPE_V8SF_INT:
23037 type = v4sf_ftype_v8sf_int;
23039 case V2DI_FTYPE_V2DI_INT:
23040 case V2DI2TI_FTYPE_V2DI_INT:
23041 type = v2di_ftype_v2di_int;
23043 case V2DF_FTYPE_V2DF_INT:
23044 type = v2df_ftype_v2df_int;
23046 case V2DF_FTYPE_V4DF_INT:
23047 type = v2df_ftype_v4df_int;
23049 case V16QI_FTYPE_V16QI_V16QI_V16QI:
23050 type = v16qi_ftype_v16qi_v16qi_v16qi;
23052 case V8SF_FTYPE_V8SF_V8SF_V8SF:
23053 type = v8sf_ftype_v8sf_v8sf_v8sf;
23055 case V4DF_FTYPE_V4DF_V4DF_V4DF:
23056 type = v4df_ftype_v4df_v4df_v4df;
23058 case V4SF_FTYPE_V4SF_V4SF_V4SF:
23059 type = v4sf_ftype_v4sf_v4sf_v4sf;
23061 case V2DF_FTYPE_V2DF_V2DF_V2DF:
23062 type = v2df_ftype_v2df_v2df_v2df;
23064 case V16QI_FTYPE_V16QI_V16QI_INT:
23065 type = v16qi_ftype_v16qi_v16qi_int;
23067 case V8SI_FTYPE_V8SI_V8SI_INT:
23068 type = v8si_ftype_v8si_v8si_int;
23070 case V8SI_FTYPE_V8SI_V4SI_INT:
23071 type = v8si_ftype_v8si_v4si_int;
23073 case V8HI_FTYPE_V8HI_V8HI_INT:
23074 type = v8hi_ftype_v8hi_v8hi_int;
23076 case V8SF_FTYPE_V8SF_V8SF_INT:
23077 type = v8sf_ftype_v8sf_v8sf_int;
23079 case V8SF_FTYPE_V8SF_V4SF_INT:
23080 type = v8sf_ftype_v8sf_v4sf_int;
23082 case V4SI_FTYPE_V4SI_V4SI_INT:
23083 type = v4si_ftype_v4si_v4si_int;
23085 case V4DF_FTYPE_V4DF_V4DF_INT:
23086 type = v4df_ftype_v4df_v4df_int;
23088 case V4DF_FTYPE_V4DF_V2DF_INT:
23089 type = v4df_ftype_v4df_v2df_int;
23091 case V4SF_FTYPE_V4SF_V4SF_INT:
23092 type = v4sf_ftype_v4sf_v4sf_int;
23094 case V2DI_FTYPE_V2DI_V2DI_INT:
23095 case V2DI2TI_FTYPE_V2DI_V2DI_INT:
23096 type = v2di_ftype_v2di_v2di_int;
23098 case V2DF_FTYPE_V2DF_V2DF_INT:
23099 type = v2df_ftype_v2df_v2df_int;
23101 case V2DI_FTYPE_V2DI_UINT_UINT:
23102 type = v2di_ftype_v2di_unsigned_unsigned;
23104 case V2DI_FTYPE_V2DI_V2DI_UINT_UINT:
23105 type = v2di_ftype_v2di_v2di_unsigned_unsigned;
23107 case V1DI2DI_FTYPE_V1DI_V1DI_INT:
23108 type = v1di_ftype_v1di_v1di_int;
23111 gcc_unreachable ();
23114 def_builtin_const (d->mask, d->name, type, d->code);
23117 /* pcmpestr[im] insns. */
23118 for (i = 0, d = bdesc_pcmpestr;
23119 i < ARRAY_SIZE (bdesc_pcmpestr);
23122 if (d->code == IX86_BUILTIN_PCMPESTRM128)
23123 ftype = v16qi_ftype_v16qi_int_v16qi_int_int;
23125 ftype = int_ftype_v16qi_int_v16qi_int_int;
23126 def_builtin_const (d->mask, d->name, ftype, d->code);
23129 /* pcmpistr[im] insns. */
23130 for (i = 0, d = bdesc_pcmpistr;
23131 i < ARRAY_SIZE (bdesc_pcmpistr);
23134 if (d->code == IX86_BUILTIN_PCMPISTRM128)
23135 ftype = v16qi_ftype_v16qi_v16qi_int;
23137 ftype = int_ftype_v16qi_v16qi_int;
23138 def_builtin_const (d->mask, d->name, ftype, d->code);
23141 /* comi/ucomi insns. */
23142 for (i = 0, d = bdesc_comi; i < ARRAY_SIZE (bdesc_comi); i++, d++)
23143 if (d->mask == OPTION_MASK_ISA_SSE2)
23144 def_builtin_const (d->mask, d->name, int_ftype_v2df_v2df, d->code);
23146 def_builtin_const (d->mask, d->name, int_ftype_v4sf_v4sf, d->code);
23149 def_builtin (OPTION_MASK_ISA_SSE, "__builtin_ia32_ldmxcsr", void_ftype_unsigned, IX86_BUILTIN_LDMXCSR);
23150 def_builtin (OPTION_MASK_ISA_SSE, "__builtin_ia32_stmxcsr", unsigned_ftype_void, IX86_BUILTIN_STMXCSR);
23152 /* SSE or 3DNow!A */
23153 def_builtin (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, "__builtin_ia32_maskmovq", void_ftype_v8qi_v8qi_pchar, IX86_BUILTIN_MASKMOVQ);
23156 def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_maskmovdqu", void_ftype_v16qi_v16qi_pchar, IX86_BUILTIN_MASKMOVDQU);
23158 def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_clflush", void_ftype_pcvoid, IX86_BUILTIN_CLFLUSH);
23159 x86_mfence = def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_mfence", void_ftype_void, IX86_BUILTIN_MFENCE);
23162 def_builtin (OPTION_MASK_ISA_SSE3, "__builtin_ia32_monitor", void_ftype_pcvoid_unsigned_unsigned, IX86_BUILTIN_MONITOR);
23163 def_builtin (OPTION_MASK_ISA_SSE3, "__builtin_ia32_mwait", void_ftype_unsigned_unsigned, IX86_BUILTIN_MWAIT);
23166 def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aesenc128", v2di_ftype_v2di_v2di, IX86_BUILTIN_AESENC128);
23167 def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aesenclast128", v2di_ftype_v2di_v2di, IX86_BUILTIN_AESENCLAST128);
23168 def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aesdec128", v2di_ftype_v2di_v2di, IX86_BUILTIN_AESDEC128);
23169 def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aesdeclast128", v2di_ftype_v2di_v2di, IX86_BUILTIN_AESDECLAST128);
23170 def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aesimc128", v2di_ftype_v2di, IX86_BUILTIN_AESIMC128);
23171 def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aeskeygenassist128", v2di_ftype_v2di_int, IX86_BUILTIN_AESKEYGENASSIST128);
23174 def_builtin_const (OPTION_MASK_ISA_PCLMUL, "__builtin_ia32_pclmulqdq128", v2di_ftype_v2di_v2di_int, IX86_BUILTIN_PCLMULQDQ128);
23177 def_builtin (OPTION_MASK_ISA_AVX, "__builtin_ia32_vzeroupper", void_ftype_void,
23178 TARGET_64BIT ? IX86_BUILTIN_VZEROUPPER_REX64 : IX86_BUILTIN_VZEROUPPER);
23180 /* Access to the vec_init patterns. */
23181 ftype = build_function_type_list (V2SI_type_node, integer_type_node,
23182 integer_type_node, NULL_TREE);
23183 def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_vec_init_v2si", ftype, IX86_BUILTIN_VEC_INIT_V2SI);
23185 ftype = build_function_type_list (V4HI_type_node, short_integer_type_node,
23186 short_integer_type_node,
23187 short_integer_type_node,
23188 short_integer_type_node, NULL_TREE);
23189 def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_vec_init_v4hi", ftype, IX86_BUILTIN_VEC_INIT_V4HI);
23191 ftype = build_function_type_list (V8QI_type_node, char_type_node,
23192 char_type_node, char_type_node,
23193 char_type_node, char_type_node,
23194 char_type_node, char_type_node,
23195 char_type_node, NULL_TREE);
23196 def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_vec_init_v8qi", ftype, IX86_BUILTIN_VEC_INIT_V8QI);
23198 /* Access to the vec_extract patterns. */
23199 ftype = build_function_type_list (double_type_node, V2DF_type_node,
23200 integer_type_node, NULL_TREE);
23201 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v2df", ftype, IX86_BUILTIN_VEC_EXT_V2DF);
23203 ftype = build_function_type_list (long_long_integer_type_node,
23204 V2DI_type_node, integer_type_node,
23206 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v2di", ftype, IX86_BUILTIN_VEC_EXT_V2DI);
23208 ftype = build_function_type_list (float_type_node, V4SF_type_node,
23209 integer_type_node, NULL_TREE);
23210 def_builtin_const (OPTION_MASK_ISA_SSE, "__builtin_ia32_vec_ext_v4sf", ftype, IX86_BUILTIN_VEC_EXT_V4SF);
23212 ftype = build_function_type_list (intSI_type_node, V4SI_type_node,
23213 integer_type_node, NULL_TREE);
23214 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v4si", ftype, IX86_BUILTIN_VEC_EXT_V4SI);
23216 ftype = build_function_type_list (intHI_type_node, V8HI_type_node,
23217 integer_type_node, NULL_TREE);
23218 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v8hi", ftype, IX86_BUILTIN_VEC_EXT_V8HI);
23220 ftype = build_function_type_list (intHI_type_node, V4HI_type_node,
23221 integer_type_node, NULL_TREE);
23222 def_builtin_const (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, "__builtin_ia32_vec_ext_v4hi", ftype, IX86_BUILTIN_VEC_EXT_V4HI);
23224 ftype = build_function_type_list (intSI_type_node, V2SI_type_node,
23225 integer_type_node, NULL_TREE);
23226 def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_vec_ext_v2si", ftype, IX86_BUILTIN_VEC_EXT_V2SI);
23228 ftype = build_function_type_list (intQI_type_node, V16QI_type_node,
23229 integer_type_node, NULL_TREE);
23230 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v16qi", ftype, IX86_BUILTIN_VEC_EXT_V16QI);
23232 /* Access to the vec_set patterns. */
23233 ftype = build_function_type_list (V2DI_type_node, V2DI_type_node,
23235 integer_type_node, NULL_TREE);
23236 def_builtin_const (OPTION_MASK_ISA_SSE4_1 | OPTION_MASK_ISA_64BIT, "__builtin_ia32_vec_set_v2di", ftype, IX86_BUILTIN_VEC_SET_V2DI);
23238 ftype = build_function_type_list (V4SF_type_node, V4SF_type_node,
23240 integer_type_node, NULL_TREE);
23241 def_builtin_const (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_vec_set_v4sf", ftype, IX86_BUILTIN_VEC_SET_V4SF);
23243 ftype = build_function_type_list (V4SI_type_node, V4SI_type_node,
23245 integer_type_node, NULL_TREE);
23246 def_builtin_const (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_vec_set_v4si", ftype, IX86_BUILTIN_VEC_SET_V4SI);
23248 ftype = build_function_type_list (V8HI_type_node, V8HI_type_node,
23250 integer_type_node, NULL_TREE);
23251 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_set_v8hi", ftype, IX86_BUILTIN_VEC_SET_V8HI);
23253 ftype = build_function_type_list (V4HI_type_node, V4HI_type_node,
23255 integer_type_node, NULL_TREE);
23256 def_builtin_const (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, "__builtin_ia32_vec_set_v4hi", ftype, IX86_BUILTIN_VEC_SET_V4HI);
23258 ftype = build_function_type_list (V16QI_type_node, V16QI_type_node,
23260 integer_type_node, NULL_TREE);
23261 def_builtin_const (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_vec_set_v16qi", ftype, IX86_BUILTIN_VEC_SET_V16QI);
23263 /* Add SSE5 multi-arg argument instructions */
23264 for (i = 0, d = bdesc_multi_arg; i < ARRAY_SIZE (bdesc_multi_arg); i++, d++)
23266 tree mtype = NULL_TREE;
23271 switch ((enum multi_arg_type)d->flag)
23273 case MULTI_ARG_3_SF: mtype = v4sf_ftype_v4sf_v4sf_v4sf; break;
23274 case MULTI_ARG_3_DF: mtype = v2df_ftype_v2df_v2df_v2df; break;
23275 case MULTI_ARG_3_DI: mtype = v2di_ftype_v2di_v2di_v2di; break;
23276 case MULTI_ARG_3_SI: mtype = v4si_ftype_v4si_v4si_v4si; break;
23277 case MULTI_ARG_3_SI_DI: mtype = v4si_ftype_v4si_v4si_v2di; break;
23278 case MULTI_ARG_3_HI: mtype = v8hi_ftype_v8hi_v8hi_v8hi; break;
23279 case MULTI_ARG_3_HI_SI: mtype = v8hi_ftype_v8hi_v8hi_v4si; break;
23280 case MULTI_ARG_3_QI: mtype = v16qi_ftype_v16qi_v16qi_v16qi; break;
23281 case MULTI_ARG_3_PERMPS: mtype = v4sf_ftype_v4sf_v4sf_v16qi; break;
23282 case MULTI_ARG_3_PERMPD: mtype = v2df_ftype_v2df_v2df_v16qi; break;
23283 case MULTI_ARG_2_SF: mtype = v4sf_ftype_v4sf_v4sf; break;
23284 case MULTI_ARG_2_DF: mtype = v2df_ftype_v2df_v2df; break;
23285 case MULTI_ARG_2_DI: mtype = v2di_ftype_v2di_v2di; break;
23286 case MULTI_ARG_2_SI: mtype = v4si_ftype_v4si_v4si; break;
23287 case MULTI_ARG_2_HI: mtype = v8hi_ftype_v8hi_v8hi; break;
23288 case MULTI_ARG_2_QI: mtype = v16qi_ftype_v16qi_v16qi; break;
23289 case MULTI_ARG_2_DI_IMM: mtype = v2di_ftype_v2di_si; break;
23290 case MULTI_ARG_2_SI_IMM: mtype = v4si_ftype_v4si_si; break;
23291 case MULTI_ARG_2_HI_IMM: mtype = v8hi_ftype_v8hi_si; break;
23292 case MULTI_ARG_2_QI_IMM: mtype = v16qi_ftype_v16qi_si; break;
23293 case MULTI_ARG_2_SF_CMP: mtype = v4sf_ftype_v4sf_v4sf; break;
23294 case MULTI_ARG_2_DF_CMP: mtype = v2df_ftype_v2df_v2df; break;
23295 case MULTI_ARG_2_DI_CMP: mtype = v2di_ftype_v2di_v2di; break;
23296 case MULTI_ARG_2_SI_CMP: mtype = v4si_ftype_v4si_v4si; break;
23297 case MULTI_ARG_2_HI_CMP: mtype = v8hi_ftype_v8hi_v8hi; break;
23298 case MULTI_ARG_2_QI_CMP: mtype = v16qi_ftype_v16qi_v16qi; break;
23299 case MULTI_ARG_2_SF_TF: mtype = v4sf_ftype_v4sf_v4sf; break;
23300 case MULTI_ARG_2_DF_TF: mtype = v2df_ftype_v2df_v2df; break;
23301 case MULTI_ARG_2_DI_TF: mtype = v2di_ftype_v2di_v2di; break;
23302 case MULTI_ARG_2_SI_TF: mtype = v4si_ftype_v4si_v4si; break;
23303 case MULTI_ARG_2_HI_TF: mtype = v8hi_ftype_v8hi_v8hi; break;
23304 case MULTI_ARG_2_QI_TF: mtype = v16qi_ftype_v16qi_v16qi; break;
23305 case MULTI_ARG_1_SF: mtype = v4sf_ftype_v4sf; break;
23306 case MULTI_ARG_1_DF: mtype = v2df_ftype_v2df; break;
23307 case MULTI_ARG_1_DI: mtype = v2di_ftype_v2di; break;
23308 case MULTI_ARG_1_SI: mtype = v4si_ftype_v4si; break;
23309 case MULTI_ARG_1_HI: mtype = v8hi_ftype_v8hi; break;
23310 case MULTI_ARG_1_QI: mtype = v16qi_ftype_v16qi; break;
23311 case MULTI_ARG_1_SI_DI: mtype = v2di_ftype_v4si; break;
23312 case MULTI_ARG_1_HI_DI: mtype = v2di_ftype_v8hi; break;
23313 case MULTI_ARG_1_HI_SI: mtype = v4si_ftype_v8hi; break;
23314 case MULTI_ARG_1_QI_DI: mtype = v2di_ftype_v16qi; break;
23315 case MULTI_ARG_1_QI_SI: mtype = v4si_ftype_v16qi; break;
23316 case MULTI_ARG_1_QI_HI: mtype = v8hi_ftype_v16qi; break;
23317 case MULTI_ARG_1_PH2PS: mtype = v4sf_ftype_v4hi; break;
23318 case MULTI_ARG_1_PS2PH: mtype = v4hi_ftype_v4sf; break;
23319 case MULTI_ARG_UNKNOWN:
23321 gcc_unreachable ();
23325 def_builtin_const (d->mask, d->name, mtype, d->code);
23329 /* Internal method for ix86_init_builtins. */
23332 ix86_init_builtins_va_builtins_abi (void)
23334 tree ms_va_ref, sysv_va_ref;
23335 tree fnvoid_va_end_ms, fnvoid_va_end_sysv;
23336 tree fnvoid_va_start_ms, fnvoid_va_start_sysv;
23337 tree fnvoid_va_copy_ms, fnvoid_va_copy_sysv;
23338 tree fnattr_ms = NULL_TREE, fnattr_sysv = NULL_TREE;
23342 fnattr_ms = build_tree_list (get_identifier ("ms_abi"), NULL_TREE);
23343 fnattr_sysv = build_tree_list (get_identifier ("sysv_abi"), NULL_TREE);
23344 ms_va_ref = build_reference_type (ms_va_list_type_node);
23346 build_pointer_type (TREE_TYPE (sysv_va_list_type_node));
23349 build_function_type_list (void_type_node, ms_va_ref, NULL_TREE);
23350 fnvoid_va_start_ms =
23351 build_varargs_function_type_list (void_type_node, ms_va_ref, NULL_TREE);
23352 fnvoid_va_end_sysv =
23353 build_function_type_list (void_type_node, sysv_va_ref, NULL_TREE);
23354 fnvoid_va_start_sysv =
23355 build_varargs_function_type_list (void_type_node, sysv_va_ref,
23357 fnvoid_va_copy_ms =
23358 build_function_type_list (void_type_node, ms_va_ref, ms_va_list_type_node,
23360 fnvoid_va_copy_sysv =
23361 build_function_type_list (void_type_node, sysv_va_ref,
23362 sysv_va_ref, NULL_TREE);
23364 add_builtin_function ("__builtin_ms_va_start", fnvoid_va_start_ms,
23365 BUILT_IN_VA_START, BUILT_IN_NORMAL, NULL, fnattr_ms);
23366 add_builtin_function ("__builtin_ms_va_end", fnvoid_va_end_ms,
23367 BUILT_IN_VA_END, BUILT_IN_NORMAL, NULL, fnattr_ms);
23368 add_builtin_function ("__builtin_ms_va_copy", fnvoid_va_copy_ms,
23369 BUILT_IN_VA_COPY, BUILT_IN_NORMAL, NULL, fnattr_ms);
23370 add_builtin_function ("__builtin_sysv_va_start", fnvoid_va_start_sysv,
23371 BUILT_IN_VA_START, BUILT_IN_NORMAL, NULL, fnattr_sysv);
23372 add_builtin_function ("__builtin_sysv_va_end", fnvoid_va_end_sysv,
23373 BUILT_IN_VA_END, BUILT_IN_NORMAL, NULL, fnattr_sysv);
23374 add_builtin_function ("__builtin_sysv_va_copy", fnvoid_va_copy_sysv,
23375 BUILT_IN_VA_COPY, BUILT_IN_NORMAL, NULL, fnattr_sysv);
23379 ix86_init_builtins (void)
23381 tree float128_type_node = make_node (REAL_TYPE);
23384 /* The __float80 type. */
23385 if (TYPE_MODE (long_double_type_node) == XFmode)
23386 (*lang_hooks.types.register_builtin_type) (long_double_type_node,
23390 /* The __float80 type. */
23391 tree float80_type_node = make_node (REAL_TYPE);
23393 TYPE_PRECISION (float80_type_node) = 80;
23394 layout_type (float80_type_node);
23395 (*lang_hooks.types.register_builtin_type) (float80_type_node,
23399 /* The __float128 type. */
23400 TYPE_PRECISION (float128_type_node) = 128;
23401 layout_type (float128_type_node);
23402 (*lang_hooks.types.register_builtin_type) (float128_type_node,
23405 /* TFmode support builtins. */
23406 ftype = build_function_type (float128_type_node, void_list_node);
23407 decl = add_builtin_function ("__builtin_infq", ftype,
23408 IX86_BUILTIN_INFQ, BUILT_IN_MD,
23410 ix86_builtins[(int) IX86_BUILTIN_INFQ] = decl;
23412 decl = add_builtin_function ("__builtin_huge_valq", ftype,
23413 IX86_BUILTIN_HUGE_VALQ, BUILT_IN_MD,
23415 ix86_builtins[(int) IX86_BUILTIN_HUGE_VALQ] = decl;
23417 /* We will expand them to normal call if SSE2 isn't available since
23418 they are used by libgcc. */
23419 ftype = build_function_type_list (float128_type_node,
23420 float128_type_node,
23422 decl = add_builtin_function ("__builtin_fabsq", ftype,
23423 IX86_BUILTIN_FABSQ, BUILT_IN_MD,
23424 "__fabstf2", NULL_TREE);
23425 ix86_builtins[(int) IX86_BUILTIN_FABSQ] = decl;
23426 TREE_READONLY (decl) = 1;
23428 ftype = build_function_type_list (float128_type_node,
23429 float128_type_node,
23430 float128_type_node,
23432 decl = add_builtin_function ("__builtin_copysignq", ftype,
23433 IX86_BUILTIN_COPYSIGNQ, BUILT_IN_MD,
23434 "__copysigntf3", NULL_TREE);
23435 ix86_builtins[(int) IX86_BUILTIN_COPYSIGNQ] = decl;
23436 TREE_READONLY (decl) = 1;
23438 ix86_init_mmx_sse_builtins ();
23440 ix86_init_builtins_va_builtins_abi ();
23443 /* Errors in the source file can cause expand_expr to return const0_rtx
23444 where we expect a vector. To avoid crashing, use one of the vector
23445 clear instructions. */
23447 safe_vector_operand (rtx x, enum machine_mode mode)
23449 if (x == const0_rtx)
23450 x = CONST0_RTX (mode);
23454 /* Subroutine of ix86_expand_builtin to take care of binop insns. */
23457 ix86_expand_binop_builtin (enum insn_code icode, tree exp, rtx target)
23460 tree arg0 = CALL_EXPR_ARG (exp, 0);
23461 tree arg1 = CALL_EXPR_ARG (exp, 1);
23462 rtx op0 = expand_normal (arg0);
23463 rtx op1 = expand_normal (arg1);
23464 enum machine_mode tmode = insn_data[icode].operand[0].mode;
23465 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
23466 enum machine_mode mode1 = insn_data[icode].operand[2].mode;
23468 if (VECTOR_MODE_P (mode0))
23469 op0 = safe_vector_operand (op0, mode0);
23470 if (VECTOR_MODE_P (mode1))
23471 op1 = safe_vector_operand (op1, mode1);
23473 if (optimize || !target
23474 || GET_MODE (target) != tmode
23475 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
23476 target = gen_reg_rtx (tmode);
23478 if (GET_MODE (op1) == SImode && mode1 == TImode)
23480 rtx x = gen_reg_rtx (V4SImode);
23481 emit_insn (gen_sse2_loadd (x, op1));
23482 op1 = gen_lowpart (TImode, x);
23485 if (!(*insn_data[icode].operand[1].predicate) (op0, mode0))
23486 op0 = copy_to_mode_reg (mode0, op0);
23487 if (!(*insn_data[icode].operand[2].predicate) (op1, mode1))
23488 op1 = copy_to_mode_reg (mode1, op1);
23490 pat = GEN_FCN (icode) (target, op0, op1);
23499 /* Subroutine of ix86_expand_builtin to take care of 2-4 argument insns. */
23502 ix86_expand_multi_arg_builtin (enum insn_code icode, tree exp, rtx target,
23503 enum multi_arg_type m_type,
23504 enum insn_code sub_code)
23509 bool comparison_p = false;
23511 bool last_arg_constant = false;
23512 int num_memory = 0;
23515 enum machine_mode mode;
23518 enum machine_mode tmode = insn_data[icode].operand[0].mode;
23522 case MULTI_ARG_3_SF:
23523 case MULTI_ARG_3_DF:
23524 case MULTI_ARG_3_DI:
23525 case MULTI_ARG_3_SI:
23526 case MULTI_ARG_3_SI_DI:
23527 case MULTI_ARG_3_HI:
23528 case MULTI_ARG_3_HI_SI:
23529 case MULTI_ARG_3_QI:
23530 case MULTI_ARG_3_PERMPS:
23531 case MULTI_ARG_3_PERMPD:
23535 case MULTI_ARG_2_SF:
23536 case MULTI_ARG_2_DF:
23537 case MULTI_ARG_2_DI:
23538 case MULTI_ARG_2_SI:
23539 case MULTI_ARG_2_HI:
23540 case MULTI_ARG_2_QI:
23544 case MULTI_ARG_2_DI_IMM:
23545 case MULTI_ARG_2_SI_IMM:
23546 case MULTI_ARG_2_HI_IMM:
23547 case MULTI_ARG_2_QI_IMM:
23549 last_arg_constant = true;
23552 case MULTI_ARG_1_SF:
23553 case MULTI_ARG_1_DF:
23554 case MULTI_ARG_1_DI:
23555 case MULTI_ARG_1_SI:
23556 case MULTI_ARG_1_HI:
23557 case MULTI_ARG_1_QI:
23558 case MULTI_ARG_1_SI_DI:
23559 case MULTI_ARG_1_HI_DI:
23560 case MULTI_ARG_1_HI_SI:
23561 case MULTI_ARG_1_QI_DI:
23562 case MULTI_ARG_1_QI_SI:
23563 case MULTI_ARG_1_QI_HI:
23564 case MULTI_ARG_1_PH2PS:
23565 case MULTI_ARG_1_PS2PH:
23569 case MULTI_ARG_2_SF_CMP:
23570 case MULTI_ARG_2_DF_CMP:
23571 case MULTI_ARG_2_DI_CMP:
23572 case MULTI_ARG_2_SI_CMP:
23573 case MULTI_ARG_2_HI_CMP:
23574 case MULTI_ARG_2_QI_CMP:
23576 comparison_p = true;
23579 case MULTI_ARG_2_SF_TF:
23580 case MULTI_ARG_2_DF_TF:
23581 case MULTI_ARG_2_DI_TF:
23582 case MULTI_ARG_2_SI_TF:
23583 case MULTI_ARG_2_HI_TF:
23584 case MULTI_ARG_2_QI_TF:
23589 case MULTI_ARG_UNKNOWN:
23591 gcc_unreachable ();
23594 if (optimize || !target
23595 || GET_MODE (target) != tmode
23596 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
23597 target = gen_reg_rtx (tmode);
23599 gcc_assert (nargs <= 4);
23601 for (i = 0; i < nargs; i++)
23603 tree arg = CALL_EXPR_ARG (exp, i);
23604 rtx op = expand_normal (arg);
23605 int adjust = (comparison_p) ? 1 : 0;
23606 enum machine_mode mode = insn_data[icode].operand[i+adjust+1].mode;
23608 if (last_arg_constant && i == nargs-1)
23610 if (GET_CODE (op) != CONST_INT)
23612 error ("last argument must be an immediate");
23613 return gen_reg_rtx (tmode);
23618 if (VECTOR_MODE_P (mode))
23619 op = safe_vector_operand (op, mode);
23621 /* If we aren't optimizing, only allow one memory operand to be
23623 if (memory_operand (op, mode))
23626 gcc_assert (GET_MODE (op) == mode || GET_MODE (op) == VOIDmode);
23629 || ! (*insn_data[icode].operand[i+adjust+1].predicate) (op, mode)
23631 op = force_reg (mode, op);
23635 args[i].mode = mode;
23641 pat = GEN_FCN (icode) (target, args[0].op);
23646 pat = GEN_FCN (icode) (target, args[0].op, args[1].op,
23647 GEN_INT ((int)sub_code));
23648 else if (! comparison_p)
23649 pat = GEN_FCN (icode) (target, args[0].op, args[1].op);
23652 rtx cmp_op = gen_rtx_fmt_ee (sub_code, GET_MODE (target),
23656 pat = GEN_FCN (icode) (target, cmp_op, args[0].op, args[1].op);
23661 pat = GEN_FCN (icode) (target, args[0].op, args[1].op, args[2].op);
23665 gcc_unreachable ();
23675 /* Subroutine of ix86_expand_args_builtin to take care of scalar unop
23676 insns with vec_merge. */
23679 ix86_expand_unop_vec_merge_builtin (enum insn_code icode, tree exp,
23683 tree arg0 = CALL_EXPR_ARG (exp, 0);
23684 rtx op1, op0 = expand_normal (arg0);
23685 enum machine_mode tmode = insn_data[icode].operand[0].mode;
23686 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
23688 if (optimize || !target
23689 || GET_MODE (target) != tmode
23690 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
23691 target = gen_reg_rtx (tmode);
23693 if (VECTOR_MODE_P (mode0))
23694 op0 = safe_vector_operand (op0, mode0);
23696 if ((optimize && !register_operand (op0, mode0))
23697 || ! (*insn_data[icode].operand[1].predicate) (op0, mode0))
23698 op0 = copy_to_mode_reg (mode0, op0);
23701 if (! (*insn_data[icode].operand[2].predicate) (op1, mode0))
23702 op1 = copy_to_mode_reg (mode0, op1);
23704 pat = GEN_FCN (icode) (target, op0, op1);
23711 /* Subroutine of ix86_expand_builtin to take care of comparison insns. */
23714 ix86_expand_sse_compare (const struct builtin_description *d,
23715 tree exp, rtx target, bool swap)
23718 tree arg0 = CALL_EXPR_ARG (exp, 0);
23719 tree arg1 = CALL_EXPR_ARG (exp, 1);
23720 rtx op0 = expand_normal (arg0);
23721 rtx op1 = expand_normal (arg1);
23723 enum machine_mode tmode = insn_data[d->icode].operand[0].mode;
23724 enum machine_mode mode0 = insn_data[d->icode].operand[1].mode;
23725 enum machine_mode mode1 = insn_data[d->icode].operand[2].mode;
23726 enum rtx_code comparison = d->comparison;
23728 if (VECTOR_MODE_P (mode0))
23729 op0 = safe_vector_operand (op0, mode0);
23730 if (VECTOR_MODE_P (mode1))
23731 op1 = safe_vector_operand (op1, mode1);
23733 /* Swap operands if we have a comparison that isn't available in
23737 rtx tmp = gen_reg_rtx (mode1);
23738 emit_move_insn (tmp, op1);
23743 if (optimize || !target
23744 || GET_MODE (target) != tmode
23745 || ! (*insn_data[d->icode].operand[0].predicate) (target, tmode))
23746 target = gen_reg_rtx (tmode);
23748 if ((optimize && !register_operand (op0, mode0))
23749 || ! (*insn_data[d->icode].operand[1].predicate) (op0, mode0))
23750 op0 = copy_to_mode_reg (mode0, op0);
23751 if ((optimize && !register_operand (op1, mode1))
23752 || ! (*insn_data[d->icode].operand[2].predicate) (op1, mode1))
23753 op1 = copy_to_mode_reg (mode1, op1);
23755 op2 = gen_rtx_fmt_ee (comparison, mode0, op0, op1);
23756 pat = GEN_FCN (d->icode) (target, op0, op1, op2);
23763 /* Subroutine of ix86_expand_builtin to take care of comi insns. */
23766 ix86_expand_sse_comi (const struct builtin_description *d, tree exp,
23770 tree arg0 = CALL_EXPR_ARG (exp, 0);
23771 tree arg1 = CALL_EXPR_ARG (exp, 1);
23772 rtx op0 = expand_normal (arg0);
23773 rtx op1 = expand_normal (arg1);
23774 enum machine_mode mode0 = insn_data[d->icode].operand[0].mode;
23775 enum machine_mode mode1 = insn_data[d->icode].operand[1].mode;
23776 enum rtx_code comparison = d->comparison;
23778 if (VECTOR_MODE_P (mode0))
23779 op0 = safe_vector_operand (op0, mode0);
23780 if (VECTOR_MODE_P (mode1))
23781 op1 = safe_vector_operand (op1, mode1);
23783 /* Swap operands if we have a comparison that isn't available in
23785 if (d->flag & BUILTIN_DESC_SWAP_OPERANDS)
23792 target = gen_reg_rtx (SImode);
23793 emit_move_insn (target, const0_rtx);
23794 target = gen_rtx_SUBREG (QImode, target, 0);
23796 if ((optimize && !register_operand (op0, mode0))
23797 || !(*insn_data[d->icode].operand[0].predicate) (op0, mode0))
23798 op0 = copy_to_mode_reg (mode0, op0);
23799 if ((optimize && !register_operand (op1, mode1))
23800 || !(*insn_data[d->icode].operand[1].predicate) (op1, mode1))
23801 op1 = copy_to_mode_reg (mode1, op1);
23803 pat = GEN_FCN (d->icode) (op0, op1);
23807 emit_insn (gen_rtx_SET (VOIDmode,
23808 gen_rtx_STRICT_LOW_PART (VOIDmode, target),
23809 gen_rtx_fmt_ee (comparison, QImode,
23813 return SUBREG_REG (target);
23816 /* Subroutine of ix86_expand_builtin to take care of ptest insns. */
23819 ix86_expand_sse_ptest (const struct builtin_description *d, tree exp,
23823 tree arg0 = CALL_EXPR_ARG (exp, 0);
23824 tree arg1 = CALL_EXPR_ARG (exp, 1);
23825 rtx op0 = expand_normal (arg0);
23826 rtx op1 = expand_normal (arg1);
23827 enum machine_mode mode0 = insn_data[d->icode].operand[0].mode;
23828 enum machine_mode mode1 = insn_data[d->icode].operand[1].mode;
23829 enum rtx_code comparison = d->comparison;
23831 if (VECTOR_MODE_P (mode0))
23832 op0 = safe_vector_operand (op0, mode0);
23833 if (VECTOR_MODE_P (mode1))
23834 op1 = safe_vector_operand (op1, mode1);
23836 target = gen_reg_rtx (SImode);
23837 emit_move_insn (target, const0_rtx);
23838 target = gen_rtx_SUBREG (QImode, target, 0);
23840 if ((optimize && !register_operand (op0, mode0))
23841 || !(*insn_data[d->icode].operand[0].predicate) (op0, mode0))
23842 op0 = copy_to_mode_reg (mode0, op0);
23843 if ((optimize && !register_operand (op1, mode1))
23844 || !(*insn_data[d->icode].operand[1].predicate) (op1, mode1))
23845 op1 = copy_to_mode_reg (mode1, op1);
23847 pat = GEN_FCN (d->icode) (op0, op1);
23851 emit_insn (gen_rtx_SET (VOIDmode,
23852 gen_rtx_STRICT_LOW_PART (VOIDmode, target),
23853 gen_rtx_fmt_ee (comparison, QImode,
23857 return SUBREG_REG (target);
23860 /* Subroutine of ix86_expand_builtin to take care of pcmpestr[im] insns. */
23863 ix86_expand_sse_pcmpestr (const struct builtin_description *d,
23864 tree exp, rtx target)
23867 tree arg0 = CALL_EXPR_ARG (exp, 0);
23868 tree arg1 = CALL_EXPR_ARG (exp, 1);
23869 tree arg2 = CALL_EXPR_ARG (exp, 2);
23870 tree arg3 = CALL_EXPR_ARG (exp, 3);
23871 tree arg4 = CALL_EXPR_ARG (exp, 4);
23872 rtx scratch0, scratch1;
23873 rtx op0 = expand_normal (arg0);
23874 rtx op1 = expand_normal (arg1);
23875 rtx op2 = expand_normal (arg2);
23876 rtx op3 = expand_normal (arg3);
23877 rtx op4 = expand_normal (arg4);
23878 enum machine_mode tmode0, tmode1, modev2, modei3, modev4, modei5, modeimm;
23880 tmode0 = insn_data[d->icode].operand[0].mode;
23881 tmode1 = insn_data[d->icode].operand[1].mode;
23882 modev2 = insn_data[d->icode].operand[2].mode;
23883 modei3 = insn_data[d->icode].operand[3].mode;
23884 modev4 = insn_data[d->icode].operand[4].mode;
23885 modei5 = insn_data[d->icode].operand[5].mode;
23886 modeimm = insn_data[d->icode].operand[6].mode;
23888 if (VECTOR_MODE_P (modev2))
23889 op0 = safe_vector_operand (op0, modev2);
23890 if (VECTOR_MODE_P (modev4))
23891 op2 = safe_vector_operand (op2, modev4);
23893 if (! (*insn_data[d->icode].operand[2].predicate) (op0, modev2))
23894 op0 = copy_to_mode_reg (modev2, op0);
23895 if (! (*insn_data[d->icode].operand[3].predicate) (op1, modei3))
23896 op1 = copy_to_mode_reg (modei3, op1);
23897 if ((optimize && !register_operand (op2, modev4))
23898 || !(*insn_data[d->icode].operand[4].predicate) (op2, modev4))
23899 op2 = copy_to_mode_reg (modev4, op2);
23900 if (! (*insn_data[d->icode].operand[5].predicate) (op3, modei5))
23901 op3 = copy_to_mode_reg (modei5, op3);
23903 if (! (*insn_data[d->icode].operand[6].predicate) (op4, modeimm))
23905 error ("the fifth argument must be a 8-bit immediate");
23909 if (d->code == IX86_BUILTIN_PCMPESTRI128)
23911 if (optimize || !target
23912 || GET_MODE (target) != tmode0
23913 || ! (*insn_data[d->icode].operand[0].predicate) (target, tmode0))
23914 target = gen_reg_rtx (tmode0);
23916 scratch1 = gen_reg_rtx (tmode1);
23918 pat = GEN_FCN (d->icode) (target, scratch1, op0, op1, op2, op3, op4);
23920 else if (d->code == IX86_BUILTIN_PCMPESTRM128)
23922 if (optimize || !target
23923 || GET_MODE (target) != tmode1
23924 || ! (*insn_data[d->icode].operand[1].predicate) (target, tmode1))
23925 target = gen_reg_rtx (tmode1);
23927 scratch0 = gen_reg_rtx (tmode0);
23929 pat = GEN_FCN (d->icode) (scratch0, target, op0, op1, op2, op3, op4);
23933 gcc_assert (d->flag);
23935 scratch0 = gen_reg_rtx (tmode0);
23936 scratch1 = gen_reg_rtx (tmode1);
23938 pat = GEN_FCN (d->icode) (scratch0, scratch1, op0, op1, op2, op3, op4);
23948 target = gen_reg_rtx (SImode);
23949 emit_move_insn (target, const0_rtx);
23950 target = gen_rtx_SUBREG (QImode, target, 0);
23953 (gen_rtx_SET (VOIDmode, gen_rtx_STRICT_LOW_PART (VOIDmode, target),
23954 gen_rtx_fmt_ee (EQ, QImode,
23955 gen_rtx_REG ((enum machine_mode) d->flag,
23958 return SUBREG_REG (target);
23965 /* Subroutine of ix86_expand_builtin to take care of pcmpistr[im] insns. */
23968 ix86_expand_sse_pcmpistr (const struct builtin_description *d,
23969 tree exp, rtx target)
23972 tree arg0 = CALL_EXPR_ARG (exp, 0);
23973 tree arg1 = CALL_EXPR_ARG (exp, 1);
23974 tree arg2 = CALL_EXPR_ARG (exp, 2);
23975 rtx scratch0, scratch1;
23976 rtx op0 = expand_normal (arg0);
23977 rtx op1 = expand_normal (arg1);
23978 rtx op2 = expand_normal (arg2);
23979 enum machine_mode tmode0, tmode1, modev2, modev3, modeimm;
23981 tmode0 = insn_data[d->icode].operand[0].mode;
23982 tmode1 = insn_data[d->icode].operand[1].mode;
23983 modev2 = insn_data[d->icode].operand[2].mode;
23984 modev3 = insn_data[d->icode].operand[3].mode;
23985 modeimm = insn_data[d->icode].operand[4].mode;
23987 if (VECTOR_MODE_P (modev2))
23988 op0 = safe_vector_operand (op0, modev2);
23989 if (VECTOR_MODE_P (modev3))
23990 op1 = safe_vector_operand (op1, modev3);
23992 if (! (*insn_data[d->icode].operand[2].predicate) (op0, modev2))
23993 op0 = copy_to_mode_reg (modev2, op0);
23994 if ((optimize && !register_operand (op1, modev3))
23995 || !(*insn_data[d->icode].operand[3].predicate) (op1, modev3))
23996 op1 = copy_to_mode_reg (modev3, op1);
23998 if (! (*insn_data[d->icode].operand[4].predicate) (op2, modeimm))
24000 error ("the third argument must be a 8-bit immediate");
24004 if (d->code == IX86_BUILTIN_PCMPISTRI128)
24006 if (optimize || !target
24007 || GET_MODE (target) != tmode0
24008 || ! (*insn_data[d->icode].operand[0].predicate) (target, tmode0))
24009 target = gen_reg_rtx (tmode0);
24011 scratch1 = gen_reg_rtx (tmode1);
24013 pat = GEN_FCN (d->icode) (target, scratch1, op0, op1, op2);
24015 else if (d->code == IX86_BUILTIN_PCMPISTRM128)
24017 if (optimize || !target
24018 || GET_MODE (target) != tmode1
24019 || ! (*insn_data[d->icode].operand[1].predicate) (target, tmode1))
24020 target = gen_reg_rtx (tmode1);
24022 scratch0 = gen_reg_rtx (tmode0);
24024 pat = GEN_FCN (d->icode) (scratch0, target, op0, op1, op2);
24028 gcc_assert (d->flag);
24030 scratch0 = gen_reg_rtx (tmode0);
24031 scratch1 = gen_reg_rtx (tmode1);
24033 pat = GEN_FCN (d->icode) (scratch0, scratch1, op0, op1, op2);
24043 target = gen_reg_rtx (SImode);
24044 emit_move_insn (target, const0_rtx);
24045 target = gen_rtx_SUBREG (QImode, target, 0);
24048 (gen_rtx_SET (VOIDmode, gen_rtx_STRICT_LOW_PART (VOIDmode, target),
24049 gen_rtx_fmt_ee (EQ, QImode,
24050 gen_rtx_REG ((enum machine_mode) d->flag,
24053 return SUBREG_REG (target);
24059 /* Subroutine of ix86_expand_builtin to take care of insns with
24060 variable number of operands. */
24063 ix86_expand_args_builtin (const struct builtin_description *d,
24064 tree exp, rtx target)
24066 rtx pat, real_target;
24067 unsigned int i, nargs;
24068 unsigned int nargs_constant = 0;
24069 int num_memory = 0;
24073 enum machine_mode mode;
24075 bool last_arg_count = false;
24076 enum insn_code icode = d->icode;
24077 const struct insn_data *insn_p = &insn_data[icode];
24078 enum machine_mode tmode = insn_p->operand[0].mode;
24079 enum machine_mode rmode = VOIDmode;
24081 enum rtx_code comparison = d->comparison;
24083 switch ((enum ix86_builtin_type) d->flag)
24085 case INT_FTYPE_V8SF_V8SF_PTEST:
24086 case INT_FTYPE_V4DI_V4DI_PTEST:
24087 case INT_FTYPE_V4DF_V4DF_PTEST:
24088 case INT_FTYPE_V4SF_V4SF_PTEST:
24089 case INT_FTYPE_V2DI_V2DI_PTEST:
24090 case INT_FTYPE_V2DF_V2DF_PTEST:
24091 return ix86_expand_sse_ptest (d, exp, target);
24092 case FLOAT128_FTYPE_FLOAT128:
24093 case FLOAT_FTYPE_FLOAT:
24094 case INT64_FTYPE_V4SF:
24095 case INT64_FTYPE_V2DF:
24096 case INT_FTYPE_V16QI:
24097 case INT_FTYPE_V8QI:
24098 case INT_FTYPE_V8SF:
24099 case INT_FTYPE_V4DF:
24100 case INT_FTYPE_V4SF:
24101 case INT_FTYPE_V2DF:
24102 case V16QI_FTYPE_V16QI:
24103 case V8SI_FTYPE_V8SF:
24104 case V8SI_FTYPE_V4SI:
24105 case V8HI_FTYPE_V8HI:
24106 case V8HI_FTYPE_V16QI:
24107 case V8QI_FTYPE_V8QI:
24108 case V8SF_FTYPE_V8SF:
24109 case V8SF_FTYPE_V8SI:
24110 case V8SF_FTYPE_V4SF:
24111 case V4SI_FTYPE_V4SI:
24112 case V4SI_FTYPE_V16QI:
24113 case V4SI_FTYPE_V4SF:
24114 case V4SI_FTYPE_V8SI:
24115 case V4SI_FTYPE_V8HI:
24116 case V4SI_FTYPE_V4DF:
24117 case V4SI_FTYPE_V2DF:
24118 case V4HI_FTYPE_V4HI:
24119 case V4DF_FTYPE_V4DF:
24120 case V4DF_FTYPE_V4SI:
24121 case V4DF_FTYPE_V4SF:
24122 case V4DF_FTYPE_V2DF:
24123 case V4SF_FTYPE_V4SF:
24124 case V4SF_FTYPE_V4SI:
24125 case V4SF_FTYPE_V8SF:
24126 case V4SF_FTYPE_V4DF:
24127 case V4SF_FTYPE_V2DF:
24128 case V2DI_FTYPE_V2DI:
24129 case V2DI_FTYPE_V16QI:
24130 case V2DI_FTYPE_V8HI:
24131 case V2DI_FTYPE_V4SI:
24132 case V2DF_FTYPE_V2DF:
24133 case V2DF_FTYPE_V4SI:
24134 case V2DF_FTYPE_V4DF:
24135 case V2DF_FTYPE_V4SF:
24136 case V2DF_FTYPE_V2SI:
24137 case V2SI_FTYPE_V2SI:
24138 case V2SI_FTYPE_V4SF:
24139 case V2SI_FTYPE_V2SF:
24140 case V2SI_FTYPE_V2DF:
24141 case V2SF_FTYPE_V2SF:
24142 case V2SF_FTYPE_V2SI:
24145 case V4SF_FTYPE_V4SF_VEC_MERGE:
24146 case V2DF_FTYPE_V2DF_VEC_MERGE:
24147 return ix86_expand_unop_vec_merge_builtin (icode, exp, target);
24148 case FLOAT128_FTYPE_FLOAT128_FLOAT128:
24149 case V16QI_FTYPE_V16QI_V16QI:
24150 case V16QI_FTYPE_V8HI_V8HI:
24151 case V8QI_FTYPE_V8QI_V8QI:
24152 case V8QI_FTYPE_V4HI_V4HI:
24153 case V8HI_FTYPE_V8HI_V8HI:
24154 case V8HI_FTYPE_V16QI_V16QI:
24155 case V8HI_FTYPE_V4SI_V4SI:
24156 case V8SF_FTYPE_V8SF_V8SF:
24157 case V8SF_FTYPE_V8SF_V8SI:
24158 case V4SI_FTYPE_V4SI_V4SI:
24159 case V4SI_FTYPE_V8HI_V8HI:
24160 case V4SI_FTYPE_V4SF_V4SF:
24161 case V4SI_FTYPE_V2DF_V2DF:
24162 case V4HI_FTYPE_V4HI_V4HI:
24163 case V4HI_FTYPE_V8QI_V8QI:
24164 case V4HI_FTYPE_V2SI_V2SI:
24165 case V4DF_FTYPE_V4DF_V4DF:
24166 case V4DF_FTYPE_V4DF_V4DI:
24167 case V4SF_FTYPE_V4SF_V4SF:
24168 case V4SF_FTYPE_V4SF_V4SI:
24169 case V4SF_FTYPE_V4SF_V2SI:
24170 case V4SF_FTYPE_V4SF_V2DF:
24171 case V4SF_FTYPE_V4SF_DI:
24172 case V4SF_FTYPE_V4SF_SI:
24173 case V2DI_FTYPE_V2DI_V2DI:
24174 case V2DI_FTYPE_V16QI_V16QI:
24175 case V2DI_FTYPE_V4SI_V4SI:
24176 case V2DI_FTYPE_V2DI_V16QI:
24177 case V2DI_FTYPE_V2DF_V2DF:
24178 case V2SI_FTYPE_V2SI_V2SI:
24179 case V2SI_FTYPE_V4HI_V4HI:
24180 case V2SI_FTYPE_V2SF_V2SF:
24181 case V2DF_FTYPE_V2DF_V2DF:
24182 case V2DF_FTYPE_V2DF_V4SF:
24183 case V2DF_FTYPE_V2DF_V2DI:
24184 case V2DF_FTYPE_V2DF_DI:
24185 case V2DF_FTYPE_V2DF_SI:
24186 case V2SF_FTYPE_V2SF_V2SF:
24187 case V1DI_FTYPE_V1DI_V1DI:
24188 case V1DI_FTYPE_V8QI_V8QI:
24189 case V1DI_FTYPE_V2SI_V2SI:
24190 if (comparison == UNKNOWN)
24191 return ix86_expand_binop_builtin (icode, exp, target);
24194 case V4SF_FTYPE_V4SF_V4SF_SWAP:
24195 case V2DF_FTYPE_V2DF_V2DF_SWAP:
24196 gcc_assert (comparison != UNKNOWN);
24200 case V8HI_FTYPE_V8HI_V8HI_COUNT:
24201 case V8HI_FTYPE_V8HI_SI_COUNT:
24202 case V4SI_FTYPE_V4SI_V4SI_COUNT:
24203 case V4SI_FTYPE_V4SI_SI_COUNT:
24204 case V4HI_FTYPE_V4HI_V4HI_COUNT:
24205 case V4HI_FTYPE_V4HI_SI_COUNT:
24206 case V2DI_FTYPE_V2DI_V2DI_COUNT:
24207 case V2DI_FTYPE_V2DI_SI_COUNT:
24208 case V2SI_FTYPE_V2SI_V2SI_COUNT:
24209 case V2SI_FTYPE_V2SI_SI_COUNT:
24210 case V1DI_FTYPE_V1DI_V1DI_COUNT:
24211 case V1DI_FTYPE_V1DI_SI_COUNT:
24213 last_arg_count = true;
24215 case UINT64_FTYPE_UINT64_UINT64:
24216 case UINT_FTYPE_UINT_UINT:
24217 case UINT_FTYPE_UINT_USHORT:
24218 case UINT_FTYPE_UINT_UCHAR:
24221 case V2DI2TI_FTYPE_V2DI_INT:
24224 nargs_constant = 1;
24226 case V8HI_FTYPE_V8HI_INT:
24227 case V8SF_FTYPE_V8SF_INT:
24228 case V4SI_FTYPE_V4SI_INT:
24229 case V4SI_FTYPE_V8SI_INT:
24230 case V4HI_FTYPE_V4HI_INT:
24231 case V4DF_FTYPE_V4DF_INT:
24232 case V4SF_FTYPE_V4SF_INT:
24233 case V4SF_FTYPE_V8SF_INT:
24234 case V2DI_FTYPE_V2DI_INT:
24235 case V2DF_FTYPE_V2DF_INT:
24236 case V2DF_FTYPE_V4DF_INT:
24238 nargs_constant = 1;
24240 case V16QI_FTYPE_V16QI_V16QI_V16QI:
24241 case V8SF_FTYPE_V8SF_V8SF_V8SF:
24242 case V4DF_FTYPE_V4DF_V4DF_V4DF:
24243 case V4SF_FTYPE_V4SF_V4SF_V4SF:
24244 case V2DF_FTYPE_V2DF_V2DF_V2DF:
24247 case V16QI_FTYPE_V16QI_V16QI_INT:
24248 case V8HI_FTYPE_V8HI_V8HI_INT:
24249 case V8SI_FTYPE_V8SI_V8SI_INT:
24250 case V8SI_FTYPE_V8SI_V4SI_INT:
24251 case V8SF_FTYPE_V8SF_V8SF_INT:
24252 case V8SF_FTYPE_V8SF_V4SF_INT:
24253 case V4SI_FTYPE_V4SI_V4SI_INT:
24254 case V4DF_FTYPE_V4DF_V4DF_INT:
24255 case V4DF_FTYPE_V4DF_V2DF_INT:
24256 case V4SF_FTYPE_V4SF_V4SF_INT:
24257 case V2DI_FTYPE_V2DI_V2DI_INT:
24258 case V2DF_FTYPE_V2DF_V2DF_INT:
24260 nargs_constant = 1;
24262 case V2DI2TI_FTYPE_V2DI_V2DI_INT:
24265 nargs_constant = 1;
24267 case V1DI2DI_FTYPE_V1DI_V1DI_INT:
24270 nargs_constant = 1;
24272 case V2DI_FTYPE_V2DI_UINT_UINT:
24274 nargs_constant = 2;
24276 case V2DI_FTYPE_V2DI_V2DI_UINT_UINT:
24278 nargs_constant = 2;
24281 gcc_unreachable ();
24284 gcc_assert (nargs <= ARRAY_SIZE (args));
24286 if (comparison != UNKNOWN)
24288 gcc_assert (nargs == 2);
24289 return ix86_expand_sse_compare (d, exp, target, swap);
24292 if (rmode == VOIDmode || rmode == tmode)
24296 || GET_MODE (target) != tmode
24297 || ! (*insn_p->operand[0].predicate) (target, tmode))
24298 target = gen_reg_rtx (tmode);
24299 real_target = target;
24303 target = gen_reg_rtx (rmode);
24304 real_target = simplify_gen_subreg (tmode, target, rmode, 0);
24307 for (i = 0; i < nargs; i++)
24309 tree arg = CALL_EXPR_ARG (exp, i);
24310 rtx op = expand_normal (arg);
24311 enum machine_mode mode = insn_p->operand[i + 1].mode;
24312 bool match = (*insn_p->operand[i + 1].predicate) (op, mode);
24314 if (last_arg_count && (i + 1) == nargs)
24316 /* SIMD shift insns take either an 8-bit immediate or
24317 register as count. But builtin functions take int as
24318 count. If count doesn't match, we put it in register. */
24321 op = simplify_gen_subreg (SImode, op, GET_MODE (op), 0);
24322 if (!(*insn_p->operand[i + 1].predicate) (op, mode))
24323 op = copy_to_reg (op);
24326 else if ((nargs - i) <= nargs_constant)
24331 case CODE_FOR_sse4_1_roundpd:
24332 case CODE_FOR_sse4_1_roundps:
24333 case CODE_FOR_sse4_1_roundsd:
24334 case CODE_FOR_sse4_1_roundss:
24335 case CODE_FOR_sse4_1_blendps:
24336 case CODE_FOR_avx_blendpd256:
24337 case CODE_FOR_avx_vpermilv4df:
24338 case CODE_FOR_avx_roundpd256:
24339 case CODE_FOR_avx_roundps256:
24340 error ("the last argument must be a 4-bit immediate");
24343 case CODE_FOR_sse4_1_blendpd:
24344 case CODE_FOR_avx_vpermilv2df:
24345 error ("the last argument must be a 2-bit immediate");
24348 case CODE_FOR_avx_vextractf128v4df:
24349 case CODE_FOR_avx_vextractf128v8sf:
24350 case CODE_FOR_avx_vextractf128v8si:
24351 case CODE_FOR_avx_vinsertf128v4df:
24352 case CODE_FOR_avx_vinsertf128v8sf:
24353 case CODE_FOR_avx_vinsertf128v8si:
24354 error ("the last argument must be a 1-bit immediate");
24357 case CODE_FOR_avx_cmpsdv2df3:
24358 case CODE_FOR_avx_cmpssv4sf3:
24359 case CODE_FOR_avx_cmppdv2df3:
24360 case CODE_FOR_avx_cmppsv4sf3:
24361 case CODE_FOR_avx_cmppdv4df3:
24362 case CODE_FOR_avx_cmppsv8sf3:
24363 error ("the last argument must be a 5-bit immediate");
24367 switch (nargs_constant)
24370 if ((nargs - i) == nargs_constant)
24372 error ("the next to last argument must be an 8-bit immediate");
24376 error ("the last argument must be an 8-bit immediate");
24379 gcc_unreachable ();
24386 if (VECTOR_MODE_P (mode))
24387 op = safe_vector_operand (op, mode);
24389 /* If we aren't optimizing, only allow one memory operand to
24391 if (memory_operand (op, mode))
24394 if (GET_MODE (op) == mode || GET_MODE (op) == VOIDmode)
24396 if (optimize || !match || num_memory > 1)
24397 op = copy_to_mode_reg (mode, op);
24401 op = copy_to_reg (op);
24402 op = simplify_gen_subreg (mode, op, GET_MODE (op), 0);
24407 args[i].mode = mode;
24413 pat = GEN_FCN (icode) (real_target, args[0].op);
24416 pat = GEN_FCN (icode) (real_target, args[0].op, args[1].op);
24419 pat = GEN_FCN (icode) (real_target, args[0].op, args[1].op,
24423 pat = GEN_FCN (icode) (real_target, args[0].op, args[1].op,
24424 args[2].op, args[3].op);
24427 gcc_unreachable ();
24437 /* Subroutine of ix86_expand_builtin to take care of special insns
24438 with variable number of operands. */
24441 ix86_expand_special_args_builtin (const struct builtin_description *d,
24442 tree exp, rtx target)
24446 unsigned int i, nargs, arg_adjust, memory;
24450 enum machine_mode mode;
24452 enum insn_code icode = d->icode;
24453 bool last_arg_constant = false;
24454 const struct insn_data *insn_p = &insn_data[icode];
24455 enum machine_mode tmode = insn_p->operand[0].mode;
24456 enum { load, store } klass;
24458 switch ((enum ix86_special_builtin_type) d->flag)
24460 case VOID_FTYPE_VOID:
24461 emit_insn (GEN_FCN (icode) (target));
24463 case V2DI_FTYPE_PV2DI:
24464 case V32QI_FTYPE_PCCHAR:
24465 case V16QI_FTYPE_PCCHAR:
24466 case V8SF_FTYPE_PCV4SF:
24467 case V8SF_FTYPE_PCFLOAT:
24468 case V4SF_FTYPE_PCFLOAT:
24469 case V4DF_FTYPE_PCV2DF:
24470 case V4DF_FTYPE_PCDOUBLE:
24471 case V2DF_FTYPE_PCDOUBLE:
24476 case VOID_FTYPE_PV2SF_V4SF:
24477 case VOID_FTYPE_PV4DI_V4DI:
24478 case VOID_FTYPE_PV2DI_V2DI:
24479 case VOID_FTYPE_PCHAR_V32QI:
24480 case VOID_FTYPE_PCHAR_V16QI:
24481 case VOID_FTYPE_PFLOAT_V8SF:
24482 case VOID_FTYPE_PFLOAT_V4SF:
24483 case VOID_FTYPE_PDOUBLE_V4DF:
24484 case VOID_FTYPE_PDOUBLE_V2DF:
24485 case VOID_FTYPE_PDI_DI:
24486 case VOID_FTYPE_PINT_INT:
24489 /* Reserve memory operand for target. */
24490 memory = ARRAY_SIZE (args);
24492 case V4SF_FTYPE_V4SF_PCV2SF:
24493 case V2DF_FTYPE_V2DF_PCDOUBLE:
24498 case V8SF_FTYPE_PCV8SF_V8SF:
24499 case V4DF_FTYPE_PCV4DF_V4DF:
24500 case V4SF_FTYPE_PCV4SF_V4SF:
24501 case V2DF_FTYPE_PCV2DF_V2DF:
24506 case VOID_FTYPE_PV8SF_V8SF_V8SF:
24507 case VOID_FTYPE_PV4DF_V4DF_V4DF:
24508 case VOID_FTYPE_PV4SF_V4SF_V4SF:
24509 case VOID_FTYPE_PV2DF_V2DF_V2DF:
24512 /* Reserve memory operand for target. */
24513 memory = ARRAY_SIZE (args);
24516 gcc_unreachable ();
24519 gcc_assert (nargs <= ARRAY_SIZE (args));
24521 if (klass == store)
24523 arg = CALL_EXPR_ARG (exp, 0);
24524 op = expand_normal (arg);
24525 gcc_assert (target == 0);
24526 target = gen_rtx_MEM (tmode, copy_to_mode_reg (Pmode, op));
24534 || GET_MODE (target) != tmode
24535 || ! (*insn_p->operand[0].predicate) (target, tmode))
24536 target = gen_reg_rtx (tmode);
24539 for (i = 0; i < nargs; i++)
24541 enum machine_mode mode = insn_p->operand[i + 1].mode;
24544 arg = CALL_EXPR_ARG (exp, i + arg_adjust);
24545 op = expand_normal (arg);
24546 match = (*insn_p->operand[i + 1].predicate) (op, mode);
24548 if (last_arg_constant && (i + 1) == nargs)
24554 error ("the last argument must be an 8-bit immediate");
24562 /* This must be the memory operand. */
24563 op = gen_rtx_MEM (mode, copy_to_mode_reg (Pmode, op));
24564 gcc_assert (GET_MODE (op) == mode
24565 || GET_MODE (op) == VOIDmode);
24569 /* This must be register. */
24570 if (VECTOR_MODE_P (mode))
24571 op = safe_vector_operand (op, mode);
24573 gcc_assert (GET_MODE (op) == mode
24574 || GET_MODE (op) == VOIDmode);
24575 op = copy_to_mode_reg (mode, op);
24580 args[i].mode = mode;
24586 pat = GEN_FCN (icode) (target, args[0].op);
24589 pat = GEN_FCN (icode) (target, args[0].op, args[1].op);
24592 gcc_unreachable ();
24598 return klass == store ? 0 : target;
24601 /* Return the integer constant in ARG. Constrain it to be in the range
24602 of the subparts of VEC_TYPE; issue an error if not. */
24605 get_element_number (tree vec_type, tree arg)
24607 unsigned HOST_WIDE_INT elt, max = TYPE_VECTOR_SUBPARTS (vec_type) - 1;
24609 if (!host_integerp (arg, 1)
24610 || (elt = tree_low_cst (arg, 1), elt > max))
24612 error ("selector must be an integer constant in the range 0..%wi", max);
24619 /* A subroutine of ix86_expand_builtin. These builtins are a wrapper around
24620 ix86_expand_vector_init. We DO have language-level syntax for this, in
24621 the form of (type){ init-list }. Except that since we can't place emms
24622 instructions from inside the compiler, we can't allow the use of MMX
24623 registers unless the user explicitly asks for it. So we do *not* define
24624 vec_set/vec_extract/vec_init patterns for MMX modes in mmx.md. Instead
24625 we have builtins invoked by mmintrin.h that gives us license to emit
24626 these sorts of instructions. */
24629 ix86_expand_vec_init_builtin (tree type, tree exp, rtx target)
24631 enum machine_mode tmode = TYPE_MODE (type);
24632 enum machine_mode inner_mode = GET_MODE_INNER (tmode);
24633 int i, n_elt = GET_MODE_NUNITS (tmode);
24634 rtvec v = rtvec_alloc (n_elt);
24636 gcc_assert (VECTOR_MODE_P (tmode));
24637 gcc_assert (call_expr_nargs (exp) == n_elt);
24639 for (i = 0; i < n_elt; ++i)
24641 rtx x = expand_normal (CALL_EXPR_ARG (exp, i));
24642 RTVEC_ELT (v, i) = gen_lowpart (inner_mode, x);
24645 if (!target || !register_operand (target, tmode))
24646 target = gen_reg_rtx (tmode);
24648 ix86_expand_vector_init (true, target, gen_rtx_PARALLEL (tmode, v));
24652 /* A subroutine of ix86_expand_builtin. These builtins are a wrapper around
24653 ix86_expand_vector_extract. They would be redundant (for non-MMX) if we
24654 had a language-level syntax for referencing vector elements. */
24657 ix86_expand_vec_ext_builtin (tree exp, rtx target)
24659 enum machine_mode tmode, mode0;
24664 arg0 = CALL_EXPR_ARG (exp, 0);
24665 arg1 = CALL_EXPR_ARG (exp, 1);
24667 op0 = expand_normal (arg0);
24668 elt = get_element_number (TREE_TYPE (arg0), arg1);
24670 tmode = TYPE_MODE (TREE_TYPE (TREE_TYPE (arg0)));
24671 mode0 = TYPE_MODE (TREE_TYPE (arg0));
24672 gcc_assert (VECTOR_MODE_P (mode0));
24674 op0 = force_reg (mode0, op0);
24676 if (optimize || !target || !register_operand (target, tmode))
24677 target = gen_reg_rtx (tmode);
24679 ix86_expand_vector_extract (true, target, op0, elt);
24684 /* A subroutine of ix86_expand_builtin. These builtins are a wrapper around
24685 ix86_expand_vector_set. They would be redundant (for non-MMX) if we had
24686 a language-level syntax for referencing vector elements. */
24689 ix86_expand_vec_set_builtin (tree exp)
24691 enum machine_mode tmode, mode1;
24692 tree arg0, arg1, arg2;
24694 rtx op0, op1, target;
24696 arg0 = CALL_EXPR_ARG (exp, 0);
24697 arg1 = CALL_EXPR_ARG (exp, 1);
24698 arg2 = CALL_EXPR_ARG (exp, 2);
24700 tmode = TYPE_MODE (TREE_TYPE (arg0));
24701 mode1 = TYPE_MODE (TREE_TYPE (TREE_TYPE (arg0)));
24702 gcc_assert (VECTOR_MODE_P (tmode));
24704 op0 = expand_expr (arg0, NULL_RTX, tmode, EXPAND_NORMAL);
24705 op1 = expand_expr (arg1, NULL_RTX, mode1, EXPAND_NORMAL);
24706 elt = get_element_number (TREE_TYPE (arg0), arg2);
24708 if (GET_MODE (op1) != mode1 && GET_MODE (op1) != VOIDmode)
24709 op1 = convert_modes (mode1, GET_MODE (op1), op1, true);
24711 op0 = force_reg (tmode, op0);
24712 op1 = force_reg (mode1, op1);
24714 /* OP0 is the source of these builtin functions and shouldn't be
24715 modified. Create a copy, use it and return it as target. */
24716 target = gen_reg_rtx (tmode);
24717 emit_move_insn (target, op0);
24718 ix86_expand_vector_set (true, target, op1, elt);
24723 /* Expand an expression EXP that calls a built-in function,
24724 with result going to TARGET if that's convenient
24725 (and in mode MODE if that's convenient).
24726 SUBTARGET may be used as the target for computing one of EXP's operands.
24727 IGNORE is nonzero if the value is to be ignored. */
24730 ix86_expand_builtin (tree exp, rtx target, rtx subtarget ATTRIBUTE_UNUSED,
24731 enum machine_mode mode ATTRIBUTE_UNUSED,
24732 int ignore ATTRIBUTE_UNUSED)
24734 const struct builtin_description *d;
24736 enum insn_code icode;
24737 tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
24738 tree arg0, arg1, arg2;
24739 rtx op0, op1, op2, pat;
24740 enum machine_mode mode0, mode1, mode2;
24741 unsigned int fcode = DECL_FUNCTION_CODE (fndecl);
24743 /* Determine whether the builtin function is available under the current ISA.
24744 Originally the builtin was not created if it wasn't applicable to the
24745 current ISA based on the command line switches. With function specific
24746 options, we need to check in the context of the function making the call
24747 whether it is supported. */
24748 if (ix86_builtins_isa[fcode].isa
24749 && !(ix86_builtins_isa[fcode].isa & ix86_isa_flags))
24751 char *opts = ix86_target_string (ix86_builtins_isa[fcode].isa, 0, NULL,
24752 NULL, NULL, false);
24755 error ("%qE needs unknown isa option", fndecl);
24758 gcc_assert (opts != NULL);
24759 error ("%qE needs isa option %s", fndecl, opts);
24767 case IX86_BUILTIN_MASKMOVQ:
24768 case IX86_BUILTIN_MASKMOVDQU:
24769 icode = (fcode == IX86_BUILTIN_MASKMOVQ
24770 ? CODE_FOR_mmx_maskmovq
24771 : CODE_FOR_sse2_maskmovdqu);
24772 /* Note the arg order is different from the operand order. */
24773 arg1 = CALL_EXPR_ARG (exp, 0);
24774 arg2 = CALL_EXPR_ARG (exp, 1);
24775 arg0 = CALL_EXPR_ARG (exp, 2);
24776 op0 = expand_normal (arg0);
24777 op1 = expand_normal (arg1);
24778 op2 = expand_normal (arg2);
24779 mode0 = insn_data[icode].operand[0].mode;
24780 mode1 = insn_data[icode].operand[1].mode;
24781 mode2 = insn_data[icode].operand[2].mode;
24783 op0 = force_reg (Pmode, op0);
24784 op0 = gen_rtx_MEM (mode1, op0);
24786 if (! (*insn_data[icode].operand[0].predicate) (op0, mode0))
24787 op0 = copy_to_mode_reg (mode0, op0);
24788 if (! (*insn_data[icode].operand[1].predicate) (op1, mode1))
24789 op1 = copy_to_mode_reg (mode1, op1);
24790 if (! (*insn_data[icode].operand[2].predicate) (op2, mode2))
24791 op2 = copy_to_mode_reg (mode2, op2);
24792 pat = GEN_FCN (icode) (op0, op1, op2);
24798 case IX86_BUILTIN_LDMXCSR:
24799 op0 = expand_normal (CALL_EXPR_ARG (exp, 0));
24800 target = assign_386_stack_local (SImode, SLOT_VIRTUAL);
24801 emit_move_insn (target, op0);
24802 emit_insn (gen_sse_ldmxcsr (target));
24805 case IX86_BUILTIN_STMXCSR:
24806 target = assign_386_stack_local (SImode, SLOT_VIRTUAL);
24807 emit_insn (gen_sse_stmxcsr (target));
24808 return copy_to_mode_reg (SImode, target);
24810 case IX86_BUILTIN_CLFLUSH:
24811 arg0 = CALL_EXPR_ARG (exp, 0);
24812 op0 = expand_normal (arg0);
24813 icode = CODE_FOR_sse2_clflush;
24814 if (! (*insn_data[icode].operand[0].predicate) (op0, Pmode))
24815 op0 = copy_to_mode_reg (Pmode, op0);
24817 emit_insn (gen_sse2_clflush (op0));
24820 case IX86_BUILTIN_MONITOR:
24821 arg0 = CALL_EXPR_ARG (exp, 0);
24822 arg1 = CALL_EXPR_ARG (exp, 1);
24823 arg2 = CALL_EXPR_ARG (exp, 2);
24824 op0 = expand_normal (arg0);
24825 op1 = expand_normal (arg1);
24826 op2 = expand_normal (arg2);
24828 op0 = copy_to_mode_reg (Pmode, op0);
24830 op1 = copy_to_mode_reg (SImode, op1);
24832 op2 = copy_to_mode_reg (SImode, op2);
24833 emit_insn ((*ix86_gen_monitor) (op0, op1, op2));
24836 case IX86_BUILTIN_MWAIT:
24837 arg0 = CALL_EXPR_ARG (exp, 0);
24838 arg1 = CALL_EXPR_ARG (exp, 1);
24839 op0 = expand_normal (arg0);
24840 op1 = expand_normal (arg1);
24842 op0 = copy_to_mode_reg (SImode, op0);
24844 op1 = copy_to_mode_reg (SImode, op1);
24845 emit_insn (gen_sse3_mwait (op0, op1));
24848 case IX86_BUILTIN_VEC_INIT_V2SI:
24849 case IX86_BUILTIN_VEC_INIT_V4HI:
24850 case IX86_BUILTIN_VEC_INIT_V8QI:
24851 return ix86_expand_vec_init_builtin (TREE_TYPE (exp), exp, target);
24853 case IX86_BUILTIN_VEC_EXT_V2DF:
24854 case IX86_BUILTIN_VEC_EXT_V2DI:
24855 case IX86_BUILTIN_VEC_EXT_V4SF:
24856 case IX86_BUILTIN_VEC_EXT_V4SI:
24857 case IX86_BUILTIN_VEC_EXT_V8HI:
24858 case IX86_BUILTIN_VEC_EXT_V2SI:
24859 case IX86_BUILTIN_VEC_EXT_V4HI:
24860 case IX86_BUILTIN_VEC_EXT_V16QI:
24861 return ix86_expand_vec_ext_builtin (exp, target);
24863 case IX86_BUILTIN_VEC_SET_V2DI:
24864 case IX86_BUILTIN_VEC_SET_V4SF:
24865 case IX86_BUILTIN_VEC_SET_V4SI:
24866 case IX86_BUILTIN_VEC_SET_V8HI:
24867 case IX86_BUILTIN_VEC_SET_V4HI:
24868 case IX86_BUILTIN_VEC_SET_V16QI:
24869 return ix86_expand_vec_set_builtin (exp);
24871 case IX86_BUILTIN_INFQ:
24872 case IX86_BUILTIN_HUGE_VALQ:
24874 REAL_VALUE_TYPE inf;
24878 tmp = CONST_DOUBLE_FROM_REAL_VALUE (inf, mode);
24880 tmp = validize_mem (force_const_mem (mode, tmp));
24883 target = gen_reg_rtx (mode);
24885 emit_move_insn (target, tmp);
24893 for (i = 0, d = bdesc_special_args;
24894 i < ARRAY_SIZE (bdesc_special_args);
24896 if (d->code == fcode)
24897 return ix86_expand_special_args_builtin (d, exp, target);
24899 for (i = 0, d = bdesc_args;
24900 i < ARRAY_SIZE (bdesc_args);
24902 if (d->code == fcode)
24905 case IX86_BUILTIN_FABSQ:
24906 case IX86_BUILTIN_COPYSIGNQ:
24908 /* Emit a normal call if SSE2 isn't available. */
24909 return expand_call (exp, target, ignore);
24911 return ix86_expand_args_builtin (d, exp, target);
24914 for (i = 0, d = bdesc_comi; i < ARRAY_SIZE (bdesc_comi); i++, d++)
24915 if (d->code == fcode)
24916 return ix86_expand_sse_comi (d, exp, target);
24918 for (i = 0, d = bdesc_pcmpestr;
24919 i < ARRAY_SIZE (bdesc_pcmpestr);
24921 if (d->code == fcode)
24922 return ix86_expand_sse_pcmpestr (d, exp, target);
24924 for (i = 0, d = bdesc_pcmpistr;
24925 i < ARRAY_SIZE (bdesc_pcmpistr);
24927 if (d->code == fcode)
24928 return ix86_expand_sse_pcmpistr (d, exp, target);
24930 for (i = 0, d = bdesc_multi_arg; i < ARRAY_SIZE (bdesc_multi_arg); i++, d++)
24931 if (d->code == fcode)
24932 return ix86_expand_multi_arg_builtin (d->icode, exp, target,
24933 (enum multi_arg_type)d->flag,
24936 gcc_unreachable ();
24939 /* Returns a function decl for a vectorized version of the builtin function
24940 with builtin function code FN and the result vector type TYPE, or NULL_TREE
24941 if it is not available. */
24944 ix86_builtin_vectorized_function (unsigned int fn, tree type_out,
24947 enum machine_mode in_mode, out_mode;
24950 if (TREE_CODE (type_out) != VECTOR_TYPE
24951 || TREE_CODE (type_in) != VECTOR_TYPE)
24954 out_mode = TYPE_MODE (TREE_TYPE (type_out));
24955 out_n = TYPE_VECTOR_SUBPARTS (type_out);
24956 in_mode = TYPE_MODE (TREE_TYPE (type_in));
24957 in_n = TYPE_VECTOR_SUBPARTS (type_in);
24961 case BUILT_IN_SQRT:
24962 if (out_mode == DFmode && out_n == 2
24963 && in_mode == DFmode && in_n == 2)
24964 return ix86_builtins[IX86_BUILTIN_SQRTPD];
24967 case BUILT_IN_SQRTF:
24968 if (out_mode == SFmode && out_n == 4
24969 && in_mode == SFmode && in_n == 4)
24970 return ix86_builtins[IX86_BUILTIN_SQRTPS_NR];
24973 case BUILT_IN_LRINT:
24974 if (out_mode == SImode && out_n == 4
24975 && in_mode == DFmode && in_n == 2)
24976 return ix86_builtins[IX86_BUILTIN_VEC_PACK_SFIX];
24979 case BUILT_IN_LRINTF:
24980 if (out_mode == SImode && out_n == 4
24981 && in_mode == SFmode && in_n == 4)
24982 return ix86_builtins[IX86_BUILTIN_CVTPS2DQ];
24989 /* Dispatch to a handler for a vectorization library. */
24990 if (ix86_veclib_handler)
24991 return (*ix86_veclib_handler)(fn, type_out, type_in);
24996 /* Handler for an SVML-style interface to
24997 a library with vectorized intrinsics. */
25000 ix86_veclibabi_svml (enum built_in_function fn, tree type_out, tree type_in)
25003 tree fntype, new_fndecl, args;
25006 enum machine_mode el_mode, in_mode;
25009 /* The SVML is suitable for unsafe math only. */
25010 if (!flag_unsafe_math_optimizations)
25013 el_mode = TYPE_MODE (TREE_TYPE (type_out));
25014 n = TYPE_VECTOR_SUBPARTS (type_out);
25015 in_mode = TYPE_MODE (TREE_TYPE (type_in));
25016 in_n = TYPE_VECTOR_SUBPARTS (type_in);
25017 if (el_mode != in_mode
25025 case BUILT_IN_LOG10:
25027 case BUILT_IN_TANH:
25029 case BUILT_IN_ATAN:
25030 case BUILT_IN_ATAN2:
25031 case BUILT_IN_ATANH:
25032 case BUILT_IN_CBRT:
25033 case BUILT_IN_SINH:
25035 case BUILT_IN_ASINH:
25036 case BUILT_IN_ASIN:
25037 case BUILT_IN_COSH:
25039 case BUILT_IN_ACOSH:
25040 case BUILT_IN_ACOS:
25041 if (el_mode != DFmode || n != 2)
25045 case BUILT_IN_EXPF:
25046 case BUILT_IN_LOGF:
25047 case BUILT_IN_LOG10F:
25048 case BUILT_IN_POWF:
25049 case BUILT_IN_TANHF:
25050 case BUILT_IN_TANF:
25051 case BUILT_IN_ATANF:
25052 case BUILT_IN_ATAN2F:
25053 case BUILT_IN_ATANHF:
25054 case BUILT_IN_CBRTF:
25055 case BUILT_IN_SINHF:
25056 case BUILT_IN_SINF:
25057 case BUILT_IN_ASINHF:
25058 case BUILT_IN_ASINF:
25059 case BUILT_IN_COSHF:
25060 case BUILT_IN_COSF:
25061 case BUILT_IN_ACOSHF:
25062 case BUILT_IN_ACOSF:
25063 if (el_mode != SFmode || n != 4)
25071 bname = IDENTIFIER_POINTER (DECL_NAME (implicit_built_in_decls[fn]));
25073 if (fn == BUILT_IN_LOGF)
25074 strcpy (name, "vmlsLn4");
25075 else if (fn == BUILT_IN_LOG)
25076 strcpy (name, "vmldLn2");
25079 sprintf (name, "vmls%s", bname+10);
25080 name[strlen (name)-1] = '4';
25083 sprintf (name, "vmld%s2", bname+10);
25085 /* Convert to uppercase. */
25089 for (args = DECL_ARGUMENTS (implicit_built_in_decls[fn]); args;
25090 args = TREE_CHAIN (args))
25094 fntype = build_function_type_list (type_out, type_in, NULL);
25096 fntype = build_function_type_list (type_out, type_in, type_in, NULL);
25098 /* Build a function declaration for the vectorized function. */
25099 new_fndecl = build_decl (FUNCTION_DECL, get_identifier (name), fntype);
25100 TREE_PUBLIC (new_fndecl) = 1;
25101 DECL_EXTERNAL (new_fndecl) = 1;
25102 DECL_IS_NOVOPS (new_fndecl) = 1;
25103 TREE_READONLY (new_fndecl) = 1;
25108 /* Handler for an ACML-style interface to
25109 a library with vectorized intrinsics. */
25112 ix86_veclibabi_acml (enum built_in_function fn, tree type_out, tree type_in)
25114 char name[20] = "__vr.._";
25115 tree fntype, new_fndecl, args;
25118 enum machine_mode el_mode, in_mode;
25121 /* The ACML is 64bits only and suitable for unsafe math only as
25122 it does not correctly support parts of IEEE with the required
25123 precision such as denormals. */
25125 || !flag_unsafe_math_optimizations)
25128 el_mode = TYPE_MODE (TREE_TYPE (type_out));
25129 n = TYPE_VECTOR_SUBPARTS (type_out);
25130 in_mode = TYPE_MODE (TREE_TYPE (type_in));
25131 in_n = TYPE_VECTOR_SUBPARTS (type_in);
25132 if (el_mode != in_mode
25142 case BUILT_IN_LOG2:
25143 case BUILT_IN_LOG10:
25146 if (el_mode != DFmode
25151 case BUILT_IN_SINF:
25152 case BUILT_IN_COSF:
25153 case BUILT_IN_EXPF:
25154 case BUILT_IN_POWF:
25155 case BUILT_IN_LOGF:
25156 case BUILT_IN_LOG2F:
25157 case BUILT_IN_LOG10F:
25160 if (el_mode != SFmode
25169 bname = IDENTIFIER_POINTER (DECL_NAME (implicit_built_in_decls[fn]));
25170 sprintf (name + 7, "%s", bname+10);
25173 for (args = DECL_ARGUMENTS (implicit_built_in_decls[fn]); args;
25174 args = TREE_CHAIN (args))
25178 fntype = build_function_type_list (type_out, type_in, NULL);
25180 fntype = build_function_type_list (type_out, type_in, type_in, NULL);
25182 /* Build a function declaration for the vectorized function. */
25183 new_fndecl = build_decl (FUNCTION_DECL, get_identifier (name), fntype);
25184 TREE_PUBLIC (new_fndecl) = 1;
25185 DECL_EXTERNAL (new_fndecl) = 1;
25186 DECL_IS_NOVOPS (new_fndecl) = 1;
25187 TREE_READONLY (new_fndecl) = 1;
25193 /* Returns a decl of a function that implements conversion of an integer vector
25194 into a floating-point vector, or vice-versa. TYPE is the type of the integer
25195 side of the conversion.
25196 Return NULL_TREE if it is not available. */
25199 ix86_vectorize_builtin_conversion (unsigned int code, tree type)
25201 if (TREE_CODE (type) != VECTOR_TYPE)
25207 switch (TYPE_MODE (type))
25210 return ix86_builtins[IX86_BUILTIN_CVTDQ2PS];
25215 case FIX_TRUNC_EXPR:
25216 switch (TYPE_MODE (type))
25219 return ix86_builtins[IX86_BUILTIN_CVTTPS2DQ];
25229 /* Returns a code for a target-specific builtin that implements
25230 reciprocal of the function, or NULL_TREE if not available. */
25233 ix86_builtin_reciprocal (unsigned int fn, bool md_fn,
25234 bool sqrt ATTRIBUTE_UNUSED)
25236 if (! (TARGET_SSE_MATH && TARGET_RECIP && !optimize_insn_for_size_p ()
25237 && flag_finite_math_only && !flag_trapping_math
25238 && flag_unsafe_math_optimizations))
25242 /* Machine dependent builtins. */
25245 /* Vectorized version of sqrt to rsqrt conversion. */
25246 case IX86_BUILTIN_SQRTPS_NR:
25247 return ix86_builtins[IX86_BUILTIN_RSQRTPS_NR];
25253 /* Normal builtins. */
25256 /* Sqrt to rsqrt conversion. */
25257 case BUILT_IN_SQRTF:
25258 return ix86_builtins[IX86_BUILTIN_RSQRTF];
25265 /* Store OPERAND to the memory after reload is completed. This means
25266 that we can't easily use assign_stack_local. */
25268 ix86_force_to_memory (enum machine_mode mode, rtx operand)
25272 gcc_assert (reload_completed);
25273 if (!TARGET_64BIT_MS_ABI && TARGET_RED_ZONE)
25275 result = gen_rtx_MEM (mode,
25276 gen_rtx_PLUS (Pmode,
25278 GEN_INT (-RED_ZONE_SIZE)));
25279 emit_move_insn (result, operand);
25281 else if ((TARGET_64BIT_MS_ABI || !TARGET_RED_ZONE) && TARGET_64BIT)
25287 operand = gen_lowpart (DImode, operand);
25291 gen_rtx_SET (VOIDmode,
25292 gen_rtx_MEM (DImode,
25293 gen_rtx_PRE_DEC (DImode,
25294 stack_pointer_rtx)),
25298 gcc_unreachable ();
25300 result = gen_rtx_MEM (mode, stack_pointer_rtx);
25309 split_di (&operand, 1, operands, operands + 1);
25311 gen_rtx_SET (VOIDmode,
25312 gen_rtx_MEM (SImode,
25313 gen_rtx_PRE_DEC (Pmode,
25314 stack_pointer_rtx)),
25317 gen_rtx_SET (VOIDmode,
25318 gen_rtx_MEM (SImode,
25319 gen_rtx_PRE_DEC (Pmode,
25320 stack_pointer_rtx)),
25325 /* Store HImodes as SImodes. */
25326 operand = gen_lowpart (SImode, operand);
25330 gen_rtx_SET (VOIDmode,
25331 gen_rtx_MEM (GET_MODE (operand),
25332 gen_rtx_PRE_DEC (SImode,
25333 stack_pointer_rtx)),
25337 gcc_unreachable ();
25339 result = gen_rtx_MEM (mode, stack_pointer_rtx);
25344 /* Free operand from the memory. */
25346 ix86_free_from_memory (enum machine_mode mode)
25348 if (!TARGET_RED_ZONE || TARGET_64BIT_MS_ABI)
25352 if (mode == DImode || TARGET_64BIT)
25356 /* Use LEA to deallocate stack space. In peephole2 it will be converted
25357 to pop or add instruction if registers are available. */
25358 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
25359 gen_rtx_PLUS (Pmode, stack_pointer_rtx,
25364 /* Put float CONST_DOUBLE in the constant pool instead of fp regs.
25365 QImode must go into class Q_REGS.
25366 Narrow ALL_REGS to GENERAL_REGS. This supports allowing movsf and
25367 movdf to do mem-to-mem moves through integer regs. */
25369 ix86_preferred_reload_class (rtx x, enum reg_class regclass)
25371 enum machine_mode mode = GET_MODE (x);
25373 /* We're only allowed to return a subclass of CLASS. Many of the
25374 following checks fail for NO_REGS, so eliminate that early. */
25375 if (regclass == NO_REGS)
25378 /* All classes can load zeros. */
25379 if (x == CONST0_RTX (mode))
25382 /* Force constants into memory if we are loading a (nonzero) constant into
25383 an MMX or SSE register. This is because there are no MMX/SSE instructions
25384 to load from a constant. */
25386 && (MAYBE_MMX_CLASS_P (regclass) || MAYBE_SSE_CLASS_P (regclass)))
25389 /* Prefer SSE regs only, if we can use them for math. */
25390 if (TARGET_SSE_MATH && !TARGET_MIX_SSE_I387 && SSE_FLOAT_MODE_P (mode))
25391 return SSE_CLASS_P (regclass) ? regclass : NO_REGS;
25393 /* Floating-point constants need more complex checks. */
25394 if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) != VOIDmode)
25396 /* General regs can load everything. */
25397 if (reg_class_subset_p (regclass, GENERAL_REGS))
25400 /* Floats can load 0 and 1 plus some others. Note that we eliminated
25401 zero above. We only want to wind up preferring 80387 registers if
25402 we plan on doing computation with them. */
25404 && standard_80387_constant_p (x))
25406 /* Limit class to non-sse. */
25407 if (regclass == FLOAT_SSE_REGS)
25409 if (regclass == FP_TOP_SSE_REGS)
25411 if (regclass == FP_SECOND_SSE_REGS)
25412 return FP_SECOND_REG;
25413 if (regclass == FLOAT_INT_REGS || regclass == FLOAT_REGS)
25420 /* Generally when we see PLUS here, it's the function invariant
25421 (plus soft-fp const_int). Which can only be computed into general
25423 if (GET_CODE (x) == PLUS)
25424 return reg_class_subset_p (regclass, GENERAL_REGS) ? regclass : NO_REGS;
25426 /* QImode constants are easy to load, but non-constant QImode data
25427 must go into Q_REGS. */
25428 if (GET_MODE (x) == QImode && !CONSTANT_P (x))
25430 if (reg_class_subset_p (regclass, Q_REGS))
25432 if (reg_class_subset_p (Q_REGS, regclass))
25440 /* Discourage putting floating-point values in SSE registers unless
25441 SSE math is being used, and likewise for the 387 registers. */
25443 ix86_preferred_output_reload_class (rtx x, enum reg_class regclass)
25445 enum machine_mode mode = GET_MODE (x);
25447 /* Restrict the output reload class to the register bank that we are doing
25448 math on. If we would like not to return a subset of CLASS, reject this
25449 alternative: if reload cannot do this, it will still use its choice. */
25450 mode = GET_MODE (x);
25451 if (TARGET_SSE_MATH && SSE_FLOAT_MODE_P (mode))
25452 return MAYBE_SSE_CLASS_P (regclass) ? SSE_REGS : NO_REGS;
25454 if (X87_FLOAT_MODE_P (mode))
25456 if (regclass == FP_TOP_SSE_REGS)
25458 else if (regclass == FP_SECOND_SSE_REGS)
25459 return FP_SECOND_REG;
25461 return FLOAT_CLASS_P (regclass) ? regclass : NO_REGS;
25467 static enum reg_class
25468 ix86_secondary_reload (bool in_p, rtx x, enum reg_class rclass,
25469 enum machine_mode mode,
25470 secondary_reload_info *sri ATTRIBUTE_UNUSED)
25472 /* QImode spills from non-QI registers require
25473 intermediate register on 32bit targets. */
25474 if (!in_p && mode == QImode && !TARGET_64BIT
25475 && (rclass == GENERAL_REGS
25476 || rclass == LEGACY_REGS
25477 || rclass == INDEX_REGS))
25486 if (regno >= FIRST_PSEUDO_REGISTER || GET_CODE (x) == SUBREG)
25487 regno = true_regnum (x);
25489 /* Return Q_REGS if the operand is in memory. */
25497 /* If we are copying between general and FP registers, we need a memory
25498 location. The same is true for SSE and MMX registers.
25500 To optimize register_move_cost performance, allow inline variant.
25502 The macro can't work reliably when one of the CLASSES is class containing
25503 registers from multiple units (SSE, MMX, integer). We avoid this by never
25504 combining those units in single alternative in the machine description.
25505 Ensure that this constraint holds to avoid unexpected surprises.
25507 When STRICT is false, we are being called from REGISTER_MOVE_COST, so do not
25508 enforce these sanity checks. */
25511 inline_secondary_memory_needed (enum reg_class class1, enum reg_class class2,
25512 enum machine_mode mode, int strict)
25514 if (MAYBE_FLOAT_CLASS_P (class1) != FLOAT_CLASS_P (class1)
25515 || MAYBE_FLOAT_CLASS_P (class2) != FLOAT_CLASS_P (class2)
25516 || MAYBE_SSE_CLASS_P (class1) != SSE_CLASS_P (class1)
25517 || MAYBE_SSE_CLASS_P (class2) != SSE_CLASS_P (class2)
25518 || MAYBE_MMX_CLASS_P (class1) != MMX_CLASS_P (class1)
25519 || MAYBE_MMX_CLASS_P (class2) != MMX_CLASS_P (class2))
25521 gcc_assert (!strict);
25525 if (FLOAT_CLASS_P (class1) != FLOAT_CLASS_P (class2))
25528 /* ??? This is a lie. We do have moves between mmx/general, and for
25529 mmx/sse2. But by saying we need secondary memory we discourage the
25530 register allocator from using the mmx registers unless needed. */
25531 if (MMX_CLASS_P (class1) != MMX_CLASS_P (class2))
25534 if (SSE_CLASS_P (class1) != SSE_CLASS_P (class2))
25536 /* SSE1 doesn't have any direct moves from other classes. */
25540 /* If the target says that inter-unit moves are more expensive
25541 than moving through memory, then don't generate them. */
25542 if (!TARGET_INTER_UNIT_MOVES)
25545 /* Between SSE and general, we have moves no larger than word size. */
25546 if (GET_MODE_SIZE (mode) > UNITS_PER_WORD)
25554 ix86_secondary_memory_needed (enum reg_class class1, enum reg_class class2,
25555 enum machine_mode mode, int strict)
25557 return inline_secondary_memory_needed (class1, class2, mode, strict);
25560 /* Return true if the registers in CLASS cannot represent the change from
25561 modes FROM to TO. */
25564 ix86_cannot_change_mode_class (enum machine_mode from, enum machine_mode to,
25565 enum reg_class regclass)
25570 /* x87 registers can't do subreg at all, as all values are reformatted
25571 to extended precision. */
25572 if (MAYBE_FLOAT_CLASS_P (regclass))
25575 if (MAYBE_SSE_CLASS_P (regclass) || MAYBE_MMX_CLASS_P (regclass))
25577 /* Vector registers do not support QI or HImode loads. If we don't
25578 disallow a change to these modes, reload will assume it's ok to
25579 drop the subreg from (subreg:SI (reg:HI 100) 0). This affects
25580 the vec_dupv4hi pattern. */
25581 if (GET_MODE_SIZE (from) < 4)
25584 /* Vector registers do not support subreg with nonzero offsets, which
25585 are otherwise valid for integer registers. Since we can't see
25586 whether we have a nonzero offset from here, prohibit all
25587 nonparadoxical subregs changing size. */
25588 if (GET_MODE_SIZE (to) < GET_MODE_SIZE (from))
25595 /* Return the cost of moving data of mode M between a
25596 register and memory. A value of 2 is the default; this cost is
25597 relative to those in `REGISTER_MOVE_COST'.
25599 This function is used extensively by register_move_cost that is used to
25600 build tables at startup. Make it inline in this case.
25601 When IN is 2, return maximum of in and out move cost.
25603 If moving between registers and memory is more expensive than
25604 between two registers, you should define this macro to express the
25607 Model also increased moving costs of QImode registers in non
25611 inline_memory_move_cost (enum machine_mode mode, enum reg_class regclass,
25615 if (FLOAT_CLASS_P (regclass))
25633 return MAX (ix86_cost->fp_load [index], ix86_cost->fp_store [index]);
25634 return in ? ix86_cost->fp_load [index] : ix86_cost->fp_store [index];
25636 if (SSE_CLASS_P (regclass))
25639 switch (GET_MODE_SIZE (mode))
25654 return MAX (ix86_cost->sse_load [index], ix86_cost->sse_store [index]);
25655 return in ? ix86_cost->sse_load [index] : ix86_cost->sse_store [index];
25657 if (MMX_CLASS_P (regclass))
25660 switch (GET_MODE_SIZE (mode))
25672 return MAX (ix86_cost->mmx_load [index], ix86_cost->mmx_store [index]);
25673 return in ? ix86_cost->mmx_load [index] : ix86_cost->mmx_store [index];
25675 switch (GET_MODE_SIZE (mode))
25678 if (Q_CLASS_P (regclass) || TARGET_64BIT)
25681 return ix86_cost->int_store[0];
25682 if (TARGET_PARTIAL_REG_DEPENDENCY
25683 && optimize_function_for_speed_p (cfun))
25684 cost = ix86_cost->movzbl_load;
25686 cost = ix86_cost->int_load[0];
25688 return MAX (cost, ix86_cost->int_store[0]);
25694 return MAX (ix86_cost->movzbl_load, ix86_cost->int_store[0] + 4);
25696 return ix86_cost->movzbl_load;
25698 return ix86_cost->int_store[0] + 4;
25703 return MAX (ix86_cost->int_load[1], ix86_cost->int_store[1]);
25704 return in ? ix86_cost->int_load[1] : ix86_cost->int_store[1];
25706 /* Compute number of 32bit moves needed. TFmode is moved as XFmode. */
25707 if (mode == TFmode)
25710 cost = MAX (ix86_cost->int_load[2] , ix86_cost->int_store[2]);
25712 cost = ix86_cost->int_load[2];
25714 cost = ix86_cost->int_store[2];
25715 return (cost * (((int) GET_MODE_SIZE (mode)
25716 + UNITS_PER_WORD - 1) / UNITS_PER_WORD));
25721 ix86_memory_move_cost (enum machine_mode mode, enum reg_class regclass, int in)
25723 return inline_memory_move_cost (mode, regclass, in);
25727 /* Return the cost of moving data from a register in class CLASS1 to
25728 one in class CLASS2.
25730 It is not required that the cost always equal 2 when FROM is the same as TO;
25731 on some machines it is expensive to move between registers if they are not
25732 general registers. */
25735 ix86_register_move_cost (enum machine_mode mode, enum reg_class class1,
25736 enum reg_class class2)
25738 /* In case we require secondary memory, compute cost of the store followed
25739 by load. In order to avoid bad register allocation choices, we need
25740 for this to be *at least* as high as the symmetric MEMORY_MOVE_COST. */
25742 if (inline_secondary_memory_needed (class1, class2, mode, 0))
25746 cost += inline_memory_move_cost (mode, class1, 2);
25747 cost += inline_memory_move_cost (mode, class2, 2);
25749 /* In case of copying from general_purpose_register we may emit multiple
25750 stores followed by single load causing memory size mismatch stall.
25751 Count this as arbitrarily high cost of 20. */
25752 if (CLASS_MAX_NREGS (class1, mode) > CLASS_MAX_NREGS (class2, mode))
25755 /* In the case of FP/MMX moves, the registers actually overlap, and we
25756 have to switch modes in order to treat them differently. */
25757 if ((MMX_CLASS_P (class1) && MAYBE_FLOAT_CLASS_P (class2))
25758 || (MMX_CLASS_P (class2) && MAYBE_FLOAT_CLASS_P (class1)))
25764 /* Moves between SSE/MMX and integer unit are expensive. */
25765 if (MMX_CLASS_P (class1) != MMX_CLASS_P (class2)
25766 || SSE_CLASS_P (class1) != SSE_CLASS_P (class2))
25768 /* ??? By keeping returned value relatively high, we limit the number
25769 of moves between integer and MMX/SSE registers for all targets.
25770 Additionally, high value prevents problem with x86_modes_tieable_p(),
25771 where integer modes in MMX/SSE registers are not tieable
25772 because of missing QImode and HImode moves to, from or between
25773 MMX/SSE registers. */
25774 return MAX (8, ix86_cost->mmxsse_to_integer);
25776 if (MAYBE_FLOAT_CLASS_P (class1))
25777 return ix86_cost->fp_move;
25778 if (MAYBE_SSE_CLASS_P (class1))
25779 return ix86_cost->sse_move;
25780 if (MAYBE_MMX_CLASS_P (class1))
25781 return ix86_cost->mmx_move;
25785 /* Return 1 if hard register REGNO can hold a value of machine-mode MODE. */
25788 ix86_hard_regno_mode_ok (int regno, enum machine_mode mode)
25790 /* Flags and only flags can only hold CCmode values. */
25791 if (CC_REGNO_P (regno))
25792 return GET_MODE_CLASS (mode) == MODE_CC;
25793 if (GET_MODE_CLASS (mode) == MODE_CC
25794 || GET_MODE_CLASS (mode) == MODE_RANDOM
25795 || GET_MODE_CLASS (mode) == MODE_PARTIAL_INT)
25797 if (FP_REGNO_P (regno))
25798 return VALID_FP_MODE_P (mode);
25799 if (SSE_REGNO_P (regno))
25801 /* We implement the move patterns for all vector modes into and
25802 out of SSE registers, even when no operation instructions
25803 are available. OImode move is available only when AVX is
25805 return ((TARGET_AVX && mode == OImode)
25806 || VALID_AVX256_REG_MODE (mode)
25807 || VALID_SSE_REG_MODE (mode)
25808 || VALID_SSE2_REG_MODE (mode)
25809 || VALID_MMX_REG_MODE (mode)
25810 || VALID_MMX_REG_MODE_3DNOW (mode));
25812 if (MMX_REGNO_P (regno))
25814 /* We implement the move patterns for 3DNOW modes even in MMX mode,
25815 so if the register is available at all, then we can move data of
25816 the given mode into or out of it. */
25817 return (VALID_MMX_REG_MODE (mode)
25818 || VALID_MMX_REG_MODE_3DNOW (mode));
25821 if (mode == QImode)
25823 /* Take care for QImode values - they can be in non-QI regs,
25824 but then they do cause partial register stalls. */
25825 if (regno <= BX_REG || TARGET_64BIT)
25827 if (!TARGET_PARTIAL_REG_STALL)
25829 return reload_in_progress || reload_completed;
25831 /* We handle both integer and floats in the general purpose registers. */
25832 else if (VALID_INT_MODE_P (mode))
25834 else if (VALID_FP_MODE_P (mode))
25836 else if (VALID_DFP_MODE_P (mode))
25838 /* Lots of MMX code casts 8 byte vector modes to DImode. If we then go
25839 on to use that value in smaller contexts, this can easily force a
25840 pseudo to be allocated to GENERAL_REGS. Since this is no worse than
25841 supporting DImode, allow it. */
25842 else if (VALID_MMX_REG_MODE_3DNOW (mode) || VALID_MMX_REG_MODE (mode))
25848 /* A subroutine of ix86_modes_tieable_p. Return true if MODE is a
25849 tieable integer mode. */
25852 ix86_tieable_integer_mode_p (enum machine_mode mode)
25861 return TARGET_64BIT || !TARGET_PARTIAL_REG_STALL;
25864 return TARGET_64BIT;
25871 /* Return true if MODE1 is accessible in a register that can hold MODE2
25872 without copying. That is, all register classes that can hold MODE2
25873 can also hold MODE1. */
25876 ix86_modes_tieable_p (enum machine_mode mode1, enum machine_mode mode2)
25878 if (mode1 == mode2)
25881 if (ix86_tieable_integer_mode_p (mode1)
25882 && ix86_tieable_integer_mode_p (mode2))
25885 /* MODE2 being XFmode implies fp stack or general regs, which means we
25886 can tie any smaller floating point modes to it. Note that we do not
25887 tie this with TFmode. */
25888 if (mode2 == XFmode)
25889 return mode1 == SFmode || mode1 == DFmode;
25891 /* MODE2 being DFmode implies fp stack, general or sse regs, which means
25892 that we can tie it with SFmode. */
25893 if (mode2 == DFmode)
25894 return mode1 == SFmode;
25896 /* If MODE2 is only appropriate for an SSE register, then tie with
25897 any other mode acceptable to SSE registers. */
25898 if (GET_MODE_SIZE (mode2) == 16
25899 && ix86_hard_regno_mode_ok (FIRST_SSE_REG, mode2))
25900 return (GET_MODE_SIZE (mode1) == 16
25901 && ix86_hard_regno_mode_ok (FIRST_SSE_REG, mode1));
25903 /* If MODE2 is appropriate for an MMX register, then tie
25904 with any other mode acceptable to MMX registers. */
25905 if (GET_MODE_SIZE (mode2) == 8
25906 && ix86_hard_regno_mode_ok (FIRST_MMX_REG, mode2))
25907 return (GET_MODE_SIZE (mode1) == 8
25908 && ix86_hard_regno_mode_ok (FIRST_MMX_REG, mode1));
25913 /* Compute a (partial) cost for rtx X. Return true if the complete
25914 cost has been computed, and false if subexpressions should be
25915 scanned. In either case, *TOTAL contains the cost result. */
25918 ix86_rtx_costs (rtx x, int code, int outer_code_i, int *total, bool speed)
25920 enum rtx_code outer_code = (enum rtx_code) outer_code_i;
25921 enum machine_mode mode = GET_MODE (x);
25922 const struct processor_costs *cost = speed ? ix86_cost : &ix86_size_cost;
25930 if (TARGET_64BIT && !x86_64_immediate_operand (x, VOIDmode))
25932 else if (TARGET_64BIT && !x86_64_zext_immediate_operand (x, VOIDmode))
25934 else if (flag_pic && SYMBOLIC_CONST (x)
25936 || (!GET_CODE (x) != LABEL_REF
25937 && (GET_CODE (x) != SYMBOL_REF
25938 || !SYMBOL_REF_LOCAL_P (x)))))
25945 if (mode == VOIDmode)
25948 switch (standard_80387_constant_p (x))
25953 default: /* Other constants */
25958 /* Start with (MEM (SYMBOL_REF)), since that's where
25959 it'll probably end up. Add a penalty for size. */
25960 *total = (COSTS_N_INSNS (1)
25961 + (flag_pic != 0 && !TARGET_64BIT)
25962 + (mode == SFmode ? 0 : mode == DFmode ? 1 : 2));
25968 /* The zero extensions is often completely free on x86_64, so make
25969 it as cheap as possible. */
25970 if (TARGET_64BIT && mode == DImode
25971 && GET_MODE (XEXP (x, 0)) == SImode)
25973 else if (TARGET_ZERO_EXTEND_WITH_AND)
25974 *total = cost->add;
25976 *total = cost->movzx;
25980 *total = cost->movsx;
25984 if (CONST_INT_P (XEXP (x, 1))
25985 && (GET_MODE (XEXP (x, 0)) != DImode || TARGET_64BIT))
25987 HOST_WIDE_INT value = INTVAL (XEXP (x, 1));
25990 *total = cost->add;
25993 if ((value == 2 || value == 3)
25994 && cost->lea <= cost->shift_const)
25996 *total = cost->lea;
26006 if (!TARGET_64BIT && GET_MODE (XEXP (x, 0)) == DImode)
26008 if (CONST_INT_P (XEXP (x, 1)))
26010 if (INTVAL (XEXP (x, 1)) > 32)
26011 *total = cost->shift_const + COSTS_N_INSNS (2);
26013 *total = cost->shift_const * 2;
26017 if (GET_CODE (XEXP (x, 1)) == AND)
26018 *total = cost->shift_var * 2;
26020 *total = cost->shift_var * 6 + COSTS_N_INSNS (2);
26025 if (CONST_INT_P (XEXP (x, 1)))
26026 *total = cost->shift_const;
26028 *total = cost->shift_var;
26033 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
26035 /* ??? SSE scalar cost should be used here. */
26036 *total = cost->fmul;
26039 else if (X87_FLOAT_MODE_P (mode))
26041 *total = cost->fmul;
26044 else if (FLOAT_MODE_P (mode))
26046 /* ??? SSE vector cost should be used here. */
26047 *total = cost->fmul;
26052 rtx op0 = XEXP (x, 0);
26053 rtx op1 = XEXP (x, 1);
26055 if (CONST_INT_P (XEXP (x, 1)))
26057 unsigned HOST_WIDE_INT value = INTVAL (XEXP (x, 1));
26058 for (nbits = 0; value != 0; value &= value - 1)
26062 /* This is arbitrary. */
26065 /* Compute costs correctly for widening multiplication. */
26066 if ((GET_CODE (op0) == SIGN_EXTEND || GET_CODE (op0) == ZERO_EXTEND)
26067 && GET_MODE_SIZE (GET_MODE (XEXP (op0, 0))) * 2
26068 == GET_MODE_SIZE (mode))
26070 int is_mulwiden = 0;
26071 enum machine_mode inner_mode = GET_MODE (op0);
26073 if (GET_CODE (op0) == GET_CODE (op1))
26074 is_mulwiden = 1, op1 = XEXP (op1, 0);
26075 else if (CONST_INT_P (op1))
26077 if (GET_CODE (op0) == SIGN_EXTEND)
26078 is_mulwiden = trunc_int_for_mode (INTVAL (op1), inner_mode)
26081 is_mulwiden = !(INTVAL (op1) & ~GET_MODE_MASK (inner_mode));
26085 op0 = XEXP (op0, 0), mode = GET_MODE (op0);
26088 *total = (cost->mult_init[MODE_INDEX (mode)]
26089 + nbits * cost->mult_bit
26090 + rtx_cost (op0, outer_code, speed) + rtx_cost (op1, outer_code, speed));
26099 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
26100 /* ??? SSE cost should be used here. */
26101 *total = cost->fdiv;
26102 else if (X87_FLOAT_MODE_P (mode))
26103 *total = cost->fdiv;
26104 else if (FLOAT_MODE_P (mode))
26105 /* ??? SSE vector cost should be used here. */
26106 *total = cost->fdiv;
26108 *total = cost->divide[MODE_INDEX (mode)];
26112 if (GET_MODE_CLASS (mode) == MODE_INT
26113 && GET_MODE_BITSIZE (mode) <= GET_MODE_BITSIZE (Pmode))
26115 if (GET_CODE (XEXP (x, 0)) == PLUS
26116 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
26117 && CONST_INT_P (XEXP (XEXP (XEXP (x, 0), 0), 1))
26118 && CONSTANT_P (XEXP (x, 1)))
26120 HOST_WIDE_INT val = INTVAL (XEXP (XEXP (XEXP (x, 0), 0), 1));
26121 if (val == 2 || val == 4 || val == 8)
26123 *total = cost->lea;
26124 *total += rtx_cost (XEXP (XEXP (x, 0), 1), outer_code, speed);
26125 *total += rtx_cost (XEXP (XEXP (XEXP (x, 0), 0), 0),
26126 outer_code, speed);
26127 *total += rtx_cost (XEXP (x, 1), outer_code, speed);
26131 else if (GET_CODE (XEXP (x, 0)) == MULT
26132 && CONST_INT_P (XEXP (XEXP (x, 0), 1)))
26134 HOST_WIDE_INT val = INTVAL (XEXP (XEXP (x, 0), 1));
26135 if (val == 2 || val == 4 || val == 8)
26137 *total = cost->lea;
26138 *total += rtx_cost (XEXP (XEXP (x, 0), 0), outer_code, speed);
26139 *total += rtx_cost (XEXP (x, 1), outer_code, speed);
26143 else if (GET_CODE (XEXP (x, 0)) == PLUS)
26145 *total = cost->lea;
26146 *total += rtx_cost (XEXP (XEXP (x, 0), 0), outer_code, speed);
26147 *total += rtx_cost (XEXP (XEXP (x, 0), 1), outer_code, speed);
26148 *total += rtx_cost (XEXP (x, 1), outer_code, speed);
26155 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
26157 /* ??? SSE cost should be used here. */
26158 *total = cost->fadd;
26161 else if (X87_FLOAT_MODE_P (mode))
26163 *total = cost->fadd;
26166 else if (FLOAT_MODE_P (mode))
26168 /* ??? SSE vector cost should be used here. */
26169 *total = cost->fadd;
26177 if (!TARGET_64BIT && mode == DImode)
26179 *total = (cost->add * 2
26180 + (rtx_cost (XEXP (x, 0), outer_code, speed)
26181 << (GET_MODE (XEXP (x, 0)) != DImode))
26182 + (rtx_cost (XEXP (x, 1), outer_code, speed)
26183 << (GET_MODE (XEXP (x, 1)) != DImode)));
26189 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
26191 /* ??? SSE cost should be used here. */
26192 *total = cost->fchs;
26195 else if (X87_FLOAT_MODE_P (mode))
26197 *total = cost->fchs;
26200 else if (FLOAT_MODE_P (mode))
26202 /* ??? SSE vector cost should be used here. */
26203 *total = cost->fchs;
26209 if (!TARGET_64BIT && mode == DImode)
26210 *total = cost->add * 2;
26212 *total = cost->add;
26216 if (GET_CODE (XEXP (x, 0)) == ZERO_EXTRACT
26217 && XEXP (XEXP (x, 0), 1) == const1_rtx
26218 && CONST_INT_P (XEXP (XEXP (x, 0), 2))
26219 && XEXP (x, 1) == const0_rtx)
26221 /* This kind of construct is implemented using test[bwl].
26222 Treat it as if we had an AND. */
26223 *total = (cost->add
26224 + rtx_cost (XEXP (XEXP (x, 0), 0), outer_code, speed)
26225 + rtx_cost (const1_rtx, outer_code, speed));
26231 if (!(SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH))
26236 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
26237 /* ??? SSE cost should be used here. */
26238 *total = cost->fabs;
26239 else if (X87_FLOAT_MODE_P (mode))
26240 *total = cost->fabs;
26241 else if (FLOAT_MODE_P (mode))
26242 /* ??? SSE vector cost should be used here. */
26243 *total = cost->fabs;
26247 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
26248 /* ??? SSE cost should be used here. */
26249 *total = cost->fsqrt;
26250 else if (X87_FLOAT_MODE_P (mode))
26251 *total = cost->fsqrt;
26252 else if (FLOAT_MODE_P (mode))
26253 /* ??? SSE vector cost should be used here. */
26254 *total = cost->fsqrt;
26258 if (XINT (x, 1) == UNSPEC_TP)
26269 static int current_machopic_label_num;
26271 /* Given a symbol name and its associated stub, write out the
26272 definition of the stub. */
26275 machopic_output_stub (FILE *file, const char *symb, const char *stub)
26277 unsigned int length;
26278 char *binder_name, *symbol_name, lazy_ptr_name[32];
26279 int label = ++current_machopic_label_num;
26281 /* For 64-bit we shouldn't get here. */
26282 gcc_assert (!TARGET_64BIT);
26284 /* Lose our funky encoding stuff so it doesn't contaminate the stub. */
26285 symb = (*targetm.strip_name_encoding) (symb);
26287 length = strlen (stub);
26288 binder_name = XALLOCAVEC (char, length + 32);
26289 GEN_BINDER_NAME_FOR_STUB (binder_name, stub, length);
26291 length = strlen (symb);
26292 symbol_name = XALLOCAVEC (char, length + 32);
26293 GEN_SYMBOL_NAME_FOR_SYMBOL (symbol_name, symb, length);
26295 sprintf (lazy_ptr_name, "L%d$lz", label);
26298 switch_to_section (darwin_sections[machopic_picsymbol_stub_section]);
26300 switch_to_section (darwin_sections[machopic_symbol_stub_section]);
26302 fprintf (file, "%s:\n", stub);
26303 fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
26307 fprintf (file, "\tcall\tLPC$%d\nLPC$%d:\tpopl\t%%eax\n", label, label);
26308 fprintf (file, "\tmovl\t%s-LPC$%d(%%eax),%%edx\n", lazy_ptr_name, label);
26309 fprintf (file, "\tjmp\t*%%edx\n");
26312 fprintf (file, "\tjmp\t*%s\n", lazy_ptr_name);
26314 fprintf (file, "%s:\n", binder_name);
26318 fprintf (file, "\tlea\t%s-LPC$%d(%%eax),%%eax\n", lazy_ptr_name, label);
26319 fprintf (file, "\tpushl\t%%eax\n");
26322 fprintf (file, "\tpushl\t$%s\n", lazy_ptr_name);
26324 fprintf (file, "\tjmp\tdyld_stub_binding_helper\n");
26326 switch_to_section (darwin_sections[machopic_lazy_symbol_ptr_section]);
26327 fprintf (file, "%s:\n", lazy_ptr_name);
26328 fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
26329 fprintf (file, "\t.long %s\n", binder_name);
26333 darwin_x86_file_end (void)
26335 darwin_file_end ();
26338 #endif /* TARGET_MACHO */
26340 /* Order the registers for register allocator. */
26343 x86_order_regs_for_local_alloc (void)
26348 /* First allocate the local general purpose registers. */
26349 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
26350 if (GENERAL_REGNO_P (i) && call_used_regs[i])
26351 reg_alloc_order [pos++] = i;
26353 /* Global general purpose registers. */
26354 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
26355 if (GENERAL_REGNO_P (i) && !call_used_regs[i])
26356 reg_alloc_order [pos++] = i;
26358 /* x87 registers come first in case we are doing FP math
26360 if (!TARGET_SSE_MATH)
26361 for (i = FIRST_STACK_REG; i <= LAST_STACK_REG; i++)
26362 reg_alloc_order [pos++] = i;
26364 /* SSE registers. */
26365 for (i = FIRST_SSE_REG; i <= LAST_SSE_REG; i++)
26366 reg_alloc_order [pos++] = i;
26367 for (i = FIRST_REX_SSE_REG; i <= LAST_REX_SSE_REG; i++)
26368 reg_alloc_order [pos++] = i;
26370 /* x87 registers. */
26371 if (TARGET_SSE_MATH)
26372 for (i = FIRST_STACK_REG; i <= LAST_STACK_REG; i++)
26373 reg_alloc_order [pos++] = i;
26375 for (i = FIRST_MMX_REG; i <= LAST_MMX_REG; i++)
26376 reg_alloc_order [pos++] = i;
26378 /* Initialize the rest of array as we do not allocate some registers
26380 while (pos < FIRST_PSEUDO_REGISTER)
26381 reg_alloc_order [pos++] = 0;
26384 /* Handle a "ms_abi" or "sysv" attribute; arguments as in
26385 struct attribute_spec.handler. */
26387 ix86_handle_abi_attribute (tree *node, tree name,
26388 tree args ATTRIBUTE_UNUSED,
26389 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
26391 if (TREE_CODE (*node) != FUNCTION_TYPE
26392 && TREE_CODE (*node) != METHOD_TYPE
26393 && TREE_CODE (*node) != FIELD_DECL
26394 && TREE_CODE (*node) != TYPE_DECL)
26396 warning (OPT_Wattributes, "%qs attribute only applies to functions",
26397 IDENTIFIER_POINTER (name));
26398 *no_add_attrs = true;
26403 warning (OPT_Wattributes, "%qs attribute only available for 64-bit",
26404 IDENTIFIER_POINTER (name));
26405 *no_add_attrs = true;
26409 /* Can combine regparm with all attributes but fastcall. */
26410 if (is_attribute_p ("ms_abi", name))
26412 if (lookup_attribute ("sysv_abi", TYPE_ATTRIBUTES (*node)))
26414 error ("ms_abi and sysv_abi attributes are not compatible");
26419 else if (is_attribute_p ("sysv_abi", name))
26421 if (lookup_attribute ("ms_abi", TYPE_ATTRIBUTES (*node)))
26423 error ("ms_abi and sysv_abi attributes are not compatible");
26432 /* Handle a "ms_struct" or "gcc_struct" attribute; arguments as in
26433 struct attribute_spec.handler. */
26435 ix86_handle_struct_attribute (tree *node, tree name,
26436 tree args ATTRIBUTE_UNUSED,
26437 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
26440 if (DECL_P (*node))
26442 if (TREE_CODE (*node) == TYPE_DECL)
26443 type = &TREE_TYPE (*node);
26448 if (!(type && (TREE_CODE (*type) == RECORD_TYPE
26449 || TREE_CODE (*type) == UNION_TYPE)))
26451 warning (OPT_Wattributes, "%qs attribute ignored",
26452 IDENTIFIER_POINTER (name));
26453 *no_add_attrs = true;
26456 else if ((is_attribute_p ("ms_struct", name)
26457 && lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (*type)))
26458 || ((is_attribute_p ("gcc_struct", name)
26459 && lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (*type)))))
26461 warning (OPT_Wattributes, "%qs incompatible attribute ignored",
26462 IDENTIFIER_POINTER (name));
26463 *no_add_attrs = true;
26470 ix86_ms_bitfield_layout_p (const_tree record_type)
26472 return (TARGET_MS_BITFIELD_LAYOUT &&
26473 !lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (record_type)))
26474 || lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (record_type));
26477 /* Returns an expression indicating where the this parameter is
26478 located on entry to the FUNCTION. */
26481 x86_this_parameter (tree function)
26483 tree type = TREE_TYPE (function);
26484 bool aggr = aggregate_value_p (TREE_TYPE (type), type) != 0;
26489 const int *parm_regs;
26491 if (ix86_function_type_abi (type) == MS_ABI)
26492 parm_regs = x86_64_ms_abi_int_parameter_registers;
26494 parm_regs = x86_64_int_parameter_registers;
26495 return gen_rtx_REG (DImode, parm_regs[aggr]);
26498 nregs = ix86_function_regparm (type, function);
26500 if (nregs > 0 && !stdarg_p (type))
26504 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type)))
26505 regno = aggr ? DX_REG : CX_REG;
26513 return gen_rtx_MEM (SImode,
26514 plus_constant (stack_pointer_rtx, 4));
26517 return gen_rtx_REG (SImode, regno);
26520 return gen_rtx_MEM (SImode, plus_constant (stack_pointer_rtx, aggr ? 8 : 4));
26523 /* Determine whether x86_output_mi_thunk can succeed. */
26526 x86_can_output_mi_thunk (const_tree thunk ATTRIBUTE_UNUSED,
26527 HOST_WIDE_INT delta ATTRIBUTE_UNUSED,
26528 HOST_WIDE_INT vcall_offset, const_tree function)
26530 /* 64-bit can handle anything. */
26534 /* For 32-bit, everything's fine if we have one free register. */
26535 if (ix86_function_regparm (TREE_TYPE (function), function) < 3)
26538 /* Need a free register for vcall_offset. */
26542 /* Need a free register for GOT references. */
26543 if (flag_pic && !(*targetm.binds_local_p) (function))
26546 /* Otherwise ok. */
26550 /* Output the assembler code for a thunk function. THUNK_DECL is the
26551 declaration for the thunk function itself, FUNCTION is the decl for
26552 the target function. DELTA is an immediate constant offset to be
26553 added to THIS. If VCALL_OFFSET is nonzero, the word at
26554 *(*this + vcall_offset) should be added to THIS. */
26557 x86_output_mi_thunk (FILE *file ATTRIBUTE_UNUSED,
26558 tree thunk ATTRIBUTE_UNUSED, HOST_WIDE_INT delta,
26559 HOST_WIDE_INT vcall_offset, tree function)
26562 rtx this_param = x86_this_parameter (function);
26565 /* If VCALL_OFFSET, we'll need THIS in a register. Might as well
26566 pull it in now and let DELTA benefit. */
26567 if (REG_P (this_param))
26568 this_reg = this_param;
26569 else if (vcall_offset)
26571 /* Put the this parameter into %eax. */
26572 xops[0] = this_param;
26573 xops[1] = this_reg = gen_rtx_REG (Pmode, AX_REG);
26574 output_asm_insn ("mov%z1\t{%0, %1|%1, %0}", xops);
26577 this_reg = NULL_RTX;
26579 /* Adjust the this parameter by a fixed constant. */
26582 xops[0] = GEN_INT (delta);
26583 xops[1] = this_reg ? this_reg : this_param;
26586 if (!x86_64_general_operand (xops[0], DImode))
26588 tmp = gen_rtx_REG (DImode, R10_REG);
26590 output_asm_insn ("mov{q}\t{%1, %0|%0, %1}", xops);
26592 xops[1] = this_param;
26594 output_asm_insn ("add{q}\t{%0, %1|%1, %0}", xops);
26597 output_asm_insn ("add{l}\t{%0, %1|%1, %0}", xops);
26600 /* Adjust the this parameter by a value stored in the vtable. */
26604 tmp = gen_rtx_REG (DImode, R10_REG);
26607 int tmp_regno = CX_REG;
26608 if (lookup_attribute ("fastcall",
26609 TYPE_ATTRIBUTES (TREE_TYPE (function))))
26610 tmp_regno = AX_REG;
26611 tmp = gen_rtx_REG (SImode, tmp_regno);
26614 xops[0] = gen_rtx_MEM (Pmode, this_reg);
26616 output_asm_insn ("mov%z1\t{%0, %1|%1, %0}", xops);
26618 /* Adjust the this parameter. */
26619 xops[0] = gen_rtx_MEM (Pmode, plus_constant (tmp, vcall_offset));
26620 if (TARGET_64BIT && !memory_operand (xops[0], Pmode))
26622 rtx tmp2 = gen_rtx_REG (DImode, R11_REG);
26623 xops[0] = GEN_INT (vcall_offset);
26625 output_asm_insn ("mov{q}\t{%0, %1|%1, %0}", xops);
26626 xops[0] = gen_rtx_MEM (Pmode, gen_rtx_PLUS (Pmode, tmp, tmp2));
26628 xops[1] = this_reg;
26629 output_asm_insn ("add%z1\t{%0, %1|%1, %0}", xops);
26632 /* If necessary, drop THIS back to its stack slot. */
26633 if (this_reg && this_reg != this_param)
26635 xops[0] = this_reg;
26636 xops[1] = this_param;
26637 output_asm_insn ("mov%z1\t{%0, %1|%1, %0}", xops);
26640 xops[0] = XEXP (DECL_RTL (function), 0);
26643 if (!flag_pic || (*targetm.binds_local_p) (function))
26644 output_asm_insn ("jmp\t%P0", xops);
26645 /* All thunks should be in the same object as their target,
26646 and thus binds_local_p should be true. */
26647 else if (TARGET_64BIT && cfun->machine->call_abi == MS_ABI)
26648 gcc_unreachable ();
26651 tmp = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, xops[0]), UNSPEC_GOTPCREL);
26652 tmp = gen_rtx_CONST (Pmode, tmp);
26653 tmp = gen_rtx_MEM (QImode, tmp);
26655 output_asm_insn ("jmp\t%A0", xops);
26660 if (!flag_pic || (*targetm.binds_local_p) (function))
26661 output_asm_insn ("jmp\t%P0", xops);
26666 rtx sym_ref = XEXP (DECL_RTL (function), 0);
26667 tmp = (gen_rtx_SYMBOL_REF
26669 machopic_indirection_name (sym_ref, /*stub_p=*/true)));
26670 tmp = gen_rtx_MEM (QImode, tmp);
26672 output_asm_insn ("jmp\t%0", xops);
26675 #endif /* TARGET_MACHO */
26677 tmp = gen_rtx_REG (SImode, CX_REG);
26678 output_set_got (tmp, NULL_RTX);
26681 output_asm_insn ("mov{l}\t{%0@GOT(%1), %1|%1, %0@GOT[%1]}", xops);
26682 output_asm_insn ("jmp\t{*}%1", xops);
26688 x86_file_start (void)
26690 default_file_start ();
26692 darwin_file_start ();
26694 if (X86_FILE_START_VERSION_DIRECTIVE)
26695 fputs ("\t.version\t\"01.01\"\n", asm_out_file);
26696 if (X86_FILE_START_FLTUSED)
26697 fputs ("\t.global\t__fltused\n", asm_out_file);
26698 if (ix86_asm_dialect == ASM_INTEL)
26699 fputs ("\t.intel_syntax noprefix\n", asm_out_file);
26703 x86_field_alignment (tree field, int computed)
26705 enum machine_mode mode;
26706 tree type = TREE_TYPE (field);
26708 if (TARGET_64BIT || TARGET_ALIGN_DOUBLE)
26710 mode = TYPE_MODE (strip_array_types (type));
26711 if (mode == DFmode || mode == DCmode
26712 || GET_MODE_CLASS (mode) == MODE_INT
26713 || GET_MODE_CLASS (mode) == MODE_COMPLEX_INT)
26714 return MIN (32, computed);
26718 /* Output assembler code to FILE to increment profiler label # LABELNO
26719 for profiling a function entry. */
26721 x86_function_profiler (FILE *file, int labelno ATTRIBUTE_UNUSED)
26725 #ifndef NO_PROFILE_COUNTERS
26726 fprintf (file, "\tleaq\t%sP%d@(%%rip),%%r11\n", LPREFIX, labelno);
26729 if (DEFAULT_ABI == SYSV_ABI && flag_pic)
26730 fprintf (file, "\tcall\t*%s@GOTPCREL(%%rip)\n", MCOUNT_NAME);
26732 fprintf (file, "\tcall\t%s\n", MCOUNT_NAME);
26736 #ifndef NO_PROFILE_COUNTERS
26737 fprintf (file, "\tleal\t%sP%d@GOTOFF(%%ebx),%%%s\n",
26738 LPREFIX, labelno, PROFILE_COUNT_REGISTER);
26740 fprintf (file, "\tcall\t*%s@GOT(%%ebx)\n", MCOUNT_NAME);
26744 #ifndef NO_PROFILE_COUNTERS
26745 fprintf (file, "\tmovl\t$%sP%d,%%%s\n", LPREFIX, labelno,
26746 PROFILE_COUNT_REGISTER);
26748 fprintf (file, "\tcall\t%s\n", MCOUNT_NAME);
26752 /* We don't have exact information about the insn sizes, but we may assume
26753 quite safely that we are informed about all 1 byte insns and memory
26754 address sizes. This is enough to eliminate unnecessary padding in
26758 min_insn_size (rtx insn)
26762 if (!INSN_P (insn) || !active_insn_p (insn))
26765 /* Discard alignments we've emit and jump instructions. */
26766 if (GET_CODE (PATTERN (insn)) == UNSPEC_VOLATILE
26767 && XINT (PATTERN (insn), 1) == UNSPECV_ALIGN)
26770 && (GET_CODE (PATTERN (insn)) == ADDR_VEC
26771 || GET_CODE (PATTERN (insn)) == ADDR_DIFF_VEC))
26774 /* Important case - calls are always 5 bytes.
26775 It is common to have many calls in the row. */
26777 && symbolic_reference_mentioned_p (PATTERN (insn))
26778 && !SIBLING_CALL_P (insn))
26780 if (get_attr_length (insn) <= 1)
26783 /* For normal instructions we may rely on the sizes of addresses
26784 and the presence of symbol to require 4 bytes of encoding.
26785 This is not the case for jumps where references are PC relative. */
26786 if (!JUMP_P (insn))
26788 l = get_attr_length_address (insn);
26789 if (l < 4 && symbolic_reference_mentioned_p (PATTERN (insn)))
26798 /* AMD K8 core mispredicts jumps when there are more than 3 jumps in 16 byte
26802 ix86_avoid_jump_misspredicts (void)
26804 rtx insn, start = get_insns ();
26805 int nbytes = 0, njumps = 0;
26808 /* Look for all minimal intervals of instructions containing 4 jumps.
26809 The intervals are bounded by START and INSN. NBYTES is the total
26810 size of instructions in the interval including INSN and not including
26811 START. When the NBYTES is smaller than 16 bytes, it is possible
26812 that the end of START and INSN ends up in the same 16byte page.
26814 The smallest offset in the page INSN can start is the case where START
26815 ends on the offset 0. Offset of INSN is then NBYTES - sizeof (INSN).
26816 We add p2align to 16byte window with maxskip 17 - NBYTES + sizeof (INSN).
26818 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
26821 nbytes += min_insn_size (insn);
26823 fprintf(dump_file, "Insn %i estimated to %i bytes\n",
26824 INSN_UID (insn), min_insn_size (insn));
26826 && GET_CODE (PATTERN (insn)) != ADDR_VEC
26827 && GET_CODE (PATTERN (insn)) != ADDR_DIFF_VEC)
26835 start = NEXT_INSN (start);
26836 if ((JUMP_P (start)
26837 && GET_CODE (PATTERN (start)) != ADDR_VEC
26838 && GET_CODE (PATTERN (start)) != ADDR_DIFF_VEC)
26840 njumps--, isjump = 1;
26843 nbytes -= min_insn_size (start);
26845 gcc_assert (njumps >= 0);
26847 fprintf (dump_file, "Interval %i to %i has %i bytes\n",
26848 INSN_UID (start), INSN_UID (insn), nbytes);
26850 if (njumps == 3 && isjump && nbytes < 16)
26852 int padsize = 15 - nbytes + min_insn_size (insn);
26855 fprintf (dump_file, "Padding insn %i by %i bytes!\n",
26856 INSN_UID (insn), padsize);
26857 emit_insn_before (gen_align (GEN_INT (padsize)), insn);
26862 /* AMD Athlon works faster
26863 when RET is not destination of conditional jump or directly preceded
26864 by other jump instruction. We avoid the penalty by inserting NOP just
26865 before the RET instructions in such cases. */
26867 ix86_pad_returns (void)
26872 FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR->preds)
26874 basic_block bb = e->src;
26875 rtx ret = BB_END (bb);
26877 bool replace = false;
26879 if (!JUMP_P (ret) || GET_CODE (PATTERN (ret)) != RETURN
26880 || optimize_bb_for_size_p (bb))
26882 for (prev = PREV_INSN (ret); prev; prev = PREV_INSN (prev))
26883 if (active_insn_p (prev) || LABEL_P (prev))
26885 if (prev && LABEL_P (prev))
26890 FOR_EACH_EDGE (e, ei, bb->preds)
26891 if (EDGE_FREQUENCY (e) && e->src->index >= 0
26892 && !(e->flags & EDGE_FALLTHRU))
26897 prev = prev_active_insn (ret);
26899 && ((JUMP_P (prev) && any_condjump_p (prev))
26902 /* Empty functions get branch mispredict even when the jump destination
26903 is not visible to us. */
26904 if (!prev && cfun->function_frequency > FUNCTION_FREQUENCY_UNLIKELY_EXECUTED)
26909 emit_insn_before (gen_return_internal_long (), ret);
26915 /* Implement machine specific optimizations. We implement padding of returns
26916 for K8 CPUs and pass to avoid 4 jumps in the single 16 byte window. */
26920 if (TARGET_PAD_RETURNS && optimize
26921 && optimize_function_for_speed_p (cfun))
26922 ix86_pad_returns ();
26923 if (TARGET_FOUR_JUMP_LIMIT && optimize
26924 && optimize_function_for_speed_p (cfun))
26925 ix86_avoid_jump_misspredicts ();
26928 /* Return nonzero when QImode register that must be represented via REX prefix
26931 x86_extended_QIreg_mentioned_p (rtx insn)
26934 extract_insn_cached (insn);
26935 for (i = 0; i < recog_data.n_operands; i++)
26936 if (REG_P (recog_data.operand[i])
26937 && REGNO (recog_data.operand[i]) > BX_REG)
26942 /* Return nonzero when P points to register encoded via REX prefix.
26943 Called via for_each_rtx. */
26945 extended_reg_mentioned_1 (rtx *p, void *data ATTRIBUTE_UNUSED)
26947 unsigned int regno;
26950 regno = REGNO (*p);
26951 return REX_INT_REGNO_P (regno) || REX_SSE_REGNO_P (regno);
26954 /* Return true when INSN mentions register that must be encoded using REX
26957 x86_extended_reg_mentioned_p (rtx insn)
26959 return for_each_rtx (INSN_P (insn) ? &PATTERN (insn) : &insn,
26960 extended_reg_mentioned_1, NULL);
26963 /* Generate an unsigned DImode/SImode to FP conversion. This is the same code
26964 optabs would emit if we didn't have TFmode patterns. */
26967 x86_emit_floatuns (rtx operands[2])
26969 rtx neglab, donelab, i0, i1, f0, in, out;
26970 enum machine_mode mode, inmode;
26972 inmode = GET_MODE (operands[1]);
26973 gcc_assert (inmode == SImode || inmode == DImode);
26976 in = force_reg (inmode, operands[1]);
26977 mode = GET_MODE (out);
26978 neglab = gen_label_rtx ();
26979 donelab = gen_label_rtx ();
26980 f0 = gen_reg_rtx (mode);
26982 emit_cmp_and_jump_insns (in, const0_rtx, LT, const0_rtx, inmode, 0, neglab);
26984 expand_float (out, in, 0);
26986 emit_jump_insn (gen_jump (donelab));
26989 emit_label (neglab);
26991 i0 = expand_simple_binop (inmode, LSHIFTRT, in, const1_rtx, NULL,
26993 i1 = expand_simple_binop (inmode, AND, in, const1_rtx, NULL,
26995 i0 = expand_simple_binop (inmode, IOR, i0, i1, i0, 1, OPTAB_DIRECT);
26997 expand_float (f0, i0, 0);
26999 emit_insn (gen_rtx_SET (VOIDmode, out, gen_rtx_PLUS (mode, f0, f0)));
27001 emit_label (donelab);
27004 /* A subroutine of ix86_expand_vector_init. Store into TARGET a vector
27005 with all elements equal to VAR. Return true if successful. */
27008 ix86_expand_vector_init_duplicate (bool mmx_ok, enum machine_mode mode,
27009 rtx target, rtx val)
27011 enum machine_mode hmode, smode, wsmode, wvmode;
27026 val = force_reg (GET_MODE_INNER (mode), val);
27027 x = gen_rtx_VEC_DUPLICATE (mode, val);
27028 emit_insn (gen_rtx_SET (VOIDmode, target, x));
27034 if (TARGET_SSE || TARGET_3DNOW_A)
27036 val = gen_lowpart (SImode, val);
27037 x = gen_rtx_TRUNCATE (HImode, val);
27038 x = gen_rtx_VEC_DUPLICATE (mode, x);
27039 emit_insn (gen_rtx_SET (VOIDmode, target, x));
27061 /* Extend HImode to SImode using a paradoxical SUBREG. */
27062 tmp1 = gen_reg_rtx (SImode);
27063 emit_move_insn (tmp1, gen_lowpart (SImode, val));
27064 /* Insert the SImode value as low element of V4SImode vector. */
27065 tmp2 = gen_reg_rtx (V4SImode);
27066 tmp1 = gen_rtx_VEC_MERGE (V4SImode,
27067 gen_rtx_VEC_DUPLICATE (V4SImode, tmp1),
27068 CONST0_RTX (V4SImode),
27070 emit_insn (gen_rtx_SET (VOIDmode, tmp2, tmp1));
27071 /* Cast the V4SImode vector back to a V8HImode vector. */
27072 tmp1 = gen_reg_rtx (V8HImode);
27073 emit_move_insn (tmp1, gen_lowpart (V8HImode, tmp2));
27074 /* Duplicate the low short through the whole low SImode word. */
27075 emit_insn (gen_sse2_punpcklwd (tmp1, tmp1, tmp1));
27076 /* Cast the V8HImode vector back to a V4SImode vector. */
27077 tmp2 = gen_reg_rtx (V4SImode);
27078 emit_move_insn (tmp2, gen_lowpart (V4SImode, tmp1));
27079 /* Replicate the low element of the V4SImode vector. */
27080 emit_insn (gen_sse2_pshufd (tmp2, tmp2, const0_rtx));
27081 /* Cast the V2SImode back to V8HImode, and store in target. */
27082 emit_move_insn (target, gen_lowpart (V8HImode, tmp2));
27093 /* Extend QImode to SImode using a paradoxical SUBREG. */
27094 tmp1 = gen_reg_rtx (SImode);
27095 emit_move_insn (tmp1, gen_lowpart (SImode, val));
27096 /* Insert the SImode value as low element of V4SImode vector. */
27097 tmp2 = gen_reg_rtx (V4SImode);
27098 tmp1 = gen_rtx_VEC_MERGE (V4SImode,
27099 gen_rtx_VEC_DUPLICATE (V4SImode, tmp1),
27100 CONST0_RTX (V4SImode),
27102 emit_insn (gen_rtx_SET (VOIDmode, tmp2, tmp1));
27103 /* Cast the V4SImode vector back to a V16QImode vector. */
27104 tmp1 = gen_reg_rtx (V16QImode);
27105 emit_move_insn (tmp1, gen_lowpart (V16QImode, tmp2));
27106 /* Duplicate the low byte through the whole low SImode word. */
27107 emit_insn (gen_sse2_punpcklbw (tmp1, tmp1, tmp1));
27108 emit_insn (gen_sse2_punpcklbw (tmp1, tmp1, tmp1));
27109 /* Cast the V16QImode vector back to a V4SImode vector. */
27110 tmp2 = gen_reg_rtx (V4SImode);
27111 emit_move_insn (tmp2, gen_lowpart (V4SImode, tmp1));
27112 /* Replicate the low element of the V4SImode vector. */
27113 emit_insn (gen_sse2_pshufd (tmp2, tmp2, const0_rtx));
27114 /* Cast the V2SImode back to V16QImode, and store in target. */
27115 emit_move_insn (target, gen_lowpart (V16QImode, tmp2));
27123 /* Replicate the value once into the next wider mode and recurse. */
27124 val = convert_modes (wsmode, smode, val, true);
27125 x = expand_simple_binop (wsmode, ASHIFT, val,
27126 GEN_INT (GET_MODE_BITSIZE (smode)),
27127 NULL_RTX, 1, OPTAB_LIB_WIDEN);
27128 val = expand_simple_binop (wsmode, IOR, val, x, x, 1, OPTAB_LIB_WIDEN);
27130 x = gen_reg_rtx (wvmode);
27131 if (!ix86_expand_vector_init_duplicate (mmx_ok, wvmode, x, val))
27132 gcc_unreachable ();
27133 emit_move_insn (target, gen_lowpart (mode, x));
27156 rtx tmp = gen_reg_rtx (hmode);
27157 ix86_expand_vector_init_duplicate (mmx_ok, hmode, tmp, val);
27158 emit_insn (gen_rtx_SET (VOIDmode, target,
27159 gen_rtx_VEC_CONCAT (mode, tmp, tmp)));
27168 /* A subroutine of ix86_expand_vector_init. Store into TARGET a vector
27169 whose ONE_VAR element is VAR, and other elements are zero. Return true
27173 ix86_expand_vector_init_one_nonzero (bool mmx_ok, enum machine_mode mode,
27174 rtx target, rtx var, int one_var)
27176 enum machine_mode vsimode;
27179 bool use_vector_set = false;
27184 /* For SSE4.1, we normally use vector set. But if the second
27185 element is zero and inter-unit moves are OK, we use movq
27187 use_vector_set = (TARGET_64BIT
27189 && !(TARGET_INTER_UNIT_MOVES
27195 use_vector_set = TARGET_SSE4_1;
27198 use_vector_set = TARGET_SSE2;
27201 use_vector_set = TARGET_SSE || TARGET_3DNOW_A;
27208 use_vector_set = TARGET_AVX;
27211 /* Use ix86_expand_vector_set in 64bit mode only. */
27212 use_vector_set = TARGET_AVX && TARGET_64BIT;
27218 if (use_vector_set)
27220 emit_insn (gen_rtx_SET (VOIDmode, target, CONST0_RTX (mode)));
27221 var = force_reg (GET_MODE_INNER (mode), var);
27222 ix86_expand_vector_set (mmx_ok, target, var, one_var);
27238 var = force_reg (GET_MODE_INNER (mode), var);
27239 x = gen_rtx_VEC_CONCAT (mode, var, CONST0_RTX (GET_MODE_INNER (mode)));
27240 emit_insn (gen_rtx_SET (VOIDmode, target, x));
27245 if (!REG_P (target) || REGNO (target) < FIRST_PSEUDO_REGISTER)
27246 new_target = gen_reg_rtx (mode);
27248 new_target = target;
27249 var = force_reg (GET_MODE_INNER (mode), var);
27250 x = gen_rtx_VEC_DUPLICATE (mode, var);
27251 x = gen_rtx_VEC_MERGE (mode, x, CONST0_RTX (mode), const1_rtx);
27252 emit_insn (gen_rtx_SET (VOIDmode, new_target, x));
27255 /* We need to shuffle the value to the correct position, so
27256 create a new pseudo to store the intermediate result. */
27258 /* With SSE2, we can use the integer shuffle insns. */
27259 if (mode != V4SFmode && TARGET_SSE2)
27261 emit_insn (gen_sse2_pshufd_1 (new_target, new_target,
27263 GEN_INT (one_var == 1 ? 0 : 1),
27264 GEN_INT (one_var == 2 ? 0 : 1),
27265 GEN_INT (one_var == 3 ? 0 : 1)));
27266 if (target != new_target)
27267 emit_move_insn (target, new_target);
27271 /* Otherwise convert the intermediate result to V4SFmode and
27272 use the SSE1 shuffle instructions. */
27273 if (mode != V4SFmode)
27275 tmp = gen_reg_rtx (V4SFmode);
27276 emit_move_insn (tmp, gen_lowpart (V4SFmode, new_target));
27281 emit_insn (gen_sse_shufps_v4sf (tmp, tmp, tmp,
27283 GEN_INT (one_var == 1 ? 0 : 1),
27284 GEN_INT (one_var == 2 ? 0+4 : 1+4),
27285 GEN_INT (one_var == 3 ? 0+4 : 1+4)));
27287 if (mode != V4SFmode)
27288 emit_move_insn (target, gen_lowpart (V4SImode, tmp));
27289 else if (tmp != target)
27290 emit_move_insn (target, tmp);
27292 else if (target != new_target)
27293 emit_move_insn (target, new_target);
27298 vsimode = V4SImode;
27304 vsimode = V2SImode;
27310 /* Zero extend the variable element to SImode and recurse. */
27311 var = convert_modes (SImode, GET_MODE_INNER (mode), var, true);
27313 x = gen_reg_rtx (vsimode);
27314 if (!ix86_expand_vector_init_one_nonzero (mmx_ok, vsimode, x,
27316 gcc_unreachable ();
27318 emit_move_insn (target, gen_lowpart (mode, x));
27326 /* A subroutine of ix86_expand_vector_init. Store into TARGET a vector
27327 consisting of the values in VALS. It is known that all elements
27328 except ONE_VAR are constants. Return true if successful. */
27331 ix86_expand_vector_init_one_var (bool mmx_ok, enum machine_mode mode,
27332 rtx target, rtx vals, int one_var)
27334 rtx var = XVECEXP (vals, 0, one_var);
27335 enum machine_mode wmode;
27338 const_vec = copy_rtx (vals);
27339 XVECEXP (const_vec, 0, one_var) = CONST0_RTX (GET_MODE_INNER (mode));
27340 const_vec = gen_rtx_CONST_VECTOR (mode, XVEC (const_vec, 0));
27348 /* For the two element vectors, it's just as easy to use
27349 the general case. */
27353 /* Use ix86_expand_vector_set in 64bit mode only. */
27376 /* There's no way to set one QImode entry easily. Combine
27377 the variable value with its adjacent constant value, and
27378 promote to an HImode set. */
27379 x = XVECEXP (vals, 0, one_var ^ 1);
27382 var = convert_modes (HImode, QImode, var, true);
27383 var = expand_simple_binop (HImode, ASHIFT, var, GEN_INT (8),
27384 NULL_RTX, 1, OPTAB_LIB_WIDEN);
27385 x = GEN_INT (INTVAL (x) & 0xff);
27389 var = convert_modes (HImode, QImode, var, true);
27390 x = gen_int_mode (INTVAL (x) << 8, HImode);
27392 if (x != const0_rtx)
27393 var = expand_simple_binop (HImode, IOR, var, x, var,
27394 1, OPTAB_LIB_WIDEN);
27396 x = gen_reg_rtx (wmode);
27397 emit_move_insn (x, gen_lowpart (wmode, const_vec));
27398 ix86_expand_vector_set (mmx_ok, x, var, one_var >> 1);
27400 emit_move_insn (target, gen_lowpart (mode, x));
27407 emit_move_insn (target, const_vec);
27408 ix86_expand_vector_set (mmx_ok, target, var, one_var);
27412 /* A subroutine of ix86_expand_vector_init_general. Use vector
27413 concatenate to handle the most general case: all values variable,
27414 and none identical. */
27417 ix86_expand_vector_init_concat (enum machine_mode mode,
27418 rtx target, rtx *ops, int n)
27420 enum machine_mode cmode, hmode = VOIDmode;
27421 rtx first[8], second[4];
27461 gcc_unreachable ();
27464 if (!register_operand (ops[1], cmode))
27465 ops[1] = force_reg (cmode, ops[1]);
27466 if (!register_operand (ops[0], cmode))
27467 ops[0] = force_reg (cmode, ops[0]);
27468 emit_insn (gen_rtx_SET (VOIDmode, target,
27469 gen_rtx_VEC_CONCAT (mode, ops[0],
27489 gcc_unreachable ();
27505 gcc_unreachable ();
27510 /* FIXME: We process inputs backward to help RA. PR 36222. */
27513 for (; i > 0; i -= 2, j--)
27515 first[j] = gen_reg_rtx (cmode);
27516 v = gen_rtvec (2, ops[i - 1], ops[i]);
27517 ix86_expand_vector_init (false, first[j],
27518 gen_rtx_PARALLEL (cmode, v));
27524 gcc_assert (hmode != VOIDmode);
27525 for (i = j = 0; i < n; i += 2, j++)
27527 second[j] = gen_reg_rtx (hmode);
27528 ix86_expand_vector_init_concat (hmode, second [j],
27532 ix86_expand_vector_init_concat (mode, target, second, n);
27535 ix86_expand_vector_init_concat (mode, target, first, n);
27539 gcc_unreachable ();
27543 /* A subroutine of ix86_expand_vector_init_general. Use vector
27544 interleave to handle the most general case: all values variable,
27545 and none identical. */
27548 ix86_expand_vector_init_interleave (enum machine_mode mode,
27549 rtx target, rtx *ops, int n)
27551 enum machine_mode first_imode, second_imode, third_imode, inner_mode;
27554 rtx (*gen_load_even) (rtx, rtx, rtx);
27555 rtx (*gen_interleave_first_low) (rtx, rtx, rtx);
27556 rtx (*gen_interleave_second_low) (rtx, rtx, rtx);
27561 gen_load_even = gen_vec_setv8hi;
27562 gen_interleave_first_low = gen_vec_interleave_lowv4si;
27563 gen_interleave_second_low = gen_vec_interleave_lowv2di;
27564 inner_mode = HImode;
27565 first_imode = V4SImode;
27566 second_imode = V2DImode;
27567 third_imode = VOIDmode;
27570 gen_load_even = gen_vec_setv16qi;
27571 gen_interleave_first_low = gen_vec_interleave_lowv8hi;
27572 gen_interleave_second_low = gen_vec_interleave_lowv4si;
27573 inner_mode = QImode;
27574 first_imode = V8HImode;
27575 second_imode = V4SImode;
27576 third_imode = V2DImode;
27579 gcc_unreachable ();
27582 for (i = 0; i < n; i++)
27584 /* Extend the odd elment to SImode using a paradoxical SUBREG. */
27585 op0 = gen_reg_rtx (SImode);
27586 emit_move_insn (op0, gen_lowpart (SImode, ops [i + i]));
27588 /* Insert the SImode value as low element of V4SImode vector. */
27589 op1 = gen_reg_rtx (V4SImode);
27590 op0 = gen_rtx_VEC_MERGE (V4SImode,
27591 gen_rtx_VEC_DUPLICATE (V4SImode,
27593 CONST0_RTX (V4SImode),
27595 emit_insn (gen_rtx_SET (VOIDmode, op1, op0));
27597 /* Cast the V4SImode vector back to a vector in orignal mode. */
27598 op0 = gen_reg_rtx (mode);
27599 emit_move_insn (op0, gen_lowpart (mode, op1));
27601 /* Load even elements into the second positon. */
27602 emit_insn ((*gen_load_even) (op0,
27603 force_reg (inner_mode,
27607 /* Cast vector to FIRST_IMODE vector. */
27608 ops[i] = gen_reg_rtx (first_imode);
27609 emit_move_insn (ops[i], gen_lowpart (first_imode, op0));
27612 /* Interleave low FIRST_IMODE vectors. */
27613 for (i = j = 0; i < n; i += 2, j++)
27615 op0 = gen_reg_rtx (first_imode);
27616 emit_insn ((*gen_interleave_first_low) (op0, ops[i], ops[i + 1]));
27618 /* Cast FIRST_IMODE vector to SECOND_IMODE vector. */
27619 ops[j] = gen_reg_rtx (second_imode);
27620 emit_move_insn (ops[j], gen_lowpart (second_imode, op0));
27623 /* Interleave low SECOND_IMODE vectors. */
27624 switch (second_imode)
27627 for (i = j = 0; i < n / 2; i += 2, j++)
27629 op0 = gen_reg_rtx (second_imode);
27630 emit_insn ((*gen_interleave_second_low) (op0, ops[i],
27633 /* Cast the SECOND_IMODE vector to the THIRD_IMODE
27635 ops[j] = gen_reg_rtx (third_imode);
27636 emit_move_insn (ops[j], gen_lowpart (third_imode, op0));
27638 second_imode = V2DImode;
27639 gen_interleave_second_low = gen_vec_interleave_lowv2di;
27643 op0 = gen_reg_rtx (second_imode);
27644 emit_insn ((*gen_interleave_second_low) (op0, ops[0],
27647 /* Cast the SECOND_IMODE vector back to a vector on original
27649 emit_insn (gen_rtx_SET (VOIDmode, target,
27650 gen_lowpart (mode, op0)));
27654 gcc_unreachable ();
27658 /* A subroutine of ix86_expand_vector_init. Handle the most general case:
27659 all values variable, and none identical. */
27662 ix86_expand_vector_init_general (bool mmx_ok, enum machine_mode mode,
27663 rtx target, rtx vals)
27665 rtx ops[32], op0, op1;
27666 enum machine_mode half_mode = VOIDmode;
27673 if (!mmx_ok && !TARGET_SSE)
27685 n = GET_MODE_NUNITS (mode);
27686 for (i = 0; i < n; i++)
27687 ops[i] = XVECEXP (vals, 0, i);
27688 ix86_expand_vector_init_concat (mode, target, ops, n);
27692 half_mode = V16QImode;
27696 half_mode = V8HImode;
27700 n = GET_MODE_NUNITS (mode);
27701 for (i = 0; i < n; i++)
27702 ops[i] = XVECEXP (vals, 0, i);
27703 op0 = gen_reg_rtx (half_mode);
27704 op1 = gen_reg_rtx (half_mode);
27705 ix86_expand_vector_init_interleave (half_mode, op0, ops,
27707 ix86_expand_vector_init_interleave (half_mode, op1,
27708 &ops [n >> 1], n >> 2);
27709 emit_insn (gen_rtx_SET (VOIDmode, target,
27710 gen_rtx_VEC_CONCAT (mode, op0, op1)));
27714 if (!TARGET_SSE4_1)
27722 /* Don't use ix86_expand_vector_init_interleave if we can't
27723 move from GPR to SSE register directly. */
27724 if (!TARGET_INTER_UNIT_MOVES)
27727 n = GET_MODE_NUNITS (mode);
27728 for (i = 0; i < n; i++)
27729 ops[i] = XVECEXP (vals, 0, i);
27730 ix86_expand_vector_init_interleave (mode, target, ops, n >> 1);
27738 gcc_unreachable ();
27742 int i, j, n_elts, n_words, n_elt_per_word;
27743 enum machine_mode inner_mode;
27744 rtx words[4], shift;
27746 inner_mode = GET_MODE_INNER (mode);
27747 n_elts = GET_MODE_NUNITS (mode);
27748 n_words = GET_MODE_SIZE (mode) / UNITS_PER_WORD;
27749 n_elt_per_word = n_elts / n_words;
27750 shift = GEN_INT (GET_MODE_BITSIZE (inner_mode));
27752 for (i = 0; i < n_words; ++i)
27754 rtx word = NULL_RTX;
27756 for (j = 0; j < n_elt_per_word; ++j)
27758 rtx elt = XVECEXP (vals, 0, (i+1)*n_elt_per_word - j - 1);
27759 elt = convert_modes (word_mode, inner_mode, elt, true);
27765 word = expand_simple_binop (word_mode, ASHIFT, word, shift,
27766 word, 1, OPTAB_LIB_WIDEN);
27767 word = expand_simple_binop (word_mode, IOR, word, elt,
27768 word, 1, OPTAB_LIB_WIDEN);
27776 emit_move_insn (target, gen_lowpart (mode, words[0]));
27777 else if (n_words == 2)
27779 rtx tmp = gen_reg_rtx (mode);
27780 emit_clobber (tmp);
27781 emit_move_insn (gen_lowpart (word_mode, tmp), words[0]);
27782 emit_move_insn (gen_highpart (word_mode, tmp), words[1]);
27783 emit_move_insn (target, tmp);
27785 else if (n_words == 4)
27787 rtx tmp = gen_reg_rtx (V4SImode);
27788 gcc_assert (word_mode == SImode);
27789 vals = gen_rtx_PARALLEL (V4SImode, gen_rtvec_v (4, words));
27790 ix86_expand_vector_init_general (false, V4SImode, tmp, vals);
27791 emit_move_insn (target, gen_lowpart (mode, tmp));
27794 gcc_unreachable ();
27798 /* Initialize vector TARGET via VALS. Suppress the use of MMX
27799 instructions unless MMX_OK is true. */
27802 ix86_expand_vector_init (bool mmx_ok, rtx target, rtx vals)
27804 enum machine_mode mode = GET_MODE (target);
27805 enum machine_mode inner_mode = GET_MODE_INNER (mode);
27806 int n_elts = GET_MODE_NUNITS (mode);
27807 int n_var = 0, one_var = -1;
27808 bool all_same = true, all_const_zero = true;
27812 for (i = 0; i < n_elts; ++i)
27814 x = XVECEXP (vals, 0, i);
27815 if (!(CONST_INT_P (x)
27816 || GET_CODE (x) == CONST_DOUBLE
27817 || GET_CODE (x) == CONST_FIXED))
27818 n_var++, one_var = i;
27819 else if (x != CONST0_RTX (inner_mode))
27820 all_const_zero = false;
27821 if (i > 0 && !rtx_equal_p (x, XVECEXP (vals, 0, 0)))
27825 /* Constants are best loaded from the constant pool. */
27828 emit_move_insn (target, gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0)));
27832 /* If all values are identical, broadcast the value. */
27834 && ix86_expand_vector_init_duplicate (mmx_ok, mode, target,
27835 XVECEXP (vals, 0, 0)))
27838 /* Values where only one field is non-constant are best loaded from
27839 the pool and overwritten via move later. */
27843 && ix86_expand_vector_init_one_nonzero (mmx_ok, mode, target,
27844 XVECEXP (vals, 0, one_var),
27848 if (ix86_expand_vector_init_one_var (mmx_ok, mode, target, vals, one_var))
27852 ix86_expand_vector_init_general (mmx_ok, mode, target, vals);
27856 ix86_expand_vector_set (bool mmx_ok, rtx target, rtx val, int elt)
27858 enum machine_mode mode = GET_MODE (target);
27859 enum machine_mode inner_mode = GET_MODE_INNER (mode);
27860 enum machine_mode half_mode;
27861 bool use_vec_merge = false;
27863 static rtx (*gen_extract[6][2]) (rtx, rtx)
27865 { gen_vec_extract_lo_v32qi, gen_vec_extract_hi_v32qi },
27866 { gen_vec_extract_lo_v16hi, gen_vec_extract_hi_v16hi },
27867 { gen_vec_extract_lo_v8si, gen_vec_extract_hi_v8si },
27868 { gen_vec_extract_lo_v4di, gen_vec_extract_hi_v4di },
27869 { gen_vec_extract_lo_v8sf, gen_vec_extract_hi_v8sf },
27870 { gen_vec_extract_lo_v4df, gen_vec_extract_hi_v4df }
27872 static rtx (*gen_insert[6][2]) (rtx, rtx, rtx)
27874 { gen_vec_set_lo_v32qi, gen_vec_set_hi_v32qi },
27875 { gen_vec_set_lo_v16hi, gen_vec_set_hi_v16hi },
27876 { gen_vec_set_lo_v8si, gen_vec_set_hi_v8si },
27877 { gen_vec_set_lo_v4di, gen_vec_set_hi_v4di },
27878 { gen_vec_set_lo_v8sf, gen_vec_set_hi_v8sf },
27879 { gen_vec_set_lo_v4df, gen_vec_set_hi_v4df }
27889 tmp = gen_reg_rtx (GET_MODE_INNER (mode));
27890 ix86_expand_vector_extract (true, tmp, target, 1 - elt);
27892 tmp = gen_rtx_VEC_CONCAT (mode, tmp, val);
27894 tmp = gen_rtx_VEC_CONCAT (mode, val, tmp);
27895 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
27901 use_vec_merge = TARGET_SSE4_1;
27909 /* For the two element vectors, we implement a VEC_CONCAT with
27910 the extraction of the other element. */
27912 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, GEN_INT (1 - elt)));
27913 tmp = gen_rtx_VEC_SELECT (inner_mode, target, tmp);
27916 op0 = val, op1 = tmp;
27918 op0 = tmp, op1 = val;
27920 tmp = gen_rtx_VEC_CONCAT (mode, op0, op1);
27921 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
27926 use_vec_merge = TARGET_SSE4_1;
27933 use_vec_merge = true;
27937 /* tmp = target = A B C D */
27938 tmp = copy_to_reg (target);
27939 /* target = A A B B */
27940 emit_insn (gen_sse_unpcklps (target, target, target));
27941 /* target = X A B B */
27942 ix86_expand_vector_set (false, target, val, 0);
27943 /* target = A X C D */
27944 emit_insn (gen_sse_shufps_v4sf (target, target, tmp,
27945 GEN_INT (1), GEN_INT (0),
27946 GEN_INT (2+4), GEN_INT (3+4)));
27950 /* tmp = target = A B C D */
27951 tmp = copy_to_reg (target);
27952 /* tmp = X B C D */
27953 ix86_expand_vector_set (false, tmp, val, 0);
27954 /* target = A B X D */
27955 emit_insn (gen_sse_shufps_v4sf (target, target, tmp,
27956 GEN_INT (0), GEN_INT (1),
27957 GEN_INT (0+4), GEN_INT (3+4)));
27961 /* tmp = target = A B C D */
27962 tmp = copy_to_reg (target);
27963 /* tmp = X B C D */
27964 ix86_expand_vector_set (false, tmp, val, 0);
27965 /* target = A B X D */
27966 emit_insn (gen_sse_shufps_v4sf (target, target, tmp,
27967 GEN_INT (0), GEN_INT (1),
27968 GEN_INT (2+4), GEN_INT (0+4)));
27972 gcc_unreachable ();
27977 use_vec_merge = TARGET_SSE4_1;
27981 /* Element 0 handled by vec_merge below. */
27984 use_vec_merge = true;
27990 /* With SSE2, use integer shuffles to swap element 0 and ELT,
27991 store into element 0, then shuffle them back. */
27995 order[0] = GEN_INT (elt);
27996 order[1] = const1_rtx;
27997 order[2] = const2_rtx;
27998 order[3] = GEN_INT (3);
27999 order[elt] = const0_rtx;
28001 emit_insn (gen_sse2_pshufd_1 (target, target, order[0],
28002 order[1], order[2], order[3]));
28004 ix86_expand_vector_set (false, target, val, 0);
28006 emit_insn (gen_sse2_pshufd_1 (target, target, order[0],
28007 order[1], order[2], order[3]));
28011 /* For SSE1, we have to reuse the V4SF code. */
28012 ix86_expand_vector_set (false, gen_lowpart (V4SFmode, target),
28013 gen_lowpart (SFmode, val), elt);
28018 use_vec_merge = TARGET_SSE2;
28021 use_vec_merge = mmx_ok && (TARGET_SSE || TARGET_3DNOW_A);
28025 use_vec_merge = TARGET_SSE4_1;
28032 half_mode = V16QImode;
28038 half_mode = V8HImode;
28044 half_mode = V4SImode;
28050 half_mode = V2DImode;
28056 half_mode = V4SFmode;
28062 half_mode = V2DFmode;
28068 /* Compute offset. */
28072 gcc_assert (i <= 1);
28074 /* Extract the half. */
28075 tmp = gen_reg_rtx (half_mode);
28076 emit_insn ((*gen_extract[j][i]) (tmp, target));
28078 /* Put val in tmp at elt. */
28079 ix86_expand_vector_set (false, tmp, val, elt);
28082 emit_insn ((*gen_insert[j][i]) (target, target, tmp));
28091 tmp = gen_rtx_VEC_DUPLICATE (mode, val);
28092 tmp = gen_rtx_VEC_MERGE (mode, tmp, target, GEN_INT (1 << elt));
28093 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
28097 rtx mem = assign_stack_temp (mode, GET_MODE_SIZE (mode), false);
28099 emit_move_insn (mem, target);
28101 tmp = adjust_address (mem, inner_mode, elt*GET_MODE_SIZE (inner_mode));
28102 emit_move_insn (tmp, val);
28104 emit_move_insn (target, mem);
28109 ix86_expand_vector_extract (bool mmx_ok, rtx target, rtx vec, int elt)
28111 enum machine_mode mode = GET_MODE (vec);
28112 enum machine_mode inner_mode = GET_MODE_INNER (mode);
28113 bool use_vec_extr = false;
28126 use_vec_extr = true;
28130 use_vec_extr = TARGET_SSE4_1;
28142 tmp = gen_reg_rtx (mode);
28143 emit_insn (gen_sse_shufps_v4sf (tmp, vec, vec,
28144 GEN_INT (elt), GEN_INT (elt),
28145 GEN_INT (elt+4), GEN_INT (elt+4)));
28149 tmp = gen_reg_rtx (mode);
28150 emit_insn (gen_sse_unpckhps (tmp, vec, vec));
28154 gcc_unreachable ();
28157 use_vec_extr = true;
28162 use_vec_extr = TARGET_SSE4_1;
28176 tmp = gen_reg_rtx (mode);
28177 emit_insn (gen_sse2_pshufd_1 (tmp, vec,
28178 GEN_INT (elt), GEN_INT (elt),
28179 GEN_INT (elt), GEN_INT (elt)));
28183 tmp = gen_reg_rtx (mode);
28184 emit_insn (gen_sse2_punpckhdq (tmp, vec, vec));
28188 gcc_unreachable ();
28191 use_vec_extr = true;
28196 /* For SSE1, we have to reuse the V4SF code. */
28197 ix86_expand_vector_extract (false, gen_lowpart (SFmode, target),
28198 gen_lowpart (V4SFmode, vec), elt);
28204 use_vec_extr = TARGET_SSE2;
28207 use_vec_extr = mmx_ok && (TARGET_SSE || TARGET_3DNOW_A);
28211 use_vec_extr = TARGET_SSE4_1;
28215 /* ??? Could extract the appropriate HImode element and shift. */
28222 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, GEN_INT (elt)));
28223 tmp = gen_rtx_VEC_SELECT (inner_mode, vec, tmp);
28225 /* Let the rtl optimizers know about the zero extension performed. */
28226 if (inner_mode == QImode || inner_mode == HImode)
28228 tmp = gen_rtx_ZERO_EXTEND (SImode, tmp);
28229 target = gen_lowpart (SImode, target);
28232 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
28236 rtx mem = assign_stack_temp (mode, GET_MODE_SIZE (mode), false);
28238 emit_move_insn (mem, vec);
28240 tmp = adjust_address (mem, inner_mode, elt*GET_MODE_SIZE (inner_mode));
28241 emit_move_insn (target, tmp);
28245 /* Expand a vector reduction on V4SFmode for SSE1. FN is the binary
28246 pattern to reduce; DEST is the destination; IN is the input vector. */
28249 ix86_expand_reduc_v4sf (rtx (*fn) (rtx, rtx, rtx), rtx dest, rtx in)
28251 rtx tmp1, tmp2, tmp3;
28253 tmp1 = gen_reg_rtx (V4SFmode);
28254 tmp2 = gen_reg_rtx (V4SFmode);
28255 tmp3 = gen_reg_rtx (V4SFmode);
28257 emit_insn (gen_sse_movhlps (tmp1, in, in));
28258 emit_insn (fn (tmp2, tmp1, in));
28260 emit_insn (gen_sse_shufps_v4sf (tmp3, tmp2, tmp2,
28261 GEN_INT (1), GEN_INT (1),
28262 GEN_INT (1+4), GEN_INT (1+4)));
28263 emit_insn (fn (dest, tmp2, tmp3));
28266 /* Target hook for scalar_mode_supported_p. */
28268 ix86_scalar_mode_supported_p (enum machine_mode mode)
28270 if (DECIMAL_FLOAT_MODE_P (mode))
28272 else if (mode == TFmode)
28275 return default_scalar_mode_supported_p (mode);
28278 /* Implements target hook vector_mode_supported_p. */
28280 ix86_vector_mode_supported_p (enum machine_mode mode)
28282 if (TARGET_SSE && VALID_SSE_REG_MODE (mode))
28284 if (TARGET_SSE2 && VALID_SSE2_REG_MODE (mode))
28286 if (TARGET_AVX && VALID_AVX256_REG_MODE (mode))
28288 if (TARGET_MMX && VALID_MMX_REG_MODE (mode))
28290 if (TARGET_3DNOW && VALID_MMX_REG_MODE_3DNOW (mode))
28295 /* Target hook for c_mode_for_suffix. */
28296 static enum machine_mode
28297 ix86_c_mode_for_suffix (char suffix)
28307 /* Worker function for TARGET_MD_ASM_CLOBBERS.
28309 We do this in the new i386 backend to maintain source compatibility
28310 with the old cc0-based compiler. */
28313 ix86_md_asm_clobbers (tree outputs ATTRIBUTE_UNUSED,
28314 tree inputs ATTRIBUTE_UNUSED,
28317 clobbers = tree_cons (NULL_TREE, build_string (5, "flags"),
28319 clobbers = tree_cons (NULL_TREE, build_string (4, "fpsr"),
28324 /* Implements target vector targetm.asm.encode_section_info. This
28325 is not used by netware. */
28327 static void ATTRIBUTE_UNUSED
28328 ix86_encode_section_info (tree decl, rtx rtl, int first)
28330 default_encode_section_info (decl, rtl, first);
28332 if (TREE_CODE (decl) == VAR_DECL
28333 && (TREE_STATIC (decl) || DECL_EXTERNAL (decl))
28334 && ix86_in_large_data_p (decl))
28335 SYMBOL_REF_FLAGS (XEXP (rtl, 0)) |= SYMBOL_FLAG_FAR_ADDR;
28338 /* Worker function for REVERSE_CONDITION. */
28341 ix86_reverse_condition (enum rtx_code code, enum machine_mode mode)
28343 return (mode != CCFPmode && mode != CCFPUmode
28344 ? reverse_condition (code)
28345 : reverse_condition_maybe_unordered (code));
28348 /* Output code to perform an x87 FP register move, from OPERANDS[1]
28352 output_387_reg_move (rtx insn, rtx *operands)
28354 if (REG_P (operands[0]))
28356 if (REG_P (operands[1])
28357 && find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
28359 if (REGNO (operands[0]) == FIRST_STACK_REG)
28360 return output_387_ffreep (operands, 0);
28361 return "fstp\t%y0";
28363 if (STACK_TOP_P (operands[0]))
28364 return "fld%z1\t%y1";
28367 else if (MEM_P (operands[0]))
28369 gcc_assert (REG_P (operands[1]));
28370 if (find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
28371 return "fstp%z0\t%y0";
28374 /* There is no non-popping store to memory for XFmode.
28375 So if we need one, follow the store with a load. */
28376 if (GET_MODE (operands[0]) == XFmode)
28377 return "fstp%z0\t%y0\n\tfld%z0\t%y0";
28379 return "fst%z0\t%y0";
28386 /* Output code to perform a conditional jump to LABEL, if C2 flag in
28387 FP status register is set. */
28390 ix86_emit_fp_unordered_jump (rtx label)
28392 rtx reg = gen_reg_rtx (HImode);
28395 emit_insn (gen_x86_fnstsw_1 (reg));
28397 if (TARGET_SAHF && (TARGET_USE_SAHF || optimize_insn_for_size_p ()))
28399 emit_insn (gen_x86_sahf_1 (reg));
28401 temp = gen_rtx_REG (CCmode, FLAGS_REG);
28402 temp = gen_rtx_UNORDERED (VOIDmode, temp, const0_rtx);
28406 emit_insn (gen_testqi_ext_ccno_0 (reg, GEN_INT (0x04)));
28408 temp = gen_rtx_REG (CCNOmode, FLAGS_REG);
28409 temp = gen_rtx_NE (VOIDmode, temp, const0_rtx);
28412 temp = gen_rtx_IF_THEN_ELSE (VOIDmode, temp,
28413 gen_rtx_LABEL_REF (VOIDmode, label),
28415 temp = gen_rtx_SET (VOIDmode, pc_rtx, temp);
28417 emit_jump_insn (temp);
28418 predict_jump (REG_BR_PROB_BASE * 10 / 100);
28421 /* Output code to perform a log1p XFmode calculation. */
28423 void ix86_emit_i387_log1p (rtx op0, rtx op1)
28425 rtx label1 = gen_label_rtx ();
28426 rtx label2 = gen_label_rtx ();
28428 rtx tmp = gen_reg_rtx (XFmode);
28429 rtx tmp2 = gen_reg_rtx (XFmode);
28431 emit_insn (gen_absxf2 (tmp, op1));
28432 emit_insn (gen_cmpxf (tmp,
28433 CONST_DOUBLE_FROM_REAL_VALUE (
28434 REAL_VALUE_ATOF ("0.29289321881345247561810596348408353", XFmode),
28436 emit_jump_insn (gen_bge (label1));
28438 emit_move_insn (tmp2, standard_80387_constant_rtx (4)); /* fldln2 */
28439 emit_insn (gen_fyl2xp1xf3_i387 (op0, op1, tmp2));
28440 emit_jump (label2);
28442 emit_label (label1);
28443 emit_move_insn (tmp, CONST1_RTX (XFmode));
28444 emit_insn (gen_addxf3 (tmp, op1, tmp));
28445 emit_move_insn (tmp2, standard_80387_constant_rtx (4)); /* fldln2 */
28446 emit_insn (gen_fyl2xxf3_i387 (op0, tmp, tmp2));
28448 emit_label (label2);
28451 /* Output code to perform a Newton-Rhapson approximation of a single precision
28452 floating point divide [http://en.wikipedia.org/wiki/N-th_root_algorithm]. */
28454 void ix86_emit_swdivsf (rtx res, rtx a, rtx b, enum machine_mode mode)
28456 rtx x0, x1, e0, e1, two;
28458 x0 = gen_reg_rtx (mode);
28459 e0 = gen_reg_rtx (mode);
28460 e1 = gen_reg_rtx (mode);
28461 x1 = gen_reg_rtx (mode);
28463 two = CONST_DOUBLE_FROM_REAL_VALUE (dconst2, SFmode);
28465 if (VECTOR_MODE_P (mode))
28466 two = ix86_build_const_vector (SFmode, true, two);
28468 two = force_reg (mode, two);
28470 /* a / b = a * rcp(b) * (2.0 - b * rcp(b)) */
28472 /* x0 = rcp(b) estimate */
28473 emit_insn (gen_rtx_SET (VOIDmode, x0,
28474 gen_rtx_UNSPEC (mode, gen_rtvec (1, b),
28477 emit_insn (gen_rtx_SET (VOIDmode, e0,
28478 gen_rtx_MULT (mode, x0, b)));
28480 emit_insn (gen_rtx_SET (VOIDmode, e1,
28481 gen_rtx_MINUS (mode, two, e0)));
28483 emit_insn (gen_rtx_SET (VOIDmode, x1,
28484 gen_rtx_MULT (mode, x0, e1)));
28486 emit_insn (gen_rtx_SET (VOIDmode, res,
28487 gen_rtx_MULT (mode, a, x1)));
28490 /* Output code to perform a Newton-Rhapson approximation of a
28491 single precision floating point [reciprocal] square root. */
28493 void ix86_emit_swsqrtsf (rtx res, rtx a, enum machine_mode mode,
28496 rtx x0, e0, e1, e2, e3, mthree, mhalf;
28499 x0 = gen_reg_rtx (mode);
28500 e0 = gen_reg_rtx (mode);
28501 e1 = gen_reg_rtx (mode);
28502 e2 = gen_reg_rtx (mode);
28503 e3 = gen_reg_rtx (mode);
28505 real_from_integer (&r, VOIDmode, -3, -1, 0);
28506 mthree = CONST_DOUBLE_FROM_REAL_VALUE (r, SFmode);
28508 real_arithmetic (&r, NEGATE_EXPR, &dconsthalf, NULL);
28509 mhalf = CONST_DOUBLE_FROM_REAL_VALUE (r, SFmode);
28511 if (VECTOR_MODE_P (mode))
28513 mthree = ix86_build_const_vector (SFmode, true, mthree);
28514 mhalf = ix86_build_const_vector (SFmode, true, mhalf);
28517 /* sqrt(a) = -0.5 * a * rsqrtss(a) * (a * rsqrtss(a) * rsqrtss(a) - 3.0)
28518 rsqrt(a) = -0.5 * rsqrtss(a) * (a * rsqrtss(a) * rsqrtss(a) - 3.0) */
28520 /* x0 = rsqrt(a) estimate */
28521 emit_insn (gen_rtx_SET (VOIDmode, x0,
28522 gen_rtx_UNSPEC (mode, gen_rtvec (1, a),
28525 /* If (a == 0.0) Filter out infinity to prevent NaN for sqrt(0.0). */
28530 zero = gen_reg_rtx (mode);
28531 mask = gen_reg_rtx (mode);
28533 zero = force_reg (mode, CONST0_RTX(mode));
28534 emit_insn (gen_rtx_SET (VOIDmode, mask,
28535 gen_rtx_NE (mode, zero, a)));
28537 emit_insn (gen_rtx_SET (VOIDmode, x0,
28538 gen_rtx_AND (mode, x0, mask)));
28542 emit_insn (gen_rtx_SET (VOIDmode, e0,
28543 gen_rtx_MULT (mode, x0, a)));
28545 emit_insn (gen_rtx_SET (VOIDmode, e1,
28546 gen_rtx_MULT (mode, e0, x0)));
28549 mthree = force_reg (mode, mthree);
28550 emit_insn (gen_rtx_SET (VOIDmode, e2,
28551 gen_rtx_PLUS (mode, e1, mthree)));
28553 mhalf = force_reg (mode, mhalf);
28555 /* e3 = -.5 * x0 */
28556 emit_insn (gen_rtx_SET (VOIDmode, e3,
28557 gen_rtx_MULT (mode, x0, mhalf)));
28559 /* e3 = -.5 * e0 */
28560 emit_insn (gen_rtx_SET (VOIDmode, e3,
28561 gen_rtx_MULT (mode, e0, mhalf)));
28562 /* ret = e2 * e3 */
28563 emit_insn (gen_rtx_SET (VOIDmode, res,
28564 gen_rtx_MULT (mode, e2, e3)));
28567 /* Solaris implementation of TARGET_ASM_NAMED_SECTION. */
28569 static void ATTRIBUTE_UNUSED
28570 i386_solaris_elf_named_section (const char *name, unsigned int flags,
28573 /* With Binutils 2.15, the "@unwind" marker must be specified on
28574 every occurrence of the ".eh_frame" section, not just the first
28577 && strcmp (name, ".eh_frame") == 0)
28579 fprintf (asm_out_file, "\t.section\t%s,\"%s\",@unwind\n", name,
28580 flags & SECTION_WRITE ? "aw" : "a");
28583 default_elf_asm_named_section (name, flags, decl);
28586 /* Return the mangling of TYPE if it is an extended fundamental type. */
28588 static const char *
28589 ix86_mangle_type (const_tree type)
28591 type = TYPE_MAIN_VARIANT (type);
28593 if (TREE_CODE (type) != VOID_TYPE && TREE_CODE (type) != BOOLEAN_TYPE
28594 && TREE_CODE (type) != INTEGER_TYPE && TREE_CODE (type) != REAL_TYPE)
28597 switch (TYPE_MODE (type))
28600 /* __float128 is "g". */
28603 /* "long double" or __float80 is "e". */
28610 /* For 32-bit code we can save PIC register setup by using
28611 __stack_chk_fail_local hidden function instead of calling
28612 __stack_chk_fail directly. 64-bit code doesn't need to setup any PIC
28613 register, so it is better to call __stack_chk_fail directly. */
28616 ix86_stack_protect_fail (void)
28618 return TARGET_64BIT
28619 ? default_external_stack_protect_fail ()
28620 : default_hidden_stack_protect_fail ();
28623 /* Select a format to encode pointers in exception handling data. CODE
28624 is 0 for data, 1 for code labels, 2 for function pointers. GLOBAL is
28625 true if the symbol may be affected by dynamic relocations.
28627 ??? All x86 object file formats are capable of representing this.
28628 After all, the relocation needed is the same as for the call insn.
28629 Whether or not a particular assembler allows us to enter such, I
28630 guess we'll have to see. */
28632 asm_preferred_eh_data_format (int code, int global)
28636 int type = DW_EH_PE_sdata8;
28638 || ix86_cmodel == CM_SMALL_PIC
28639 || (ix86_cmodel == CM_MEDIUM_PIC && (global || code)))
28640 type = DW_EH_PE_sdata4;
28641 return (global ? DW_EH_PE_indirect : 0) | DW_EH_PE_pcrel | type;
28643 if (ix86_cmodel == CM_SMALL
28644 || (ix86_cmodel == CM_MEDIUM && code))
28645 return DW_EH_PE_udata4;
28646 return DW_EH_PE_absptr;
28649 /* Expand copysign from SIGN to the positive value ABS_VALUE
28650 storing in RESULT. If MASK is non-null, it shall be a mask to mask out
28653 ix86_sse_copysign_to_positive (rtx result, rtx abs_value, rtx sign, rtx mask)
28655 enum machine_mode mode = GET_MODE (sign);
28656 rtx sgn = gen_reg_rtx (mode);
28657 if (mask == NULL_RTX)
28659 mask = ix86_build_signbit_mask (mode, VECTOR_MODE_P (mode), false);
28660 if (!VECTOR_MODE_P (mode))
28662 /* We need to generate a scalar mode mask in this case. */
28663 rtx tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, const0_rtx));
28664 tmp = gen_rtx_VEC_SELECT (mode, mask, tmp);
28665 mask = gen_reg_rtx (mode);
28666 emit_insn (gen_rtx_SET (VOIDmode, mask, tmp));
28670 mask = gen_rtx_NOT (mode, mask);
28671 emit_insn (gen_rtx_SET (VOIDmode, sgn,
28672 gen_rtx_AND (mode, mask, sign)));
28673 emit_insn (gen_rtx_SET (VOIDmode, result,
28674 gen_rtx_IOR (mode, abs_value, sgn)));
28677 /* Expand fabs (OP0) and return a new rtx that holds the result. The
28678 mask for masking out the sign-bit is stored in *SMASK, if that is
28681 ix86_expand_sse_fabs (rtx op0, rtx *smask)
28683 enum machine_mode mode = GET_MODE (op0);
28686 xa = gen_reg_rtx (mode);
28687 mask = ix86_build_signbit_mask (mode, VECTOR_MODE_P (mode), true);
28688 if (!VECTOR_MODE_P (mode))
28690 /* We need to generate a scalar mode mask in this case. */
28691 rtx tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, const0_rtx));
28692 tmp = gen_rtx_VEC_SELECT (mode, mask, tmp);
28693 mask = gen_reg_rtx (mode);
28694 emit_insn (gen_rtx_SET (VOIDmode, mask, tmp));
28696 emit_insn (gen_rtx_SET (VOIDmode, xa,
28697 gen_rtx_AND (mode, op0, mask)));
28705 /* Expands a comparison of OP0 with OP1 using comparison code CODE,
28706 swapping the operands if SWAP_OPERANDS is true. The expanded
28707 code is a forward jump to a newly created label in case the
28708 comparison is true. The generated label rtx is returned. */
28710 ix86_expand_sse_compare_and_jump (enum rtx_code code, rtx op0, rtx op1,
28711 bool swap_operands)
28722 label = gen_label_rtx ();
28723 tmp = gen_rtx_REG (CCFPUmode, FLAGS_REG);
28724 emit_insn (gen_rtx_SET (VOIDmode, tmp,
28725 gen_rtx_COMPARE (CCFPUmode, op0, op1)));
28726 tmp = gen_rtx_fmt_ee (code, VOIDmode, tmp, const0_rtx);
28727 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
28728 gen_rtx_LABEL_REF (VOIDmode, label), pc_rtx);
28729 tmp = emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
28730 JUMP_LABEL (tmp) = label;
28735 /* Expand a mask generating SSE comparison instruction comparing OP0 with OP1
28736 using comparison code CODE. Operands are swapped for the comparison if
28737 SWAP_OPERANDS is true. Returns a rtx for the generated mask. */
28739 ix86_expand_sse_compare_mask (enum rtx_code code, rtx op0, rtx op1,
28740 bool swap_operands)
28742 enum machine_mode mode = GET_MODE (op0);
28743 rtx mask = gen_reg_rtx (mode);
28752 if (mode == DFmode)
28753 emit_insn (gen_sse2_maskcmpdf3 (mask, op0, op1,
28754 gen_rtx_fmt_ee (code, mode, op0, op1)));
28756 emit_insn (gen_sse_maskcmpsf3 (mask, op0, op1,
28757 gen_rtx_fmt_ee (code, mode, op0, op1)));
28762 /* Generate and return a rtx of mode MODE for 2**n where n is the number
28763 of bits of the mantissa of MODE, which must be one of DFmode or SFmode. */
28765 ix86_gen_TWO52 (enum machine_mode mode)
28767 REAL_VALUE_TYPE TWO52r;
28770 real_ldexp (&TWO52r, &dconst1, mode == DFmode ? 52 : 23);
28771 TWO52 = const_double_from_real_value (TWO52r, mode);
28772 TWO52 = force_reg (mode, TWO52);
28777 /* Expand SSE sequence for computing lround from OP1 storing
28780 ix86_expand_lround (rtx op0, rtx op1)
28782 /* C code for the stuff we're doing below:
28783 tmp = op1 + copysign (nextafter (0.5, 0.0), op1)
28786 enum machine_mode mode = GET_MODE (op1);
28787 const struct real_format *fmt;
28788 REAL_VALUE_TYPE pred_half, half_minus_pred_half;
28791 /* load nextafter (0.5, 0.0) */
28792 fmt = REAL_MODE_FORMAT (mode);
28793 real_2expN (&half_minus_pred_half, -(fmt->p) - 1, mode);
28794 REAL_ARITHMETIC (pred_half, MINUS_EXPR, dconsthalf, half_minus_pred_half);
28796 /* adj = copysign (0.5, op1) */
28797 adj = force_reg (mode, const_double_from_real_value (pred_half, mode));
28798 ix86_sse_copysign_to_positive (adj, adj, force_reg (mode, op1), NULL_RTX);
28800 /* adj = op1 + adj */
28801 adj = expand_simple_binop (mode, PLUS, adj, op1, NULL_RTX, 0, OPTAB_DIRECT);
28803 /* op0 = (imode)adj */
28804 expand_fix (op0, adj, 0);
28807 /* Expand SSE2 sequence for computing lround from OPERAND1 storing
28810 ix86_expand_lfloorceil (rtx op0, rtx op1, bool do_floor)
28812 /* C code for the stuff we're doing below (for do_floor):
28814 xi -= (double)xi > op1 ? 1 : 0;
28817 enum machine_mode fmode = GET_MODE (op1);
28818 enum machine_mode imode = GET_MODE (op0);
28819 rtx ireg, freg, label, tmp;
28821 /* reg = (long)op1 */
28822 ireg = gen_reg_rtx (imode);
28823 expand_fix (ireg, op1, 0);
28825 /* freg = (double)reg */
28826 freg = gen_reg_rtx (fmode);
28827 expand_float (freg, ireg, 0);
28829 /* ireg = (freg > op1) ? ireg - 1 : ireg */
28830 label = ix86_expand_sse_compare_and_jump (UNLE,
28831 freg, op1, !do_floor);
28832 tmp = expand_simple_binop (imode, do_floor ? MINUS : PLUS,
28833 ireg, const1_rtx, NULL_RTX, 0, OPTAB_DIRECT);
28834 emit_move_insn (ireg, tmp);
28836 emit_label (label);
28837 LABEL_NUSES (label) = 1;
28839 emit_move_insn (op0, ireg);
28842 /* Expand rint (IEEE round to nearest) rounding OPERAND1 and storing the
28843 result in OPERAND0. */
28845 ix86_expand_rint (rtx operand0, rtx operand1)
28847 /* C code for the stuff we're doing below:
28848 xa = fabs (operand1);
28849 if (!isless (xa, 2**52))
28851 xa = xa + 2**52 - 2**52;
28852 return copysign (xa, operand1);
28854 enum machine_mode mode = GET_MODE (operand0);
28855 rtx res, xa, label, TWO52, mask;
28857 res = gen_reg_rtx (mode);
28858 emit_move_insn (res, operand1);
28860 /* xa = abs (operand1) */
28861 xa = ix86_expand_sse_fabs (res, &mask);
28863 /* if (!isless (xa, TWO52)) goto label; */
28864 TWO52 = ix86_gen_TWO52 (mode);
28865 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
28867 xa = expand_simple_binop (mode, PLUS, xa, TWO52, NULL_RTX, 0, OPTAB_DIRECT);
28868 xa = expand_simple_binop (mode, MINUS, xa, TWO52, xa, 0, OPTAB_DIRECT);
28870 ix86_sse_copysign_to_positive (res, xa, res, mask);
28872 emit_label (label);
28873 LABEL_NUSES (label) = 1;
28875 emit_move_insn (operand0, res);
28878 /* Expand SSE2 sequence for computing floor or ceil from OPERAND1 storing
28881 ix86_expand_floorceildf_32 (rtx operand0, rtx operand1, bool do_floor)
28883 /* C code for the stuff we expand below.
28884 double xa = fabs (x), x2;
28885 if (!isless (xa, TWO52))
28887 xa = xa + TWO52 - TWO52;
28888 x2 = copysign (xa, x);
28897 enum machine_mode mode = GET_MODE (operand0);
28898 rtx xa, TWO52, tmp, label, one, res, mask;
28900 TWO52 = ix86_gen_TWO52 (mode);
28902 /* Temporary for holding the result, initialized to the input
28903 operand to ease control flow. */
28904 res = gen_reg_rtx (mode);
28905 emit_move_insn (res, operand1);
28907 /* xa = abs (operand1) */
28908 xa = ix86_expand_sse_fabs (res, &mask);
28910 /* if (!isless (xa, TWO52)) goto label; */
28911 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
28913 /* xa = xa + TWO52 - TWO52; */
28914 xa = expand_simple_binop (mode, PLUS, xa, TWO52, NULL_RTX, 0, OPTAB_DIRECT);
28915 xa = expand_simple_binop (mode, MINUS, xa, TWO52, xa, 0, OPTAB_DIRECT);
28917 /* xa = copysign (xa, operand1) */
28918 ix86_sse_copysign_to_positive (xa, xa, res, mask);
28920 /* generate 1.0 or -1.0 */
28921 one = force_reg (mode,
28922 const_double_from_real_value (do_floor
28923 ? dconst1 : dconstm1, mode));
28925 /* Compensate: xa = xa - (xa > operand1 ? 1 : 0) */
28926 tmp = ix86_expand_sse_compare_mask (UNGT, xa, res, !do_floor);
28927 emit_insn (gen_rtx_SET (VOIDmode, tmp,
28928 gen_rtx_AND (mode, one, tmp)));
28929 /* We always need to subtract here to preserve signed zero. */
28930 tmp = expand_simple_binop (mode, MINUS,
28931 xa, tmp, NULL_RTX, 0, OPTAB_DIRECT);
28932 emit_move_insn (res, tmp);
28934 emit_label (label);
28935 LABEL_NUSES (label) = 1;
28937 emit_move_insn (operand0, res);
28940 /* Expand SSE2 sequence for computing floor or ceil from OPERAND1 storing
28943 ix86_expand_floorceil (rtx operand0, rtx operand1, bool do_floor)
28945 /* C code for the stuff we expand below.
28946 double xa = fabs (x), x2;
28947 if (!isless (xa, TWO52))
28949 x2 = (double)(long)x;
28956 if (HONOR_SIGNED_ZEROS (mode))
28957 return copysign (x2, x);
28960 enum machine_mode mode = GET_MODE (operand0);
28961 rtx xa, xi, TWO52, tmp, label, one, res, mask;
28963 TWO52 = ix86_gen_TWO52 (mode);
28965 /* Temporary for holding the result, initialized to the input
28966 operand to ease control flow. */
28967 res = gen_reg_rtx (mode);
28968 emit_move_insn (res, operand1);
28970 /* xa = abs (operand1) */
28971 xa = ix86_expand_sse_fabs (res, &mask);
28973 /* if (!isless (xa, TWO52)) goto label; */
28974 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
28976 /* xa = (double)(long)x */
28977 xi = gen_reg_rtx (mode == DFmode ? DImode : SImode);
28978 expand_fix (xi, res, 0);
28979 expand_float (xa, xi, 0);
28982 one = force_reg (mode, const_double_from_real_value (dconst1, mode));
28984 /* Compensate: xa = xa - (xa > operand1 ? 1 : 0) */
28985 tmp = ix86_expand_sse_compare_mask (UNGT, xa, res, !do_floor);
28986 emit_insn (gen_rtx_SET (VOIDmode, tmp,
28987 gen_rtx_AND (mode, one, tmp)));
28988 tmp = expand_simple_binop (mode, do_floor ? MINUS : PLUS,
28989 xa, tmp, NULL_RTX, 0, OPTAB_DIRECT);
28990 emit_move_insn (res, tmp);
28992 if (HONOR_SIGNED_ZEROS (mode))
28993 ix86_sse_copysign_to_positive (res, res, force_reg (mode, operand1), mask);
28995 emit_label (label);
28996 LABEL_NUSES (label) = 1;
28998 emit_move_insn (operand0, res);
29001 /* Expand SSE sequence for computing round from OPERAND1 storing
29002 into OPERAND0. Sequence that works without relying on DImode truncation
29003 via cvttsd2siq that is only available on 64bit targets. */
29005 ix86_expand_rounddf_32 (rtx operand0, rtx operand1)
29007 /* C code for the stuff we expand below.
29008 double xa = fabs (x), xa2, x2;
29009 if (!isless (xa, TWO52))
29011 Using the absolute value and copying back sign makes
29012 -0.0 -> -0.0 correct.
29013 xa2 = xa + TWO52 - TWO52;
29018 else if (dxa > 0.5)
29020 x2 = copysign (xa2, x);
29023 enum machine_mode mode = GET_MODE (operand0);
29024 rtx xa, xa2, dxa, TWO52, tmp, label, half, mhalf, one, res, mask;
29026 TWO52 = ix86_gen_TWO52 (mode);
29028 /* Temporary for holding the result, initialized to the input
29029 operand to ease control flow. */
29030 res = gen_reg_rtx (mode);
29031 emit_move_insn (res, operand1);
29033 /* xa = abs (operand1) */
29034 xa = ix86_expand_sse_fabs (res, &mask);
29036 /* if (!isless (xa, TWO52)) goto label; */
29037 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
29039 /* xa2 = xa + TWO52 - TWO52; */
29040 xa2 = expand_simple_binop (mode, PLUS, xa, TWO52, NULL_RTX, 0, OPTAB_DIRECT);
29041 xa2 = expand_simple_binop (mode, MINUS, xa2, TWO52, xa2, 0, OPTAB_DIRECT);
29043 /* dxa = xa2 - xa; */
29044 dxa = expand_simple_binop (mode, MINUS, xa2, xa, NULL_RTX, 0, OPTAB_DIRECT);
29046 /* generate 0.5, 1.0 and -0.5 */
29047 half = force_reg (mode, const_double_from_real_value (dconsthalf, mode));
29048 one = expand_simple_binop (mode, PLUS, half, half, NULL_RTX, 0, OPTAB_DIRECT);
29049 mhalf = expand_simple_binop (mode, MINUS, half, one, NULL_RTX,
29053 tmp = gen_reg_rtx (mode);
29054 /* xa2 = xa2 - (dxa > 0.5 ? 1 : 0) */
29055 tmp = ix86_expand_sse_compare_mask (UNGT, dxa, half, false);
29056 emit_insn (gen_rtx_SET (VOIDmode, tmp,
29057 gen_rtx_AND (mode, one, tmp)));
29058 xa2 = expand_simple_binop (mode, MINUS, xa2, tmp, NULL_RTX, 0, OPTAB_DIRECT);
29059 /* xa2 = xa2 + (dxa <= -0.5 ? 1 : 0) */
29060 tmp = ix86_expand_sse_compare_mask (UNGE, mhalf, dxa, false);
29061 emit_insn (gen_rtx_SET (VOIDmode, tmp,
29062 gen_rtx_AND (mode, one, tmp)));
29063 xa2 = expand_simple_binop (mode, PLUS, xa2, tmp, NULL_RTX, 0, OPTAB_DIRECT);
29065 /* res = copysign (xa2, operand1) */
29066 ix86_sse_copysign_to_positive (res, xa2, force_reg (mode, operand1), mask);
29068 emit_label (label);
29069 LABEL_NUSES (label) = 1;
29071 emit_move_insn (operand0, res);
29074 /* Expand SSE sequence for computing trunc from OPERAND1 storing
29077 ix86_expand_trunc (rtx operand0, rtx operand1)
29079 /* C code for SSE variant we expand below.
29080 double xa = fabs (x), x2;
29081 if (!isless (xa, TWO52))
29083 x2 = (double)(long)x;
29084 if (HONOR_SIGNED_ZEROS (mode))
29085 return copysign (x2, x);
29088 enum machine_mode mode = GET_MODE (operand0);
29089 rtx xa, xi, TWO52, label, res, mask;
29091 TWO52 = ix86_gen_TWO52 (mode);
29093 /* Temporary for holding the result, initialized to the input
29094 operand to ease control flow. */
29095 res = gen_reg_rtx (mode);
29096 emit_move_insn (res, operand1);
29098 /* xa = abs (operand1) */
29099 xa = ix86_expand_sse_fabs (res, &mask);
29101 /* if (!isless (xa, TWO52)) goto label; */
29102 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
29104 /* x = (double)(long)x */
29105 xi = gen_reg_rtx (mode == DFmode ? DImode : SImode);
29106 expand_fix (xi, res, 0);
29107 expand_float (res, xi, 0);
29109 if (HONOR_SIGNED_ZEROS (mode))
29110 ix86_sse_copysign_to_positive (res, res, force_reg (mode, operand1), mask);
29112 emit_label (label);
29113 LABEL_NUSES (label) = 1;
29115 emit_move_insn (operand0, res);
29118 /* Expand SSE sequence for computing trunc from OPERAND1 storing
29121 ix86_expand_truncdf_32 (rtx operand0, rtx operand1)
29123 enum machine_mode mode = GET_MODE (operand0);
29124 rtx xa, mask, TWO52, label, one, res, smask, tmp;
29126 /* C code for SSE variant we expand below.
29127 double xa = fabs (x), x2;
29128 if (!isless (xa, TWO52))
29130 xa2 = xa + TWO52 - TWO52;
29134 x2 = copysign (xa2, x);
29138 TWO52 = ix86_gen_TWO52 (mode);
29140 /* Temporary for holding the result, initialized to the input
29141 operand to ease control flow. */
29142 res = gen_reg_rtx (mode);
29143 emit_move_insn (res, operand1);
29145 /* xa = abs (operand1) */
29146 xa = ix86_expand_sse_fabs (res, &smask);
29148 /* if (!isless (xa, TWO52)) goto label; */
29149 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
29151 /* res = xa + TWO52 - TWO52; */
29152 tmp = expand_simple_binop (mode, PLUS, xa, TWO52, NULL_RTX, 0, OPTAB_DIRECT);
29153 tmp = expand_simple_binop (mode, MINUS, tmp, TWO52, tmp, 0, OPTAB_DIRECT);
29154 emit_move_insn (res, tmp);
29157 one = force_reg (mode, const_double_from_real_value (dconst1, mode));
29159 /* Compensate: res = xa2 - (res > xa ? 1 : 0) */
29160 mask = ix86_expand_sse_compare_mask (UNGT, res, xa, false);
29161 emit_insn (gen_rtx_SET (VOIDmode, mask,
29162 gen_rtx_AND (mode, mask, one)));
29163 tmp = expand_simple_binop (mode, MINUS,
29164 res, mask, NULL_RTX, 0, OPTAB_DIRECT);
29165 emit_move_insn (res, tmp);
29167 /* res = copysign (res, operand1) */
29168 ix86_sse_copysign_to_positive (res, res, force_reg (mode, operand1), smask);
29170 emit_label (label);
29171 LABEL_NUSES (label) = 1;
29173 emit_move_insn (operand0, res);
29176 /* Expand SSE sequence for computing round from OPERAND1 storing
29179 ix86_expand_round (rtx operand0, rtx operand1)
29181 /* C code for the stuff we're doing below:
29182 double xa = fabs (x);
29183 if (!isless (xa, TWO52))
29185 xa = (double)(long)(xa + nextafter (0.5, 0.0));
29186 return copysign (xa, x);
29188 enum machine_mode mode = GET_MODE (operand0);
29189 rtx res, TWO52, xa, label, xi, half, mask;
29190 const struct real_format *fmt;
29191 REAL_VALUE_TYPE pred_half, half_minus_pred_half;
29193 /* Temporary for holding the result, initialized to the input
29194 operand to ease control flow. */
29195 res = gen_reg_rtx (mode);
29196 emit_move_insn (res, operand1);
29198 TWO52 = ix86_gen_TWO52 (mode);
29199 xa = ix86_expand_sse_fabs (res, &mask);
29200 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
29202 /* load nextafter (0.5, 0.0) */
29203 fmt = REAL_MODE_FORMAT (mode);
29204 real_2expN (&half_minus_pred_half, -(fmt->p) - 1, mode);
29205 REAL_ARITHMETIC (pred_half, MINUS_EXPR, dconsthalf, half_minus_pred_half);
29207 /* xa = xa + 0.5 */
29208 half = force_reg (mode, const_double_from_real_value (pred_half, mode));
29209 xa = expand_simple_binop (mode, PLUS, xa, half, NULL_RTX, 0, OPTAB_DIRECT);
29211 /* xa = (double)(int64_t)xa */
29212 xi = gen_reg_rtx (mode == DFmode ? DImode : SImode);
29213 expand_fix (xi, xa, 0);
29214 expand_float (xa, xi, 0);
29216 /* res = copysign (xa, operand1) */
29217 ix86_sse_copysign_to_positive (res, xa, force_reg (mode, operand1), mask);
29219 emit_label (label);
29220 LABEL_NUSES (label) = 1;
29222 emit_move_insn (operand0, res);
29226 /* Validate whether a SSE5 instruction is valid or not.
29227 OPERANDS is the array of operands.
29228 NUM is the number of operands.
29229 USES_OC0 is true if the instruction uses OC0 and provides 4 variants.
29230 NUM_MEMORY is the maximum number of memory operands to accept.
29231 when COMMUTATIVE is set, operand 1 and 2 can be swapped. */
29234 ix86_sse5_valid_op_p (rtx operands[], rtx insn ATTRIBUTE_UNUSED, int num,
29235 bool uses_oc0, int num_memory, bool commutative)
29241 /* Count the number of memory arguments */
29244 for (i = 0; i < num; i++)
29246 enum machine_mode mode = GET_MODE (operands[i]);
29247 if (register_operand (operands[i], mode))
29250 else if (memory_operand (operands[i], mode))
29252 mem_mask |= (1 << i);
29258 rtx pattern = PATTERN (insn);
29260 /* allow 0 for pcmov */
29261 if (GET_CODE (pattern) != SET
29262 || GET_CODE (SET_SRC (pattern)) != IF_THEN_ELSE
29264 || operands[i] != CONST0_RTX (mode))
29269 /* Special case pmacsdq{l,h} where we allow the 3rd argument to be
29270 a memory operation. */
29271 if (num_memory < 0)
29273 num_memory = -num_memory;
29274 if ((mem_mask & (1 << (num-1))) != 0)
29276 mem_mask &= ~(1 << (num-1));
29281 /* If there were no memory operations, allow the insn */
29285 /* Do not allow the destination register to be a memory operand. */
29286 else if (mem_mask & (1 << 0))
29289 /* If there are too many memory operations, disallow the instruction. While
29290 the hardware only allows 1 memory reference, before register allocation
29291 for some insns, we allow two memory operations sometimes in order to allow
29292 code like the following to be optimized:
29294 float fmadd (float *a, float *b, float *c) { return (*a * *b) + *c; }
29296 or similar cases that are vectorized into using the fmaddss
29298 else if (mem_count > num_memory)
29301 /* Don't allow more than one memory operation if not optimizing. */
29302 else if (mem_count > 1 && !optimize)
29305 else if (num == 4 && mem_count == 1)
29307 /* formats (destination is the first argument), example fmaddss:
29308 xmm1, xmm1, xmm2, xmm3/mem
29309 xmm1, xmm1, xmm2/mem, xmm3
29310 xmm1, xmm2, xmm3/mem, xmm1
29311 xmm1, xmm2/mem, xmm3, xmm1 */
29313 return ((mem_mask == (1 << 1))
29314 || (mem_mask == (1 << 2))
29315 || (mem_mask == (1 << 3)));
29317 /* format, example pmacsdd:
29318 xmm1, xmm2, xmm3/mem, xmm1 */
29320 return (mem_mask == (1 << 2) || mem_mask == (1 << 1));
29322 return (mem_mask == (1 << 2));
29325 else if (num == 4 && num_memory == 2)
29327 /* If there are two memory operations, we can load one of the memory ops
29328 into the destination register. This is for optimizing the
29329 multiply/add ops, which the combiner has optimized both the multiply
29330 and the add insns to have a memory operation. We have to be careful
29331 that the destination doesn't overlap with the inputs. */
29332 rtx op0 = operands[0];
29334 if (reg_mentioned_p (op0, operands[1])
29335 || reg_mentioned_p (op0, operands[2])
29336 || reg_mentioned_p (op0, operands[3]))
29339 /* formats (destination is the first argument), example fmaddss:
29340 xmm1, xmm1, xmm2, xmm3/mem
29341 xmm1, xmm1, xmm2/mem, xmm3
29342 xmm1, xmm2, xmm3/mem, xmm1
29343 xmm1, xmm2/mem, xmm3, xmm1
29345 For the oc0 case, we will load either operands[1] or operands[3] into
29346 operands[0], so any combination of 2 memory operands is ok. */
29350 /* format, example pmacsdd:
29351 xmm1, xmm2, xmm3/mem, xmm1
29353 For the integer multiply/add instructions be more restrictive and
29354 require operands[2] and operands[3] to be the memory operands. */
29356 return (mem_mask == ((1 << 1) | (1 << 3)) || ((1 << 2) | (1 << 3)));
29358 return (mem_mask == ((1 << 2) | (1 << 3)));
29361 else if (num == 3 && num_memory == 1)
29363 /* formats, example protb:
29364 xmm1, xmm2, xmm3/mem
29365 xmm1, xmm2/mem, xmm3 */
29367 return ((mem_mask == (1 << 1)) || (mem_mask == (1 << 2)));
29369 /* format, example comeq:
29370 xmm1, xmm2, xmm3/mem */
29372 return (mem_mask == (1 << 2));
29376 gcc_unreachable ();
29382 /* Fixup an SSE5 instruction that has 2 memory input references into a form the
29383 hardware will allow by using the destination register to load one of the
29384 memory operations. Presently this is used by the multiply/add routines to
29385 allow 2 memory references. */
29388 ix86_expand_sse5_multiple_memory (rtx operands[],
29390 enum machine_mode mode)
29392 rtx op0 = operands[0];
29394 || memory_operand (op0, mode)
29395 || reg_mentioned_p (op0, operands[1])
29396 || reg_mentioned_p (op0, operands[2])
29397 || reg_mentioned_p (op0, operands[3]))
29398 gcc_unreachable ();
29400 /* For 2 memory operands, pick either operands[1] or operands[3] to move into
29401 the destination register. */
29402 if (memory_operand (operands[1], mode))
29404 emit_move_insn (op0, operands[1]);
29407 else if (memory_operand (operands[3], mode))
29409 emit_move_insn (op0, operands[3]);
29413 gcc_unreachable ();
29419 /* Table of valid machine attributes. */
29420 static const struct attribute_spec ix86_attribute_table[] =
29422 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler } */
29423 /* Stdcall attribute says callee is responsible for popping arguments
29424 if they are not variable. */
29425 { "stdcall", 0, 0, false, true, true, ix86_handle_cconv_attribute },
29426 /* Fastcall attribute says callee is responsible for popping arguments
29427 if they are not variable. */
29428 { "fastcall", 0, 0, false, true, true, ix86_handle_cconv_attribute },
29429 /* Cdecl attribute says the callee is a normal C declaration */
29430 { "cdecl", 0, 0, false, true, true, ix86_handle_cconv_attribute },
29431 /* Regparm attribute specifies how many integer arguments are to be
29432 passed in registers. */
29433 { "regparm", 1, 1, false, true, true, ix86_handle_cconv_attribute },
29434 /* Sseregparm attribute says we are using x86_64 calling conventions
29435 for FP arguments. */
29436 { "sseregparm", 0, 0, false, true, true, ix86_handle_cconv_attribute },
29437 /* force_align_arg_pointer says this function realigns the stack at entry. */
29438 { (const char *)&ix86_force_align_arg_pointer_string, 0, 0,
29439 false, true, true, ix86_handle_cconv_attribute },
29440 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
29441 { "dllimport", 0, 0, false, false, false, handle_dll_attribute },
29442 { "dllexport", 0, 0, false, false, false, handle_dll_attribute },
29443 { "shared", 0, 0, true, false, false, ix86_handle_shared_attribute },
29445 { "ms_struct", 0, 0, false, false, false, ix86_handle_struct_attribute },
29446 { "gcc_struct", 0, 0, false, false, false, ix86_handle_struct_attribute },
29447 #ifdef SUBTARGET_ATTRIBUTE_TABLE
29448 SUBTARGET_ATTRIBUTE_TABLE,
29450 /* ms_abi and sysv_abi calling convention function attributes. */
29451 { "ms_abi", 0, 0, false, true, true, ix86_handle_abi_attribute },
29452 { "sysv_abi", 0, 0, false, true, true, ix86_handle_abi_attribute },
29454 { NULL, 0, 0, false, false, false, NULL }
29457 /* Implement targetm.vectorize.builtin_vectorization_cost. */
29459 x86_builtin_vectorization_cost (bool runtime_test)
29461 /* If the branch of the runtime test is taken - i.e. - the vectorized
29462 version is skipped - this incurs a misprediction cost (because the
29463 vectorized version is expected to be the fall-through). So we subtract
29464 the latency of a mispredicted branch from the costs that are incured
29465 when the vectorized version is executed.
29467 TODO: The values in individual target tables have to be tuned or new
29468 fields may be needed. For eg. on K8, the default branch path is the
29469 not-taken path. If the taken path is predicted correctly, the minimum
29470 penalty of going down the taken-path is 1 cycle. If the taken-path is
29471 not predicted correctly, then the minimum penalty is 10 cycles. */
29475 return (-(ix86_cost->cond_taken_branch_cost));
29481 /* This function returns the calling abi specific va_list type node.
29482 It returns the FNDECL specific va_list type. */
29485 ix86_fn_abi_va_list (tree fndecl)
29488 return va_list_type_node;
29489 gcc_assert (fndecl != NULL_TREE);
29491 if (ix86_function_abi ((const_tree) fndecl) == MS_ABI)
29492 return ms_va_list_type_node;
29494 return sysv_va_list_type_node;
29497 /* Returns the canonical va_list type specified by TYPE. If there
29498 is no valid TYPE provided, it return NULL_TREE. */
29501 ix86_canonical_va_list_type (tree type)
29505 /* Resolve references and pointers to va_list type. */
29506 if (INDIRECT_REF_P (type))
29507 type = TREE_TYPE (type);
29508 else if (POINTER_TYPE_P (type) && POINTER_TYPE_P (TREE_TYPE(type)))
29509 type = TREE_TYPE (type);
29513 wtype = va_list_type_node;
29514 gcc_assert (wtype != NULL_TREE);
29516 if (TREE_CODE (wtype) == ARRAY_TYPE)
29518 /* If va_list is an array type, the argument may have decayed
29519 to a pointer type, e.g. by being passed to another function.
29520 In that case, unwrap both types so that we can compare the
29521 underlying records. */
29522 if (TREE_CODE (htype) == ARRAY_TYPE
29523 || POINTER_TYPE_P (htype))
29525 wtype = TREE_TYPE (wtype);
29526 htype = TREE_TYPE (htype);
29529 if (TYPE_MAIN_VARIANT (wtype) == TYPE_MAIN_VARIANT (htype))
29530 return va_list_type_node;
29531 wtype = sysv_va_list_type_node;
29532 gcc_assert (wtype != NULL_TREE);
29534 if (TREE_CODE (wtype) == ARRAY_TYPE)
29536 /* If va_list is an array type, the argument may have decayed
29537 to a pointer type, e.g. by being passed to another function.
29538 In that case, unwrap both types so that we can compare the
29539 underlying records. */
29540 if (TREE_CODE (htype) == ARRAY_TYPE
29541 || POINTER_TYPE_P (htype))
29543 wtype = TREE_TYPE (wtype);
29544 htype = TREE_TYPE (htype);
29547 if (TYPE_MAIN_VARIANT (wtype) == TYPE_MAIN_VARIANT (htype))
29548 return sysv_va_list_type_node;
29549 wtype = ms_va_list_type_node;
29550 gcc_assert (wtype != NULL_TREE);
29552 if (TREE_CODE (wtype) == ARRAY_TYPE)
29554 /* If va_list is an array type, the argument may have decayed
29555 to a pointer type, e.g. by being passed to another function.
29556 In that case, unwrap both types so that we can compare the
29557 underlying records. */
29558 if (TREE_CODE (htype) == ARRAY_TYPE
29559 || POINTER_TYPE_P (htype))
29561 wtype = TREE_TYPE (wtype);
29562 htype = TREE_TYPE (htype);
29565 if (TYPE_MAIN_VARIANT (wtype) == TYPE_MAIN_VARIANT (htype))
29566 return ms_va_list_type_node;
29569 return std_canonical_va_list_type (type);
29572 /* Iterate through the target-specific builtin types for va_list.
29573 IDX denotes the iterator, *PTREE is set to the result type of
29574 the va_list builtin, and *PNAME to its internal type.
29575 Returns zero if there is no element for this index, otherwise
29576 IDX should be increased upon the next call.
29577 Note, do not iterate a base builtin's name like __builtin_va_list.
29578 Used from c_common_nodes_and_builtins. */
29581 ix86_enum_va_list (int idx, const char **pname, tree *ptree)
29587 *ptree = ms_va_list_type_node;
29588 *pname = "__builtin_ms_va_list";
29591 *ptree = sysv_va_list_type_node;
29592 *pname = "__builtin_sysv_va_list";
29600 /* Initialize the GCC target structure. */
29601 #undef TARGET_RETURN_IN_MEMORY
29602 #define TARGET_RETURN_IN_MEMORY ix86_return_in_memory
29604 #undef TARGET_ATTRIBUTE_TABLE
29605 #define TARGET_ATTRIBUTE_TABLE ix86_attribute_table
29606 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
29607 # undef TARGET_MERGE_DECL_ATTRIBUTES
29608 # define TARGET_MERGE_DECL_ATTRIBUTES merge_dllimport_decl_attributes
29611 #undef TARGET_COMP_TYPE_ATTRIBUTES
29612 #define TARGET_COMP_TYPE_ATTRIBUTES ix86_comp_type_attributes
29614 #undef TARGET_INIT_BUILTINS
29615 #define TARGET_INIT_BUILTINS ix86_init_builtins
29616 #undef TARGET_EXPAND_BUILTIN
29617 #define TARGET_EXPAND_BUILTIN ix86_expand_builtin
29619 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION
29620 #define TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION \
29621 ix86_builtin_vectorized_function
29623 #undef TARGET_VECTORIZE_BUILTIN_CONVERSION
29624 #define TARGET_VECTORIZE_BUILTIN_CONVERSION ix86_vectorize_builtin_conversion
29626 #undef TARGET_BUILTIN_RECIPROCAL
29627 #define TARGET_BUILTIN_RECIPROCAL ix86_builtin_reciprocal
29629 #undef TARGET_ASM_FUNCTION_EPILOGUE
29630 #define TARGET_ASM_FUNCTION_EPILOGUE ix86_output_function_epilogue
29632 #undef TARGET_ENCODE_SECTION_INFO
29633 #ifndef SUBTARGET_ENCODE_SECTION_INFO
29634 #define TARGET_ENCODE_SECTION_INFO ix86_encode_section_info
29636 #define TARGET_ENCODE_SECTION_INFO SUBTARGET_ENCODE_SECTION_INFO
29639 #undef TARGET_ASM_OPEN_PAREN
29640 #define TARGET_ASM_OPEN_PAREN ""
29641 #undef TARGET_ASM_CLOSE_PAREN
29642 #define TARGET_ASM_CLOSE_PAREN ""
29644 #undef TARGET_ASM_ALIGNED_HI_OP
29645 #define TARGET_ASM_ALIGNED_HI_OP ASM_SHORT
29646 #undef TARGET_ASM_ALIGNED_SI_OP
29647 #define TARGET_ASM_ALIGNED_SI_OP ASM_LONG
29649 #undef TARGET_ASM_ALIGNED_DI_OP
29650 #define TARGET_ASM_ALIGNED_DI_OP ASM_QUAD
29653 #undef TARGET_ASM_UNALIGNED_HI_OP
29654 #define TARGET_ASM_UNALIGNED_HI_OP TARGET_ASM_ALIGNED_HI_OP
29655 #undef TARGET_ASM_UNALIGNED_SI_OP
29656 #define TARGET_ASM_UNALIGNED_SI_OP TARGET_ASM_ALIGNED_SI_OP
29657 #undef TARGET_ASM_UNALIGNED_DI_OP
29658 #define TARGET_ASM_UNALIGNED_DI_OP TARGET_ASM_ALIGNED_DI_OP
29660 #undef TARGET_SCHED_ADJUST_COST
29661 #define TARGET_SCHED_ADJUST_COST ix86_adjust_cost
29662 #undef TARGET_SCHED_ISSUE_RATE
29663 #define TARGET_SCHED_ISSUE_RATE ix86_issue_rate
29664 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD
29665 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD \
29666 ia32_multipass_dfa_lookahead
29668 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
29669 #define TARGET_FUNCTION_OK_FOR_SIBCALL ix86_function_ok_for_sibcall
29672 #undef TARGET_HAVE_TLS
29673 #define TARGET_HAVE_TLS true
29675 #undef TARGET_CANNOT_FORCE_CONST_MEM
29676 #define TARGET_CANNOT_FORCE_CONST_MEM ix86_cannot_force_const_mem
29677 #undef TARGET_USE_BLOCKS_FOR_CONSTANT_P
29678 #define TARGET_USE_BLOCKS_FOR_CONSTANT_P hook_bool_mode_const_rtx_true
29680 #undef TARGET_DELEGITIMIZE_ADDRESS
29681 #define TARGET_DELEGITIMIZE_ADDRESS ix86_delegitimize_address
29683 #undef TARGET_MS_BITFIELD_LAYOUT_P
29684 #define TARGET_MS_BITFIELD_LAYOUT_P ix86_ms_bitfield_layout_p
29687 #undef TARGET_BINDS_LOCAL_P
29688 #define TARGET_BINDS_LOCAL_P darwin_binds_local_p
29690 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
29691 #undef TARGET_BINDS_LOCAL_P
29692 #define TARGET_BINDS_LOCAL_P i386_pe_binds_local_p
29695 #undef TARGET_ASM_OUTPUT_MI_THUNK
29696 #define TARGET_ASM_OUTPUT_MI_THUNK x86_output_mi_thunk
29697 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
29698 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK x86_can_output_mi_thunk
29700 #undef TARGET_ASM_FILE_START
29701 #define TARGET_ASM_FILE_START x86_file_start
29703 #undef TARGET_DEFAULT_TARGET_FLAGS
29704 #define TARGET_DEFAULT_TARGET_FLAGS \
29706 | TARGET_SUBTARGET_DEFAULT \
29707 | TARGET_TLS_DIRECT_SEG_REFS_DEFAULT)
29709 #undef TARGET_HANDLE_OPTION
29710 #define TARGET_HANDLE_OPTION ix86_handle_option
29712 #undef TARGET_RTX_COSTS
29713 #define TARGET_RTX_COSTS ix86_rtx_costs
29714 #undef TARGET_ADDRESS_COST
29715 #define TARGET_ADDRESS_COST ix86_address_cost
29717 #undef TARGET_FIXED_CONDITION_CODE_REGS
29718 #define TARGET_FIXED_CONDITION_CODE_REGS ix86_fixed_condition_code_regs
29719 #undef TARGET_CC_MODES_COMPATIBLE
29720 #define TARGET_CC_MODES_COMPATIBLE ix86_cc_modes_compatible
29722 #undef TARGET_MACHINE_DEPENDENT_REORG
29723 #define TARGET_MACHINE_DEPENDENT_REORG ix86_reorg
29725 #undef TARGET_BUILTIN_SETJMP_FRAME_VALUE
29726 #define TARGET_BUILTIN_SETJMP_FRAME_VALUE ix86_builtin_setjmp_frame_value
29728 #undef TARGET_BUILD_BUILTIN_VA_LIST
29729 #define TARGET_BUILD_BUILTIN_VA_LIST ix86_build_builtin_va_list
29731 #undef TARGET_FN_ABI_VA_LIST
29732 #define TARGET_FN_ABI_VA_LIST ix86_fn_abi_va_list
29734 #undef TARGET_CANONICAL_VA_LIST_TYPE
29735 #define TARGET_CANONICAL_VA_LIST_TYPE ix86_canonical_va_list_type
29737 #undef TARGET_EXPAND_BUILTIN_VA_START
29738 #define TARGET_EXPAND_BUILTIN_VA_START ix86_va_start
29740 #undef TARGET_MD_ASM_CLOBBERS
29741 #define TARGET_MD_ASM_CLOBBERS ix86_md_asm_clobbers
29743 #undef TARGET_PROMOTE_PROTOTYPES
29744 #define TARGET_PROMOTE_PROTOTYPES hook_bool_const_tree_true
29745 #undef TARGET_STRUCT_VALUE_RTX
29746 #define TARGET_STRUCT_VALUE_RTX ix86_struct_value_rtx
29747 #undef TARGET_SETUP_INCOMING_VARARGS
29748 #define TARGET_SETUP_INCOMING_VARARGS ix86_setup_incoming_varargs
29749 #undef TARGET_MUST_PASS_IN_STACK
29750 #define TARGET_MUST_PASS_IN_STACK ix86_must_pass_in_stack
29751 #undef TARGET_PASS_BY_REFERENCE
29752 #define TARGET_PASS_BY_REFERENCE ix86_pass_by_reference
29753 #undef TARGET_INTERNAL_ARG_POINTER
29754 #define TARGET_INTERNAL_ARG_POINTER ix86_internal_arg_pointer
29755 #undef TARGET_UPDATE_STACK_BOUNDARY
29756 #define TARGET_UPDATE_STACK_BOUNDARY ix86_update_stack_boundary
29757 #undef TARGET_GET_DRAP_RTX
29758 #define TARGET_GET_DRAP_RTX ix86_get_drap_rtx
29759 #undef TARGET_DWARF_HANDLE_FRAME_UNSPEC
29760 #define TARGET_DWARF_HANDLE_FRAME_UNSPEC ix86_dwarf_handle_frame_unspec
29761 #undef TARGET_STRICT_ARGUMENT_NAMING
29762 #define TARGET_STRICT_ARGUMENT_NAMING hook_bool_CUMULATIVE_ARGS_true
29764 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
29765 #define TARGET_GIMPLIFY_VA_ARG_EXPR ix86_gimplify_va_arg
29767 #undef TARGET_SCALAR_MODE_SUPPORTED_P
29768 #define TARGET_SCALAR_MODE_SUPPORTED_P ix86_scalar_mode_supported_p
29770 #undef TARGET_VECTOR_MODE_SUPPORTED_P
29771 #define TARGET_VECTOR_MODE_SUPPORTED_P ix86_vector_mode_supported_p
29773 #undef TARGET_C_MODE_FOR_SUFFIX
29774 #define TARGET_C_MODE_FOR_SUFFIX ix86_c_mode_for_suffix
29777 #undef TARGET_ASM_OUTPUT_DWARF_DTPREL
29778 #define TARGET_ASM_OUTPUT_DWARF_DTPREL i386_output_dwarf_dtprel
29781 #ifdef SUBTARGET_INSERT_ATTRIBUTES
29782 #undef TARGET_INSERT_ATTRIBUTES
29783 #define TARGET_INSERT_ATTRIBUTES SUBTARGET_INSERT_ATTRIBUTES
29786 #undef TARGET_MANGLE_TYPE
29787 #define TARGET_MANGLE_TYPE ix86_mangle_type
29789 #undef TARGET_STACK_PROTECT_FAIL
29790 #define TARGET_STACK_PROTECT_FAIL ix86_stack_protect_fail
29792 #undef TARGET_FUNCTION_VALUE
29793 #define TARGET_FUNCTION_VALUE ix86_function_value
29795 #undef TARGET_SECONDARY_RELOAD
29796 #define TARGET_SECONDARY_RELOAD ix86_secondary_reload
29798 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST
29799 #define TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST x86_builtin_vectorization_cost
29801 #undef TARGET_SET_CURRENT_FUNCTION
29802 #define TARGET_SET_CURRENT_FUNCTION ix86_set_current_function
29804 #undef TARGET_OPTION_VALID_ATTRIBUTE_P
29805 #define TARGET_OPTION_VALID_ATTRIBUTE_P ix86_valid_target_attribute_p
29807 #undef TARGET_OPTION_SAVE
29808 #define TARGET_OPTION_SAVE ix86_function_specific_save
29810 #undef TARGET_OPTION_RESTORE
29811 #define TARGET_OPTION_RESTORE ix86_function_specific_restore
29813 #undef TARGET_OPTION_PRINT
29814 #define TARGET_OPTION_PRINT ix86_function_specific_print
29816 #undef TARGET_OPTION_CAN_INLINE_P
29817 #define TARGET_OPTION_CAN_INLINE_P ix86_can_inline_p
29819 #undef TARGET_EXPAND_TO_RTL_HOOK
29820 #define TARGET_EXPAND_TO_RTL_HOOK ix86_maybe_switch_abi
29822 struct gcc_target targetm = TARGET_INITIALIZER;
29824 #include "gt-i386.h"