1 /* Subroutines used for code generation on IA-32.
2 Copyright (C) 1988, 1992, 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001,
3 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009
4 Free Software Foundation, Inc.
6 This file is part of GCC.
8 GCC is free software; you can redistribute it and/or modify
9 it under the terms of the GNU General Public License as published by
10 the Free Software Foundation; either version 3, or (at your option)
13 GCC is distributed in the hope that it will be useful,
14 but WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 GNU General Public License for more details.
18 You should have received a copy of the GNU General Public License
19 along with GCC; see the file COPYING3. If not see
20 <http://www.gnu.org/licenses/>. */
24 #include "coretypes.h"
30 #include "hard-reg-set.h"
32 #include "insn-config.h"
33 #include "conditions.h"
35 #include "insn-codes.h"
36 #include "insn-attr.h"
45 #include "basic-block.h"
48 #include "target-def.h"
49 #include "langhooks.h"
54 #include "tm-constrs.h"
58 static int x86_builtin_vectorization_cost (bool);
59 static rtx legitimize_dllimport_symbol (rtx, bool);
61 #ifndef CHECK_STACK_LIMIT
62 #define CHECK_STACK_LIMIT (-1)
65 /* Return index of given mode in mult and division cost tables. */
66 #define MODE_INDEX(mode) \
67 ((mode) == QImode ? 0 \
68 : (mode) == HImode ? 1 \
69 : (mode) == SImode ? 2 \
70 : (mode) == DImode ? 3 \
73 /* Processor costs (relative to an add) */
74 /* We assume COSTS_N_INSNS is defined as (N)*4 and an addition is 2 bytes. */
75 #define COSTS_N_BYTES(N) ((N) * 2)
77 #define DUMMY_STRINGOP_ALGS {libcall, {{-1, libcall}}}
80 struct processor_costs ix86_size_cost = {/* costs for tuning for size */
81 COSTS_N_BYTES (2), /* cost of an add instruction */
82 COSTS_N_BYTES (3), /* cost of a lea instruction */
83 COSTS_N_BYTES (2), /* variable shift costs */
84 COSTS_N_BYTES (3), /* constant shift costs */
85 {COSTS_N_BYTES (3), /* cost of starting multiply for QI */
86 COSTS_N_BYTES (3), /* HI */
87 COSTS_N_BYTES (3), /* SI */
88 COSTS_N_BYTES (3), /* DI */
89 COSTS_N_BYTES (5)}, /* other */
90 0, /* cost of multiply per each bit set */
91 {COSTS_N_BYTES (3), /* cost of a divide/mod for QI */
92 COSTS_N_BYTES (3), /* HI */
93 COSTS_N_BYTES (3), /* SI */
94 COSTS_N_BYTES (3), /* DI */
95 COSTS_N_BYTES (5)}, /* other */
96 COSTS_N_BYTES (3), /* cost of movsx */
97 COSTS_N_BYTES (3), /* cost of movzx */
100 2, /* cost for loading QImode using movzbl */
101 {2, 2, 2}, /* cost of loading integer registers
102 in QImode, HImode and SImode.
103 Relative to reg-reg move (2). */
104 {2, 2, 2}, /* cost of storing integer registers */
105 2, /* cost of reg,reg fld/fst */
106 {2, 2, 2}, /* cost of loading fp registers
107 in SFmode, DFmode and XFmode */
108 {2, 2, 2}, /* cost of storing fp registers
109 in SFmode, DFmode and XFmode */
110 3, /* cost of moving MMX register */
111 {3, 3}, /* cost of loading MMX registers
112 in SImode and DImode */
113 {3, 3}, /* cost of storing MMX registers
114 in SImode and DImode */
115 3, /* cost of moving SSE register */
116 {3, 3, 3}, /* cost of loading SSE registers
117 in SImode, DImode and TImode */
118 {3, 3, 3}, /* cost of storing SSE registers
119 in SImode, DImode and TImode */
120 3, /* MMX or SSE register to integer */
121 0, /* size of l1 cache */
122 0, /* size of l2 cache */
123 0, /* size of prefetch block */
124 0, /* number of parallel prefetches */
126 COSTS_N_BYTES (2), /* cost of FADD and FSUB insns. */
127 COSTS_N_BYTES (2), /* cost of FMUL instruction. */
128 COSTS_N_BYTES (2), /* cost of FDIV instruction. */
129 COSTS_N_BYTES (2), /* cost of FABS instruction. */
130 COSTS_N_BYTES (2), /* cost of FCHS instruction. */
131 COSTS_N_BYTES (2), /* cost of FSQRT instruction. */
132 {{rep_prefix_1_byte, {{-1, rep_prefix_1_byte}}},
133 {rep_prefix_1_byte, {{-1, rep_prefix_1_byte}}}},
134 {{rep_prefix_1_byte, {{-1, rep_prefix_1_byte}}},
135 {rep_prefix_1_byte, {{-1, rep_prefix_1_byte}}}},
136 1, /* scalar_stmt_cost. */
137 1, /* scalar load_cost. */
138 1, /* scalar_store_cost. */
139 1, /* vec_stmt_cost. */
140 1, /* vec_to_scalar_cost. */
141 1, /* scalar_to_vec_cost. */
142 1, /* vec_align_load_cost. */
143 1, /* vec_unalign_load_cost. */
144 1, /* vec_store_cost. */
145 1, /* cond_taken_branch_cost. */
146 1, /* cond_not_taken_branch_cost. */
149 /* Processor costs (relative to an add) */
151 struct processor_costs i386_cost = { /* 386 specific costs */
152 COSTS_N_INSNS (1), /* cost of an add instruction */
153 COSTS_N_INSNS (1), /* cost of a lea instruction */
154 COSTS_N_INSNS (3), /* variable shift costs */
155 COSTS_N_INSNS (2), /* constant shift costs */
156 {COSTS_N_INSNS (6), /* cost of starting multiply for QI */
157 COSTS_N_INSNS (6), /* HI */
158 COSTS_N_INSNS (6), /* SI */
159 COSTS_N_INSNS (6), /* DI */
160 COSTS_N_INSNS (6)}, /* other */
161 COSTS_N_INSNS (1), /* cost of multiply per each bit set */
162 {COSTS_N_INSNS (23), /* cost of a divide/mod for QI */
163 COSTS_N_INSNS (23), /* HI */
164 COSTS_N_INSNS (23), /* SI */
165 COSTS_N_INSNS (23), /* DI */
166 COSTS_N_INSNS (23)}, /* other */
167 COSTS_N_INSNS (3), /* cost of movsx */
168 COSTS_N_INSNS (2), /* cost of movzx */
169 15, /* "large" insn */
171 4, /* cost for loading QImode using movzbl */
172 {2, 4, 2}, /* cost of loading integer registers
173 in QImode, HImode and SImode.
174 Relative to reg-reg move (2). */
175 {2, 4, 2}, /* cost of storing integer registers */
176 2, /* cost of reg,reg fld/fst */
177 {8, 8, 8}, /* cost of loading fp registers
178 in SFmode, DFmode and XFmode */
179 {8, 8, 8}, /* cost of storing fp registers
180 in SFmode, DFmode and XFmode */
181 2, /* cost of moving MMX register */
182 {4, 8}, /* cost of loading MMX registers
183 in SImode and DImode */
184 {4, 8}, /* cost of storing MMX registers
185 in SImode and DImode */
186 2, /* cost of moving SSE register */
187 {4, 8, 16}, /* cost of loading SSE registers
188 in SImode, DImode and TImode */
189 {4, 8, 16}, /* cost of storing SSE registers
190 in SImode, DImode and TImode */
191 3, /* MMX or SSE register to integer */
192 0, /* size of l1 cache */
193 0, /* size of l2 cache */
194 0, /* size of prefetch block */
195 0, /* number of parallel prefetches */
197 COSTS_N_INSNS (23), /* cost of FADD and FSUB insns. */
198 COSTS_N_INSNS (27), /* cost of FMUL instruction. */
199 COSTS_N_INSNS (88), /* cost of FDIV instruction. */
200 COSTS_N_INSNS (22), /* cost of FABS instruction. */
201 COSTS_N_INSNS (24), /* cost of FCHS instruction. */
202 COSTS_N_INSNS (122), /* cost of FSQRT instruction. */
203 {{rep_prefix_1_byte, {{-1, rep_prefix_1_byte}}},
204 DUMMY_STRINGOP_ALGS},
205 {{rep_prefix_1_byte, {{-1, rep_prefix_1_byte}}},
206 DUMMY_STRINGOP_ALGS},
207 1, /* scalar_stmt_cost. */
208 1, /* scalar load_cost. */
209 1, /* scalar_store_cost. */
210 1, /* vec_stmt_cost. */
211 1, /* vec_to_scalar_cost. */
212 1, /* scalar_to_vec_cost. */
213 1, /* vec_align_load_cost. */
214 2, /* vec_unalign_load_cost. */
215 1, /* vec_store_cost. */
216 3, /* cond_taken_branch_cost. */
217 1, /* cond_not_taken_branch_cost. */
221 struct processor_costs i486_cost = { /* 486 specific costs */
222 COSTS_N_INSNS (1), /* cost of an add instruction */
223 COSTS_N_INSNS (1), /* cost of a lea instruction */
224 COSTS_N_INSNS (3), /* variable shift costs */
225 COSTS_N_INSNS (2), /* constant shift costs */
226 {COSTS_N_INSNS (12), /* cost of starting multiply for QI */
227 COSTS_N_INSNS (12), /* HI */
228 COSTS_N_INSNS (12), /* SI */
229 COSTS_N_INSNS (12), /* DI */
230 COSTS_N_INSNS (12)}, /* other */
231 1, /* cost of multiply per each bit set */
232 {COSTS_N_INSNS (40), /* cost of a divide/mod for QI */
233 COSTS_N_INSNS (40), /* HI */
234 COSTS_N_INSNS (40), /* SI */
235 COSTS_N_INSNS (40), /* DI */
236 COSTS_N_INSNS (40)}, /* other */
237 COSTS_N_INSNS (3), /* cost of movsx */
238 COSTS_N_INSNS (2), /* cost of movzx */
239 15, /* "large" insn */
241 4, /* cost for loading QImode using movzbl */
242 {2, 4, 2}, /* cost of loading integer registers
243 in QImode, HImode and SImode.
244 Relative to reg-reg move (2). */
245 {2, 4, 2}, /* cost of storing integer registers */
246 2, /* cost of reg,reg fld/fst */
247 {8, 8, 8}, /* cost of loading fp registers
248 in SFmode, DFmode and XFmode */
249 {8, 8, 8}, /* cost of storing fp registers
250 in SFmode, DFmode and XFmode */
251 2, /* cost of moving MMX register */
252 {4, 8}, /* cost of loading MMX registers
253 in SImode and DImode */
254 {4, 8}, /* cost of storing MMX registers
255 in SImode and DImode */
256 2, /* cost of moving SSE register */
257 {4, 8, 16}, /* cost of loading SSE registers
258 in SImode, DImode and TImode */
259 {4, 8, 16}, /* cost of storing SSE registers
260 in SImode, DImode and TImode */
261 3, /* MMX or SSE register to integer */
262 4, /* size of l1 cache. 486 has 8kB cache
263 shared for code and data, so 4kB is
264 not really precise. */
265 4, /* size of l2 cache */
266 0, /* size of prefetch block */
267 0, /* number of parallel prefetches */
269 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
270 COSTS_N_INSNS (16), /* cost of FMUL instruction. */
271 COSTS_N_INSNS (73), /* cost of FDIV instruction. */
272 COSTS_N_INSNS (3), /* cost of FABS instruction. */
273 COSTS_N_INSNS (3), /* cost of FCHS instruction. */
274 COSTS_N_INSNS (83), /* cost of FSQRT instruction. */
275 {{rep_prefix_4_byte, {{-1, rep_prefix_4_byte}}},
276 DUMMY_STRINGOP_ALGS},
277 {{rep_prefix_4_byte, {{-1, rep_prefix_4_byte}}},
278 DUMMY_STRINGOP_ALGS},
279 1, /* scalar_stmt_cost. */
280 1, /* scalar load_cost. */
281 1, /* scalar_store_cost. */
282 1, /* vec_stmt_cost. */
283 1, /* vec_to_scalar_cost. */
284 1, /* scalar_to_vec_cost. */
285 1, /* vec_align_load_cost. */
286 2, /* vec_unalign_load_cost. */
287 1, /* vec_store_cost. */
288 3, /* cond_taken_branch_cost. */
289 1, /* cond_not_taken_branch_cost. */
293 struct processor_costs pentium_cost = {
294 COSTS_N_INSNS (1), /* cost of an add instruction */
295 COSTS_N_INSNS (1), /* cost of a lea instruction */
296 COSTS_N_INSNS (4), /* variable shift costs */
297 COSTS_N_INSNS (1), /* constant shift costs */
298 {COSTS_N_INSNS (11), /* cost of starting multiply for QI */
299 COSTS_N_INSNS (11), /* HI */
300 COSTS_N_INSNS (11), /* SI */
301 COSTS_N_INSNS (11), /* DI */
302 COSTS_N_INSNS (11)}, /* other */
303 0, /* cost of multiply per each bit set */
304 {COSTS_N_INSNS (25), /* cost of a divide/mod for QI */
305 COSTS_N_INSNS (25), /* HI */
306 COSTS_N_INSNS (25), /* SI */
307 COSTS_N_INSNS (25), /* DI */
308 COSTS_N_INSNS (25)}, /* other */
309 COSTS_N_INSNS (3), /* cost of movsx */
310 COSTS_N_INSNS (2), /* cost of movzx */
311 8, /* "large" insn */
313 6, /* cost for loading QImode using movzbl */
314 {2, 4, 2}, /* cost of loading integer registers
315 in QImode, HImode and SImode.
316 Relative to reg-reg move (2). */
317 {2, 4, 2}, /* cost of storing integer registers */
318 2, /* cost of reg,reg fld/fst */
319 {2, 2, 6}, /* cost of loading fp registers
320 in SFmode, DFmode and XFmode */
321 {4, 4, 6}, /* cost of storing fp registers
322 in SFmode, DFmode and XFmode */
323 8, /* cost of moving MMX register */
324 {8, 8}, /* cost of loading MMX registers
325 in SImode and DImode */
326 {8, 8}, /* cost of storing MMX registers
327 in SImode and DImode */
328 2, /* cost of moving SSE register */
329 {4, 8, 16}, /* cost of loading SSE registers
330 in SImode, DImode and TImode */
331 {4, 8, 16}, /* cost of storing SSE registers
332 in SImode, DImode and TImode */
333 3, /* MMX or SSE register to integer */
334 8, /* size of l1 cache. */
335 8, /* size of l2 cache */
336 0, /* size of prefetch block */
337 0, /* number of parallel prefetches */
339 COSTS_N_INSNS (3), /* cost of FADD and FSUB insns. */
340 COSTS_N_INSNS (3), /* cost of FMUL instruction. */
341 COSTS_N_INSNS (39), /* cost of FDIV instruction. */
342 COSTS_N_INSNS (1), /* cost of FABS instruction. */
343 COSTS_N_INSNS (1), /* cost of FCHS instruction. */
344 COSTS_N_INSNS (70), /* cost of FSQRT instruction. */
345 {{libcall, {{256, rep_prefix_4_byte}, {-1, libcall}}},
346 DUMMY_STRINGOP_ALGS},
347 {{libcall, {{-1, rep_prefix_4_byte}}},
348 DUMMY_STRINGOP_ALGS},
349 1, /* scalar_stmt_cost. */
350 1, /* scalar load_cost. */
351 1, /* scalar_store_cost. */
352 1, /* vec_stmt_cost. */
353 1, /* vec_to_scalar_cost. */
354 1, /* scalar_to_vec_cost. */
355 1, /* vec_align_load_cost. */
356 2, /* vec_unalign_load_cost. */
357 1, /* vec_store_cost. */
358 3, /* cond_taken_branch_cost. */
359 1, /* cond_not_taken_branch_cost. */
363 struct processor_costs pentiumpro_cost = {
364 COSTS_N_INSNS (1), /* cost of an add instruction */
365 COSTS_N_INSNS (1), /* cost of a lea instruction */
366 COSTS_N_INSNS (1), /* variable shift costs */
367 COSTS_N_INSNS (1), /* constant shift costs */
368 {COSTS_N_INSNS (4), /* cost of starting multiply for QI */
369 COSTS_N_INSNS (4), /* HI */
370 COSTS_N_INSNS (4), /* SI */
371 COSTS_N_INSNS (4), /* DI */
372 COSTS_N_INSNS (4)}, /* other */
373 0, /* cost of multiply per each bit set */
374 {COSTS_N_INSNS (17), /* cost of a divide/mod for QI */
375 COSTS_N_INSNS (17), /* HI */
376 COSTS_N_INSNS (17), /* SI */
377 COSTS_N_INSNS (17), /* DI */
378 COSTS_N_INSNS (17)}, /* other */
379 COSTS_N_INSNS (1), /* cost of movsx */
380 COSTS_N_INSNS (1), /* cost of movzx */
381 8, /* "large" insn */
383 2, /* cost for loading QImode using movzbl */
384 {4, 4, 4}, /* cost of loading integer registers
385 in QImode, HImode and SImode.
386 Relative to reg-reg move (2). */
387 {2, 2, 2}, /* cost of storing integer registers */
388 2, /* cost of reg,reg fld/fst */
389 {2, 2, 6}, /* cost of loading fp registers
390 in SFmode, DFmode and XFmode */
391 {4, 4, 6}, /* cost of storing fp registers
392 in SFmode, DFmode and XFmode */
393 2, /* cost of moving MMX register */
394 {2, 2}, /* cost of loading MMX registers
395 in SImode and DImode */
396 {2, 2}, /* cost of storing MMX registers
397 in SImode and DImode */
398 2, /* cost of moving SSE register */
399 {2, 2, 8}, /* cost of loading SSE registers
400 in SImode, DImode and TImode */
401 {2, 2, 8}, /* cost of storing SSE registers
402 in SImode, DImode and TImode */
403 3, /* MMX or SSE register to integer */
404 8, /* size of l1 cache. */
405 256, /* size of l2 cache */
406 32, /* size of prefetch block */
407 6, /* number of parallel prefetches */
409 COSTS_N_INSNS (3), /* cost of FADD and FSUB insns. */
410 COSTS_N_INSNS (5), /* cost of FMUL instruction. */
411 COSTS_N_INSNS (56), /* cost of FDIV instruction. */
412 COSTS_N_INSNS (2), /* cost of FABS instruction. */
413 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
414 COSTS_N_INSNS (56), /* cost of FSQRT instruction. */
415 /* PentiumPro has optimized rep instructions for blocks aligned by 8 bytes (we ensure
416 the alignment). For small blocks inline loop is still a noticeable win, for bigger
417 blocks either rep movsl or rep movsb is way to go. Rep movsb has apparently
418 more expensive startup time in CPU, but after 4K the difference is down in the noise.
420 {{rep_prefix_4_byte, {{128, loop}, {1024, unrolled_loop},
421 {8192, rep_prefix_4_byte}, {-1, rep_prefix_1_byte}}},
422 DUMMY_STRINGOP_ALGS},
423 {{rep_prefix_4_byte, {{1024, unrolled_loop},
424 {8192, rep_prefix_4_byte}, {-1, libcall}}},
425 DUMMY_STRINGOP_ALGS},
426 1, /* scalar_stmt_cost. */
427 1, /* scalar load_cost. */
428 1, /* scalar_store_cost. */
429 1, /* vec_stmt_cost. */
430 1, /* vec_to_scalar_cost. */
431 1, /* scalar_to_vec_cost. */
432 1, /* vec_align_load_cost. */
433 2, /* vec_unalign_load_cost. */
434 1, /* vec_store_cost. */
435 3, /* cond_taken_branch_cost. */
436 1, /* cond_not_taken_branch_cost. */
440 struct processor_costs geode_cost = {
441 COSTS_N_INSNS (1), /* cost of an add instruction */
442 COSTS_N_INSNS (1), /* cost of a lea instruction */
443 COSTS_N_INSNS (2), /* variable shift costs */
444 COSTS_N_INSNS (1), /* constant shift costs */
445 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
446 COSTS_N_INSNS (4), /* HI */
447 COSTS_N_INSNS (7), /* SI */
448 COSTS_N_INSNS (7), /* DI */
449 COSTS_N_INSNS (7)}, /* other */
450 0, /* cost of multiply per each bit set */
451 {COSTS_N_INSNS (15), /* cost of a divide/mod for QI */
452 COSTS_N_INSNS (23), /* HI */
453 COSTS_N_INSNS (39), /* SI */
454 COSTS_N_INSNS (39), /* DI */
455 COSTS_N_INSNS (39)}, /* other */
456 COSTS_N_INSNS (1), /* cost of movsx */
457 COSTS_N_INSNS (1), /* cost of movzx */
458 8, /* "large" insn */
460 1, /* cost for loading QImode using movzbl */
461 {1, 1, 1}, /* cost of loading integer registers
462 in QImode, HImode and SImode.
463 Relative to reg-reg move (2). */
464 {1, 1, 1}, /* cost of storing integer registers */
465 1, /* cost of reg,reg fld/fst */
466 {1, 1, 1}, /* cost of loading fp registers
467 in SFmode, DFmode and XFmode */
468 {4, 6, 6}, /* cost of storing fp registers
469 in SFmode, DFmode and XFmode */
471 1, /* cost of moving MMX register */
472 {1, 1}, /* cost of loading MMX registers
473 in SImode and DImode */
474 {1, 1}, /* cost of storing MMX registers
475 in SImode and DImode */
476 1, /* cost of moving SSE register */
477 {1, 1, 1}, /* cost of loading SSE registers
478 in SImode, DImode and TImode */
479 {1, 1, 1}, /* cost of storing SSE registers
480 in SImode, DImode and TImode */
481 1, /* MMX or SSE register to integer */
482 64, /* size of l1 cache. */
483 128, /* size of l2 cache. */
484 32, /* size of prefetch block */
485 1, /* number of parallel prefetches */
487 COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */
488 COSTS_N_INSNS (11), /* cost of FMUL instruction. */
489 COSTS_N_INSNS (47), /* cost of FDIV instruction. */
490 COSTS_N_INSNS (1), /* cost of FABS instruction. */
491 COSTS_N_INSNS (1), /* cost of FCHS instruction. */
492 COSTS_N_INSNS (54), /* cost of FSQRT instruction. */
493 {{libcall, {{256, rep_prefix_4_byte}, {-1, libcall}}},
494 DUMMY_STRINGOP_ALGS},
495 {{libcall, {{256, rep_prefix_4_byte}, {-1, libcall}}},
496 DUMMY_STRINGOP_ALGS},
497 1, /* scalar_stmt_cost. */
498 1, /* scalar load_cost. */
499 1, /* scalar_store_cost. */
500 1, /* vec_stmt_cost. */
501 1, /* vec_to_scalar_cost. */
502 1, /* scalar_to_vec_cost. */
503 1, /* vec_align_load_cost. */
504 2, /* vec_unalign_load_cost. */
505 1, /* vec_store_cost. */
506 3, /* cond_taken_branch_cost. */
507 1, /* cond_not_taken_branch_cost. */
511 struct processor_costs k6_cost = {
512 COSTS_N_INSNS (1), /* cost of an add instruction */
513 COSTS_N_INSNS (2), /* cost of a lea instruction */
514 COSTS_N_INSNS (1), /* variable shift costs */
515 COSTS_N_INSNS (1), /* constant shift costs */
516 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
517 COSTS_N_INSNS (3), /* HI */
518 COSTS_N_INSNS (3), /* SI */
519 COSTS_N_INSNS (3), /* DI */
520 COSTS_N_INSNS (3)}, /* other */
521 0, /* cost of multiply per each bit set */
522 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
523 COSTS_N_INSNS (18), /* HI */
524 COSTS_N_INSNS (18), /* SI */
525 COSTS_N_INSNS (18), /* DI */
526 COSTS_N_INSNS (18)}, /* other */
527 COSTS_N_INSNS (2), /* cost of movsx */
528 COSTS_N_INSNS (2), /* cost of movzx */
529 8, /* "large" insn */
531 3, /* cost for loading QImode using movzbl */
532 {4, 5, 4}, /* cost of loading integer registers
533 in QImode, HImode and SImode.
534 Relative to reg-reg move (2). */
535 {2, 3, 2}, /* cost of storing integer registers */
536 4, /* cost of reg,reg fld/fst */
537 {6, 6, 6}, /* cost of loading fp registers
538 in SFmode, DFmode and XFmode */
539 {4, 4, 4}, /* cost of storing fp registers
540 in SFmode, DFmode and XFmode */
541 2, /* cost of moving MMX register */
542 {2, 2}, /* cost of loading MMX registers
543 in SImode and DImode */
544 {2, 2}, /* cost of storing MMX registers
545 in SImode and DImode */
546 2, /* cost of moving SSE register */
547 {2, 2, 8}, /* cost of loading SSE registers
548 in SImode, DImode and TImode */
549 {2, 2, 8}, /* cost of storing SSE registers
550 in SImode, DImode and TImode */
551 6, /* MMX or SSE register to integer */
552 32, /* size of l1 cache. */
553 32, /* size of l2 cache. Some models
554 have integrated l2 cache, but
555 optimizing for k6 is not important
556 enough to worry about that. */
557 32, /* size of prefetch block */
558 1, /* number of parallel prefetches */
560 COSTS_N_INSNS (2), /* cost of FADD and FSUB insns. */
561 COSTS_N_INSNS (2), /* cost of FMUL instruction. */
562 COSTS_N_INSNS (56), /* cost of FDIV instruction. */
563 COSTS_N_INSNS (2), /* cost of FABS instruction. */
564 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
565 COSTS_N_INSNS (56), /* cost of FSQRT instruction. */
566 {{libcall, {{256, rep_prefix_4_byte}, {-1, libcall}}},
567 DUMMY_STRINGOP_ALGS},
568 {{libcall, {{256, rep_prefix_4_byte}, {-1, libcall}}},
569 DUMMY_STRINGOP_ALGS},
570 1, /* scalar_stmt_cost. */
571 1, /* scalar load_cost. */
572 1, /* scalar_store_cost. */
573 1, /* vec_stmt_cost. */
574 1, /* vec_to_scalar_cost. */
575 1, /* scalar_to_vec_cost. */
576 1, /* vec_align_load_cost. */
577 2, /* vec_unalign_load_cost. */
578 1, /* vec_store_cost. */
579 3, /* cond_taken_branch_cost. */
580 1, /* cond_not_taken_branch_cost. */
584 struct processor_costs athlon_cost = {
585 COSTS_N_INSNS (1), /* cost of an add instruction */
586 COSTS_N_INSNS (2), /* cost of a lea instruction */
587 COSTS_N_INSNS (1), /* variable shift costs */
588 COSTS_N_INSNS (1), /* constant shift costs */
589 {COSTS_N_INSNS (5), /* cost of starting multiply for QI */
590 COSTS_N_INSNS (5), /* HI */
591 COSTS_N_INSNS (5), /* SI */
592 COSTS_N_INSNS (5), /* DI */
593 COSTS_N_INSNS (5)}, /* other */
594 0, /* cost of multiply per each bit set */
595 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
596 COSTS_N_INSNS (26), /* HI */
597 COSTS_N_INSNS (42), /* SI */
598 COSTS_N_INSNS (74), /* DI */
599 COSTS_N_INSNS (74)}, /* other */
600 COSTS_N_INSNS (1), /* cost of movsx */
601 COSTS_N_INSNS (1), /* cost of movzx */
602 8, /* "large" insn */
604 4, /* cost for loading QImode using movzbl */
605 {3, 4, 3}, /* cost of loading integer registers
606 in QImode, HImode and SImode.
607 Relative to reg-reg move (2). */
608 {3, 4, 3}, /* cost of storing integer registers */
609 4, /* cost of reg,reg fld/fst */
610 {4, 4, 12}, /* cost of loading fp registers
611 in SFmode, DFmode and XFmode */
612 {6, 6, 8}, /* cost of storing fp registers
613 in SFmode, DFmode and XFmode */
614 2, /* cost of moving MMX register */
615 {4, 4}, /* cost of loading MMX registers
616 in SImode and DImode */
617 {4, 4}, /* cost of storing MMX registers
618 in SImode and DImode */
619 2, /* cost of moving SSE register */
620 {4, 4, 6}, /* cost of loading SSE registers
621 in SImode, DImode and TImode */
622 {4, 4, 5}, /* cost of storing SSE registers
623 in SImode, DImode and TImode */
624 5, /* MMX or SSE register to integer */
625 64, /* size of l1 cache. */
626 256, /* size of l2 cache. */
627 64, /* size of prefetch block */
628 6, /* number of parallel prefetches */
630 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
631 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
632 COSTS_N_INSNS (24), /* cost of FDIV instruction. */
633 COSTS_N_INSNS (2), /* cost of FABS instruction. */
634 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
635 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
636 /* For some reason, Athlon deals better with REP prefix (relative to loops)
637 compared to K8. Alignment becomes important after 8 bytes for memcpy and
638 128 bytes for memset. */
639 {{libcall, {{2048, rep_prefix_4_byte}, {-1, libcall}}},
640 DUMMY_STRINGOP_ALGS},
641 {{libcall, {{2048, rep_prefix_4_byte}, {-1, libcall}}},
642 DUMMY_STRINGOP_ALGS},
643 1, /* scalar_stmt_cost. */
644 1, /* scalar load_cost. */
645 1, /* scalar_store_cost. */
646 1, /* vec_stmt_cost. */
647 1, /* vec_to_scalar_cost. */
648 1, /* scalar_to_vec_cost. */
649 1, /* vec_align_load_cost. */
650 2, /* vec_unalign_load_cost. */
651 1, /* vec_store_cost. */
652 3, /* cond_taken_branch_cost. */
653 1, /* cond_not_taken_branch_cost. */
657 struct processor_costs k8_cost = {
658 COSTS_N_INSNS (1), /* cost of an add instruction */
659 COSTS_N_INSNS (2), /* cost of a lea instruction */
660 COSTS_N_INSNS (1), /* variable shift costs */
661 COSTS_N_INSNS (1), /* constant shift costs */
662 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
663 COSTS_N_INSNS (4), /* HI */
664 COSTS_N_INSNS (3), /* SI */
665 COSTS_N_INSNS (4), /* DI */
666 COSTS_N_INSNS (5)}, /* other */
667 0, /* cost of multiply per each bit set */
668 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
669 COSTS_N_INSNS (26), /* HI */
670 COSTS_N_INSNS (42), /* SI */
671 COSTS_N_INSNS (74), /* DI */
672 COSTS_N_INSNS (74)}, /* other */
673 COSTS_N_INSNS (1), /* cost of movsx */
674 COSTS_N_INSNS (1), /* cost of movzx */
675 8, /* "large" insn */
677 4, /* cost for loading QImode using movzbl */
678 {3, 4, 3}, /* cost of loading integer registers
679 in QImode, HImode and SImode.
680 Relative to reg-reg move (2). */
681 {3, 4, 3}, /* cost of storing integer registers */
682 4, /* cost of reg,reg fld/fst */
683 {4, 4, 12}, /* cost of loading fp registers
684 in SFmode, DFmode and XFmode */
685 {6, 6, 8}, /* cost of storing fp registers
686 in SFmode, DFmode and XFmode */
687 2, /* cost of moving MMX register */
688 {3, 3}, /* cost of loading MMX registers
689 in SImode and DImode */
690 {4, 4}, /* cost of storing MMX registers
691 in SImode and DImode */
692 2, /* cost of moving SSE register */
693 {4, 3, 6}, /* cost of loading SSE registers
694 in SImode, DImode and TImode */
695 {4, 4, 5}, /* cost of storing SSE registers
696 in SImode, DImode and TImode */
697 5, /* MMX or SSE register to integer */
698 64, /* size of l1 cache. */
699 512, /* size of l2 cache. */
700 64, /* size of prefetch block */
701 /* New AMD processors never drop prefetches; if they cannot be performed
702 immediately, they are queued. We set number of simultaneous prefetches
703 to a large constant to reflect this (it probably is not a good idea not
704 to limit number of prefetches at all, as their execution also takes some
706 100, /* number of parallel prefetches */
708 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
709 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
710 COSTS_N_INSNS (19), /* cost of FDIV instruction. */
711 COSTS_N_INSNS (2), /* cost of FABS instruction. */
712 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
713 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
714 /* K8 has optimized REP instruction for medium sized blocks, but for very small
715 blocks it is better to use loop. For large blocks, libcall can do
716 nontemporary accesses and beat inline considerably. */
717 {{libcall, {{6, loop}, {14, unrolled_loop}, {-1, rep_prefix_4_byte}}},
718 {libcall, {{16, loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
719 {{libcall, {{8, loop}, {24, unrolled_loop},
720 {2048, rep_prefix_4_byte}, {-1, libcall}}},
721 {libcall, {{48, unrolled_loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
722 4, /* scalar_stmt_cost. */
723 2, /* scalar load_cost. */
724 2, /* scalar_store_cost. */
725 5, /* vec_stmt_cost. */
726 0, /* vec_to_scalar_cost. */
727 2, /* scalar_to_vec_cost. */
728 2, /* vec_align_load_cost. */
729 3, /* vec_unalign_load_cost. */
730 3, /* vec_store_cost. */
731 3, /* cond_taken_branch_cost. */
732 2, /* cond_not_taken_branch_cost. */
735 struct processor_costs amdfam10_cost = {
736 COSTS_N_INSNS (1), /* cost of an add instruction */
737 COSTS_N_INSNS (2), /* cost of a lea instruction */
738 COSTS_N_INSNS (1), /* variable shift costs */
739 COSTS_N_INSNS (1), /* constant shift costs */
740 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
741 COSTS_N_INSNS (4), /* HI */
742 COSTS_N_INSNS (3), /* SI */
743 COSTS_N_INSNS (4), /* DI */
744 COSTS_N_INSNS (5)}, /* other */
745 0, /* cost of multiply per each bit set */
746 {COSTS_N_INSNS (19), /* cost of a divide/mod for QI */
747 COSTS_N_INSNS (35), /* HI */
748 COSTS_N_INSNS (51), /* SI */
749 COSTS_N_INSNS (83), /* DI */
750 COSTS_N_INSNS (83)}, /* other */
751 COSTS_N_INSNS (1), /* cost of movsx */
752 COSTS_N_INSNS (1), /* cost of movzx */
753 8, /* "large" insn */
755 4, /* cost for loading QImode using movzbl */
756 {3, 4, 3}, /* cost of loading integer registers
757 in QImode, HImode and SImode.
758 Relative to reg-reg move (2). */
759 {3, 4, 3}, /* cost of storing integer registers */
760 4, /* cost of reg,reg fld/fst */
761 {4, 4, 12}, /* cost of loading fp registers
762 in SFmode, DFmode and XFmode */
763 {6, 6, 8}, /* cost of storing fp registers
764 in SFmode, DFmode and XFmode */
765 2, /* cost of moving MMX register */
766 {3, 3}, /* cost of loading MMX registers
767 in SImode and DImode */
768 {4, 4}, /* cost of storing MMX registers
769 in SImode and DImode */
770 2, /* cost of moving SSE register */
771 {4, 4, 3}, /* cost of loading SSE registers
772 in SImode, DImode and TImode */
773 {4, 4, 5}, /* cost of storing SSE registers
774 in SImode, DImode and TImode */
775 3, /* MMX or SSE register to integer */
777 MOVD reg64, xmmreg Double FSTORE 4
778 MOVD reg32, xmmreg Double FSTORE 4
780 MOVD reg64, xmmreg Double FADD 3
782 MOVD reg32, xmmreg Double FADD 3
784 64, /* size of l1 cache. */
785 512, /* size of l2 cache. */
786 64, /* size of prefetch block */
787 /* New AMD processors never drop prefetches; if they cannot be performed
788 immediately, they are queued. We set number of simultaneous prefetches
789 to a large constant to reflect this (it probably is not a good idea not
790 to limit number of prefetches at all, as their execution also takes some
792 100, /* number of parallel prefetches */
794 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
795 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
796 COSTS_N_INSNS (19), /* cost of FDIV instruction. */
797 COSTS_N_INSNS (2), /* cost of FABS instruction. */
798 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
799 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
801 /* AMDFAM10 has optimized REP instruction for medium sized blocks, but for
802 very small blocks it is better to use loop. For large blocks, libcall can
803 do nontemporary accesses and beat inline considerably. */
804 {{libcall, {{6, loop}, {14, unrolled_loop}, {-1, rep_prefix_4_byte}}},
805 {libcall, {{16, loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
806 {{libcall, {{8, loop}, {24, unrolled_loop},
807 {2048, rep_prefix_4_byte}, {-1, libcall}}},
808 {libcall, {{48, unrolled_loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
809 4, /* scalar_stmt_cost. */
810 2, /* scalar load_cost. */
811 2, /* scalar_store_cost. */
812 6, /* vec_stmt_cost. */
813 0, /* vec_to_scalar_cost. */
814 2, /* scalar_to_vec_cost. */
815 2, /* vec_align_load_cost. */
816 2, /* vec_unalign_load_cost. */
817 2, /* vec_store_cost. */
818 2, /* cond_taken_branch_cost. */
819 1, /* cond_not_taken_branch_cost. */
823 struct processor_costs pentium4_cost = {
824 COSTS_N_INSNS (1), /* cost of an add instruction */
825 COSTS_N_INSNS (3), /* cost of a lea instruction */
826 COSTS_N_INSNS (4), /* variable shift costs */
827 COSTS_N_INSNS (4), /* constant shift costs */
828 {COSTS_N_INSNS (15), /* cost of starting multiply for QI */
829 COSTS_N_INSNS (15), /* HI */
830 COSTS_N_INSNS (15), /* SI */
831 COSTS_N_INSNS (15), /* DI */
832 COSTS_N_INSNS (15)}, /* other */
833 0, /* cost of multiply per each bit set */
834 {COSTS_N_INSNS (56), /* cost of a divide/mod for QI */
835 COSTS_N_INSNS (56), /* HI */
836 COSTS_N_INSNS (56), /* SI */
837 COSTS_N_INSNS (56), /* DI */
838 COSTS_N_INSNS (56)}, /* other */
839 COSTS_N_INSNS (1), /* cost of movsx */
840 COSTS_N_INSNS (1), /* cost of movzx */
841 16, /* "large" insn */
843 2, /* cost for loading QImode using movzbl */
844 {4, 5, 4}, /* cost of loading integer registers
845 in QImode, HImode and SImode.
846 Relative to reg-reg move (2). */
847 {2, 3, 2}, /* cost of storing integer registers */
848 2, /* cost of reg,reg fld/fst */
849 {2, 2, 6}, /* cost of loading fp registers
850 in SFmode, DFmode and XFmode */
851 {4, 4, 6}, /* cost of storing fp registers
852 in SFmode, DFmode and XFmode */
853 2, /* cost of moving MMX register */
854 {2, 2}, /* cost of loading MMX registers
855 in SImode and DImode */
856 {2, 2}, /* cost of storing MMX registers
857 in SImode and DImode */
858 12, /* cost of moving SSE register */
859 {12, 12, 12}, /* cost of loading SSE registers
860 in SImode, DImode and TImode */
861 {2, 2, 8}, /* cost of storing SSE registers
862 in SImode, DImode and TImode */
863 10, /* MMX or SSE register to integer */
864 8, /* size of l1 cache. */
865 256, /* size of l2 cache. */
866 64, /* size of prefetch block */
867 6, /* number of parallel prefetches */
869 COSTS_N_INSNS (5), /* cost of FADD and FSUB insns. */
870 COSTS_N_INSNS (7), /* cost of FMUL instruction. */
871 COSTS_N_INSNS (43), /* cost of FDIV instruction. */
872 COSTS_N_INSNS (2), /* cost of FABS instruction. */
873 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
874 COSTS_N_INSNS (43), /* cost of FSQRT instruction. */
875 {{libcall, {{12, loop_1_byte}, {-1, rep_prefix_4_byte}}},
876 DUMMY_STRINGOP_ALGS},
877 {{libcall, {{6, loop_1_byte}, {48, loop}, {20480, rep_prefix_4_byte},
879 DUMMY_STRINGOP_ALGS},
880 1, /* scalar_stmt_cost. */
881 1, /* scalar load_cost. */
882 1, /* scalar_store_cost. */
883 1, /* vec_stmt_cost. */
884 1, /* vec_to_scalar_cost. */
885 1, /* scalar_to_vec_cost. */
886 1, /* vec_align_load_cost. */
887 2, /* vec_unalign_load_cost. */
888 1, /* vec_store_cost. */
889 3, /* cond_taken_branch_cost. */
890 1, /* cond_not_taken_branch_cost. */
894 struct processor_costs nocona_cost = {
895 COSTS_N_INSNS (1), /* cost of an add instruction */
896 COSTS_N_INSNS (1), /* cost of a lea instruction */
897 COSTS_N_INSNS (1), /* variable shift costs */
898 COSTS_N_INSNS (1), /* constant shift costs */
899 {COSTS_N_INSNS (10), /* cost of starting multiply for QI */
900 COSTS_N_INSNS (10), /* HI */
901 COSTS_N_INSNS (10), /* SI */
902 COSTS_N_INSNS (10), /* DI */
903 COSTS_N_INSNS (10)}, /* other */
904 0, /* cost of multiply per each bit set */
905 {COSTS_N_INSNS (66), /* cost of a divide/mod for QI */
906 COSTS_N_INSNS (66), /* HI */
907 COSTS_N_INSNS (66), /* SI */
908 COSTS_N_INSNS (66), /* DI */
909 COSTS_N_INSNS (66)}, /* other */
910 COSTS_N_INSNS (1), /* cost of movsx */
911 COSTS_N_INSNS (1), /* cost of movzx */
912 16, /* "large" insn */
914 4, /* cost for loading QImode using movzbl */
915 {4, 4, 4}, /* cost of loading integer registers
916 in QImode, HImode and SImode.
917 Relative to reg-reg move (2). */
918 {4, 4, 4}, /* cost of storing integer registers */
919 3, /* cost of reg,reg fld/fst */
920 {12, 12, 12}, /* cost of loading fp registers
921 in SFmode, DFmode and XFmode */
922 {4, 4, 4}, /* cost of storing fp registers
923 in SFmode, DFmode and XFmode */
924 6, /* cost of moving MMX register */
925 {12, 12}, /* cost of loading MMX registers
926 in SImode and DImode */
927 {12, 12}, /* cost of storing MMX registers
928 in SImode and DImode */
929 6, /* cost of moving SSE register */
930 {12, 12, 12}, /* cost of loading SSE registers
931 in SImode, DImode and TImode */
932 {12, 12, 12}, /* cost of storing SSE registers
933 in SImode, DImode and TImode */
934 8, /* MMX or SSE register to integer */
935 8, /* size of l1 cache. */
936 1024, /* size of l2 cache. */
937 128, /* size of prefetch block */
938 8, /* number of parallel prefetches */
940 COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */
941 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
942 COSTS_N_INSNS (40), /* cost of FDIV instruction. */
943 COSTS_N_INSNS (3), /* cost of FABS instruction. */
944 COSTS_N_INSNS (3), /* cost of FCHS instruction. */
945 COSTS_N_INSNS (44), /* cost of FSQRT instruction. */
946 {{libcall, {{12, loop_1_byte}, {-1, rep_prefix_4_byte}}},
947 {libcall, {{32, loop}, {20000, rep_prefix_8_byte},
948 {100000, unrolled_loop}, {-1, libcall}}}},
949 {{libcall, {{6, loop_1_byte}, {48, loop}, {20480, rep_prefix_4_byte},
951 {libcall, {{24, loop}, {64, unrolled_loop},
952 {8192, rep_prefix_8_byte}, {-1, libcall}}}},
953 1, /* scalar_stmt_cost. */
954 1, /* scalar load_cost. */
955 1, /* scalar_store_cost. */
956 1, /* vec_stmt_cost. */
957 1, /* vec_to_scalar_cost. */
958 1, /* scalar_to_vec_cost. */
959 1, /* vec_align_load_cost. */
960 2, /* vec_unalign_load_cost. */
961 1, /* vec_store_cost. */
962 3, /* cond_taken_branch_cost. */
963 1, /* cond_not_taken_branch_cost. */
967 struct processor_costs core2_cost = {
968 COSTS_N_INSNS (1), /* cost of an add instruction */
969 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
970 COSTS_N_INSNS (1), /* variable shift costs */
971 COSTS_N_INSNS (1), /* constant shift costs */
972 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
973 COSTS_N_INSNS (3), /* HI */
974 COSTS_N_INSNS (3), /* SI */
975 COSTS_N_INSNS (3), /* DI */
976 COSTS_N_INSNS (3)}, /* other */
977 0, /* cost of multiply per each bit set */
978 {COSTS_N_INSNS (22), /* cost of a divide/mod for QI */
979 COSTS_N_INSNS (22), /* HI */
980 COSTS_N_INSNS (22), /* SI */
981 COSTS_N_INSNS (22), /* DI */
982 COSTS_N_INSNS (22)}, /* other */
983 COSTS_N_INSNS (1), /* cost of movsx */
984 COSTS_N_INSNS (1), /* cost of movzx */
985 8, /* "large" insn */
987 2, /* cost for loading QImode using movzbl */
988 {6, 6, 6}, /* cost of loading integer registers
989 in QImode, HImode and SImode.
990 Relative to reg-reg move (2). */
991 {4, 4, 4}, /* cost of storing integer registers */
992 2, /* cost of reg,reg fld/fst */
993 {6, 6, 6}, /* cost of loading fp registers
994 in SFmode, DFmode and XFmode */
995 {4, 4, 4}, /* cost of storing fp registers
996 in SFmode, DFmode and XFmode */
997 2, /* cost of moving MMX register */
998 {6, 6}, /* cost of loading MMX registers
999 in SImode and DImode */
1000 {4, 4}, /* cost of storing MMX registers
1001 in SImode and DImode */
1002 2, /* cost of moving SSE register */
1003 {6, 6, 6}, /* cost of loading SSE registers
1004 in SImode, DImode and TImode */
1005 {4, 4, 4}, /* cost of storing SSE registers
1006 in SImode, DImode and TImode */
1007 2, /* MMX or SSE register to integer */
1008 32, /* size of l1 cache. */
1009 2048, /* size of l2 cache. */
1010 128, /* size of prefetch block */
1011 8, /* number of parallel prefetches */
1012 3, /* Branch cost */
1013 COSTS_N_INSNS (3), /* cost of FADD and FSUB insns. */
1014 COSTS_N_INSNS (5), /* cost of FMUL instruction. */
1015 COSTS_N_INSNS (32), /* cost of FDIV instruction. */
1016 COSTS_N_INSNS (1), /* cost of FABS instruction. */
1017 COSTS_N_INSNS (1), /* cost of FCHS instruction. */
1018 COSTS_N_INSNS (58), /* cost of FSQRT instruction. */
1019 {{libcall, {{11, loop}, {-1, rep_prefix_4_byte}}},
1020 {libcall, {{32, loop}, {64, rep_prefix_4_byte},
1021 {8192, rep_prefix_8_byte}, {-1, libcall}}}},
1022 {{libcall, {{8, loop}, {15, unrolled_loop},
1023 {2048, rep_prefix_4_byte}, {-1, libcall}}},
1024 {libcall, {{24, loop}, {32, unrolled_loop},
1025 {8192, rep_prefix_8_byte}, {-1, libcall}}}},
1026 1, /* scalar_stmt_cost. */
1027 1, /* scalar load_cost. */
1028 1, /* scalar_store_cost. */
1029 1, /* vec_stmt_cost. */
1030 1, /* vec_to_scalar_cost. */
1031 1, /* scalar_to_vec_cost. */
1032 1, /* vec_align_load_cost. */
1033 2, /* vec_unalign_load_cost. */
1034 1, /* vec_store_cost. */
1035 3, /* cond_taken_branch_cost. */
1036 1, /* cond_not_taken_branch_cost. */
1040 struct processor_costs atom_cost = {
1041 COSTS_N_INSNS (1), /* cost of an add instruction */
1042 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
1043 COSTS_N_INSNS (1), /* variable shift costs */
1044 COSTS_N_INSNS (1), /* constant shift costs */
1045 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1046 COSTS_N_INSNS (4), /* HI */
1047 COSTS_N_INSNS (3), /* SI */
1048 COSTS_N_INSNS (4), /* DI */
1049 COSTS_N_INSNS (2)}, /* other */
1050 0, /* cost of multiply per each bit set */
1051 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
1052 COSTS_N_INSNS (26), /* HI */
1053 COSTS_N_INSNS (42), /* SI */
1054 COSTS_N_INSNS (74), /* DI */
1055 COSTS_N_INSNS (74)}, /* other */
1056 COSTS_N_INSNS (1), /* cost of movsx */
1057 COSTS_N_INSNS (1), /* cost of movzx */
1058 8, /* "large" insn */
1059 17, /* MOVE_RATIO */
1060 2, /* cost for loading QImode using movzbl */
1061 {4, 4, 4}, /* cost of loading integer registers
1062 in QImode, HImode and SImode.
1063 Relative to reg-reg move (2). */
1064 {4, 4, 4}, /* cost of storing integer registers */
1065 4, /* cost of reg,reg fld/fst */
1066 {12, 12, 12}, /* cost of loading fp registers
1067 in SFmode, DFmode and XFmode */
1068 {6, 6, 8}, /* cost of storing fp registers
1069 in SFmode, DFmode and XFmode */
1070 2, /* cost of moving MMX register */
1071 {8, 8}, /* cost of loading MMX registers
1072 in SImode and DImode */
1073 {8, 8}, /* cost of storing MMX registers
1074 in SImode and DImode */
1075 2, /* cost of moving SSE register */
1076 {8, 8, 8}, /* cost of loading SSE registers
1077 in SImode, DImode and TImode */
1078 {8, 8, 8}, /* cost of storing SSE registers
1079 in SImode, DImode and TImode */
1080 5, /* MMX or SSE register to integer */
1081 32, /* size of l1 cache. */
1082 256, /* size of l2 cache. */
1083 64, /* size of prefetch block */
1084 6, /* number of parallel prefetches */
1085 3, /* Branch cost */
1086 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
1087 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
1088 COSTS_N_INSNS (20), /* cost of FDIV instruction. */
1089 COSTS_N_INSNS (8), /* cost of FABS instruction. */
1090 COSTS_N_INSNS (8), /* cost of FCHS instruction. */
1091 COSTS_N_INSNS (40), /* cost of FSQRT instruction. */
1092 {{libcall, {{11, loop}, {-1, rep_prefix_4_byte}}},
1093 {libcall, {{32, loop}, {64, rep_prefix_4_byte},
1094 {8192, rep_prefix_8_byte}, {-1, libcall}}}},
1095 {{libcall, {{8, loop}, {15, unrolled_loop},
1096 {2048, rep_prefix_4_byte}, {-1, libcall}}},
1097 {libcall, {{24, loop}, {32, unrolled_loop},
1098 {8192, rep_prefix_8_byte}, {-1, libcall}}}},
1099 1, /* scalar_stmt_cost. */
1100 1, /* scalar load_cost. */
1101 1, /* scalar_store_cost. */
1102 1, /* vec_stmt_cost. */
1103 1, /* vec_to_scalar_cost. */
1104 1, /* scalar_to_vec_cost. */
1105 1, /* vec_align_load_cost. */
1106 2, /* vec_unalign_load_cost. */
1107 1, /* vec_store_cost. */
1108 3, /* cond_taken_branch_cost. */
1109 1, /* cond_not_taken_branch_cost. */
1112 /* Generic64 should produce code tuned for Nocona and K8. */
1114 struct processor_costs generic64_cost = {
1115 COSTS_N_INSNS (1), /* cost of an add instruction */
1116 /* On all chips taken into consideration lea is 2 cycles and more. With
1117 this cost however our current implementation of synth_mult results in
1118 use of unnecessary temporary registers causing regression on several
1119 SPECfp benchmarks. */
1120 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
1121 COSTS_N_INSNS (1), /* variable shift costs */
1122 COSTS_N_INSNS (1), /* constant shift costs */
1123 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1124 COSTS_N_INSNS (4), /* HI */
1125 COSTS_N_INSNS (3), /* SI */
1126 COSTS_N_INSNS (4), /* DI */
1127 COSTS_N_INSNS (2)}, /* other */
1128 0, /* cost of multiply per each bit set */
1129 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
1130 COSTS_N_INSNS (26), /* HI */
1131 COSTS_N_INSNS (42), /* SI */
1132 COSTS_N_INSNS (74), /* DI */
1133 COSTS_N_INSNS (74)}, /* other */
1134 COSTS_N_INSNS (1), /* cost of movsx */
1135 COSTS_N_INSNS (1), /* cost of movzx */
1136 8, /* "large" insn */
1137 17, /* MOVE_RATIO */
1138 4, /* cost for loading QImode using movzbl */
1139 {4, 4, 4}, /* cost of loading integer registers
1140 in QImode, HImode and SImode.
1141 Relative to reg-reg move (2). */
1142 {4, 4, 4}, /* cost of storing integer registers */
1143 4, /* cost of reg,reg fld/fst */
1144 {12, 12, 12}, /* cost of loading fp registers
1145 in SFmode, DFmode and XFmode */
1146 {6, 6, 8}, /* cost of storing fp registers
1147 in SFmode, DFmode and XFmode */
1148 2, /* cost of moving MMX register */
1149 {8, 8}, /* cost of loading MMX registers
1150 in SImode and DImode */
1151 {8, 8}, /* cost of storing MMX registers
1152 in SImode and DImode */
1153 2, /* cost of moving SSE register */
1154 {8, 8, 8}, /* cost of loading SSE registers
1155 in SImode, DImode and TImode */
1156 {8, 8, 8}, /* cost of storing SSE registers
1157 in SImode, DImode and TImode */
1158 5, /* MMX or SSE register to integer */
1159 32, /* size of l1 cache. */
1160 512, /* size of l2 cache. */
1161 64, /* size of prefetch block */
1162 6, /* number of parallel prefetches */
1163 /* Benchmarks shows large regressions on K8 sixtrack benchmark when this value
1164 is increased to perhaps more appropriate value of 5. */
1165 3, /* Branch cost */
1166 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
1167 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
1168 COSTS_N_INSNS (20), /* cost of FDIV instruction. */
1169 COSTS_N_INSNS (8), /* cost of FABS instruction. */
1170 COSTS_N_INSNS (8), /* cost of FCHS instruction. */
1171 COSTS_N_INSNS (40), /* cost of FSQRT instruction. */
1172 {DUMMY_STRINGOP_ALGS,
1173 {libcall, {{32, loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
1174 {DUMMY_STRINGOP_ALGS,
1175 {libcall, {{32, loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
1176 1, /* scalar_stmt_cost. */
1177 1, /* scalar load_cost. */
1178 1, /* scalar_store_cost. */
1179 1, /* vec_stmt_cost. */
1180 1, /* vec_to_scalar_cost. */
1181 1, /* scalar_to_vec_cost. */
1182 1, /* vec_align_load_cost. */
1183 2, /* vec_unalign_load_cost. */
1184 1, /* vec_store_cost. */
1185 3, /* cond_taken_branch_cost. */
1186 1, /* cond_not_taken_branch_cost. */
1189 /* Generic32 should produce code tuned for Athlon, PPro, Pentium4, Nocona and K8. */
1191 struct processor_costs generic32_cost = {
1192 COSTS_N_INSNS (1), /* cost of an add instruction */
1193 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
1194 COSTS_N_INSNS (1), /* variable shift costs */
1195 COSTS_N_INSNS (1), /* constant shift costs */
1196 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1197 COSTS_N_INSNS (4), /* HI */
1198 COSTS_N_INSNS (3), /* SI */
1199 COSTS_N_INSNS (4), /* DI */
1200 COSTS_N_INSNS (2)}, /* other */
1201 0, /* cost of multiply per each bit set */
1202 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
1203 COSTS_N_INSNS (26), /* HI */
1204 COSTS_N_INSNS (42), /* SI */
1205 COSTS_N_INSNS (74), /* DI */
1206 COSTS_N_INSNS (74)}, /* other */
1207 COSTS_N_INSNS (1), /* cost of movsx */
1208 COSTS_N_INSNS (1), /* cost of movzx */
1209 8, /* "large" insn */
1210 17, /* MOVE_RATIO */
1211 4, /* cost for loading QImode using movzbl */
1212 {4, 4, 4}, /* cost of loading integer registers
1213 in QImode, HImode and SImode.
1214 Relative to reg-reg move (2). */
1215 {4, 4, 4}, /* cost of storing integer registers */
1216 4, /* cost of reg,reg fld/fst */
1217 {12, 12, 12}, /* cost of loading fp registers
1218 in SFmode, DFmode and XFmode */
1219 {6, 6, 8}, /* cost of storing fp registers
1220 in SFmode, DFmode and XFmode */
1221 2, /* cost of moving MMX register */
1222 {8, 8}, /* cost of loading MMX registers
1223 in SImode and DImode */
1224 {8, 8}, /* cost of storing MMX registers
1225 in SImode and DImode */
1226 2, /* cost of moving SSE register */
1227 {8, 8, 8}, /* cost of loading SSE registers
1228 in SImode, DImode and TImode */
1229 {8, 8, 8}, /* cost of storing SSE registers
1230 in SImode, DImode and TImode */
1231 5, /* MMX or SSE register to integer */
1232 32, /* size of l1 cache. */
1233 256, /* size of l2 cache. */
1234 64, /* size of prefetch block */
1235 6, /* number of parallel prefetches */
1236 3, /* Branch cost */
1237 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
1238 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
1239 COSTS_N_INSNS (20), /* cost of FDIV instruction. */
1240 COSTS_N_INSNS (8), /* cost of FABS instruction. */
1241 COSTS_N_INSNS (8), /* cost of FCHS instruction. */
1242 COSTS_N_INSNS (40), /* cost of FSQRT instruction. */
1243 {{libcall, {{32, loop}, {8192, rep_prefix_4_byte}, {-1, libcall}}},
1244 DUMMY_STRINGOP_ALGS},
1245 {{libcall, {{32, loop}, {8192, rep_prefix_4_byte}, {-1, libcall}}},
1246 DUMMY_STRINGOP_ALGS},
1247 1, /* scalar_stmt_cost. */
1248 1, /* scalar load_cost. */
1249 1, /* scalar_store_cost. */
1250 1, /* vec_stmt_cost. */
1251 1, /* vec_to_scalar_cost. */
1252 1, /* scalar_to_vec_cost. */
1253 1, /* vec_align_load_cost. */
1254 2, /* vec_unalign_load_cost. */
1255 1, /* vec_store_cost. */
1256 3, /* cond_taken_branch_cost. */
1257 1, /* cond_not_taken_branch_cost. */
1260 const struct processor_costs *ix86_cost = &pentium_cost;
1262 /* Processor feature/optimization bitmasks. */
1263 #define m_386 (1<<PROCESSOR_I386)
1264 #define m_486 (1<<PROCESSOR_I486)
1265 #define m_PENT (1<<PROCESSOR_PENTIUM)
1266 #define m_PPRO (1<<PROCESSOR_PENTIUMPRO)
1267 #define m_PENT4 (1<<PROCESSOR_PENTIUM4)
1268 #define m_NOCONA (1<<PROCESSOR_NOCONA)
1269 #define m_CORE2 (1<<PROCESSOR_CORE2)
1270 #define m_ATOM (1<<PROCESSOR_ATOM)
1272 #define m_GEODE (1<<PROCESSOR_GEODE)
1273 #define m_K6 (1<<PROCESSOR_K6)
1274 #define m_K6_GEODE (m_K6 | m_GEODE)
1275 #define m_K8 (1<<PROCESSOR_K8)
1276 #define m_ATHLON (1<<PROCESSOR_ATHLON)
1277 #define m_ATHLON_K8 (m_K8 | m_ATHLON)
1278 #define m_AMDFAM10 (1<<PROCESSOR_AMDFAM10)
1279 #define m_AMD_MULTIPLE (m_K8 | m_ATHLON | m_AMDFAM10)
1281 #define m_GENERIC32 (1<<PROCESSOR_GENERIC32)
1282 #define m_GENERIC64 (1<<PROCESSOR_GENERIC64)
1284 /* Generic instruction choice should be common subset of supported CPUs
1285 (PPro/PENT4/NOCONA/CORE2/Athlon/K8). */
1286 #define m_GENERIC (m_GENERIC32 | m_GENERIC64)
1288 /* Feature tests against the various tunings. */
1289 unsigned char ix86_tune_features[X86_TUNE_LAST];
1291 /* Feature tests against the various tunings used to create ix86_tune_features
1292 based on the processor mask. */
1293 static unsigned int initial_ix86_tune_features[X86_TUNE_LAST] = {
1294 /* X86_TUNE_USE_LEAVE: Leave does not affect Nocona SPEC2000 results
1295 negatively, so enabling for Generic64 seems like good code size
1296 tradeoff. We can't enable it for 32bit generic because it does not
1297 work well with PPro base chips. */
1298 m_386 | m_K6_GEODE | m_AMD_MULTIPLE | m_CORE2 | m_GENERIC64,
1300 /* X86_TUNE_PUSH_MEMORY */
1301 m_386 | m_K6_GEODE | m_AMD_MULTIPLE | m_PENT4
1302 | m_NOCONA | m_CORE2 | m_GENERIC,
1304 /* X86_TUNE_ZERO_EXTEND_WITH_AND */
1307 /* X86_TUNE_UNROLL_STRLEN */
1308 m_486 | m_PENT | m_ATOM | m_PPRO | m_AMD_MULTIPLE | m_K6
1309 | m_CORE2 | m_GENERIC,
1311 /* X86_TUNE_DEEP_BRANCH_PREDICTION */
1312 m_ATOM | m_PPRO | m_K6_GEODE | m_AMD_MULTIPLE | m_PENT4 | m_GENERIC,
1314 /* X86_TUNE_BRANCH_PREDICTION_HINTS: Branch hints were put in P4 based
1315 on simulation result. But after P4 was made, no performance benefit
1316 was observed with branch hints. It also increases the code size.
1317 As a result, icc never generates branch hints. */
1320 /* X86_TUNE_DOUBLE_WITH_ADD */
1323 /* X86_TUNE_USE_SAHF */
1324 m_ATOM | m_PPRO | m_K6_GEODE | m_K8 | m_AMDFAM10 | m_PENT4
1325 | m_NOCONA | m_CORE2 | m_GENERIC,
1327 /* X86_TUNE_MOVX: Enable to zero extend integer registers to avoid
1328 partial dependencies. */
1329 m_AMD_MULTIPLE | m_ATOM | m_PPRO | m_PENT4 | m_NOCONA
1330 | m_CORE2 | m_GENERIC | m_GEODE /* m_386 | m_K6 */,
1332 /* X86_TUNE_PARTIAL_REG_STALL: We probably ought to watch for partial
1333 register stalls on Generic32 compilation setting as well. However
1334 in current implementation the partial register stalls are not eliminated
1335 very well - they can be introduced via subregs synthesized by combine
1336 and can happen in caller/callee saving sequences. Because this option
1337 pays back little on PPro based chips and is in conflict with partial reg
1338 dependencies used by Athlon/P4 based chips, it is better to leave it off
1339 for generic32 for now. */
1342 /* X86_TUNE_PARTIAL_FLAG_REG_STALL */
1343 m_CORE2 | m_GENERIC,
1345 /* X86_TUNE_USE_HIMODE_FIOP */
1346 m_386 | m_486 | m_K6_GEODE,
1348 /* X86_TUNE_USE_SIMODE_FIOP */
1349 ~(m_PPRO | m_AMD_MULTIPLE | m_PENT | m_ATOM | m_CORE2 | m_GENERIC),
1351 /* X86_TUNE_USE_MOV0 */
1354 /* X86_TUNE_USE_CLTD */
1355 ~(m_PENT | m_ATOM | m_K6 | m_CORE2 | m_GENERIC),
1357 /* X86_TUNE_USE_XCHGB: Use xchgb %rh,%rl instead of rolw/rorw $8,rx. */
1360 /* X86_TUNE_SPLIT_LONG_MOVES */
1363 /* X86_TUNE_READ_MODIFY_WRITE */
1366 /* X86_TUNE_READ_MODIFY */
1369 /* X86_TUNE_PROMOTE_QIMODE */
1370 m_K6_GEODE | m_PENT | m_ATOM | m_386 | m_486 | m_AMD_MULTIPLE
1371 | m_CORE2 | m_GENERIC /* | m_PENT4 ? */,
1373 /* X86_TUNE_FAST_PREFIX */
1374 ~(m_PENT | m_486 | m_386),
1376 /* X86_TUNE_SINGLE_STRINGOP */
1377 m_386 | m_PENT4 | m_NOCONA,
1379 /* X86_TUNE_QIMODE_MATH */
1382 /* X86_TUNE_HIMODE_MATH: On PPro this flag is meant to avoid partial
1383 register stalls. Just like X86_TUNE_PARTIAL_REG_STALL this option
1384 might be considered for Generic32 if our scheme for avoiding partial
1385 stalls was more effective. */
1388 /* X86_TUNE_PROMOTE_QI_REGS */
1391 /* X86_TUNE_PROMOTE_HI_REGS */
1394 /* X86_TUNE_ADD_ESP_4: Enable if add/sub is preferred over 1/2 push/pop. */
1395 m_ATOM | m_AMD_MULTIPLE | m_K6_GEODE | m_PENT4 | m_NOCONA
1396 | m_CORE2 | m_GENERIC,
1398 /* X86_TUNE_ADD_ESP_8 */
1399 m_AMD_MULTIPLE | m_ATOM | m_PPRO | m_K6_GEODE | m_386
1400 | m_486 | m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC,
1402 /* X86_TUNE_SUB_ESP_4 */
1403 m_AMD_MULTIPLE | m_ATOM | m_PPRO | m_PENT4 | m_NOCONA | m_CORE2
1406 /* X86_TUNE_SUB_ESP_8 */
1407 m_AMD_MULTIPLE | m_ATOM | m_PPRO | m_386 | m_486
1408 | m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC,
1410 /* X86_TUNE_INTEGER_DFMODE_MOVES: Enable if integer moves are preferred
1411 for DFmode copies */
1412 ~(m_AMD_MULTIPLE | m_ATOM | m_PENT4 | m_NOCONA | m_PPRO | m_CORE2
1413 | m_GENERIC | m_GEODE),
1415 /* X86_TUNE_PARTIAL_REG_DEPENDENCY */
1416 m_AMD_MULTIPLE | m_ATOM | m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC,
1418 /* X86_TUNE_SSE_PARTIAL_REG_DEPENDENCY: In the Generic model we have a
1419 conflict here in between PPro/Pentium4 based chips that thread 128bit
1420 SSE registers as single units versus K8 based chips that divide SSE
1421 registers to two 64bit halves. This knob promotes all store destinations
1422 to be 128bit to allow register renaming on 128bit SSE units, but usually
1423 results in one extra microop on 64bit SSE units. Experimental results
1424 shows that disabling this option on P4 brings over 20% SPECfp regression,
1425 while enabling it on K8 brings roughly 2.4% regression that can be partly
1426 masked by careful scheduling of moves. */
1427 m_ATOM | m_PENT4 | m_NOCONA | m_PPRO | m_CORE2 | m_GENERIC
1430 /* X86_TUNE_SSE_UNALIGNED_MOVE_OPTIMAL */
1433 /* X86_TUNE_SSE_SPLIT_REGS: Set for machines where the type and dependencies
1434 are resolved on SSE register parts instead of whole registers, so we may
1435 maintain just lower part of scalar values in proper format leaving the
1436 upper part undefined. */
1439 /* X86_TUNE_SSE_TYPELESS_STORES */
1442 /* X86_TUNE_SSE_LOAD0_BY_PXOR */
1443 m_PPRO | m_PENT4 | m_NOCONA,
1445 /* X86_TUNE_MEMORY_MISMATCH_STALL */
1446 m_AMD_MULTIPLE | m_ATOM | m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC,
1448 /* X86_TUNE_PROLOGUE_USING_MOVE */
1449 m_ATHLON_K8 | m_ATOM | m_PPRO | m_CORE2 | m_GENERIC,
1451 /* X86_TUNE_EPILOGUE_USING_MOVE */
1452 m_ATHLON_K8 | m_ATOM | m_PPRO | m_CORE2 | m_GENERIC,
1454 /* X86_TUNE_SHIFT1 */
1457 /* X86_TUNE_USE_FFREEP */
1460 /* X86_TUNE_INTER_UNIT_MOVES */
1461 ~(m_AMD_MULTIPLE | m_ATOM | m_GENERIC),
1463 /* X86_TUNE_INTER_UNIT_CONVERSIONS */
1466 /* X86_TUNE_FOUR_JUMP_LIMIT: Some CPU cores are not able to predict more
1467 than 4 branch instructions in the 16 byte window. */
1468 m_ATOM | m_PPRO | m_AMD_MULTIPLE | m_PENT4 | m_NOCONA | m_CORE2
1471 /* X86_TUNE_SCHEDULE */
1472 m_PPRO | m_AMD_MULTIPLE | m_K6_GEODE | m_PENT | m_ATOM | m_CORE2
1475 /* X86_TUNE_USE_BT */
1476 m_AMD_MULTIPLE | m_ATOM | m_CORE2 | m_GENERIC,
1478 /* X86_TUNE_USE_INCDEC */
1479 ~(m_PENT4 | m_NOCONA | m_GENERIC | m_ATOM),
1481 /* X86_TUNE_PAD_RETURNS */
1482 m_AMD_MULTIPLE | m_CORE2 | m_GENERIC,
1484 /* X86_TUNE_EXT_80387_CONSTANTS */
1485 m_K6_GEODE | m_ATHLON_K8 | m_ATOM | m_PENT4 | m_NOCONA | m_PPRO
1486 | m_CORE2 | m_GENERIC,
1488 /* X86_TUNE_SHORTEN_X87_SSE */
1491 /* X86_TUNE_AVOID_VECTOR_DECODE */
1494 /* X86_TUNE_PROMOTE_HIMODE_IMUL: Modern CPUs have same latency for HImode
1495 and SImode multiply, but 386 and 486 do HImode multiply faster. */
1498 /* X86_TUNE_SLOW_IMUL_IMM32_MEM: Imul of 32-bit constant and memory is
1499 vector path on AMD machines. */
1500 m_K8 | m_GENERIC64 | m_AMDFAM10,
1502 /* X86_TUNE_SLOW_IMUL_IMM8: Imul of 8-bit constant is vector path on AMD
1504 m_K8 | m_GENERIC64 | m_AMDFAM10,
1506 /* X86_TUNE_MOVE_M1_VIA_OR: On pentiums, it is faster to load -1 via OR
1510 /* X86_TUNE_NOT_UNPAIRABLE: NOT is not pairable on Pentium, while XOR is,
1511 but one byte longer. */
1514 /* X86_TUNE_NOT_VECTORMODE: On AMD K6, NOT is vector decoded with memory
1515 operand that cannot be represented using a modRM byte. The XOR
1516 replacement is long decoded, so this split helps here as well. */
1519 /* X86_TUNE_USE_VECTOR_FP_CONVERTS: Prefer vector packed SSE conversion
1521 m_AMDFAM10 | m_GENERIC,
1523 /* X86_TUNE_USE_VECTOR_CONVERTS: Prefer vector packed SSE conversion
1524 from integer to FP. */
1527 /* X86_TUNE_FUSE_CMP_AND_BRANCH: Fuse a compare or test instruction
1528 with a subsequent conditional jump instruction into a single
1529 compare-and-branch uop. */
1532 /* X86_TUNE_OPT_AGU: Optimize for Address Generation Unit. This flag
1533 will impact LEA instruction selection. */
1537 /* Feature tests against the various architecture variations. */
1538 unsigned char ix86_arch_features[X86_ARCH_LAST];
1540 /* Feature tests against the various architecture variations, used to create
1541 ix86_arch_features based on the processor mask. */
1542 static unsigned int initial_ix86_arch_features[X86_ARCH_LAST] = {
1543 /* X86_ARCH_CMOVE: Conditional move was added for pentiumpro. */
1544 ~(m_386 | m_486 | m_PENT | m_K6),
1546 /* X86_ARCH_CMPXCHG: Compare and exchange was added for 80486. */
1549 /* X86_ARCH_CMPXCHG8B: Compare and exchange 8 bytes was added for pentium. */
1552 /* X86_ARCH_XADD: Exchange and add was added for 80486. */
1555 /* X86_ARCH_BSWAP: Byteswap was added for 80486. */
1559 static const unsigned int x86_accumulate_outgoing_args
1560 = m_AMD_MULTIPLE | m_ATOM | m_PENT4 | m_NOCONA | m_PPRO | m_CORE2
1563 static const unsigned int x86_arch_always_fancy_math_387
1564 = m_PENT | m_ATOM | m_PPRO | m_AMD_MULTIPLE | m_PENT4
1565 | m_NOCONA | m_CORE2 | m_GENERIC;
1567 static enum stringop_alg stringop_alg = no_stringop;
1569 /* In case the average insn count for single function invocation is
1570 lower than this constant, emit fast (but longer) prologue and
1572 #define FAST_PROLOGUE_INSN_COUNT 20
1574 /* Names for 8 (low), 8 (high), and 16-bit registers, respectively. */
1575 static const char *const qi_reg_name[] = QI_REGISTER_NAMES;
1576 static const char *const qi_high_reg_name[] = QI_HIGH_REGISTER_NAMES;
1577 static const char *const hi_reg_name[] = HI_REGISTER_NAMES;
1579 /* Array of the smallest class containing reg number REGNO, indexed by
1580 REGNO. Used by REGNO_REG_CLASS in i386.h. */
1582 enum reg_class const regclass_map[FIRST_PSEUDO_REGISTER] =
1584 /* ax, dx, cx, bx */
1585 AREG, DREG, CREG, BREG,
1586 /* si, di, bp, sp */
1587 SIREG, DIREG, NON_Q_REGS, NON_Q_REGS,
1589 FP_TOP_REG, FP_SECOND_REG, FLOAT_REGS, FLOAT_REGS,
1590 FLOAT_REGS, FLOAT_REGS, FLOAT_REGS, FLOAT_REGS,
1593 /* flags, fpsr, fpcr, frame */
1594 NO_REGS, NO_REGS, NO_REGS, NON_Q_REGS,
1596 SSE_FIRST_REG, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS,
1599 MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS,
1602 NON_Q_REGS, NON_Q_REGS, NON_Q_REGS, NON_Q_REGS,
1603 NON_Q_REGS, NON_Q_REGS, NON_Q_REGS, NON_Q_REGS,
1604 /* SSE REX registers */
1605 SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS,
1609 /* The "default" register map used in 32bit mode. */
1611 int const dbx_register_map[FIRST_PSEUDO_REGISTER] =
1613 0, 2, 1, 3, 6, 7, 4, 5, /* general regs */
1614 12, 13, 14, 15, 16, 17, 18, 19, /* fp regs */
1615 -1, -1, -1, -1, -1, /* arg, flags, fpsr, fpcr, frame */
1616 21, 22, 23, 24, 25, 26, 27, 28, /* SSE */
1617 29, 30, 31, 32, 33, 34, 35, 36, /* MMX */
1618 -1, -1, -1, -1, -1, -1, -1, -1, /* extended integer registers */
1619 -1, -1, -1, -1, -1, -1, -1, -1, /* extended SSE registers */
1622 /* The "default" register map used in 64bit mode. */
1624 int const dbx64_register_map[FIRST_PSEUDO_REGISTER] =
1626 0, 1, 2, 3, 4, 5, 6, 7, /* general regs */
1627 33, 34, 35, 36, 37, 38, 39, 40, /* fp regs */
1628 -1, -1, -1, -1, -1, /* arg, flags, fpsr, fpcr, frame */
1629 17, 18, 19, 20, 21, 22, 23, 24, /* SSE */
1630 41, 42, 43, 44, 45, 46, 47, 48, /* MMX */
1631 8,9,10,11,12,13,14,15, /* extended integer registers */
1632 25, 26, 27, 28, 29, 30, 31, 32, /* extended SSE registers */
1635 /* Define the register numbers to be used in Dwarf debugging information.
1636 The SVR4 reference port C compiler uses the following register numbers
1637 in its Dwarf output code:
1638 0 for %eax (gcc regno = 0)
1639 1 for %ecx (gcc regno = 2)
1640 2 for %edx (gcc regno = 1)
1641 3 for %ebx (gcc regno = 3)
1642 4 for %esp (gcc regno = 7)
1643 5 for %ebp (gcc regno = 6)
1644 6 for %esi (gcc regno = 4)
1645 7 for %edi (gcc regno = 5)
1646 The following three DWARF register numbers are never generated by
1647 the SVR4 C compiler or by the GNU compilers, but SDB on x86/svr4
1648 believes these numbers have these meanings.
1649 8 for %eip (no gcc equivalent)
1650 9 for %eflags (gcc regno = 17)
1651 10 for %trapno (no gcc equivalent)
1652 It is not at all clear how we should number the FP stack registers
1653 for the x86 architecture. If the version of SDB on x86/svr4 were
1654 a bit less brain dead with respect to floating-point then we would
1655 have a precedent to follow with respect to DWARF register numbers
1656 for x86 FP registers, but the SDB on x86/svr4 is so completely
1657 broken with respect to FP registers that it is hardly worth thinking
1658 of it as something to strive for compatibility with.
1659 The version of x86/svr4 SDB I have at the moment does (partially)
1660 seem to believe that DWARF register number 11 is associated with
1661 the x86 register %st(0), but that's about all. Higher DWARF
1662 register numbers don't seem to be associated with anything in
1663 particular, and even for DWARF regno 11, SDB only seems to under-
1664 stand that it should say that a variable lives in %st(0) (when
1665 asked via an `=' command) if we said it was in DWARF regno 11,
1666 but SDB still prints garbage when asked for the value of the
1667 variable in question (via a `/' command).
1668 (Also note that the labels SDB prints for various FP stack regs
1669 when doing an `x' command are all wrong.)
1670 Note that these problems generally don't affect the native SVR4
1671 C compiler because it doesn't allow the use of -O with -g and
1672 because when it is *not* optimizing, it allocates a memory
1673 location for each floating-point variable, and the memory
1674 location is what gets described in the DWARF AT_location
1675 attribute for the variable in question.
1676 Regardless of the severe mental illness of the x86/svr4 SDB, we
1677 do something sensible here and we use the following DWARF
1678 register numbers. Note that these are all stack-top-relative
1680 11 for %st(0) (gcc regno = 8)
1681 12 for %st(1) (gcc regno = 9)
1682 13 for %st(2) (gcc regno = 10)
1683 14 for %st(3) (gcc regno = 11)
1684 15 for %st(4) (gcc regno = 12)
1685 16 for %st(5) (gcc regno = 13)
1686 17 for %st(6) (gcc regno = 14)
1687 18 for %st(7) (gcc regno = 15)
1689 int const svr4_dbx_register_map[FIRST_PSEUDO_REGISTER] =
1691 0, 2, 1, 3, 6, 7, 5, 4, /* general regs */
1692 11, 12, 13, 14, 15, 16, 17, 18, /* fp regs */
1693 -1, 9, -1, -1, -1, /* arg, flags, fpsr, fpcr, frame */
1694 21, 22, 23, 24, 25, 26, 27, 28, /* SSE registers */
1695 29, 30, 31, 32, 33, 34, 35, 36, /* MMX registers */
1696 -1, -1, -1, -1, -1, -1, -1, -1, /* extended integer registers */
1697 -1, -1, -1, -1, -1, -1, -1, -1, /* extended SSE registers */
1700 /* Test and compare insns in i386.md store the information needed to
1701 generate branch and scc insns here. */
1703 rtx ix86_compare_op0 = NULL_RTX;
1704 rtx ix86_compare_op1 = NULL_RTX;
1706 /* Define parameter passing and return registers. */
1708 static int const x86_64_int_parameter_registers[6] =
1710 DI_REG, SI_REG, DX_REG, CX_REG, R8_REG, R9_REG
1713 static int const x86_64_ms_abi_int_parameter_registers[4] =
1715 CX_REG, DX_REG, R8_REG, R9_REG
1718 static int const x86_64_int_return_registers[4] =
1720 AX_REG, DX_REG, DI_REG, SI_REG
1723 /* Define the structure for the machine field in struct function. */
1725 struct GTY(()) stack_local_entry {
1726 unsigned short mode;
1729 struct stack_local_entry *next;
1732 /* Structure describing stack frame layout.
1733 Stack grows downward:
1739 saved frame pointer if frame_pointer_needed
1740 <- HARD_FRAME_POINTER
1749 [va_arg registers] (
1750 > to_allocate <- FRAME_POINTER
1762 HOST_WIDE_INT frame;
1764 int outgoing_arguments_size;
1767 HOST_WIDE_INT to_allocate;
1768 /* The offsets relative to ARG_POINTER. */
1769 HOST_WIDE_INT frame_pointer_offset;
1770 HOST_WIDE_INT hard_frame_pointer_offset;
1771 HOST_WIDE_INT stack_pointer_offset;
1773 /* When save_regs_using_mov is set, emit prologue using
1774 move instead of push instructions. */
1775 bool save_regs_using_mov;
1778 /* Code model option. */
1779 enum cmodel ix86_cmodel;
1781 enum asm_dialect ix86_asm_dialect = ASM_ATT;
1783 enum tls_dialect ix86_tls_dialect = TLS_DIALECT_GNU;
1785 /* Which unit we are generating floating point math for. */
1786 enum fpmath_unit ix86_fpmath;
1788 /* Which cpu are we scheduling for. */
1789 enum attr_cpu ix86_schedule;
1791 /* Which cpu are we optimizing for. */
1792 enum processor_type ix86_tune;
1794 /* Which instruction set architecture to use. */
1795 enum processor_type ix86_arch;
1797 /* true if sse prefetch instruction is not NOOP. */
1798 int x86_prefetch_sse;
1800 /* ix86_regparm_string as a number */
1801 static int ix86_regparm;
1803 /* -mstackrealign option */
1804 extern int ix86_force_align_arg_pointer;
1805 static const char ix86_force_align_arg_pointer_string[]
1806 = "force_align_arg_pointer";
1808 static rtx (*ix86_gen_leave) (void);
1809 static rtx (*ix86_gen_pop1) (rtx);
1810 static rtx (*ix86_gen_add3) (rtx, rtx, rtx);
1811 static rtx (*ix86_gen_sub3) (rtx, rtx, rtx);
1812 static rtx (*ix86_gen_sub3_carry) (rtx, rtx, rtx, rtx);
1813 static rtx (*ix86_gen_one_cmpl2) (rtx, rtx);
1814 static rtx (*ix86_gen_monitor) (rtx, rtx, rtx);
1815 static rtx (*ix86_gen_andsp) (rtx, rtx, rtx);
1817 /* Preferred alignment for stack boundary in bits. */
1818 unsigned int ix86_preferred_stack_boundary;
1820 /* Alignment for incoming stack boundary in bits specified at
1822 static unsigned int ix86_user_incoming_stack_boundary;
1824 /* Default alignment for incoming stack boundary in bits. */
1825 static unsigned int ix86_default_incoming_stack_boundary;
1827 /* Alignment for incoming stack boundary in bits. */
1828 unsigned int ix86_incoming_stack_boundary;
1830 /* The abi used by target. */
1831 enum calling_abi ix86_abi;
1833 /* Values 1-5: see jump.c */
1834 int ix86_branch_cost;
1836 /* Calling abi specific va_list type nodes. */
1837 static GTY(()) tree sysv_va_list_type_node;
1838 static GTY(()) tree ms_va_list_type_node;
1840 /* Variables which are this size or smaller are put in the data/bss
1841 or ldata/lbss sections. */
1843 int ix86_section_threshold = 65536;
1845 /* Prefix built by ASM_GENERATE_INTERNAL_LABEL. */
1846 char internal_label_prefix[16];
1847 int internal_label_prefix_len;
1849 /* Fence to use after loop using movnt. */
1852 /* Register class used for passing given 64bit part of the argument.
1853 These represent classes as documented by the PS ABI, with the exception
1854 of SSESF, SSEDF classes, that are basically SSE class, just gcc will
1855 use SF or DFmode move instead of DImode to avoid reformatting penalties.
1857 Similarly we play games with INTEGERSI_CLASS to use cheaper SImode moves
1858 whenever possible (upper half does contain padding). */
1859 enum x86_64_reg_class
1862 X86_64_INTEGER_CLASS,
1863 X86_64_INTEGERSI_CLASS,
1870 X86_64_COMPLEX_X87_CLASS,
1874 #define MAX_CLASSES 4
1876 /* Table of constants used by fldpi, fldln2, etc.... */
1877 static REAL_VALUE_TYPE ext_80387_constants_table [5];
1878 static bool ext_80387_constants_init = 0;
1881 static struct machine_function * ix86_init_machine_status (void);
1882 static rtx ix86_function_value (const_tree, const_tree, bool);
1883 static int ix86_function_regparm (const_tree, const_tree);
1884 static void ix86_compute_frame_layout (struct ix86_frame *);
1885 static bool ix86_expand_vector_init_one_nonzero (bool, enum machine_mode,
1887 static void ix86_add_new_builtins (int);
1889 enum ix86_function_specific_strings
1891 IX86_FUNCTION_SPECIFIC_ARCH,
1892 IX86_FUNCTION_SPECIFIC_TUNE,
1893 IX86_FUNCTION_SPECIFIC_FPMATH,
1894 IX86_FUNCTION_SPECIFIC_MAX
1897 static char *ix86_target_string (int, int, const char *, const char *,
1898 const char *, bool);
1899 static void ix86_debug_options (void) ATTRIBUTE_UNUSED;
1900 static void ix86_function_specific_save (struct cl_target_option *);
1901 static void ix86_function_specific_restore (struct cl_target_option *);
1902 static void ix86_function_specific_print (FILE *, int,
1903 struct cl_target_option *);
1904 static bool ix86_valid_target_attribute_p (tree, tree, tree, int);
1905 static bool ix86_valid_target_attribute_inner_p (tree, char *[]);
1906 static bool ix86_can_inline_p (tree, tree);
1907 static void ix86_set_current_function (tree);
1909 static enum calling_abi ix86_function_abi (const_tree);
1912 /* The svr4 ABI for the i386 says that records and unions are returned
1914 #ifndef DEFAULT_PCC_STRUCT_RETURN
1915 #define DEFAULT_PCC_STRUCT_RETURN 1
1918 /* Whether -mtune= or -march= were specified */
1919 static int ix86_tune_defaulted;
1920 static int ix86_arch_specified;
1922 /* Bit flags that specify the ISA we are compiling for. */
1923 int ix86_isa_flags = TARGET_64BIT_DEFAULT | TARGET_SUBTARGET_ISA_DEFAULT;
1925 /* A mask of ix86_isa_flags that includes bit X if X
1926 was set or cleared on the command line. */
1927 static int ix86_isa_flags_explicit;
1929 /* Define a set of ISAs which are available when a given ISA is
1930 enabled. MMX and SSE ISAs are handled separately. */
1932 #define OPTION_MASK_ISA_MMX_SET OPTION_MASK_ISA_MMX
1933 #define OPTION_MASK_ISA_3DNOW_SET \
1934 (OPTION_MASK_ISA_3DNOW | OPTION_MASK_ISA_MMX_SET)
1936 #define OPTION_MASK_ISA_SSE_SET OPTION_MASK_ISA_SSE
1937 #define OPTION_MASK_ISA_SSE2_SET \
1938 (OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_SSE_SET)
1939 #define OPTION_MASK_ISA_SSE3_SET \
1940 (OPTION_MASK_ISA_SSE3 | OPTION_MASK_ISA_SSE2_SET)
1941 #define OPTION_MASK_ISA_SSSE3_SET \
1942 (OPTION_MASK_ISA_SSSE3 | OPTION_MASK_ISA_SSE3_SET)
1943 #define OPTION_MASK_ISA_SSE4_1_SET \
1944 (OPTION_MASK_ISA_SSE4_1 | OPTION_MASK_ISA_SSSE3_SET)
1945 #define OPTION_MASK_ISA_SSE4_2_SET \
1946 (OPTION_MASK_ISA_SSE4_2 | OPTION_MASK_ISA_SSE4_1_SET)
1947 #define OPTION_MASK_ISA_AVX_SET \
1948 (OPTION_MASK_ISA_AVX | OPTION_MASK_ISA_SSE4_2_SET)
1949 #define OPTION_MASK_ISA_FMA_SET \
1950 (OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_AVX_SET)
1952 /* SSE4 includes both SSE4.1 and SSE4.2. -msse4 should be the same
1954 #define OPTION_MASK_ISA_SSE4_SET OPTION_MASK_ISA_SSE4_2_SET
1956 #define OPTION_MASK_ISA_SSE4A_SET \
1957 (OPTION_MASK_ISA_SSE4A | OPTION_MASK_ISA_SSE3_SET)
1958 #define OPTION_MASK_ISA_SSE5_SET \
1959 (OPTION_MASK_ISA_SSE5 | OPTION_MASK_ISA_SSE4A_SET)
1961 /* AES and PCLMUL need SSE2 because they use xmm registers */
1962 #define OPTION_MASK_ISA_AES_SET \
1963 (OPTION_MASK_ISA_AES | OPTION_MASK_ISA_SSE2_SET)
1964 #define OPTION_MASK_ISA_PCLMUL_SET \
1965 (OPTION_MASK_ISA_PCLMUL | OPTION_MASK_ISA_SSE2_SET)
1967 #define OPTION_MASK_ISA_ABM_SET \
1968 (OPTION_MASK_ISA_ABM | OPTION_MASK_ISA_POPCNT)
1969 #define OPTION_MASK_ISA_POPCNT_SET OPTION_MASK_ISA_POPCNT
1970 #define OPTION_MASK_ISA_CX16_SET OPTION_MASK_ISA_CX16
1971 #define OPTION_MASK_ISA_SAHF_SET OPTION_MASK_ISA_SAHF
1973 /* Define a set of ISAs which aren't available when a given ISA is
1974 disabled. MMX and SSE ISAs are handled separately. */
1976 #define OPTION_MASK_ISA_MMX_UNSET \
1977 (OPTION_MASK_ISA_MMX | OPTION_MASK_ISA_3DNOW_UNSET)
1978 #define OPTION_MASK_ISA_3DNOW_UNSET \
1979 (OPTION_MASK_ISA_3DNOW | OPTION_MASK_ISA_3DNOW_A_UNSET)
1980 #define OPTION_MASK_ISA_3DNOW_A_UNSET OPTION_MASK_ISA_3DNOW_A
1982 #define OPTION_MASK_ISA_SSE_UNSET \
1983 (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_SSE2_UNSET)
1984 #define OPTION_MASK_ISA_SSE2_UNSET \
1985 (OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_SSE3_UNSET)
1986 #define OPTION_MASK_ISA_SSE3_UNSET \
1987 (OPTION_MASK_ISA_SSE3 \
1988 | OPTION_MASK_ISA_SSSE3_UNSET \
1989 | OPTION_MASK_ISA_SSE4A_UNSET )
1990 #define OPTION_MASK_ISA_SSSE3_UNSET \
1991 (OPTION_MASK_ISA_SSSE3 | OPTION_MASK_ISA_SSE4_1_UNSET)
1992 #define OPTION_MASK_ISA_SSE4_1_UNSET \
1993 (OPTION_MASK_ISA_SSE4_1 | OPTION_MASK_ISA_SSE4_2_UNSET)
1994 #define OPTION_MASK_ISA_SSE4_2_UNSET \
1995 (OPTION_MASK_ISA_SSE4_2 | OPTION_MASK_ISA_AVX_UNSET )
1996 #define OPTION_MASK_ISA_AVX_UNSET \
1997 (OPTION_MASK_ISA_AVX | OPTION_MASK_ISA_FMA_UNSET)
1998 #define OPTION_MASK_ISA_FMA_UNSET OPTION_MASK_ISA_FMA
2000 /* SSE4 includes both SSE4.1 and SSE4.2. -mno-sse4 should the same
2002 #define OPTION_MASK_ISA_SSE4_UNSET OPTION_MASK_ISA_SSE4_1_UNSET
2004 #define OPTION_MASK_ISA_SSE4A_UNSET \
2005 (OPTION_MASK_ISA_SSE4A | OPTION_MASK_ISA_SSE5_UNSET)
2006 #define OPTION_MASK_ISA_SSE5_UNSET OPTION_MASK_ISA_SSE5
2007 #define OPTION_MASK_ISA_AES_UNSET OPTION_MASK_ISA_AES
2008 #define OPTION_MASK_ISA_PCLMUL_UNSET OPTION_MASK_ISA_PCLMUL
2009 #define OPTION_MASK_ISA_ABM_UNSET OPTION_MASK_ISA_ABM
2010 #define OPTION_MASK_ISA_POPCNT_UNSET OPTION_MASK_ISA_POPCNT
2011 #define OPTION_MASK_ISA_CX16_UNSET OPTION_MASK_ISA_CX16
2012 #define OPTION_MASK_ISA_SAHF_UNSET OPTION_MASK_ISA_SAHF
2014 /* Vectorization library interface and handlers. */
2015 tree (*ix86_veclib_handler)(enum built_in_function, tree, tree) = NULL;
2016 static tree ix86_veclibabi_svml (enum built_in_function, tree, tree);
2017 static tree ix86_veclibabi_acml (enum built_in_function, tree, tree);
2019 /* Processor target table, indexed by processor number */
2022 const struct processor_costs *cost; /* Processor costs */
2023 const int align_loop; /* Default alignments. */
2024 const int align_loop_max_skip;
2025 const int align_jump;
2026 const int align_jump_max_skip;
2027 const int align_func;
2030 static const struct ptt processor_target_table[PROCESSOR_max] =
2032 {&i386_cost, 4, 3, 4, 3, 4},
2033 {&i486_cost, 16, 15, 16, 15, 16},
2034 {&pentium_cost, 16, 7, 16, 7, 16},
2035 {&pentiumpro_cost, 16, 15, 16, 10, 16},
2036 {&geode_cost, 0, 0, 0, 0, 0},
2037 {&k6_cost, 32, 7, 32, 7, 32},
2038 {&athlon_cost, 16, 7, 16, 7, 16},
2039 {&pentium4_cost, 0, 0, 0, 0, 0},
2040 {&k8_cost, 16, 7, 16, 7, 16},
2041 {&nocona_cost, 0, 0, 0, 0, 0},
2042 {&core2_cost, 16, 10, 16, 10, 16},
2043 {&generic32_cost, 16, 7, 16, 7, 16},
2044 {&generic64_cost, 16, 10, 16, 10, 16},
2045 {&amdfam10_cost, 32, 24, 32, 7, 32},
2046 {&atom_cost, 16, 7, 16, 7, 16}
2049 static const char *const cpu_names[TARGET_CPU_DEFAULT_max] =
2075 /* Implement TARGET_HANDLE_OPTION. */
2078 ix86_handle_option (size_t code, const char *arg ATTRIBUTE_UNUSED, int value)
2085 ix86_isa_flags |= OPTION_MASK_ISA_MMX_SET;
2086 ix86_isa_flags_explicit |= OPTION_MASK_ISA_MMX_SET;
2090 ix86_isa_flags &= ~OPTION_MASK_ISA_MMX_UNSET;
2091 ix86_isa_flags_explicit |= OPTION_MASK_ISA_MMX_UNSET;
2098 ix86_isa_flags |= OPTION_MASK_ISA_3DNOW_SET;
2099 ix86_isa_flags_explicit |= OPTION_MASK_ISA_3DNOW_SET;
2103 ix86_isa_flags &= ~OPTION_MASK_ISA_3DNOW_UNSET;
2104 ix86_isa_flags_explicit |= OPTION_MASK_ISA_3DNOW_UNSET;
2114 ix86_isa_flags |= OPTION_MASK_ISA_SSE_SET;
2115 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE_SET;
2119 ix86_isa_flags &= ~OPTION_MASK_ISA_SSE_UNSET;
2120 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE_UNSET;
2127 ix86_isa_flags |= OPTION_MASK_ISA_SSE2_SET;
2128 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE2_SET;
2132 ix86_isa_flags &= ~OPTION_MASK_ISA_SSE2_UNSET;
2133 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE2_UNSET;
2140 ix86_isa_flags |= OPTION_MASK_ISA_SSE3_SET;
2141 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE3_SET;
2145 ix86_isa_flags &= ~OPTION_MASK_ISA_SSE3_UNSET;
2146 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE3_UNSET;
2153 ix86_isa_flags |= OPTION_MASK_ISA_SSSE3_SET;
2154 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSSE3_SET;
2158 ix86_isa_flags &= ~OPTION_MASK_ISA_SSSE3_UNSET;
2159 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSSE3_UNSET;
2166 ix86_isa_flags |= OPTION_MASK_ISA_SSE4_1_SET;
2167 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE4_1_SET;
2171 ix86_isa_flags &= ~OPTION_MASK_ISA_SSE4_1_UNSET;
2172 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE4_1_UNSET;
2179 ix86_isa_flags |= OPTION_MASK_ISA_SSE4_2_SET;
2180 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE4_2_SET;
2184 ix86_isa_flags &= ~OPTION_MASK_ISA_SSE4_2_UNSET;
2185 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE4_2_UNSET;
2192 ix86_isa_flags |= OPTION_MASK_ISA_AVX_SET;
2193 ix86_isa_flags_explicit |= OPTION_MASK_ISA_AVX_SET;
2197 ix86_isa_flags &= ~OPTION_MASK_ISA_AVX_UNSET;
2198 ix86_isa_flags_explicit |= OPTION_MASK_ISA_AVX_UNSET;
2205 ix86_isa_flags |= OPTION_MASK_ISA_FMA_SET;
2206 ix86_isa_flags_explicit |= OPTION_MASK_ISA_FMA_SET;
2210 ix86_isa_flags &= ~OPTION_MASK_ISA_FMA_UNSET;
2211 ix86_isa_flags_explicit |= OPTION_MASK_ISA_FMA_UNSET;
2216 ix86_isa_flags |= OPTION_MASK_ISA_SSE4_SET;
2217 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE4_SET;
2221 ix86_isa_flags &= ~OPTION_MASK_ISA_SSE4_UNSET;
2222 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE4_UNSET;
2228 ix86_isa_flags |= OPTION_MASK_ISA_SSE4A_SET;
2229 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE4A_SET;
2233 ix86_isa_flags &= ~OPTION_MASK_ISA_SSE4A_UNSET;
2234 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE4A_UNSET;
2241 ix86_isa_flags |= OPTION_MASK_ISA_SSE5_SET;
2242 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE5_SET;
2246 ix86_isa_flags &= ~OPTION_MASK_ISA_SSE5_UNSET;
2247 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE5_UNSET;
2254 ix86_isa_flags |= OPTION_MASK_ISA_ABM_SET;
2255 ix86_isa_flags_explicit |= OPTION_MASK_ISA_ABM_SET;
2259 ix86_isa_flags &= ~OPTION_MASK_ISA_ABM_UNSET;
2260 ix86_isa_flags_explicit |= OPTION_MASK_ISA_ABM_UNSET;
2267 ix86_isa_flags |= OPTION_MASK_ISA_POPCNT_SET;
2268 ix86_isa_flags_explicit |= OPTION_MASK_ISA_POPCNT_SET;
2272 ix86_isa_flags &= ~OPTION_MASK_ISA_POPCNT_UNSET;
2273 ix86_isa_flags_explicit |= OPTION_MASK_ISA_POPCNT_UNSET;
2280 ix86_isa_flags |= OPTION_MASK_ISA_SAHF_SET;
2281 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SAHF_SET;
2285 ix86_isa_flags &= ~OPTION_MASK_ISA_SAHF_UNSET;
2286 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SAHF_UNSET;
2293 ix86_isa_flags |= OPTION_MASK_ISA_CX16_SET;
2294 ix86_isa_flags_explicit |= OPTION_MASK_ISA_CX16_SET;
2298 ix86_isa_flags &= ~OPTION_MASK_ISA_CX16_UNSET;
2299 ix86_isa_flags_explicit |= OPTION_MASK_ISA_CX16_UNSET;
2306 ix86_isa_flags |= OPTION_MASK_ISA_AES_SET;
2307 ix86_isa_flags_explicit |= OPTION_MASK_ISA_AES_SET;
2311 ix86_isa_flags &= ~OPTION_MASK_ISA_AES_UNSET;
2312 ix86_isa_flags_explicit |= OPTION_MASK_ISA_AES_UNSET;
2319 ix86_isa_flags |= OPTION_MASK_ISA_PCLMUL_SET;
2320 ix86_isa_flags_explicit |= OPTION_MASK_ISA_PCLMUL_SET;
2324 ix86_isa_flags &= ~OPTION_MASK_ISA_PCLMUL_UNSET;
2325 ix86_isa_flags_explicit |= OPTION_MASK_ISA_PCLMUL_UNSET;
2334 /* Return a string the documents the current -m options. The caller is
2335 responsible for freeing the string. */
2338 ix86_target_string (int isa, int flags, const char *arch, const char *tune,
2339 const char *fpmath, bool add_nl_p)
2341 struct ix86_target_opts
2343 const char *option; /* option string */
2344 int mask; /* isa mask options */
2347 /* This table is ordered so that options like -msse5 or -msse4.2 that imply
2348 preceding options while match those first. */
2349 static struct ix86_target_opts isa_opts[] =
2351 { "-m64", OPTION_MASK_ISA_64BIT },
2352 { "-msse5", OPTION_MASK_ISA_SSE5 },
2353 { "-msse4a", OPTION_MASK_ISA_SSE4A },
2354 { "-msse4.2", OPTION_MASK_ISA_SSE4_2 },
2355 { "-msse4.1", OPTION_MASK_ISA_SSE4_1 },
2356 { "-mssse3", OPTION_MASK_ISA_SSSE3 },
2357 { "-msse3", OPTION_MASK_ISA_SSE3 },
2358 { "-msse2", OPTION_MASK_ISA_SSE2 },
2359 { "-msse", OPTION_MASK_ISA_SSE },
2360 { "-m3dnow", OPTION_MASK_ISA_3DNOW },
2361 { "-m3dnowa", OPTION_MASK_ISA_3DNOW_A },
2362 { "-mmmx", OPTION_MASK_ISA_MMX },
2363 { "-mabm", OPTION_MASK_ISA_ABM },
2364 { "-mpopcnt", OPTION_MASK_ISA_POPCNT },
2365 { "-maes", OPTION_MASK_ISA_AES },
2366 { "-mpclmul", OPTION_MASK_ISA_PCLMUL },
2370 static struct ix86_target_opts flag_opts[] =
2372 { "-m128bit-long-double", MASK_128BIT_LONG_DOUBLE },
2373 { "-m80387", MASK_80387 },
2374 { "-maccumulate-outgoing-args", MASK_ACCUMULATE_OUTGOING_ARGS },
2375 { "-malign-double", MASK_ALIGN_DOUBLE },
2376 { "-mcld", MASK_CLD },
2377 { "-mfp-ret-in-387", MASK_FLOAT_RETURNS },
2378 { "-mieee-fp", MASK_IEEE_FP },
2379 { "-minline-all-stringops", MASK_INLINE_ALL_STRINGOPS },
2380 { "-minline-stringops-dynamically", MASK_INLINE_STRINGOPS_DYNAMICALLY },
2381 { "-mms-bitfields", MASK_MS_BITFIELD_LAYOUT },
2382 { "-mno-align-stringops", MASK_NO_ALIGN_STRINGOPS },
2383 { "-mno-fancy-math-387", MASK_NO_FANCY_MATH_387 },
2384 { "-mno-fused-madd", MASK_NO_FUSED_MADD },
2385 { "-mno-push-args", MASK_NO_PUSH_ARGS },
2386 { "-mno-red-zone", MASK_NO_RED_ZONE },
2387 { "-momit-leaf-frame-pointer", MASK_OMIT_LEAF_FRAME_POINTER },
2388 { "-mrecip", MASK_RECIP },
2389 { "-mrtd", MASK_RTD },
2390 { "-msseregparm", MASK_SSEREGPARM },
2391 { "-mstack-arg-probe", MASK_STACK_PROBE },
2392 { "-mtls-direct-seg-refs", MASK_TLS_DIRECT_SEG_REFS },
2395 const char *opts[ARRAY_SIZE (isa_opts) + ARRAY_SIZE (flag_opts) + 6][2];
2398 char target_other[40];
2407 memset (opts, '\0', sizeof (opts));
2409 /* Add -march= option. */
2412 opts[num][0] = "-march=";
2413 opts[num++][1] = arch;
2416 /* Add -mtune= option. */
2419 opts[num][0] = "-mtune=";
2420 opts[num++][1] = tune;
2423 /* Pick out the options in isa options. */
2424 for (i = 0; i < ARRAY_SIZE (isa_opts); i++)
2426 if ((isa & isa_opts[i].mask) != 0)
2428 opts[num++][0] = isa_opts[i].option;
2429 isa &= ~ isa_opts[i].mask;
2433 if (isa && add_nl_p)
2435 opts[num++][0] = isa_other;
2436 sprintf (isa_other, "(other isa: 0x%x)", isa);
2439 /* Add flag options. */
2440 for (i = 0; i < ARRAY_SIZE (flag_opts); i++)
2442 if ((flags & flag_opts[i].mask) != 0)
2444 opts[num++][0] = flag_opts[i].option;
2445 flags &= ~ flag_opts[i].mask;
2449 if (flags && add_nl_p)
2451 opts[num++][0] = target_other;
2452 sprintf (target_other, "(other flags: 0x%x)", isa);
2455 /* Add -fpmath= option. */
2458 opts[num][0] = "-mfpmath=";
2459 opts[num++][1] = fpmath;
2466 gcc_assert (num < ARRAY_SIZE (opts));
2468 /* Size the string. */
2470 sep_len = (add_nl_p) ? 3 : 1;
2471 for (i = 0; i < num; i++)
2474 for (j = 0; j < 2; j++)
2476 len += strlen (opts[i][j]);
2479 /* Build the string. */
2480 ret = ptr = (char *) xmalloc (len);
2483 for (i = 0; i < num; i++)
2487 for (j = 0; j < 2; j++)
2488 len2[j] = (opts[i][j]) ? strlen (opts[i][j]) : 0;
2495 if (add_nl_p && line_len + len2[0] + len2[1] > 70)
2503 for (j = 0; j < 2; j++)
2506 memcpy (ptr, opts[i][j], len2[j]);
2508 line_len += len2[j];
2513 gcc_assert (ret + len >= ptr);
2518 /* Function that is callable from the debugger to print the current
2521 ix86_debug_options (void)
2523 char *opts = ix86_target_string (ix86_isa_flags, target_flags,
2524 ix86_arch_string, ix86_tune_string,
2525 ix86_fpmath_string, true);
2529 fprintf (stderr, "%s\n\n", opts);
2533 fprintf (stderr, "<no options>\n\n");
2538 /* Sometimes certain combinations of command options do not make
2539 sense on a particular target machine. You can define a macro
2540 `OVERRIDE_OPTIONS' to take account of this. This macro, if
2541 defined, is executed once just after all the command options have
2544 Don't use this macro to turn on various extra optimizations for
2545 `-O'. That is what `OPTIMIZATION_OPTIONS' is for. */
2548 override_options (bool main_args_p)
2551 unsigned int ix86_arch_mask, ix86_tune_mask;
2556 /* Comes from final.c -- no real reason to change it. */
2557 #define MAX_CODE_ALIGN 16
2565 PTA_PREFETCH_SSE = 1 << 4,
2567 PTA_3DNOW_A = 1 << 6,
2571 PTA_POPCNT = 1 << 10,
2573 PTA_SSE4A = 1 << 12,
2574 PTA_NO_SAHF = 1 << 13,
2575 PTA_SSE4_1 = 1 << 14,
2576 PTA_SSE4_2 = 1 << 15,
2579 PTA_PCLMUL = 1 << 18,
2586 const char *const name; /* processor name or nickname. */
2587 const enum processor_type processor;
2588 const enum attr_cpu schedule;
2589 const unsigned /*enum pta_flags*/ flags;
2591 const processor_alias_table[] =
2593 {"i386", PROCESSOR_I386, CPU_NONE, 0},
2594 {"i486", PROCESSOR_I486, CPU_NONE, 0},
2595 {"i586", PROCESSOR_PENTIUM, CPU_PENTIUM, 0},
2596 {"pentium", PROCESSOR_PENTIUM, CPU_PENTIUM, 0},
2597 {"pentium-mmx", PROCESSOR_PENTIUM, CPU_PENTIUM, PTA_MMX},
2598 {"winchip-c6", PROCESSOR_I486, CPU_NONE, PTA_MMX},
2599 {"winchip2", PROCESSOR_I486, CPU_NONE, PTA_MMX | PTA_3DNOW},
2600 {"c3", PROCESSOR_I486, CPU_NONE, PTA_MMX | PTA_3DNOW},
2601 {"c3-2", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO, PTA_MMX | PTA_SSE},
2602 {"i686", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO, 0},
2603 {"pentiumpro", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO, 0},
2604 {"pentium2", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO, PTA_MMX},
2605 {"pentium3", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO,
2607 {"pentium3m", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO,
2609 {"pentium-m", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO,
2610 PTA_MMX | PTA_SSE | PTA_SSE2},
2611 {"pentium4", PROCESSOR_PENTIUM4, CPU_NONE,
2612 PTA_MMX |PTA_SSE | PTA_SSE2},
2613 {"pentium4m", PROCESSOR_PENTIUM4, CPU_NONE,
2614 PTA_MMX | PTA_SSE | PTA_SSE2},
2615 {"prescott", PROCESSOR_NOCONA, CPU_NONE,
2616 PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3},
2617 {"nocona", PROCESSOR_NOCONA, CPU_NONE,
2618 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
2619 | PTA_CX16 | PTA_NO_SAHF},
2620 {"core2", PROCESSOR_CORE2, CPU_CORE2,
2621 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
2622 | PTA_SSSE3 | PTA_CX16},
2623 {"atom", PROCESSOR_ATOM, CPU_ATOM,
2624 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
2625 | PTA_SSSE3 | PTA_CX16},
2626 {"geode", PROCESSOR_GEODE, CPU_GEODE,
2627 PTA_MMX | PTA_3DNOW | PTA_3DNOW_A |PTA_PREFETCH_SSE},
2628 {"k6", PROCESSOR_K6, CPU_K6, PTA_MMX},
2629 {"k6-2", PROCESSOR_K6, CPU_K6, PTA_MMX | PTA_3DNOW},
2630 {"k6-3", PROCESSOR_K6, CPU_K6, PTA_MMX | PTA_3DNOW},
2631 {"athlon", PROCESSOR_ATHLON, CPU_ATHLON,
2632 PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_PREFETCH_SSE},
2633 {"athlon-tbird", PROCESSOR_ATHLON, CPU_ATHLON,
2634 PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_PREFETCH_SSE},
2635 {"athlon-4", PROCESSOR_ATHLON, CPU_ATHLON,
2636 PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE},
2637 {"athlon-xp", PROCESSOR_ATHLON, CPU_ATHLON,
2638 PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE},
2639 {"athlon-mp", PROCESSOR_ATHLON, CPU_ATHLON,
2640 PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE},
2641 {"x86-64", PROCESSOR_K8, CPU_K8,
2642 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_NO_SAHF},
2643 {"k8", PROCESSOR_K8, CPU_K8,
2644 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
2645 | PTA_SSE2 | PTA_NO_SAHF},
2646 {"k8-sse3", PROCESSOR_K8, CPU_K8,
2647 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
2648 | PTA_SSE2 | PTA_SSE3 | PTA_NO_SAHF},
2649 {"opteron", PROCESSOR_K8, CPU_K8,
2650 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
2651 | PTA_SSE2 | PTA_NO_SAHF},
2652 {"opteron-sse3", PROCESSOR_K8, CPU_K8,
2653 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
2654 | PTA_SSE2 | PTA_SSE3 | PTA_NO_SAHF},
2655 {"athlon64", PROCESSOR_K8, CPU_K8,
2656 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
2657 | PTA_SSE2 | PTA_NO_SAHF},
2658 {"athlon64-sse3", PROCESSOR_K8, CPU_K8,
2659 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
2660 | PTA_SSE2 | PTA_SSE3 | PTA_NO_SAHF},
2661 {"athlon-fx", PROCESSOR_K8, CPU_K8,
2662 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
2663 | PTA_SSE2 | PTA_NO_SAHF},
2664 {"amdfam10", PROCESSOR_AMDFAM10, CPU_AMDFAM10,
2665 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
2666 | PTA_SSE2 | PTA_SSE3 | PTA_SSE4A | PTA_CX16 | PTA_ABM},
2667 {"barcelona", PROCESSOR_AMDFAM10, CPU_AMDFAM10,
2668 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
2669 | PTA_SSE2 | PTA_SSE3 | PTA_SSE4A | PTA_CX16 | PTA_ABM},
2670 {"generic32", PROCESSOR_GENERIC32, CPU_PENTIUMPRO,
2671 0 /* flags are only used for -march switch. */ },
2672 {"generic64", PROCESSOR_GENERIC64, CPU_GENERIC64,
2673 PTA_64BIT /* flags are only used for -march switch. */ },
2676 int const pta_size = ARRAY_SIZE (processor_alias_table);
2678 /* Set up prefix/suffix so the error messages refer to either the command
2679 line argument, or the attribute(target). */
2688 prefix = "option(\"";
2693 #ifdef SUBTARGET_OVERRIDE_OPTIONS
2694 SUBTARGET_OVERRIDE_OPTIONS;
2697 #ifdef SUBSUBTARGET_OVERRIDE_OPTIONS
2698 SUBSUBTARGET_OVERRIDE_OPTIONS;
2701 /* -fPIC is the default for x86_64. */
2702 if (TARGET_MACHO && TARGET_64BIT)
2705 /* Set the default values for switches whose default depends on TARGET_64BIT
2706 in case they weren't overwritten by command line options. */
2709 /* Mach-O doesn't support omitting the frame pointer for now. */
2710 if (flag_omit_frame_pointer == 2)
2711 flag_omit_frame_pointer = (TARGET_MACHO ? 0 : 1);
2712 if (flag_asynchronous_unwind_tables == 2)
2713 flag_asynchronous_unwind_tables = 1;
2714 if (flag_pcc_struct_return == 2)
2715 flag_pcc_struct_return = 0;
2719 if (flag_omit_frame_pointer == 2)
2720 flag_omit_frame_pointer = 0;
2721 if (flag_asynchronous_unwind_tables == 2)
2722 flag_asynchronous_unwind_tables = 0;
2723 if (flag_pcc_struct_return == 2)
2724 flag_pcc_struct_return = DEFAULT_PCC_STRUCT_RETURN;
2727 /* Need to check -mtune=generic first. */
2728 if (ix86_tune_string)
2730 if (!strcmp (ix86_tune_string, "generic")
2731 || !strcmp (ix86_tune_string, "i686")
2732 /* As special support for cross compilers we read -mtune=native
2733 as -mtune=generic. With native compilers we won't see the
2734 -mtune=native, as it was changed by the driver. */
2735 || !strcmp (ix86_tune_string, "native"))
2738 ix86_tune_string = "generic64";
2740 ix86_tune_string = "generic32";
2742 /* If this call is for setting the option attribute, allow the
2743 generic32/generic64 that was previously set. */
2744 else if (!main_args_p
2745 && (!strcmp (ix86_tune_string, "generic32")
2746 || !strcmp (ix86_tune_string, "generic64")))
2748 else if (!strncmp (ix86_tune_string, "generic", 7))
2749 error ("bad value (%s) for %stune=%s %s",
2750 ix86_tune_string, prefix, suffix, sw);
2754 if (ix86_arch_string)
2755 ix86_tune_string = ix86_arch_string;
2756 if (!ix86_tune_string)
2758 ix86_tune_string = cpu_names[TARGET_CPU_DEFAULT];
2759 ix86_tune_defaulted = 1;
2762 /* ix86_tune_string is set to ix86_arch_string or defaulted. We
2763 need to use a sensible tune option. */
2764 if (!strcmp (ix86_tune_string, "generic")
2765 || !strcmp (ix86_tune_string, "x86-64")
2766 || !strcmp (ix86_tune_string, "i686"))
2769 ix86_tune_string = "generic64";
2771 ix86_tune_string = "generic32";
2774 if (ix86_stringop_string)
2776 if (!strcmp (ix86_stringop_string, "rep_byte"))
2777 stringop_alg = rep_prefix_1_byte;
2778 else if (!strcmp (ix86_stringop_string, "libcall"))
2779 stringop_alg = libcall;
2780 else if (!strcmp (ix86_stringop_string, "rep_4byte"))
2781 stringop_alg = rep_prefix_4_byte;
2782 else if (!strcmp (ix86_stringop_string, "rep_8byte")
2784 /* rep; movq isn't available in 32-bit code. */
2785 stringop_alg = rep_prefix_8_byte;
2786 else if (!strcmp (ix86_stringop_string, "byte_loop"))
2787 stringop_alg = loop_1_byte;
2788 else if (!strcmp (ix86_stringop_string, "loop"))
2789 stringop_alg = loop;
2790 else if (!strcmp (ix86_stringop_string, "unrolled_loop"))
2791 stringop_alg = unrolled_loop;
2793 error ("bad value (%s) for %sstringop-strategy=%s %s",
2794 ix86_stringop_string, prefix, suffix, sw);
2796 if (!strcmp (ix86_tune_string, "x86-64"))
2797 warning (OPT_Wdeprecated, "%stune=x86-64%s is deprecated. Use "
2798 "%stune=k8%s or %stune=generic%s instead as appropriate.",
2799 prefix, suffix, prefix, suffix, prefix, suffix);
2801 if (!ix86_arch_string)
2802 ix86_arch_string = TARGET_64BIT ? "x86-64" : "i386";
2804 ix86_arch_specified = 1;
2806 if (!strcmp (ix86_arch_string, "generic"))
2807 error ("generic CPU can be used only for %stune=%s %s",
2808 prefix, suffix, sw);
2809 if (!strncmp (ix86_arch_string, "generic", 7))
2810 error ("bad value (%s) for %sarch=%s %s",
2811 ix86_arch_string, prefix, suffix, sw);
2813 /* Validate -mabi= value. */
2814 if (ix86_abi_string)
2816 if (strcmp (ix86_abi_string, "sysv") == 0)
2817 ix86_abi = SYSV_ABI;
2818 else if (strcmp (ix86_abi_string, "ms") == 0)
2821 error ("unknown ABI (%s) for %sabi=%s %s",
2822 ix86_abi_string, prefix, suffix, sw);
2825 ix86_abi = DEFAULT_ABI;
2827 if (ix86_cmodel_string != 0)
2829 if (!strcmp (ix86_cmodel_string, "small"))
2830 ix86_cmodel = flag_pic ? CM_SMALL_PIC : CM_SMALL;
2831 else if (!strcmp (ix86_cmodel_string, "medium"))
2832 ix86_cmodel = flag_pic ? CM_MEDIUM_PIC : CM_MEDIUM;
2833 else if (!strcmp (ix86_cmodel_string, "large"))
2834 ix86_cmodel = flag_pic ? CM_LARGE_PIC : CM_LARGE;
2836 error ("code model %s does not support PIC mode", ix86_cmodel_string);
2837 else if (!strcmp (ix86_cmodel_string, "32"))
2838 ix86_cmodel = CM_32;
2839 else if (!strcmp (ix86_cmodel_string, "kernel") && !flag_pic)
2840 ix86_cmodel = CM_KERNEL;
2842 error ("bad value (%s) for %scmodel=%s %s",
2843 ix86_cmodel_string, prefix, suffix, sw);
2847 /* For TARGET_64BIT and MS_ABI, force pic on, in order to enable the
2848 use of rip-relative addressing. This eliminates fixups that
2849 would otherwise be needed if this object is to be placed in a
2850 DLL, and is essentially just as efficient as direct addressing. */
2851 if (TARGET_64BIT && DEFAULT_ABI == MS_ABI)
2852 ix86_cmodel = CM_SMALL_PIC, flag_pic = 1;
2853 else if (TARGET_64BIT)
2854 ix86_cmodel = flag_pic ? CM_SMALL_PIC : CM_SMALL;
2856 ix86_cmodel = CM_32;
2858 if (ix86_asm_string != 0)
2861 && !strcmp (ix86_asm_string, "intel"))
2862 ix86_asm_dialect = ASM_INTEL;
2863 else if (!strcmp (ix86_asm_string, "att"))
2864 ix86_asm_dialect = ASM_ATT;
2866 error ("bad value (%s) for %sasm=%s %s",
2867 ix86_asm_string, prefix, suffix, sw);
2869 if ((TARGET_64BIT == 0) != (ix86_cmodel == CM_32))
2870 error ("code model %qs not supported in the %s bit mode",
2871 ix86_cmodel_string, TARGET_64BIT ? "64" : "32");
2872 if ((TARGET_64BIT != 0) != ((ix86_isa_flags & OPTION_MASK_ISA_64BIT) != 0))
2873 sorry ("%i-bit mode not compiled in",
2874 (ix86_isa_flags & OPTION_MASK_ISA_64BIT) ? 64 : 32);
2876 for (i = 0; i < pta_size; i++)
2877 if (! strcmp (ix86_arch_string, processor_alias_table[i].name))
2879 ix86_schedule = processor_alias_table[i].schedule;
2880 ix86_arch = processor_alias_table[i].processor;
2881 /* Default cpu tuning to the architecture. */
2882 ix86_tune = ix86_arch;
2884 if (TARGET_64BIT && !(processor_alias_table[i].flags & PTA_64BIT))
2885 error ("CPU you selected does not support x86-64 "
2888 if (processor_alias_table[i].flags & PTA_MMX
2889 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_MMX))
2890 ix86_isa_flags |= OPTION_MASK_ISA_MMX;
2891 if (processor_alias_table[i].flags & PTA_3DNOW
2892 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_3DNOW))
2893 ix86_isa_flags |= OPTION_MASK_ISA_3DNOW;
2894 if (processor_alias_table[i].flags & PTA_3DNOW_A
2895 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_3DNOW_A))
2896 ix86_isa_flags |= OPTION_MASK_ISA_3DNOW_A;
2897 if (processor_alias_table[i].flags & PTA_SSE
2898 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE))
2899 ix86_isa_flags |= OPTION_MASK_ISA_SSE;
2900 if (processor_alias_table[i].flags & PTA_SSE2
2901 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE2))
2902 ix86_isa_flags |= OPTION_MASK_ISA_SSE2;
2903 if (processor_alias_table[i].flags & PTA_SSE3
2904 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE3))
2905 ix86_isa_flags |= OPTION_MASK_ISA_SSE3;
2906 if (processor_alias_table[i].flags & PTA_SSSE3
2907 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSSE3))
2908 ix86_isa_flags |= OPTION_MASK_ISA_SSSE3;
2909 if (processor_alias_table[i].flags & PTA_SSE4_1
2910 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE4_1))
2911 ix86_isa_flags |= OPTION_MASK_ISA_SSE4_1;
2912 if (processor_alias_table[i].flags & PTA_SSE4_2
2913 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE4_2))
2914 ix86_isa_flags |= OPTION_MASK_ISA_SSE4_2;
2915 if (processor_alias_table[i].flags & PTA_AVX
2916 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_AVX))
2917 ix86_isa_flags |= OPTION_MASK_ISA_AVX;
2918 if (processor_alias_table[i].flags & PTA_FMA
2919 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_FMA))
2920 ix86_isa_flags |= OPTION_MASK_ISA_FMA;
2921 if (processor_alias_table[i].flags & PTA_SSE4A
2922 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE4A))
2923 ix86_isa_flags |= OPTION_MASK_ISA_SSE4A;
2924 if (processor_alias_table[i].flags & PTA_SSE5
2925 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE5))
2926 ix86_isa_flags |= OPTION_MASK_ISA_SSE5;
2927 if (processor_alias_table[i].flags & PTA_ABM
2928 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_ABM))
2929 ix86_isa_flags |= OPTION_MASK_ISA_ABM;
2930 if (processor_alias_table[i].flags & PTA_CX16
2931 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_CX16))
2932 ix86_isa_flags |= OPTION_MASK_ISA_CX16;
2933 if (processor_alias_table[i].flags & (PTA_POPCNT | PTA_ABM)
2934 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_POPCNT))
2935 ix86_isa_flags |= OPTION_MASK_ISA_POPCNT;
2936 if (!(TARGET_64BIT && (processor_alias_table[i].flags & PTA_NO_SAHF))
2937 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SAHF))
2938 ix86_isa_flags |= OPTION_MASK_ISA_SAHF;
2939 if (processor_alias_table[i].flags & PTA_AES
2940 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_AES))
2941 ix86_isa_flags |= OPTION_MASK_ISA_AES;
2942 if (processor_alias_table[i].flags & PTA_PCLMUL
2943 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_PCLMUL))
2944 ix86_isa_flags |= OPTION_MASK_ISA_PCLMUL;
2945 if (processor_alias_table[i].flags & (PTA_PREFETCH_SSE | PTA_SSE))
2946 x86_prefetch_sse = true;
2952 error ("bad value (%s) for %sarch=%s %s",
2953 ix86_arch_string, prefix, suffix, sw);
2955 ix86_arch_mask = 1u << ix86_arch;
2956 for (i = 0; i < X86_ARCH_LAST; ++i)
2957 ix86_arch_features[i] = !!(initial_ix86_arch_features[i] & ix86_arch_mask);
2959 for (i = 0; i < pta_size; i++)
2960 if (! strcmp (ix86_tune_string, processor_alias_table[i].name))
2962 ix86_schedule = processor_alias_table[i].schedule;
2963 ix86_tune = processor_alias_table[i].processor;
2964 if (TARGET_64BIT && !(processor_alias_table[i].flags & PTA_64BIT))
2966 if (ix86_tune_defaulted)
2968 ix86_tune_string = "x86-64";
2969 for (i = 0; i < pta_size; i++)
2970 if (! strcmp (ix86_tune_string,
2971 processor_alias_table[i].name))
2973 ix86_schedule = processor_alias_table[i].schedule;
2974 ix86_tune = processor_alias_table[i].processor;
2977 error ("CPU you selected does not support x86-64 "
2980 /* Intel CPUs have always interpreted SSE prefetch instructions as
2981 NOPs; so, we can enable SSE prefetch instructions even when
2982 -mtune (rather than -march) points us to a processor that has them.
2983 However, the VIA C3 gives a SIGILL, so we only do that for i686 and
2984 higher processors. */
2986 && (processor_alias_table[i].flags & (PTA_PREFETCH_SSE | PTA_SSE)))
2987 x86_prefetch_sse = true;
2991 error ("bad value (%s) for %stune=%s %s",
2992 ix86_tune_string, prefix, suffix, sw);
2994 ix86_tune_mask = 1u << ix86_tune;
2995 for (i = 0; i < X86_TUNE_LAST; ++i)
2996 ix86_tune_features[i] = !!(initial_ix86_tune_features[i] & ix86_tune_mask);
2999 ix86_cost = &ix86_size_cost;
3001 ix86_cost = processor_target_table[ix86_tune].cost;
3003 /* Arrange to set up i386_stack_locals for all functions. */
3004 init_machine_status = ix86_init_machine_status;
3006 /* Validate -mregparm= value. */
3007 if (ix86_regparm_string)
3010 warning (0, "%sregparm%s is ignored in 64-bit mode", prefix, suffix);
3011 i = atoi (ix86_regparm_string);
3012 if (i < 0 || i > REGPARM_MAX)
3013 error ("%sregparm=%d%s is not between 0 and %d",
3014 prefix, i, suffix, REGPARM_MAX);
3019 ix86_regparm = REGPARM_MAX;
3021 /* If the user has provided any of the -malign-* options,
3022 warn and use that value only if -falign-* is not set.
3023 Remove this code in GCC 3.2 or later. */
3024 if (ix86_align_loops_string)
3026 warning (0, "%salign-loops%s is obsolete, use -falign-loops%s",
3027 prefix, suffix, suffix);
3028 if (align_loops == 0)
3030 i = atoi (ix86_align_loops_string);
3031 if (i < 0 || i > MAX_CODE_ALIGN)
3032 error ("%salign-loops=%d%s is not between 0 and %d",
3033 prefix, i, suffix, MAX_CODE_ALIGN);
3035 align_loops = 1 << i;
3039 if (ix86_align_jumps_string)
3041 warning (0, "%salign-jumps%s is obsolete, use -falign-jumps%s",
3042 prefix, suffix, suffix);
3043 if (align_jumps == 0)
3045 i = atoi (ix86_align_jumps_string);
3046 if (i < 0 || i > MAX_CODE_ALIGN)
3047 error ("%salign-loops=%d%s is not between 0 and %d",
3048 prefix, i, suffix, MAX_CODE_ALIGN);
3050 align_jumps = 1 << i;
3054 if (ix86_align_funcs_string)
3056 warning (0, "%salign-functions%s is obsolete, use -falign-functions%s",
3057 prefix, suffix, suffix);
3058 if (align_functions == 0)
3060 i = atoi (ix86_align_funcs_string);
3061 if (i < 0 || i > MAX_CODE_ALIGN)
3062 error ("%salign-loops=%d%s is not between 0 and %d",
3063 prefix, i, suffix, MAX_CODE_ALIGN);
3065 align_functions = 1 << i;
3069 /* Default align_* from the processor table. */
3070 if (align_loops == 0)
3072 align_loops = processor_target_table[ix86_tune].align_loop;
3073 align_loops_max_skip = processor_target_table[ix86_tune].align_loop_max_skip;
3075 if (align_jumps == 0)
3077 align_jumps = processor_target_table[ix86_tune].align_jump;
3078 align_jumps_max_skip = processor_target_table[ix86_tune].align_jump_max_skip;
3080 if (align_functions == 0)
3082 align_functions = processor_target_table[ix86_tune].align_func;
3085 /* Validate -mbranch-cost= value, or provide default. */
3086 ix86_branch_cost = ix86_cost->branch_cost;
3087 if (ix86_branch_cost_string)
3089 i = atoi (ix86_branch_cost_string);
3091 error ("%sbranch-cost=%d%s is not between 0 and 5", prefix, i, suffix);
3093 ix86_branch_cost = i;
3095 if (ix86_section_threshold_string)
3097 i = atoi (ix86_section_threshold_string);
3099 error ("%slarge-data-threshold=%d%s is negative", prefix, i, suffix);
3101 ix86_section_threshold = i;
3104 if (ix86_tls_dialect_string)
3106 if (strcmp (ix86_tls_dialect_string, "gnu") == 0)
3107 ix86_tls_dialect = TLS_DIALECT_GNU;
3108 else if (strcmp (ix86_tls_dialect_string, "gnu2") == 0)
3109 ix86_tls_dialect = TLS_DIALECT_GNU2;
3110 else if (strcmp (ix86_tls_dialect_string, "sun") == 0)
3111 ix86_tls_dialect = TLS_DIALECT_SUN;
3113 error ("bad value (%s) for %stls-dialect=%s %s",
3114 ix86_tls_dialect_string, prefix, suffix, sw);
3117 if (ix87_precision_string)
3119 i = atoi (ix87_precision_string);
3120 if (i != 32 && i != 64 && i != 80)
3121 error ("pc%d is not valid precision setting (32, 64 or 80)", i);
3126 target_flags |= TARGET_SUBTARGET64_DEFAULT & ~target_flags_explicit;
3128 /* Enable by default the SSE and MMX builtins. Do allow the user to
3129 explicitly disable any of these. In particular, disabling SSE and
3130 MMX for kernel code is extremely useful. */
3131 if (!ix86_arch_specified)
3133 |= ((OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_MMX
3134 | TARGET_SUBTARGET64_ISA_DEFAULT) & ~ix86_isa_flags_explicit);
3137 warning (0, "%srtd%s is ignored in 64bit mode", prefix, suffix);
3141 target_flags |= TARGET_SUBTARGET32_DEFAULT & ~target_flags_explicit;
3143 if (!ix86_arch_specified)
3145 |= TARGET_SUBTARGET32_ISA_DEFAULT & ~ix86_isa_flags_explicit;
3147 /* i386 ABI does not specify red zone. It still makes sense to use it
3148 when programmer takes care to stack from being destroyed. */
3149 if (!(target_flags_explicit & MASK_NO_RED_ZONE))
3150 target_flags |= MASK_NO_RED_ZONE;
3153 /* Keep nonleaf frame pointers. */
3154 if (flag_omit_frame_pointer)
3155 target_flags &= ~MASK_OMIT_LEAF_FRAME_POINTER;
3156 else if (TARGET_OMIT_LEAF_FRAME_POINTER)
3157 flag_omit_frame_pointer = 1;
3159 /* If we're doing fast math, we don't care about comparison order
3160 wrt NaNs. This lets us use a shorter comparison sequence. */
3161 if (flag_finite_math_only)
3162 target_flags &= ~MASK_IEEE_FP;
3164 /* If the architecture always has an FPU, turn off NO_FANCY_MATH_387,
3165 since the insns won't need emulation. */
3166 if (x86_arch_always_fancy_math_387 & ix86_arch_mask)
3167 target_flags &= ~MASK_NO_FANCY_MATH_387;
3169 /* Likewise, if the target doesn't have a 387, or we've specified
3170 software floating point, don't use 387 inline intrinsics. */
3172 target_flags |= MASK_NO_FANCY_MATH_387;
3174 /* Turn on MMX builtins for -msse. */
3177 ix86_isa_flags |= OPTION_MASK_ISA_MMX & ~ix86_isa_flags_explicit;
3178 x86_prefetch_sse = true;
3181 /* Turn on popcnt instruction for -msse4.2 or -mabm. */
3182 if (TARGET_SSE4_2 || TARGET_ABM)
3183 ix86_isa_flags |= OPTION_MASK_ISA_POPCNT & ~ix86_isa_flags_explicit;
3185 /* Validate -mpreferred-stack-boundary= value or default it to
3186 PREFERRED_STACK_BOUNDARY_DEFAULT. */
3187 ix86_preferred_stack_boundary = PREFERRED_STACK_BOUNDARY_DEFAULT;
3188 if (ix86_preferred_stack_boundary_string)
3190 i = atoi (ix86_preferred_stack_boundary_string);
3191 if (i < (TARGET_64BIT ? 4 : 2) || i > 12)
3192 error ("%spreferred-stack-boundary=%d%s is not between %d and 12",
3193 prefix, i, suffix, TARGET_64BIT ? 4 : 2);
3195 ix86_preferred_stack_boundary = (1 << i) * BITS_PER_UNIT;
3198 /* Set the default value for -mstackrealign. */
3199 if (ix86_force_align_arg_pointer == -1)
3200 ix86_force_align_arg_pointer = STACK_REALIGN_DEFAULT;
3202 /* Validate -mincoming-stack-boundary= value or default it to
3203 MIN_STACK_BOUNDARY/PREFERRED_STACK_BOUNDARY. */
3204 if (ix86_force_align_arg_pointer)
3205 ix86_default_incoming_stack_boundary = MIN_STACK_BOUNDARY;
3207 ix86_default_incoming_stack_boundary = PREFERRED_STACK_BOUNDARY;
3208 ix86_incoming_stack_boundary = ix86_default_incoming_stack_boundary;
3209 if (ix86_incoming_stack_boundary_string)
3211 i = atoi (ix86_incoming_stack_boundary_string);
3212 if (i < (TARGET_64BIT ? 4 : 2) || i > 12)
3213 error ("-mincoming-stack-boundary=%d is not between %d and 12",
3214 i, TARGET_64BIT ? 4 : 2);
3217 ix86_user_incoming_stack_boundary = (1 << i) * BITS_PER_UNIT;
3218 ix86_incoming_stack_boundary
3219 = ix86_user_incoming_stack_boundary;
3223 /* Accept -msseregparm only if at least SSE support is enabled. */
3224 if (TARGET_SSEREGPARM
3226 error ("%ssseregparm%s used without SSE enabled", prefix, suffix);
3228 ix86_fpmath = TARGET_FPMATH_DEFAULT;
3229 if (ix86_fpmath_string != 0)
3231 if (! strcmp (ix86_fpmath_string, "387"))
3232 ix86_fpmath = FPMATH_387;
3233 else if (! strcmp (ix86_fpmath_string, "sse"))
3237 warning (0, "SSE instruction set disabled, using 387 arithmetics");
3238 ix86_fpmath = FPMATH_387;
3241 ix86_fpmath = FPMATH_SSE;
3243 else if (! strcmp (ix86_fpmath_string, "387,sse")
3244 || ! strcmp (ix86_fpmath_string, "387+sse")
3245 || ! strcmp (ix86_fpmath_string, "sse,387")
3246 || ! strcmp (ix86_fpmath_string, "sse+387")
3247 || ! strcmp (ix86_fpmath_string, "both"))
3251 warning (0, "SSE instruction set disabled, using 387 arithmetics");
3252 ix86_fpmath = FPMATH_387;
3254 else if (!TARGET_80387)
3256 warning (0, "387 instruction set disabled, using SSE arithmetics");
3257 ix86_fpmath = FPMATH_SSE;
3260 ix86_fpmath = (enum fpmath_unit) (FPMATH_SSE | FPMATH_387);
3263 error ("bad value (%s) for %sfpmath=%s %s",
3264 ix86_fpmath_string, prefix, suffix, sw);
3267 /* If the i387 is disabled, then do not return values in it. */
3269 target_flags &= ~MASK_FLOAT_RETURNS;
3271 /* Use external vectorized library in vectorizing intrinsics. */
3272 if (ix86_veclibabi_string)
3274 if (strcmp (ix86_veclibabi_string, "svml") == 0)
3275 ix86_veclib_handler = ix86_veclibabi_svml;
3276 else if (strcmp (ix86_veclibabi_string, "acml") == 0)
3277 ix86_veclib_handler = ix86_veclibabi_acml;
3279 error ("unknown vectorization library ABI type (%s) for "
3280 "%sveclibabi=%s %s", ix86_veclibabi_string,
3281 prefix, suffix, sw);
3284 if ((x86_accumulate_outgoing_args & ix86_tune_mask)
3285 && !(target_flags_explicit & MASK_ACCUMULATE_OUTGOING_ARGS)
3287 target_flags |= MASK_ACCUMULATE_OUTGOING_ARGS;
3289 /* ??? Unwind info is not correct around the CFG unless either a frame
3290 pointer is present or M_A_O_A is set. Fixing this requires rewriting
3291 unwind info generation to be aware of the CFG and propagating states
3293 if ((flag_unwind_tables || flag_asynchronous_unwind_tables
3294 || flag_exceptions || flag_non_call_exceptions)
3295 && flag_omit_frame_pointer
3296 && !(target_flags & MASK_ACCUMULATE_OUTGOING_ARGS))
3298 if (target_flags_explicit & MASK_ACCUMULATE_OUTGOING_ARGS)
3299 warning (0, "unwind tables currently require either a frame pointer "
3300 "or %saccumulate-outgoing-args%s for correctness",
3302 target_flags |= MASK_ACCUMULATE_OUTGOING_ARGS;
3305 /* If stack probes are required, the space used for large function
3306 arguments on the stack must also be probed, so enable
3307 -maccumulate-outgoing-args so this happens in the prologue. */
3308 if (TARGET_STACK_PROBE
3309 && !(target_flags & MASK_ACCUMULATE_OUTGOING_ARGS))
3311 if (target_flags_explicit & MASK_ACCUMULATE_OUTGOING_ARGS)
3312 warning (0, "stack probing requires %saccumulate-outgoing-args%s "
3313 "for correctness", prefix, suffix);
3314 target_flags |= MASK_ACCUMULATE_OUTGOING_ARGS;
3317 /* For sane SSE instruction set generation we need fcomi instruction.
3318 It is safe to enable all CMOVE instructions. */
3322 /* Figure out what ASM_GENERATE_INTERNAL_LABEL builds as a prefix. */
3325 ASM_GENERATE_INTERNAL_LABEL (internal_label_prefix, "LX", 0);
3326 p = strchr (internal_label_prefix, 'X');
3327 internal_label_prefix_len = p - internal_label_prefix;
3331 /* When scheduling description is not available, disable scheduler pass
3332 so it won't slow down the compilation and make x87 code slower. */
3333 if (!TARGET_SCHEDULE)
3334 flag_schedule_insns_after_reload = flag_schedule_insns = 0;
3336 if (!PARAM_SET_P (PARAM_SIMULTANEOUS_PREFETCHES))
3337 set_param_value ("simultaneous-prefetches",
3338 ix86_cost->simultaneous_prefetches);
3339 if (!PARAM_SET_P (PARAM_L1_CACHE_LINE_SIZE))
3340 set_param_value ("l1-cache-line-size", ix86_cost->prefetch_block);
3341 if (!PARAM_SET_P (PARAM_L1_CACHE_SIZE))
3342 set_param_value ("l1-cache-size", ix86_cost->l1_cache_size);
3343 if (!PARAM_SET_P (PARAM_L2_CACHE_SIZE))
3344 set_param_value ("l2-cache-size", ix86_cost->l2_cache_size);
3346 /* If using typedef char *va_list, signal that __builtin_va_start (&ap, 0)
3347 can be optimized to ap = __builtin_next_arg (0). */
3349 targetm.expand_builtin_va_start = NULL;
3353 ix86_gen_leave = gen_leave_rex64;
3354 ix86_gen_pop1 = gen_popdi1;
3355 ix86_gen_add3 = gen_adddi3;
3356 ix86_gen_sub3 = gen_subdi3;
3357 ix86_gen_sub3_carry = gen_subdi3_carry_rex64;
3358 ix86_gen_one_cmpl2 = gen_one_cmpldi2;
3359 ix86_gen_monitor = gen_sse3_monitor64;
3360 ix86_gen_andsp = gen_anddi3;
3364 ix86_gen_leave = gen_leave;
3365 ix86_gen_pop1 = gen_popsi1;
3366 ix86_gen_add3 = gen_addsi3;
3367 ix86_gen_sub3 = gen_subsi3;
3368 ix86_gen_sub3_carry = gen_subsi3_carry;
3369 ix86_gen_one_cmpl2 = gen_one_cmplsi2;
3370 ix86_gen_monitor = gen_sse3_monitor;
3371 ix86_gen_andsp = gen_andsi3;
3375 /* Use -mcld by default for 32-bit code if configured with --enable-cld. */
3377 target_flags |= MASK_CLD & ~target_flags_explicit;
3380 /* Save the initial options in case the user does function specific options */
3382 target_option_default_node = target_option_current_node
3383 = build_target_option_node ();
3386 /* Save the current options */
3389 ix86_function_specific_save (struct cl_target_option *ptr)
3391 gcc_assert (IN_RANGE (ix86_arch, 0, 255));
3392 gcc_assert (IN_RANGE (ix86_schedule, 0, 255));
3393 gcc_assert (IN_RANGE (ix86_tune, 0, 255));
3394 gcc_assert (IN_RANGE (ix86_fpmath, 0, 255));
3395 gcc_assert (IN_RANGE (ix86_branch_cost, 0, 255));
3397 ptr->arch = ix86_arch;
3398 ptr->schedule = ix86_schedule;
3399 ptr->tune = ix86_tune;
3400 ptr->fpmath = ix86_fpmath;
3401 ptr->branch_cost = ix86_branch_cost;
3402 ptr->tune_defaulted = ix86_tune_defaulted;
3403 ptr->arch_specified = ix86_arch_specified;
3404 ptr->ix86_isa_flags_explicit = ix86_isa_flags_explicit;
3405 ptr->target_flags_explicit = target_flags_explicit;
3408 /* Restore the current options */
3411 ix86_function_specific_restore (struct cl_target_option *ptr)
3413 enum processor_type old_tune = ix86_tune;
3414 enum processor_type old_arch = ix86_arch;
3415 unsigned int ix86_arch_mask, ix86_tune_mask;
3418 ix86_arch = (enum processor_type) ptr->arch;
3419 ix86_schedule = (enum attr_cpu) ptr->schedule;
3420 ix86_tune = (enum processor_type) ptr->tune;
3421 ix86_fpmath = (enum fpmath_unit) ptr->fpmath;
3422 ix86_branch_cost = ptr->branch_cost;
3423 ix86_tune_defaulted = ptr->tune_defaulted;
3424 ix86_arch_specified = ptr->arch_specified;
3425 ix86_isa_flags_explicit = ptr->ix86_isa_flags_explicit;
3426 target_flags_explicit = ptr->target_flags_explicit;
3428 /* Recreate the arch feature tests if the arch changed */
3429 if (old_arch != ix86_arch)
3431 ix86_arch_mask = 1u << ix86_arch;
3432 for (i = 0; i < X86_ARCH_LAST; ++i)
3433 ix86_arch_features[i]
3434 = !!(initial_ix86_arch_features[i] & ix86_arch_mask);
3437 /* Recreate the tune optimization tests */
3438 if (old_tune != ix86_tune)
3440 ix86_tune_mask = 1u << ix86_tune;
3441 for (i = 0; i < X86_TUNE_LAST; ++i)
3442 ix86_tune_features[i]
3443 = !!(initial_ix86_tune_features[i] & ix86_tune_mask);
3447 /* Print the current options */
3450 ix86_function_specific_print (FILE *file, int indent,
3451 struct cl_target_option *ptr)
3454 = ix86_target_string (ptr->ix86_isa_flags, ptr->target_flags,
3455 NULL, NULL, NULL, false);
3457 fprintf (file, "%*sarch = %d (%s)\n",
3460 ((ptr->arch < TARGET_CPU_DEFAULT_max)
3461 ? cpu_names[ptr->arch]
3464 fprintf (file, "%*stune = %d (%s)\n",
3467 ((ptr->tune < TARGET_CPU_DEFAULT_max)
3468 ? cpu_names[ptr->tune]
3471 fprintf (file, "%*sfpmath = %d%s%s\n", indent, "", ptr->fpmath,
3472 (ptr->fpmath & FPMATH_387) ? ", 387" : "",
3473 (ptr->fpmath & FPMATH_SSE) ? ", sse" : "");
3474 fprintf (file, "%*sbranch_cost = %d\n", indent, "", ptr->branch_cost);
3478 fprintf (file, "%*s%s\n", indent, "", target_string);
3479 free (target_string);
3484 /* Inner function to process the attribute((target(...))), take an argument and
3485 set the current options from the argument. If we have a list, recursively go
3489 ix86_valid_target_attribute_inner_p (tree args, char *p_strings[])
3494 #define IX86_ATTR_ISA(S,O) { S, sizeof (S)-1, ix86_opt_isa, O, 0 }
3495 #define IX86_ATTR_STR(S,O) { S, sizeof (S)-1, ix86_opt_str, O, 0 }
3496 #define IX86_ATTR_YES(S,O,M) { S, sizeof (S)-1, ix86_opt_yes, O, M }
3497 #define IX86_ATTR_NO(S,O,M) { S, sizeof (S)-1, ix86_opt_no, O, M }
3512 enum ix86_opt_type type;
3517 IX86_ATTR_ISA ("3dnow", OPT_m3dnow),
3518 IX86_ATTR_ISA ("abm", OPT_mabm),
3519 IX86_ATTR_ISA ("aes", OPT_maes),
3520 IX86_ATTR_ISA ("avx", OPT_mavx),
3521 IX86_ATTR_ISA ("mmx", OPT_mmmx),
3522 IX86_ATTR_ISA ("pclmul", OPT_mpclmul),
3523 IX86_ATTR_ISA ("popcnt", OPT_mpopcnt),
3524 IX86_ATTR_ISA ("sse", OPT_msse),
3525 IX86_ATTR_ISA ("sse2", OPT_msse2),
3526 IX86_ATTR_ISA ("sse3", OPT_msse3),
3527 IX86_ATTR_ISA ("sse4", OPT_msse4),
3528 IX86_ATTR_ISA ("sse4.1", OPT_msse4_1),
3529 IX86_ATTR_ISA ("sse4.2", OPT_msse4_2),
3530 IX86_ATTR_ISA ("sse4a", OPT_msse4a),
3531 IX86_ATTR_ISA ("sse5", OPT_msse5),
3532 IX86_ATTR_ISA ("ssse3", OPT_mssse3),
3534 /* string options */
3535 IX86_ATTR_STR ("arch=", IX86_FUNCTION_SPECIFIC_ARCH),
3536 IX86_ATTR_STR ("fpmath=", IX86_FUNCTION_SPECIFIC_FPMATH),
3537 IX86_ATTR_STR ("tune=", IX86_FUNCTION_SPECIFIC_TUNE),
3540 IX86_ATTR_YES ("cld",
3544 IX86_ATTR_NO ("fancy-math-387",
3545 OPT_mfancy_math_387,
3546 MASK_NO_FANCY_MATH_387),
3548 IX86_ATTR_NO ("fused-madd",
3550 MASK_NO_FUSED_MADD),
3552 IX86_ATTR_YES ("ieee-fp",
3556 IX86_ATTR_YES ("inline-all-stringops",
3557 OPT_minline_all_stringops,
3558 MASK_INLINE_ALL_STRINGOPS),
3560 IX86_ATTR_YES ("inline-stringops-dynamically",
3561 OPT_minline_stringops_dynamically,
3562 MASK_INLINE_STRINGOPS_DYNAMICALLY),
3564 IX86_ATTR_NO ("align-stringops",
3565 OPT_mno_align_stringops,
3566 MASK_NO_ALIGN_STRINGOPS),
3568 IX86_ATTR_YES ("recip",
3574 /* If this is a list, recurse to get the options. */
3575 if (TREE_CODE (args) == TREE_LIST)
3579 for (; args; args = TREE_CHAIN (args))
3580 if (TREE_VALUE (args)
3581 && !ix86_valid_target_attribute_inner_p (TREE_VALUE (args), p_strings))
3587 else if (TREE_CODE (args) != STRING_CST)
3590 /* Handle multiple arguments separated by commas. */
3591 next_optstr = ASTRDUP (TREE_STRING_POINTER (args));
3593 while (next_optstr && *next_optstr != '\0')
3595 char *p = next_optstr;
3597 char *comma = strchr (next_optstr, ',');
3598 const char *opt_string;
3599 size_t len, opt_len;
3604 enum ix86_opt_type type = ix86_opt_unknown;
3610 len = comma - next_optstr;
3611 next_optstr = comma + 1;
3619 /* Recognize no-xxx. */
3620 if (len > 3 && p[0] == 'n' && p[1] == 'o' && p[2] == '-')
3629 /* Find the option. */
3632 for (i = 0; i < ARRAY_SIZE (attrs); i++)
3634 type = attrs[i].type;
3635 opt_len = attrs[i].len;
3636 if (ch == attrs[i].string[0]
3637 && ((type != ix86_opt_str) ? len == opt_len : len > opt_len)
3638 && memcmp (p, attrs[i].string, opt_len) == 0)
3641 mask = attrs[i].mask;
3642 opt_string = attrs[i].string;
3647 /* Process the option. */
3650 error ("attribute(target(\"%s\")) is unknown", orig_p);
3654 else if (type == ix86_opt_isa)
3655 ix86_handle_option (opt, p, opt_set_p);
3657 else if (type == ix86_opt_yes || type == ix86_opt_no)
3659 if (type == ix86_opt_no)
3660 opt_set_p = !opt_set_p;
3663 target_flags |= mask;
3665 target_flags &= ~mask;
3668 else if (type == ix86_opt_str)
3672 error ("option(\"%s\") was already specified", opt_string);
3676 p_strings[opt] = xstrdup (p + opt_len);
3686 /* Return a TARGET_OPTION_NODE tree of the target options listed or NULL. */
3689 ix86_valid_target_attribute_tree (tree args)
3691 const char *orig_arch_string = ix86_arch_string;
3692 const char *orig_tune_string = ix86_tune_string;
3693 const char *orig_fpmath_string = ix86_fpmath_string;
3694 int orig_tune_defaulted = ix86_tune_defaulted;
3695 int orig_arch_specified = ix86_arch_specified;
3696 char *option_strings[IX86_FUNCTION_SPECIFIC_MAX] = { NULL, NULL, NULL };
3699 struct cl_target_option *def
3700 = TREE_TARGET_OPTION (target_option_default_node);
3702 /* Process each of the options on the chain. */
3703 if (! ix86_valid_target_attribute_inner_p (args, option_strings))
3706 /* If the changed options are different from the default, rerun override_options,
3707 and then save the options away. The string options are are attribute options,
3708 and will be undone when we copy the save structure. */
3709 if (ix86_isa_flags != def->ix86_isa_flags
3710 || target_flags != def->target_flags
3711 || option_strings[IX86_FUNCTION_SPECIFIC_ARCH]
3712 || option_strings[IX86_FUNCTION_SPECIFIC_TUNE]
3713 || option_strings[IX86_FUNCTION_SPECIFIC_FPMATH])
3715 /* If we are using the default tune= or arch=, undo the string assigned,
3716 and use the default. */
3717 if (option_strings[IX86_FUNCTION_SPECIFIC_ARCH])
3718 ix86_arch_string = option_strings[IX86_FUNCTION_SPECIFIC_ARCH];
3719 else if (!orig_arch_specified)
3720 ix86_arch_string = NULL;
3722 if (option_strings[IX86_FUNCTION_SPECIFIC_TUNE])
3723 ix86_tune_string = option_strings[IX86_FUNCTION_SPECIFIC_TUNE];
3724 else if (orig_tune_defaulted)
3725 ix86_tune_string = NULL;
3727 /* If fpmath= is not set, and we now have sse2 on 32-bit, use it. */
3728 if (option_strings[IX86_FUNCTION_SPECIFIC_FPMATH])
3729 ix86_fpmath_string = option_strings[IX86_FUNCTION_SPECIFIC_FPMATH];
3730 else if (!TARGET_64BIT && TARGET_SSE)
3731 ix86_fpmath_string = "sse,387";
3733 /* Do any overrides, such as arch=xxx, or tune=xxx support. */
3734 override_options (false);
3736 /* Add any builtin functions with the new isa if any. */
3737 ix86_add_new_builtins (ix86_isa_flags);
3739 /* Save the current options unless we are validating options for
3741 t = build_target_option_node ();
3743 ix86_arch_string = orig_arch_string;
3744 ix86_tune_string = orig_tune_string;
3745 ix86_fpmath_string = orig_fpmath_string;
3747 /* Free up memory allocated to hold the strings */
3748 for (i = 0; i < IX86_FUNCTION_SPECIFIC_MAX; i++)
3749 if (option_strings[i])
3750 free (option_strings[i]);
3756 /* Hook to validate attribute((target("string"))). */
3759 ix86_valid_target_attribute_p (tree fndecl,
3760 tree ARG_UNUSED (name),
3762 int ARG_UNUSED (flags))
3764 struct cl_target_option cur_target;
3766 tree old_optimize = build_optimization_node ();
3767 tree new_target, new_optimize;
3768 tree func_optimize = DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl);
3770 /* If the function changed the optimization levels as well as setting target
3771 options, start with the optimizations specified. */
3772 if (func_optimize && func_optimize != old_optimize)
3773 cl_optimization_restore (TREE_OPTIMIZATION (func_optimize));
3775 /* The target attributes may also change some optimization flags, so update
3776 the optimization options if necessary. */
3777 cl_target_option_save (&cur_target);
3778 new_target = ix86_valid_target_attribute_tree (args);
3779 new_optimize = build_optimization_node ();
3786 DECL_FUNCTION_SPECIFIC_TARGET (fndecl) = new_target;
3788 if (old_optimize != new_optimize)
3789 DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl) = new_optimize;
3792 cl_target_option_restore (&cur_target);
3794 if (old_optimize != new_optimize)
3795 cl_optimization_restore (TREE_OPTIMIZATION (old_optimize));
3801 /* Hook to determine if one function can safely inline another. */
3804 ix86_can_inline_p (tree caller, tree callee)
3807 tree caller_tree = DECL_FUNCTION_SPECIFIC_TARGET (caller);
3808 tree callee_tree = DECL_FUNCTION_SPECIFIC_TARGET (callee);
3810 /* If callee has no option attributes, then it is ok to inline. */
3814 /* If caller has no option attributes, but callee does then it is not ok to
3816 else if (!caller_tree)
3821 struct cl_target_option *caller_opts = TREE_TARGET_OPTION (caller_tree);
3822 struct cl_target_option *callee_opts = TREE_TARGET_OPTION (callee_tree);
3824 /* Callee's isa options should a subset of the caller's, i.e. a SSE5 function
3825 can inline a SSE2 function but a SSE2 function can't inline a SSE5
3827 if ((caller_opts->ix86_isa_flags & callee_opts->ix86_isa_flags)
3828 != callee_opts->ix86_isa_flags)
3831 /* See if we have the same non-isa options. */
3832 else if (caller_opts->target_flags != callee_opts->target_flags)
3835 /* See if arch, tune, etc. are the same. */
3836 else if (caller_opts->arch != callee_opts->arch)
3839 else if (caller_opts->tune != callee_opts->tune)
3842 else if (caller_opts->fpmath != callee_opts->fpmath)
3845 else if (caller_opts->branch_cost != callee_opts->branch_cost)
3856 /* Remember the last target of ix86_set_current_function. */
3857 static GTY(()) tree ix86_previous_fndecl;
3859 /* Establish appropriate back-end context for processing the function
3860 FNDECL. The argument might be NULL to indicate processing at top
3861 level, outside of any function scope. */
3863 ix86_set_current_function (tree fndecl)
3865 /* Only change the context if the function changes. This hook is called
3866 several times in the course of compiling a function, and we don't want to
3867 slow things down too much or call target_reinit when it isn't safe. */
3868 if (fndecl && fndecl != ix86_previous_fndecl)
3870 tree old_tree = (ix86_previous_fndecl
3871 ? DECL_FUNCTION_SPECIFIC_TARGET (ix86_previous_fndecl)
3874 tree new_tree = (fndecl
3875 ? DECL_FUNCTION_SPECIFIC_TARGET (fndecl)
3878 ix86_previous_fndecl = fndecl;
3879 if (old_tree == new_tree)
3884 cl_target_option_restore (TREE_TARGET_OPTION (new_tree));
3890 struct cl_target_option *def
3891 = TREE_TARGET_OPTION (target_option_current_node);
3893 cl_target_option_restore (def);
3900 /* Return true if this goes in large data/bss. */
3903 ix86_in_large_data_p (tree exp)
3905 if (ix86_cmodel != CM_MEDIUM && ix86_cmodel != CM_MEDIUM_PIC)
3908 /* Functions are never large data. */
3909 if (TREE_CODE (exp) == FUNCTION_DECL)
3912 if (TREE_CODE (exp) == VAR_DECL && DECL_SECTION_NAME (exp))
3914 const char *section = TREE_STRING_POINTER (DECL_SECTION_NAME (exp));
3915 if (strcmp (section, ".ldata") == 0
3916 || strcmp (section, ".lbss") == 0)
3922 HOST_WIDE_INT size = int_size_in_bytes (TREE_TYPE (exp));
3924 /* If this is an incomplete type with size 0, then we can't put it
3925 in data because it might be too big when completed. */
3926 if (!size || size > ix86_section_threshold)
3933 /* Switch to the appropriate section for output of DECL.
3934 DECL is either a `VAR_DECL' node or a constant of some sort.
3935 RELOC indicates whether forming the initial value of DECL requires
3936 link-time relocations. */
3938 static section * x86_64_elf_select_section (tree, int, unsigned HOST_WIDE_INT)
3942 x86_64_elf_select_section (tree decl, int reloc,
3943 unsigned HOST_WIDE_INT align)
3945 if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
3946 && ix86_in_large_data_p (decl))
3948 const char *sname = NULL;
3949 unsigned int flags = SECTION_WRITE;
3950 switch (categorize_decl_for_section (decl, reloc))
3955 case SECCAT_DATA_REL:
3956 sname = ".ldata.rel";
3958 case SECCAT_DATA_REL_LOCAL:
3959 sname = ".ldata.rel.local";
3961 case SECCAT_DATA_REL_RO:
3962 sname = ".ldata.rel.ro";
3964 case SECCAT_DATA_REL_RO_LOCAL:
3965 sname = ".ldata.rel.ro.local";
3969 flags |= SECTION_BSS;
3972 case SECCAT_RODATA_MERGE_STR:
3973 case SECCAT_RODATA_MERGE_STR_INIT:
3974 case SECCAT_RODATA_MERGE_CONST:
3978 case SECCAT_SRODATA:
3985 /* We don't split these for medium model. Place them into
3986 default sections and hope for best. */
3988 case SECCAT_EMUTLS_VAR:
3989 case SECCAT_EMUTLS_TMPL:
3994 /* We might get called with string constants, but get_named_section
3995 doesn't like them as they are not DECLs. Also, we need to set
3996 flags in that case. */
3998 return get_section (sname, flags, NULL);
3999 return get_named_section (decl, sname, reloc);
4002 return default_elf_select_section (decl, reloc, align);
4005 /* Build up a unique section name, expressed as a
4006 STRING_CST node, and assign it to DECL_SECTION_NAME (decl).
4007 RELOC indicates whether the initial value of EXP requires
4008 link-time relocations. */
4010 static void ATTRIBUTE_UNUSED
4011 x86_64_elf_unique_section (tree decl, int reloc)
4013 if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
4014 && ix86_in_large_data_p (decl))
4016 const char *prefix = NULL;
4017 /* We only need to use .gnu.linkonce if we don't have COMDAT groups. */
4018 bool one_only = DECL_ONE_ONLY (decl) && !HAVE_COMDAT_GROUP;
4020 switch (categorize_decl_for_section (decl, reloc))
4023 case SECCAT_DATA_REL:
4024 case SECCAT_DATA_REL_LOCAL:
4025 case SECCAT_DATA_REL_RO:
4026 case SECCAT_DATA_REL_RO_LOCAL:
4027 prefix = one_only ? ".ld" : ".ldata";
4030 prefix = one_only ? ".lb" : ".lbss";
4033 case SECCAT_RODATA_MERGE_STR:
4034 case SECCAT_RODATA_MERGE_STR_INIT:
4035 case SECCAT_RODATA_MERGE_CONST:
4036 prefix = one_only ? ".lr" : ".lrodata";
4038 case SECCAT_SRODATA:
4045 /* We don't split these for medium model. Place them into
4046 default sections and hope for best. */
4048 case SECCAT_EMUTLS_VAR:
4049 prefix = targetm.emutls.var_section;
4051 case SECCAT_EMUTLS_TMPL:
4052 prefix = targetm.emutls.tmpl_section;
4057 const char *name, *linkonce;
4060 name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl));
4061 name = targetm.strip_name_encoding (name);
4063 /* If we're using one_only, then there needs to be a .gnu.linkonce
4064 prefix to the section name. */
4065 linkonce = one_only ? ".gnu.linkonce" : "";
4067 string = ACONCAT ((linkonce, prefix, ".", name, NULL));
4069 DECL_SECTION_NAME (decl) = build_string (strlen (string), string);
4073 default_unique_section (decl, reloc);
4076 #ifdef COMMON_ASM_OP
4077 /* This says how to output assembler code to declare an
4078 uninitialized external linkage data object.
4080 For medium model x86-64 we need to use .largecomm opcode for
4083 x86_elf_aligned_common (FILE *file,
4084 const char *name, unsigned HOST_WIDE_INT size,
4087 if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
4088 && size > (unsigned int)ix86_section_threshold)
4089 fprintf (file, ".largecomm\t");
4091 fprintf (file, "%s", COMMON_ASM_OP);
4092 assemble_name (file, name);
4093 fprintf (file, ","HOST_WIDE_INT_PRINT_UNSIGNED",%u\n",
4094 size, align / BITS_PER_UNIT);
4098 /* Utility function for targets to use in implementing
4099 ASM_OUTPUT_ALIGNED_BSS. */
4102 x86_output_aligned_bss (FILE *file, tree decl ATTRIBUTE_UNUSED,
4103 const char *name, unsigned HOST_WIDE_INT size,
4106 if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
4107 && size > (unsigned int)ix86_section_threshold)
4108 switch_to_section (get_named_section (decl, ".lbss", 0));
4110 switch_to_section (bss_section);
4111 ASM_OUTPUT_ALIGN (file, floor_log2 (align / BITS_PER_UNIT));
4112 #ifdef ASM_DECLARE_OBJECT_NAME
4113 last_assemble_variable_decl = decl;
4114 ASM_DECLARE_OBJECT_NAME (file, name, decl);
4116 /* Standard thing is just output label for the object. */
4117 ASM_OUTPUT_LABEL (file, name);
4118 #endif /* ASM_DECLARE_OBJECT_NAME */
4119 ASM_OUTPUT_SKIP (file, size ? size : 1);
4123 optimization_options (int level, int size ATTRIBUTE_UNUSED)
4125 /* For -O2 and beyond, turn off -fschedule-insns by default. It tends to
4126 make the problem with not enough registers even worse. */
4127 #ifdef INSN_SCHEDULING
4129 flag_schedule_insns = 0;
4133 /* The Darwin libraries never set errno, so we might as well
4134 avoid calling them when that's the only reason we would. */
4135 flag_errno_math = 0;
4137 /* The default values of these switches depend on the TARGET_64BIT
4138 that is not known at this moment. Mark these values with 2 and
4139 let user the to override these. In case there is no command line option
4140 specifying them, we will set the defaults in override_options. */
4142 flag_omit_frame_pointer = 2;
4143 flag_pcc_struct_return = 2;
4144 flag_asynchronous_unwind_tables = 2;
4145 flag_vect_cost_model = 1;
4146 #ifdef SUBTARGET_OPTIMIZATION_OPTIONS
4147 SUBTARGET_OPTIMIZATION_OPTIONS;
4151 /* Decide whether we can make a sibling call to a function. DECL is the
4152 declaration of the function being targeted by the call and EXP is the
4153 CALL_EXPR representing the call. */
4156 ix86_function_ok_for_sibcall (tree decl, tree exp)
4161 /* If we are generating position-independent code, we cannot sibcall
4162 optimize any indirect call, or a direct call to a global function,
4163 as the PLT requires %ebx be live. */
4164 if (!TARGET_64BIT && flag_pic && (!decl || !targetm.binds_local_p (decl)))
4171 func = TREE_TYPE (CALL_EXPR_FN (exp));
4172 if (POINTER_TYPE_P (func))
4173 func = TREE_TYPE (func);
4176 /* Check that the return value locations are the same. Like
4177 if we are returning floats on the 80387 register stack, we cannot
4178 make a sibcall from a function that doesn't return a float to a
4179 function that does or, conversely, from a function that does return
4180 a float to a function that doesn't; the necessary stack adjustment
4181 would not be executed. This is also the place we notice
4182 differences in the return value ABI. Note that it is ok for one
4183 of the functions to have void return type as long as the return
4184 value of the other is passed in a register. */
4185 a = ix86_function_value (TREE_TYPE (exp), func, false);
4186 b = ix86_function_value (TREE_TYPE (DECL_RESULT (cfun->decl)),
4188 if (STACK_REG_P (a) || STACK_REG_P (b))
4190 if (!rtx_equal_p (a, b))
4193 else if (VOID_TYPE_P (TREE_TYPE (DECL_RESULT (cfun->decl))))
4195 else if (!rtx_equal_p (a, b))
4198 /* If this call is indirect, we'll need to be able to use a call-clobbered
4199 register for the address of the target function. Make sure that all
4200 such registers are not used for passing parameters. */
4201 if (!decl && !TARGET_64BIT)
4205 /* We're looking at the CALL_EXPR, we need the type of the function. */
4206 type = CALL_EXPR_FN (exp); /* pointer expression */
4207 type = TREE_TYPE (type); /* pointer type */
4208 type = TREE_TYPE (type); /* function type */
4210 if (ix86_function_regparm (type, NULL) >= 3)
4212 /* ??? Need to count the actual number of registers to be used,
4213 not the possible number of registers. Fix later. */
4218 /* Dllimport'd functions are also called indirectly. */
4219 if (TARGET_DLLIMPORT_DECL_ATTRIBUTES
4221 && decl && DECL_DLLIMPORT_P (decl)
4222 && ix86_function_regparm (TREE_TYPE (decl), NULL) >= 3)
4225 /* If we need to align the outgoing stack, then sibcalling would
4226 unalign the stack, which may break the called function. */
4227 if (ix86_incoming_stack_boundary < PREFERRED_STACK_BOUNDARY)
4230 /* Otherwise okay. That also includes certain types of indirect calls. */
4234 /* Handle "cdecl", "stdcall", "fastcall", "regparm" and "sseregparm"
4235 calling convention attributes;
4236 arguments as in struct attribute_spec.handler. */
4239 ix86_handle_cconv_attribute (tree *node, tree name,
4241 int flags ATTRIBUTE_UNUSED,
4244 if (TREE_CODE (*node) != FUNCTION_TYPE
4245 && TREE_CODE (*node) != METHOD_TYPE
4246 && TREE_CODE (*node) != FIELD_DECL
4247 && TREE_CODE (*node) != TYPE_DECL)
4249 warning (OPT_Wattributes, "%qs attribute only applies to functions",
4250 IDENTIFIER_POINTER (name));
4251 *no_add_attrs = true;
4255 /* Can combine regparm with all attributes but fastcall. */
4256 if (is_attribute_p ("regparm", name))
4260 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
4262 error ("fastcall and regparm attributes are not compatible");
4265 cst = TREE_VALUE (args);
4266 if (TREE_CODE (cst) != INTEGER_CST)
4268 warning (OPT_Wattributes,
4269 "%qs attribute requires an integer constant argument",
4270 IDENTIFIER_POINTER (name));
4271 *no_add_attrs = true;
4273 else if (compare_tree_int (cst, REGPARM_MAX) > 0)
4275 warning (OPT_Wattributes, "argument to %qs attribute larger than %d",
4276 IDENTIFIER_POINTER (name), REGPARM_MAX);
4277 *no_add_attrs = true;
4285 /* Do not warn when emulating the MS ABI. */
4286 if (TREE_CODE (*node) != FUNCTION_TYPE || ix86_function_type_abi (*node)!=MS_ABI)
4287 warning (OPT_Wattributes, "%qs attribute ignored",
4288 IDENTIFIER_POINTER (name));
4289 *no_add_attrs = true;
4293 /* Can combine fastcall with stdcall (redundant) and sseregparm. */
4294 if (is_attribute_p ("fastcall", name))
4296 if (lookup_attribute ("cdecl", TYPE_ATTRIBUTES (*node)))
4298 error ("fastcall and cdecl attributes are not compatible");
4300 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (*node)))
4302 error ("fastcall and stdcall attributes are not compatible");
4304 if (lookup_attribute ("regparm", TYPE_ATTRIBUTES (*node)))
4306 error ("fastcall and regparm attributes are not compatible");
4310 /* Can combine stdcall with fastcall (redundant), regparm and
4312 else if (is_attribute_p ("stdcall", name))
4314 if (lookup_attribute ("cdecl", TYPE_ATTRIBUTES (*node)))
4316 error ("stdcall and cdecl attributes are not compatible");
4318 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
4320 error ("stdcall and fastcall attributes are not compatible");
4324 /* Can combine cdecl with regparm and sseregparm. */
4325 else if (is_attribute_p ("cdecl", name))
4327 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (*node)))
4329 error ("stdcall and cdecl attributes are not compatible");
4331 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
4333 error ("fastcall and cdecl attributes are not compatible");
4337 /* Can combine sseregparm with all attributes. */
4342 /* Return 0 if the attributes for two types are incompatible, 1 if they
4343 are compatible, and 2 if they are nearly compatible (which causes a
4344 warning to be generated). */
4347 ix86_comp_type_attributes (const_tree type1, const_tree type2)
4349 /* Check for mismatch of non-default calling convention. */
4350 const char *const rtdstr = TARGET_RTD ? "cdecl" : "stdcall";
4352 if (TREE_CODE (type1) != FUNCTION_TYPE
4353 && TREE_CODE (type1) != METHOD_TYPE)
4356 /* Check for mismatched fastcall/regparm types. */
4357 if ((!lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type1))
4358 != !lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type2)))
4359 || (ix86_function_regparm (type1, NULL)
4360 != ix86_function_regparm (type2, NULL)))
4363 /* Check for mismatched sseregparm types. */
4364 if (!lookup_attribute ("sseregparm", TYPE_ATTRIBUTES (type1))
4365 != !lookup_attribute ("sseregparm", TYPE_ATTRIBUTES (type2)))
4368 /* Check for mismatched return types (cdecl vs stdcall). */
4369 if (!lookup_attribute (rtdstr, TYPE_ATTRIBUTES (type1))
4370 != !lookup_attribute (rtdstr, TYPE_ATTRIBUTES (type2)))
4376 /* Return the regparm value for a function with the indicated TYPE and DECL.
4377 DECL may be NULL when calling function indirectly
4378 or considering a libcall. */
4381 ix86_function_regparm (const_tree type, const_tree decl)
4386 static bool error_issued;
4389 return (ix86_function_type_abi (type) == SYSV_ABI
4390 ? X86_64_REGPARM_MAX : X64_REGPARM_MAX);
4392 regparm = ix86_regparm;
4393 attr = lookup_attribute ("regparm", TYPE_ATTRIBUTES (type));
4397 = TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (attr)));
4399 if (decl && TREE_CODE (decl) == FUNCTION_DECL)
4401 /* We can't use regparm(3) for nested functions because
4402 these pass static chain pointer in %ecx register. */
4403 if (!error_issued && regparm == 3
4404 && decl_function_context (decl)
4405 && !DECL_NO_STATIC_CHAIN (decl))
4407 error ("nested functions are limited to 2 register parameters");
4408 error_issued = true;
4416 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type)))
4419 /* Use register calling convention for local functions when possible. */
4421 && TREE_CODE (decl) == FUNCTION_DECL
4425 /* FIXME: remove this CONST_CAST when cgraph.[ch] is constified. */
4426 struct cgraph_local_info *i = cgraph_local_info (CONST_CAST_TREE(decl));
4429 int local_regparm, globals = 0, regno;
4432 /* Make sure no regparm register is taken by a
4433 fixed register variable. */
4434 for (local_regparm = 0; local_regparm < REGPARM_MAX; local_regparm++)
4435 if (fixed_regs[local_regparm])
4438 /* We can't use regparm(3) for nested functions as these use
4439 static chain pointer in third argument. */
4440 if (local_regparm == 3
4441 && decl_function_context (decl)
4442 && !DECL_NO_STATIC_CHAIN (decl))
4445 /* If the function realigns its stackpointer, the prologue will
4446 clobber %ecx. If we've already generated code for the callee,
4447 the callee DECL_STRUCT_FUNCTION is gone, so we fall back to
4448 scanning the attributes for the self-realigning property. */
4449 f = DECL_STRUCT_FUNCTION (decl);
4450 /* Since current internal arg pointer won't conflict with
4451 parameter passing regs, so no need to change stack
4452 realignment and adjust regparm number.
4454 Each fixed register usage increases register pressure,
4455 so less registers should be used for argument passing.
4456 This functionality can be overriden by an explicit
4458 for (regno = 0; regno <= DI_REG; regno++)
4459 if (fixed_regs[regno])
4463 = globals < local_regparm ? local_regparm - globals : 0;
4465 if (local_regparm > regparm)
4466 regparm = local_regparm;
4473 /* Return 1 or 2, if we can pass up to SSE_REGPARM_MAX SFmode (1) and
4474 DFmode (2) arguments in SSE registers for a function with the
4475 indicated TYPE and DECL. DECL may be NULL when calling function
4476 indirectly or considering a libcall. Otherwise return 0. */
4479 ix86_function_sseregparm (const_tree type, const_tree decl, bool warn)
4481 gcc_assert (!TARGET_64BIT);
4483 /* Use SSE registers to pass SFmode and DFmode arguments if requested
4484 by the sseregparm attribute. */
4485 if (TARGET_SSEREGPARM
4486 || (type && lookup_attribute ("sseregparm", TYPE_ATTRIBUTES (type))))
4493 error ("Calling %qD with attribute sseregparm without "
4494 "SSE/SSE2 enabled", decl);
4496 error ("Calling %qT with attribute sseregparm without "
4497 "SSE/SSE2 enabled", type);
4505 /* For local functions, pass up to SSE_REGPARM_MAX SFmode
4506 (and DFmode for SSE2) arguments in SSE registers. */
4507 if (decl && TARGET_SSE_MATH && optimize && !profile_flag)
4509 /* FIXME: remove this CONST_CAST when cgraph.[ch] is constified. */
4510 struct cgraph_local_info *i = cgraph_local_info (CONST_CAST_TREE(decl));
4512 return TARGET_SSE2 ? 2 : 1;
4518 /* Return true if EAX is live at the start of the function. Used by
4519 ix86_expand_prologue to determine if we need special help before
4520 calling allocate_stack_worker. */
4523 ix86_eax_live_at_start_p (void)
4525 /* Cheat. Don't bother working forward from ix86_function_regparm
4526 to the function type to whether an actual argument is located in
4527 eax. Instead just look at cfg info, which is still close enough
4528 to correct at this point. This gives false positives for broken
4529 functions that might use uninitialized data that happens to be
4530 allocated in eax, but who cares? */
4531 return REGNO_REG_SET_P (df_get_live_out (ENTRY_BLOCK_PTR), 0);
4534 /* Value is the number of bytes of arguments automatically
4535 popped when returning from a subroutine call.
4536 FUNDECL is the declaration node of the function (as a tree),
4537 FUNTYPE is the data type of the function (as a tree),
4538 or for a library call it is an identifier node for the subroutine name.
4539 SIZE is the number of bytes of arguments passed on the stack.
4541 On the 80386, the RTD insn may be used to pop them if the number
4542 of args is fixed, but if the number is variable then the caller
4543 must pop them all. RTD can't be used for library calls now
4544 because the library is compiled with the Unix compiler.
4545 Use of RTD is a selectable option, since it is incompatible with
4546 standard Unix calling sequences. If the option is not selected,
4547 the caller must always pop the args.
4549 The attribute stdcall is equivalent to RTD on a per module basis. */
4552 ix86_return_pops_args (tree fundecl, tree funtype, int size)
4556 /* None of the 64-bit ABIs pop arguments. */
4560 rtd = TARGET_RTD && (!fundecl || TREE_CODE (fundecl) != IDENTIFIER_NODE);
4562 /* Cdecl functions override -mrtd, and never pop the stack. */
4563 if (! lookup_attribute ("cdecl", TYPE_ATTRIBUTES (funtype)))
4565 /* Stdcall and fastcall functions will pop the stack if not
4567 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (funtype))
4568 || lookup_attribute ("fastcall", TYPE_ATTRIBUTES (funtype)))
4571 if (rtd && ! stdarg_p (funtype))
4575 /* Lose any fake structure return argument if it is passed on the stack. */
4576 if (aggregate_value_p (TREE_TYPE (funtype), fundecl)
4577 && !KEEP_AGGREGATE_RETURN_POINTER)
4579 int nregs = ix86_function_regparm (funtype, fundecl);
4581 return GET_MODE_SIZE (Pmode);
4587 /* Argument support functions. */
4589 /* Return true when register may be used to pass function parameters. */
4591 ix86_function_arg_regno_p (int regno)
4594 const int *parm_regs;
4599 return (regno < REGPARM_MAX
4600 || (TARGET_SSE && SSE_REGNO_P (regno) && !fixed_regs[regno]));
4602 return (regno < REGPARM_MAX
4603 || (TARGET_MMX && MMX_REGNO_P (regno)
4604 && (regno < FIRST_MMX_REG + MMX_REGPARM_MAX))
4605 || (TARGET_SSE && SSE_REGNO_P (regno)
4606 && (regno < FIRST_SSE_REG + SSE_REGPARM_MAX)));
4611 if (SSE_REGNO_P (regno) && TARGET_SSE)
4616 if (TARGET_SSE && SSE_REGNO_P (regno)
4617 && (regno < FIRST_SSE_REG + SSE_REGPARM_MAX))
4621 /* TODO: The function should depend on current function ABI but
4622 builtins.c would need updating then. Therefore we use the
4625 /* RAX is used as hidden argument to va_arg functions. */
4626 if (ix86_abi == SYSV_ABI && regno == AX_REG)
4629 if (ix86_abi == MS_ABI)
4630 parm_regs = x86_64_ms_abi_int_parameter_registers;
4632 parm_regs = x86_64_int_parameter_registers;
4633 for (i = 0; i < (ix86_abi == MS_ABI ? X64_REGPARM_MAX
4634 : X86_64_REGPARM_MAX); i++)
4635 if (regno == parm_regs[i])
4640 /* Return if we do not know how to pass TYPE solely in registers. */
4643 ix86_must_pass_in_stack (enum machine_mode mode, const_tree type)
4645 if (must_pass_in_stack_var_size_or_pad (mode, type))
4648 /* For 32-bit, we want TImode aggregates to go on the stack. But watch out!
4649 The layout_type routine is crafty and tries to trick us into passing
4650 currently unsupported vector types on the stack by using TImode. */
4651 return (!TARGET_64BIT && mode == TImode
4652 && type && TREE_CODE (type) != VECTOR_TYPE);
4655 /* It returns the size, in bytes, of the area reserved for arguments passed
4656 in registers for the function represented by fndecl dependent to the used
4659 ix86_reg_parm_stack_space (const_tree fndecl)
4661 enum calling_abi call_abi = SYSV_ABI;
4662 if (fndecl != NULL_TREE && TREE_CODE (fndecl) == FUNCTION_DECL)
4663 call_abi = ix86_function_abi (fndecl);
4665 call_abi = ix86_function_type_abi (fndecl);
4666 if (call_abi == MS_ABI)
4671 /* Returns value SYSV_ABI, MS_ABI dependent on fntype, specifying the
4674 ix86_function_type_abi (const_tree fntype)
4676 if (TARGET_64BIT && fntype != NULL)
4678 enum calling_abi abi = ix86_abi;
4679 if (abi == SYSV_ABI)
4681 if (lookup_attribute ("ms_abi", TYPE_ATTRIBUTES (fntype)))
4684 else if (lookup_attribute ("sysv_abi", TYPE_ATTRIBUTES (fntype)))
4691 static enum calling_abi
4692 ix86_function_abi (const_tree fndecl)
4696 return ix86_function_type_abi (TREE_TYPE (fndecl));
4699 /* Returns value SYSV_ABI, MS_ABI dependent on cfun, specifying the
4702 ix86_cfun_abi (void)
4704 if (! cfun || ! TARGET_64BIT)
4706 return cfun->machine->call_abi;
4710 extern void init_regs (void);
4712 /* Implementation of call abi switching target hook. Specific to FNDECL
4713 the specific call register sets are set. See also CONDITIONAL_REGISTER_USAGE
4714 for more details. */
4716 ix86_call_abi_override (const_tree fndecl)
4718 if (fndecl == NULL_TREE)
4719 cfun->machine->call_abi = ix86_abi;
4721 cfun->machine->call_abi = ix86_function_type_abi (TREE_TYPE (fndecl));
4724 /* MS and SYSV ABI have different set of call used registers. Avoid expensive
4725 re-initialization of init_regs each time we switch function context since
4726 this is needed only during RTL expansion. */
4728 ix86_maybe_switch_abi (void)
4731 call_used_regs[SI_REG] == (cfun->machine->call_abi == MS_ABI))
4735 /* Initialize a variable CUM of type CUMULATIVE_ARGS
4736 for a call to a function whose data type is FNTYPE.
4737 For a library call, FNTYPE is 0. */
4740 init_cumulative_args (CUMULATIVE_ARGS *cum, /* Argument info to initialize */
4741 tree fntype, /* tree ptr for function decl */
4742 rtx libname, /* SYMBOL_REF of library name or 0 */
4745 struct cgraph_local_info *i = fndecl ? cgraph_local_info (fndecl) : NULL;
4746 memset (cum, 0, sizeof (*cum));
4749 cum->call_abi = ix86_function_abi (fndecl);
4751 cum->call_abi = ix86_function_type_abi (fntype);
4752 /* Set up the number of registers to use for passing arguments. */
4754 if (cum->call_abi == MS_ABI && !ACCUMULATE_OUTGOING_ARGS)
4755 sorry ("ms_abi attribute require -maccumulate-outgoing-args or subtarget optimization implying it");
4756 cum->nregs = ix86_regparm;
4759 if (cum->call_abi != ix86_abi)
4760 cum->nregs = ix86_abi != SYSV_ABI ? X86_64_REGPARM_MAX
4765 cum->sse_nregs = SSE_REGPARM_MAX;
4768 if (cum->call_abi != ix86_abi)
4769 cum->sse_nregs = ix86_abi != SYSV_ABI ? X86_64_SSE_REGPARM_MAX
4770 : X64_SSE_REGPARM_MAX;
4774 cum->mmx_nregs = MMX_REGPARM_MAX;
4775 cum->warn_avx = true;
4776 cum->warn_sse = true;
4777 cum->warn_mmx = true;
4779 /* Because type might mismatch in between caller and callee, we need to
4780 use actual type of function for local calls.
4781 FIXME: cgraph_analyze can be told to actually record if function uses
4782 va_start so for local functions maybe_vaarg can be made aggressive
4784 FIXME: once typesytem is fixed, we won't need this code anymore. */
4786 fntype = TREE_TYPE (fndecl);
4787 cum->maybe_vaarg = (fntype
4788 ? (!prototype_p (fntype) || stdarg_p (fntype))
4793 /* If there are variable arguments, then we won't pass anything
4794 in registers in 32-bit mode. */
4795 if (stdarg_p (fntype))
4806 /* Use ecx and edx registers if function has fastcall attribute,
4807 else look for regparm information. */
4810 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (fntype)))
4816 cum->nregs = ix86_function_regparm (fntype, fndecl);
4819 /* Set up the number of SSE registers used for passing SFmode
4820 and DFmode arguments. Warn for mismatching ABI. */
4821 cum->float_in_sse = ix86_function_sseregparm (fntype, fndecl, true);
4825 /* Return the "natural" mode for TYPE. In most cases, this is just TYPE_MODE.
4826 But in the case of vector types, it is some vector mode.
4828 When we have only some of our vector isa extensions enabled, then there
4829 are some modes for which vector_mode_supported_p is false. For these
4830 modes, the generic vector support in gcc will choose some non-vector mode
4831 in order to implement the type. By computing the natural mode, we'll
4832 select the proper ABI location for the operand and not depend on whatever
4833 the middle-end decides to do with these vector types.
4835 The midde-end can't deal with the vector types > 16 bytes. In this
4836 case, we return the original mode and warn ABI change if CUM isn't
4839 static enum machine_mode
4840 type_natural_mode (const_tree type, CUMULATIVE_ARGS *cum)
4842 enum machine_mode mode = TYPE_MODE (type);
4844 if (TREE_CODE (type) == VECTOR_TYPE && !VECTOR_MODE_P (mode))
4846 HOST_WIDE_INT size = int_size_in_bytes (type);
4847 if ((size == 8 || size == 16 || size == 32)
4848 /* ??? Generic code allows us to create width 1 vectors. Ignore. */
4849 && TYPE_VECTOR_SUBPARTS (type) > 1)
4851 enum machine_mode innermode = TYPE_MODE (TREE_TYPE (type));
4853 if (TREE_CODE (TREE_TYPE (type)) == REAL_TYPE)
4854 mode = MIN_MODE_VECTOR_FLOAT;
4856 mode = MIN_MODE_VECTOR_INT;
4858 /* Get the mode which has this inner mode and number of units. */
4859 for (; mode != VOIDmode; mode = GET_MODE_WIDER_MODE (mode))
4860 if (GET_MODE_NUNITS (mode) == TYPE_VECTOR_SUBPARTS (type)
4861 && GET_MODE_INNER (mode) == innermode)
4863 if (size == 32 && !TARGET_AVX)
4865 static bool warnedavx;
4872 warning (0, "AVX vector argument without AVX "
4873 "enabled changes the ABI");
4875 return TYPE_MODE (type);
4888 /* We want to pass a value in REGNO whose "natural" mode is MODE. However,
4889 this may not agree with the mode that the type system has chosen for the
4890 register, which is ORIG_MODE. If ORIG_MODE is not BLKmode, then we can
4891 go ahead and use it. Otherwise we have to build a PARALLEL instead. */
4894 gen_reg_or_parallel (enum machine_mode mode, enum machine_mode orig_mode,
4899 if (orig_mode != BLKmode)
4900 tmp = gen_rtx_REG (orig_mode, regno);
4903 tmp = gen_rtx_REG (mode, regno);
4904 tmp = gen_rtx_EXPR_LIST (VOIDmode, tmp, const0_rtx);
4905 tmp = gen_rtx_PARALLEL (orig_mode, gen_rtvec (1, tmp));
4911 /* x86-64 register passing implementation. See x86-64 ABI for details. Goal
4912 of this code is to classify each 8bytes of incoming argument by the register
4913 class and assign registers accordingly. */
4915 /* Return the union class of CLASS1 and CLASS2.
4916 See the x86-64 PS ABI for details. */
4918 static enum x86_64_reg_class
4919 merge_classes (enum x86_64_reg_class class1, enum x86_64_reg_class class2)
4921 /* Rule #1: If both classes are equal, this is the resulting class. */
4922 if (class1 == class2)
4925 /* Rule #2: If one of the classes is NO_CLASS, the resulting class is
4927 if (class1 == X86_64_NO_CLASS)
4929 if (class2 == X86_64_NO_CLASS)
4932 /* Rule #3: If one of the classes is MEMORY, the result is MEMORY. */
4933 if (class1 == X86_64_MEMORY_CLASS || class2 == X86_64_MEMORY_CLASS)
4934 return X86_64_MEMORY_CLASS;
4936 /* Rule #4: If one of the classes is INTEGER, the result is INTEGER. */
4937 if ((class1 == X86_64_INTEGERSI_CLASS && class2 == X86_64_SSESF_CLASS)
4938 || (class2 == X86_64_INTEGERSI_CLASS && class1 == X86_64_SSESF_CLASS))
4939 return X86_64_INTEGERSI_CLASS;
4940 if (class1 == X86_64_INTEGER_CLASS || class1 == X86_64_INTEGERSI_CLASS
4941 || class2 == X86_64_INTEGER_CLASS || class2 == X86_64_INTEGERSI_CLASS)
4942 return X86_64_INTEGER_CLASS;
4944 /* Rule #5: If one of the classes is X87, X87UP, or COMPLEX_X87 class,
4946 if (class1 == X86_64_X87_CLASS
4947 || class1 == X86_64_X87UP_CLASS
4948 || class1 == X86_64_COMPLEX_X87_CLASS
4949 || class2 == X86_64_X87_CLASS
4950 || class2 == X86_64_X87UP_CLASS
4951 || class2 == X86_64_COMPLEX_X87_CLASS)
4952 return X86_64_MEMORY_CLASS;
4954 /* Rule #6: Otherwise class SSE is used. */
4955 return X86_64_SSE_CLASS;
4958 /* Classify the argument of type TYPE and mode MODE.
4959 CLASSES will be filled by the register class used to pass each word
4960 of the operand. The number of words is returned. In case the parameter
4961 should be passed in memory, 0 is returned. As a special case for zero
4962 sized containers, classes[0] will be NO_CLASS and 1 is returned.
4964 BIT_OFFSET is used internally for handling records and specifies offset
4965 of the offset in bits modulo 256 to avoid overflow cases.
4967 See the x86-64 PS ABI for details.
4971 classify_argument (enum machine_mode mode, const_tree type,
4972 enum x86_64_reg_class classes[MAX_CLASSES], int bit_offset)
4974 HOST_WIDE_INT bytes =
4975 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
4976 int words = (bytes + (bit_offset % 64) / 8 + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
4978 /* Variable sized entities are always passed/returned in memory. */
4982 if (mode != VOIDmode
4983 && targetm.calls.must_pass_in_stack (mode, type))
4986 if (type && AGGREGATE_TYPE_P (type))
4990 enum x86_64_reg_class subclasses[MAX_CLASSES];
4992 /* On x86-64 we pass structures larger than 32 bytes on the stack. */
4996 for (i = 0; i < words; i++)
4997 classes[i] = X86_64_NO_CLASS;
4999 /* Zero sized arrays or structures are NO_CLASS. We return 0 to
5000 signalize memory class, so handle it as special case. */
5003 classes[0] = X86_64_NO_CLASS;
5007 /* Classify each field of record and merge classes. */
5008 switch (TREE_CODE (type))
5011 /* And now merge the fields of structure. */
5012 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
5014 if (TREE_CODE (field) == FIELD_DECL)
5018 if (TREE_TYPE (field) == error_mark_node)
5021 /* Bitfields are always classified as integer. Handle them
5022 early, since later code would consider them to be
5023 misaligned integers. */
5024 if (DECL_BIT_FIELD (field))
5026 for (i = (int_bit_position (field) + (bit_offset % 64)) / 8 / 8;
5027 i < ((int_bit_position (field) + (bit_offset % 64))
5028 + tree_low_cst (DECL_SIZE (field), 0)
5031 merge_classes (X86_64_INTEGER_CLASS,
5038 type = TREE_TYPE (field);
5040 /* Flexible array member is ignored. */
5041 if (TYPE_MODE (type) == BLKmode
5042 && TREE_CODE (type) == ARRAY_TYPE
5043 && TYPE_SIZE (type) == NULL_TREE
5044 && TYPE_DOMAIN (type) != NULL_TREE
5045 && (TYPE_MAX_VALUE (TYPE_DOMAIN (type))
5050 if (!warned && warn_psabi)
5053 inform (input_location,
5054 "The ABI of passing struct with"
5055 " a flexible array member has"
5056 " changed in GCC 4.4");
5060 num = classify_argument (TYPE_MODE (type), type,
5062 (int_bit_position (field)
5063 + bit_offset) % 256);
5066 pos = (int_bit_position (field) + (bit_offset % 64)) / 8 / 8;
5067 for (i = 0; i < num && (i + pos) < words; i++)
5069 merge_classes (subclasses[i], classes[i + pos]);
5076 /* Arrays are handled as small records. */
5079 num = classify_argument (TYPE_MODE (TREE_TYPE (type)),
5080 TREE_TYPE (type), subclasses, bit_offset);
5084 /* The partial classes are now full classes. */
5085 if (subclasses[0] == X86_64_SSESF_CLASS && bytes != 4)
5086 subclasses[0] = X86_64_SSE_CLASS;
5087 if (subclasses[0] == X86_64_INTEGERSI_CLASS
5088 && !((bit_offset % 64) == 0 && bytes == 4))
5089 subclasses[0] = X86_64_INTEGER_CLASS;
5091 for (i = 0; i < words; i++)
5092 classes[i] = subclasses[i % num];
5097 case QUAL_UNION_TYPE:
5098 /* Unions are similar to RECORD_TYPE but offset is always 0.
5100 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
5102 if (TREE_CODE (field) == FIELD_DECL)
5106 if (TREE_TYPE (field) == error_mark_node)
5109 num = classify_argument (TYPE_MODE (TREE_TYPE (field)),
5110 TREE_TYPE (field), subclasses,
5114 for (i = 0; i < num; i++)
5115 classes[i] = merge_classes (subclasses[i], classes[i]);
5126 /* When size > 16 bytes, if the first one isn't
5127 X86_64_SSE_CLASS or any other ones aren't
5128 X86_64_SSEUP_CLASS, everything should be passed in
5130 if (classes[0] != X86_64_SSE_CLASS)
5133 for (i = 1; i < words; i++)
5134 if (classes[i] != X86_64_SSEUP_CLASS)
5138 /* Final merger cleanup. */
5139 for (i = 0; i < words; i++)
5141 /* If one class is MEMORY, everything should be passed in
5143 if (classes[i] == X86_64_MEMORY_CLASS)
5146 /* The X86_64_SSEUP_CLASS should be always preceded by
5147 X86_64_SSE_CLASS or X86_64_SSEUP_CLASS. */
5148 if (classes[i] == X86_64_SSEUP_CLASS
5149 && classes[i - 1] != X86_64_SSE_CLASS
5150 && classes[i - 1] != X86_64_SSEUP_CLASS)
5152 /* The first one should never be X86_64_SSEUP_CLASS. */
5153 gcc_assert (i != 0);
5154 classes[i] = X86_64_SSE_CLASS;
5157 /* If X86_64_X87UP_CLASS isn't preceded by X86_64_X87_CLASS,
5158 everything should be passed in memory. */
5159 if (classes[i] == X86_64_X87UP_CLASS
5160 && (classes[i - 1] != X86_64_X87_CLASS))
5164 /* The first one should never be X86_64_X87UP_CLASS. */
5165 gcc_assert (i != 0);
5166 if (!warned && warn_psabi)
5169 inform (input_location,
5170 "The ABI of passing union with long double"
5171 " has changed in GCC 4.4");
5179 /* Compute alignment needed. We align all types to natural boundaries with
5180 exception of XFmode that is aligned to 64bits. */
5181 if (mode != VOIDmode && mode != BLKmode)
5183 int mode_alignment = GET_MODE_BITSIZE (mode);
5186 mode_alignment = 128;
5187 else if (mode == XCmode)
5188 mode_alignment = 256;
5189 if (COMPLEX_MODE_P (mode))
5190 mode_alignment /= 2;
5191 /* Misaligned fields are always returned in memory. */
5192 if (bit_offset % mode_alignment)
5196 /* for V1xx modes, just use the base mode */
5197 if (VECTOR_MODE_P (mode) && mode != V1DImode
5198 && GET_MODE_SIZE (GET_MODE_INNER (mode)) == bytes)
5199 mode = GET_MODE_INNER (mode);
5201 /* Classification of atomic types. */
5206 classes[0] = X86_64_SSE_CLASS;
5209 classes[0] = X86_64_SSE_CLASS;
5210 classes[1] = X86_64_SSEUP_CLASS;
5220 int size = (bit_offset % 64)+ (int) GET_MODE_BITSIZE (mode);
5224 classes[0] = X86_64_INTEGERSI_CLASS;
5227 else if (size <= 64)
5229 classes[0] = X86_64_INTEGER_CLASS;
5232 else if (size <= 64+32)
5234 classes[0] = X86_64_INTEGER_CLASS;
5235 classes[1] = X86_64_INTEGERSI_CLASS;
5238 else if (size <= 64+64)
5240 classes[0] = classes[1] = X86_64_INTEGER_CLASS;
5248 classes[0] = classes[1] = X86_64_INTEGER_CLASS;
5252 /* OImode shouldn't be used directly. */
5257 if (!(bit_offset % 64))
5258 classes[0] = X86_64_SSESF_CLASS;
5260 classes[0] = X86_64_SSE_CLASS;
5263 classes[0] = X86_64_SSEDF_CLASS;
5266 classes[0] = X86_64_X87_CLASS;
5267 classes[1] = X86_64_X87UP_CLASS;
5270 classes[0] = X86_64_SSE_CLASS;
5271 classes[1] = X86_64_SSEUP_CLASS;
5274 classes[0] = X86_64_SSE_CLASS;
5275 if (!(bit_offset % 64))
5281 if (!warned && warn_psabi)
5284 inform (input_location,
5285 "The ABI of passing structure with complex float"
5286 " member has changed in GCC 4.4");
5288 classes[1] = X86_64_SSESF_CLASS;
5292 classes[0] = X86_64_SSEDF_CLASS;
5293 classes[1] = X86_64_SSEDF_CLASS;
5296 classes[0] = X86_64_COMPLEX_X87_CLASS;
5299 /* This modes is larger than 16 bytes. */
5307 classes[0] = X86_64_SSE_CLASS;
5308 classes[1] = X86_64_SSEUP_CLASS;
5309 classes[2] = X86_64_SSEUP_CLASS;
5310 classes[3] = X86_64_SSEUP_CLASS;
5318 classes[0] = X86_64_SSE_CLASS;
5319 classes[1] = X86_64_SSEUP_CLASS;
5326 classes[0] = X86_64_SSE_CLASS;
5332 gcc_assert (VECTOR_MODE_P (mode));
5337 gcc_assert (GET_MODE_CLASS (GET_MODE_INNER (mode)) == MODE_INT);
5339 if (bit_offset + GET_MODE_BITSIZE (mode) <= 32)
5340 classes[0] = X86_64_INTEGERSI_CLASS;
5342 classes[0] = X86_64_INTEGER_CLASS;
5343 classes[1] = X86_64_INTEGER_CLASS;
5344 return 1 + (bytes > 8);
5348 /* Examine the argument and return set number of register required in each
5349 class. Return 0 iff parameter should be passed in memory. */
5351 examine_argument (enum machine_mode mode, const_tree type, int in_return,
5352 int *int_nregs, int *sse_nregs)
5354 enum x86_64_reg_class regclass[MAX_CLASSES];
5355 int n = classify_argument (mode, type, regclass, 0);
5361 for (n--; n >= 0; n--)
5362 switch (regclass[n])
5364 case X86_64_INTEGER_CLASS:
5365 case X86_64_INTEGERSI_CLASS:
5368 case X86_64_SSE_CLASS:
5369 case X86_64_SSESF_CLASS:
5370 case X86_64_SSEDF_CLASS:
5373 case X86_64_NO_CLASS:
5374 case X86_64_SSEUP_CLASS:
5376 case X86_64_X87_CLASS:
5377 case X86_64_X87UP_CLASS:
5381 case X86_64_COMPLEX_X87_CLASS:
5382 return in_return ? 2 : 0;
5383 case X86_64_MEMORY_CLASS:
5389 /* Construct container for the argument used by GCC interface. See
5390 FUNCTION_ARG for the detailed description. */
5393 construct_container (enum machine_mode mode, enum machine_mode orig_mode,
5394 const_tree type, int in_return, int nintregs, int nsseregs,
5395 const int *intreg, int sse_regno)
5397 /* The following variables hold the static issued_error state. */
5398 static bool issued_sse_arg_error;
5399 static bool issued_sse_ret_error;
5400 static bool issued_x87_ret_error;
5402 enum machine_mode tmpmode;
5404 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
5405 enum x86_64_reg_class regclass[MAX_CLASSES];
5409 int needed_sseregs, needed_intregs;
5410 rtx exp[MAX_CLASSES];
5413 n = classify_argument (mode, type, regclass, 0);
5416 if (!examine_argument (mode, type, in_return, &needed_intregs,
5419 if (needed_intregs > nintregs || needed_sseregs > nsseregs)
5422 /* We allowed the user to turn off SSE for kernel mode. Don't crash if
5423 some less clueful developer tries to use floating-point anyway. */
5424 if (needed_sseregs && !TARGET_SSE)
5428 if (!issued_sse_ret_error)
5430 error ("SSE register return with SSE disabled");
5431 issued_sse_ret_error = true;
5434 else if (!issued_sse_arg_error)
5436 error ("SSE register argument with SSE disabled");
5437 issued_sse_arg_error = true;
5442 /* Likewise, error if the ABI requires us to return values in the
5443 x87 registers and the user specified -mno-80387. */
5444 if (!TARGET_80387 && in_return)
5445 for (i = 0; i < n; i++)
5446 if (regclass[i] == X86_64_X87_CLASS
5447 || regclass[i] == X86_64_X87UP_CLASS
5448 || regclass[i] == X86_64_COMPLEX_X87_CLASS)
5450 if (!issued_x87_ret_error)
5452 error ("x87 register return with x87 disabled");
5453 issued_x87_ret_error = true;
5458 /* First construct simple cases. Avoid SCmode, since we want to use
5459 single register to pass this type. */
5460 if (n == 1 && mode != SCmode)
5461 switch (regclass[0])
5463 case X86_64_INTEGER_CLASS:
5464 case X86_64_INTEGERSI_CLASS:
5465 return gen_rtx_REG (mode, intreg[0]);
5466 case X86_64_SSE_CLASS:
5467 case X86_64_SSESF_CLASS:
5468 case X86_64_SSEDF_CLASS:
5469 if (mode != BLKmode)
5470 return gen_reg_or_parallel (mode, orig_mode,
5471 SSE_REGNO (sse_regno));
5473 case X86_64_X87_CLASS:
5474 case X86_64_COMPLEX_X87_CLASS:
5475 return gen_rtx_REG (mode, FIRST_STACK_REG);
5476 case X86_64_NO_CLASS:
5477 /* Zero sized array, struct or class. */
5482 if (n == 2 && regclass[0] == X86_64_SSE_CLASS
5483 && regclass[1] == X86_64_SSEUP_CLASS && mode != BLKmode)
5484 return gen_rtx_REG (mode, SSE_REGNO (sse_regno));
5486 && regclass[0] == X86_64_SSE_CLASS
5487 && regclass[1] == X86_64_SSEUP_CLASS
5488 && regclass[2] == X86_64_SSEUP_CLASS
5489 && regclass[3] == X86_64_SSEUP_CLASS
5491 return gen_rtx_REG (mode, SSE_REGNO (sse_regno));
5494 && regclass[0] == X86_64_X87_CLASS && regclass[1] == X86_64_X87UP_CLASS)
5495 return gen_rtx_REG (XFmode, FIRST_STACK_REG);
5496 if (n == 2 && regclass[0] == X86_64_INTEGER_CLASS
5497 && regclass[1] == X86_64_INTEGER_CLASS
5498 && (mode == CDImode || mode == TImode || mode == TFmode)
5499 && intreg[0] + 1 == intreg[1])
5500 return gen_rtx_REG (mode, intreg[0]);
5502 /* Otherwise figure out the entries of the PARALLEL. */
5503 for (i = 0; i < n; i++)
5507 switch (regclass[i])
5509 case X86_64_NO_CLASS:
5511 case X86_64_INTEGER_CLASS:
5512 case X86_64_INTEGERSI_CLASS:
5513 /* Merge TImodes on aligned occasions here too. */
5514 if (i * 8 + 8 > bytes)
5515 tmpmode = mode_for_size ((bytes - i * 8) * BITS_PER_UNIT, MODE_INT, 0);
5516 else if (regclass[i] == X86_64_INTEGERSI_CLASS)
5520 /* We've requested 24 bytes we don't have mode for. Use DImode. */
5521 if (tmpmode == BLKmode)
5523 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
5524 gen_rtx_REG (tmpmode, *intreg),
5528 case X86_64_SSESF_CLASS:
5529 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
5530 gen_rtx_REG (SFmode,
5531 SSE_REGNO (sse_regno)),
5535 case X86_64_SSEDF_CLASS:
5536 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
5537 gen_rtx_REG (DFmode,
5538 SSE_REGNO (sse_regno)),
5542 case X86_64_SSE_CLASS:
5550 if (i == 0 && regclass[1] == X86_64_SSEUP_CLASS)
5560 && regclass[1] == X86_64_SSEUP_CLASS
5561 && regclass[2] == X86_64_SSEUP_CLASS
5562 && regclass[3] == X86_64_SSEUP_CLASS);
5569 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
5570 gen_rtx_REG (tmpmode,
5571 SSE_REGNO (sse_regno)),
5580 /* Empty aligned struct, union or class. */
5584 ret = gen_rtx_PARALLEL (mode, rtvec_alloc (nexps));
5585 for (i = 0; i < nexps; i++)
5586 XVECEXP (ret, 0, i) = exp [i];
5590 /* Update the data in CUM to advance over an argument of mode MODE
5591 and data type TYPE. (TYPE is null for libcalls where that information
5592 may not be available.) */
5595 function_arg_advance_32 (CUMULATIVE_ARGS *cum, enum machine_mode mode,
5596 tree type, HOST_WIDE_INT bytes, HOST_WIDE_INT words)
5612 cum->words += words;
5613 cum->nregs -= words;
5614 cum->regno += words;
5616 if (cum->nregs <= 0)
5624 /* OImode shouldn't be used directly. */
5628 if (cum->float_in_sse < 2)
5631 if (cum->float_in_sse < 1)
5648 if (!type || !AGGREGATE_TYPE_P (type))
5650 cum->sse_words += words;
5651 cum->sse_nregs -= 1;
5652 cum->sse_regno += 1;
5653 if (cum->sse_nregs <= 0)
5666 if (!type || !AGGREGATE_TYPE_P (type))
5668 cum->mmx_words += words;
5669 cum->mmx_nregs -= 1;
5670 cum->mmx_regno += 1;
5671 if (cum->mmx_nregs <= 0)
5682 function_arg_advance_64 (CUMULATIVE_ARGS *cum, enum machine_mode mode,
5683 tree type, HOST_WIDE_INT words, int named)
5685 int int_nregs, sse_nregs;
5687 /* Unnamed 256bit vector mode parameters are passed on stack. */
5688 if (!named && VALID_AVX256_REG_MODE (mode))
5691 if (!examine_argument (mode, type, 0, &int_nregs, &sse_nregs))
5692 cum->words += words;
5693 else if (sse_nregs <= cum->sse_nregs && int_nregs <= cum->nregs)
5695 cum->nregs -= int_nregs;
5696 cum->sse_nregs -= sse_nregs;
5697 cum->regno += int_nregs;
5698 cum->sse_regno += sse_nregs;
5701 cum->words += words;
5705 function_arg_advance_ms_64 (CUMULATIVE_ARGS *cum, HOST_WIDE_INT bytes,
5706 HOST_WIDE_INT words)
5708 /* Otherwise, this should be passed indirect. */
5709 gcc_assert (bytes == 1 || bytes == 2 || bytes == 4 || bytes == 8);
5711 cum->words += words;
5720 function_arg_advance (CUMULATIVE_ARGS *cum, enum machine_mode mode,
5721 tree type, int named)
5723 HOST_WIDE_INT bytes, words;
5725 if (mode == BLKmode)
5726 bytes = int_size_in_bytes (type);
5728 bytes = GET_MODE_SIZE (mode);
5729 words = (bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
5732 mode = type_natural_mode (type, NULL);
5734 if (TARGET_64BIT && (cum ? cum->call_abi : ix86_abi) == MS_ABI)
5735 function_arg_advance_ms_64 (cum, bytes, words);
5736 else if (TARGET_64BIT)
5737 function_arg_advance_64 (cum, mode, type, words, named);
5739 function_arg_advance_32 (cum, mode, type, bytes, words);
5742 /* Define where to put the arguments to a function.
5743 Value is zero to push the argument on the stack,
5744 or a hard register in which to store the argument.
5746 MODE is the argument's machine mode.
5747 TYPE is the data type of the argument (as a tree).
5748 This is null for libcalls where that information may
5750 CUM is a variable of type CUMULATIVE_ARGS which gives info about
5751 the preceding args and about the function being called.
5752 NAMED is nonzero if this argument is a named parameter
5753 (otherwise it is an extra parameter matching an ellipsis). */
5756 function_arg_32 (CUMULATIVE_ARGS *cum, enum machine_mode mode,
5757 enum machine_mode orig_mode, tree type,
5758 HOST_WIDE_INT bytes, HOST_WIDE_INT words)
5760 static bool warnedsse, warnedmmx;
5762 /* Avoid the AL settings for the Unix64 ABI. */
5763 if (mode == VOIDmode)
5779 if (words <= cum->nregs)
5781 int regno = cum->regno;
5783 /* Fastcall allocates the first two DWORD (SImode) or
5784 smaller arguments to ECX and EDX if it isn't an
5790 || (type && AGGREGATE_TYPE_P (type)))
5793 /* ECX not EAX is the first allocated register. */
5794 if (regno == AX_REG)
5797 return gen_rtx_REG (mode, regno);
5802 if (cum->float_in_sse < 2)
5805 if (cum->float_in_sse < 1)
5809 /* In 32bit, we pass TImode in xmm registers. */
5816 if (!type || !AGGREGATE_TYPE_P (type))
5818 if (!TARGET_SSE && !warnedsse && cum->warn_sse)
5821 warning (0, "SSE vector argument without SSE enabled "
5825 return gen_reg_or_parallel (mode, orig_mode,
5826 cum->sse_regno + FIRST_SSE_REG);
5831 /* OImode shouldn't be used directly. */
5840 if (!type || !AGGREGATE_TYPE_P (type))
5843 return gen_reg_or_parallel (mode, orig_mode,
5844 cum->sse_regno + FIRST_SSE_REG);
5853 if (!type || !AGGREGATE_TYPE_P (type))
5855 if (!TARGET_MMX && !warnedmmx && cum->warn_mmx)
5858 warning (0, "MMX vector argument without MMX enabled "
5862 return gen_reg_or_parallel (mode, orig_mode,
5863 cum->mmx_regno + FIRST_MMX_REG);
5872 function_arg_64 (CUMULATIVE_ARGS *cum, enum machine_mode mode,
5873 enum machine_mode orig_mode, tree type, int named)
5875 /* Handle a hidden AL argument containing number of registers
5876 for varargs x86-64 functions. */
5877 if (mode == VOIDmode)
5878 return GEN_INT (cum->maybe_vaarg
5879 ? (cum->sse_nregs < 0
5880 ? (cum->call_abi == ix86_abi
5882 : (ix86_abi != SYSV_ABI ? X86_64_SSE_REGPARM_MAX
5883 : X64_SSE_REGPARM_MAX))
5898 /* Unnamed 256bit vector mode parameters are passed on stack. */
5904 return construct_container (mode, orig_mode, type, 0, cum->nregs,
5906 &x86_64_int_parameter_registers [cum->regno],
5911 function_arg_ms_64 (CUMULATIVE_ARGS *cum, enum machine_mode mode,
5912 enum machine_mode orig_mode, int named,
5913 HOST_WIDE_INT bytes)
5917 /* We need to add clobber for MS_ABI->SYSV ABI calls in expand_call.
5918 We use value of -2 to specify that current function call is MSABI. */
5919 if (mode == VOIDmode)
5920 return GEN_INT (-2);
5922 /* If we've run out of registers, it goes on the stack. */
5923 if (cum->nregs == 0)
5926 regno = x86_64_ms_abi_int_parameter_registers[cum->regno];
5928 /* Only floating point modes are passed in anything but integer regs. */
5929 if (TARGET_SSE && (mode == SFmode || mode == DFmode))
5932 regno = cum->regno + FIRST_SSE_REG;
5937 /* Unnamed floating parameters are passed in both the
5938 SSE and integer registers. */
5939 t1 = gen_rtx_REG (mode, cum->regno + FIRST_SSE_REG);
5940 t2 = gen_rtx_REG (mode, regno);
5941 t1 = gen_rtx_EXPR_LIST (VOIDmode, t1, const0_rtx);
5942 t2 = gen_rtx_EXPR_LIST (VOIDmode, t2, const0_rtx);
5943 return gen_rtx_PARALLEL (mode, gen_rtvec (2, t1, t2));
5946 /* Handle aggregated types passed in register. */
5947 if (orig_mode == BLKmode)
5949 if (bytes > 0 && bytes <= 8)
5950 mode = (bytes > 4 ? DImode : SImode);
5951 if (mode == BLKmode)
5955 return gen_reg_or_parallel (mode, orig_mode, regno);
5959 function_arg (CUMULATIVE_ARGS *cum, enum machine_mode omode,
5960 tree type, int named)
5962 enum machine_mode mode = omode;
5963 HOST_WIDE_INT bytes, words;
5965 if (mode == BLKmode)
5966 bytes = int_size_in_bytes (type);
5968 bytes = GET_MODE_SIZE (mode);
5969 words = (bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
5971 /* To simplify the code below, represent vector types with a vector mode
5972 even if MMX/SSE are not active. */
5973 if (type && TREE_CODE (type) == VECTOR_TYPE)
5974 mode = type_natural_mode (type, cum);
5976 if (TARGET_64BIT && (cum ? cum->call_abi : ix86_abi) == MS_ABI)
5977 return function_arg_ms_64 (cum, mode, omode, named, bytes);
5978 else if (TARGET_64BIT)
5979 return function_arg_64 (cum, mode, omode, type, named);
5981 return function_arg_32 (cum, mode, omode, type, bytes, words);
5984 /* A C expression that indicates when an argument must be passed by
5985 reference. If nonzero for an argument, a copy of that argument is
5986 made in memory and a pointer to the argument is passed instead of
5987 the argument itself. The pointer is passed in whatever way is
5988 appropriate for passing a pointer to that type. */
5991 ix86_pass_by_reference (CUMULATIVE_ARGS *cum ATTRIBUTE_UNUSED,
5992 enum machine_mode mode ATTRIBUTE_UNUSED,
5993 const_tree type, bool named ATTRIBUTE_UNUSED)
5995 /* See Windows x64 Software Convention. */
5996 if (TARGET_64BIT && (cum ? cum->call_abi : ix86_abi) == MS_ABI)
5998 int msize = (int) GET_MODE_SIZE (mode);
6001 /* Arrays are passed by reference. */
6002 if (TREE_CODE (type) == ARRAY_TYPE)
6005 if (AGGREGATE_TYPE_P (type))
6007 /* Structs/unions of sizes other than 8, 16, 32, or 64 bits
6008 are passed by reference. */
6009 msize = int_size_in_bytes (type);
6013 /* __m128 is passed by reference. */
6015 case 1: case 2: case 4: case 8:
6021 else if (TARGET_64BIT && type && int_size_in_bytes (type) == -1)
6027 /* Return true when TYPE should be 128bit aligned for 32bit argument passing
6030 contains_aligned_value_p (tree type)
6032 enum machine_mode mode = TYPE_MODE (type);
6033 if (((TARGET_SSE && SSE_REG_MODE_P (mode))
6037 && (!TYPE_USER_ALIGN (type) || TYPE_ALIGN (type) > 128))
6039 if (TYPE_ALIGN (type) < 128)
6042 if (AGGREGATE_TYPE_P (type))
6044 /* Walk the aggregates recursively. */
6045 switch (TREE_CODE (type))
6049 case QUAL_UNION_TYPE:
6053 /* Walk all the structure fields. */
6054 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
6056 if (TREE_CODE (field) == FIELD_DECL
6057 && contains_aligned_value_p (TREE_TYPE (field)))
6064 /* Just for use if some languages passes arrays by value. */
6065 if (contains_aligned_value_p (TREE_TYPE (type)))
6076 /* Gives the alignment boundary, in bits, of an argument with the
6077 specified mode and type. */
6080 ix86_function_arg_boundary (enum machine_mode mode, tree type)
6085 /* Since canonical type is used for call, we convert it to
6086 canonical type if needed. */
6087 if (!TYPE_STRUCTURAL_EQUALITY_P (type))
6088 type = TYPE_CANONICAL (type);
6089 align = TYPE_ALIGN (type);
6092 align = GET_MODE_ALIGNMENT (mode);
6093 if (align < PARM_BOUNDARY)
6094 align = PARM_BOUNDARY;
6095 /* In 32bit, only _Decimal128 and __float128 are aligned to their
6096 natural boundaries. */
6097 if (!TARGET_64BIT && mode != TDmode && mode != TFmode)
6099 /* i386 ABI defines all arguments to be 4 byte aligned. We have to
6100 make an exception for SSE modes since these require 128bit
6103 The handling here differs from field_alignment. ICC aligns MMX
6104 arguments to 4 byte boundaries, while structure fields are aligned
6105 to 8 byte boundaries. */
6108 if (!(TARGET_SSE && SSE_REG_MODE_P (mode)))
6109 align = PARM_BOUNDARY;
6113 if (!contains_aligned_value_p (type))
6114 align = PARM_BOUNDARY;
6117 if (align > BIGGEST_ALIGNMENT)
6118 align = BIGGEST_ALIGNMENT;
6122 /* Return true if N is a possible register number of function value. */
6125 ix86_function_value_regno_p (int regno)
6132 case FIRST_FLOAT_REG:
6133 /* TODO: The function should depend on current function ABI but
6134 builtins.c would need updating then. Therefore we use the
6136 if (TARGET_64BIT && ix86_abi == MS_ABI)
6138 return TARGET_FLOAT_RETURNS_IN_80387;
6144 if (TARGET_MACHO || TARGET_64BIT)
6152 /* Define how to find the value returned by a function.
6153 VALTYPE is the data type of the value (as a tree).
6154 If the precise function being called is known, FUNC is its FUNCTION_DECL;
6155 otherwise, FUNC is 0. */
6158 function_value_32 (enum machine_mode orig_mode, enum machine_mode mode,
6159 const_tree fntype, const_tree fn)
6163 /* 8-byte vector modes in %mm0. See ix86_return_in_memory for where
6164 we normally prevent this case when mmx is not available. However
6165 some ABIs may require the result to be returned like DImode. */
6166 if (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 8)
6167 regno = TARGET_MMX ? FIRST_MMX_REG : 0;
6169 /* 16-byte vector modes in %xmm0. See ix86_return_in_memory for where
6170 we prevent this case when sse is not available. However some ABIs
6171 may require the result to be returned like integer TImode. */
6172 else if (mode == TImode
6173 || (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 16))
6174 regno = TARGET_SSE ? FIRST_SSE_REG : 0;
6176 /* 32-byte vector modes in %ymm0. */
6177 else if (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 32)
6178 regno = TARGET_AVX ? FIRST_SSE_REG : 0;
6180 /* Floating point return values in %st(0) (unless -mno-fp-ret-in-387). */
6181 else if (X87_FLOAT_MODE_P (mode) && TARGET_FLOAT_RETURNS_IN_80387)
6182 regno = FIRST_FLOAT_REG;
6184 /* Most things go in %eax. */
6187 /* Override FP return register with %xmm0 for local functions when
6188 SSE math is enabled or for functions with sseregparm attribute. */
6189 if ((fn || fntype) && (mode == SFmode || mode == DFmode))
6191 int sse_level = ix86_function_sseregparm (fntype, fn, false);
6192 if ((sse_level >= 1 && mode == SFmode)
6193 || (sse_level == 2 && mode == DFmode))
6194 regno = FIRST_SSE_REG;
6197 /* OImode shouldn't be used directly. */
6198 gcc_assert (mode != OImode);
6200 return gen_rtx_REG (orig_mode, regno);
6204 function_value_64 (enum machine_mode orig_mode, enum machine_mode mode,
6209 /* Handle libcalls, which don't provide a type node. */
6210 if (valtype == NULL)
6222 return gen_rtx_REG (mode, FIRST_SSE_REG);
6225 return gen_rtx_REG (mode, FIRST_FLOAT_REG);
6229 return gen_rtx_REG (mode, AX_REG);
6233 ret = construct_container (mode, orig_mode, valtype, 1,
6234 X86_64_REGPARM_MAX, X86_64_SSE_REGPARM_MAX,
6235 x86_64_int_return_registers, 0);
6237 /* For zero sized structures, construct_container returns NULL, but we
6238 need to keep rest of compiler happy by returning meaningful value. */
6240 ret = gen_rtx_REG (orig_mode, AX_REG);
6246 function_value_ms_64 (enum machine_mode orig_mode, enum machine_mode mode)
6248 unsigned int regno = AX_REG;
6252 switch (GET_MODE_SIZE (mode))
6255 if((SCALAR_INT_MODE_P (mode) || VECTOR_MODE_P (mode))
6256 && !COMPLEX_MODE_P (mode))
6257 regno = FIRST_SSE_REG;
6261 if (mode == SFmode || mode == DFmode)
6262 regno = FIRST_SSE_REG;
6268 return gen_rtx_REG (orig_mode, regno);
6272 ix86_function_value_1 (const_tree valtype, const_tree fntype_or_decl,
6273 enum machine_mode orig_mode, enum machine_mode mode)
6275 const_tree fn, fntype;
6278 if (fntype_or_decl && DECL_P (fntype_or_decl))
6279 fn = fntype_or_decl;
6280 fntype = fn ? TREE_TYPE (fn) : fntype_or_decl;
6282 if (TARGET_64BIT && ix86_function_type_abi (fntype) == MS_ABI)
6283 return function_value_ms_64 (orig_mode, mode);
6284 else if (TARGET_64BIT)
6285 return function_value_64 (orig_mode, mode, valtype);
6287 return function_value_32 (orig_mode, mode, fntype, fn);
6291 ix86_function_value (const_tree valtype, const_tree fntype_or_decl,
6292 bool outgoing ATTRIBUTE_UNUSED)
6294 enum machine_mode mode, orig_mode;
6296 orig_mode = TYPE_MODE (valtype);
6297 mode = type_natural_mode (valtype, NULL);
6298 return ix86_function_value_1 (valtype, fntype_or_decl, orig_mode, mode);
6302 ix86_libcall_value (enum machine_mode mode)
6304 return ix86_function_value_1 (NULL, NULL, mode, mode);
6307 /* Return true iff type is returned in memory. */
6309 static int ATTRIBUTE_UNUSED
6310 return_in_memory_32 (const_tree type, enum machine_mode mode)
6314 if (mode == BLKmode)
6317 size = int_size_in_bytes (type);
6319 if (MS_AGGREGATE_RETURN && AGGREGATE_TYPE_P (type) && size <= 8)
6322 if (VECTOR_MODE_P (mode) || mode == TImode)
6324 /* User-created vectors small enough to fit in EAX. */
6328 /* MMX/3dNow values are returned in MM0,
6329 except when it doesn't exits. */
6331 return (TARGET_MMX ? 0 : 1);
6333 /* SSE values are returned in XMM0, except when it doesn't exist. */
6335 return (TARGET_SSE ? 0 : 1);
6337 /* AVX values are returned in YMM0, except when it doesn't exist. */
6339 return TARGET_AVX ? 0 : 1;
6348 /* OImode shouldn't be used directly. */
6349 gcc_assert (mode != OImode);
6354 static int ATTRIBUTE_UNUSED
6355 return_in_memory_64 (const_tree type, enum machine_mode mode)
6357 int needed_intregs, needed_sseregs;
6358 return !examine_argument (mode, type, 1, &needed_intregs, &needed_sseregs);
6361 static int ATTRIBUTE_UNUSED
6362 return_in_memory_ms_64 (const_tree type, enum machine_mode mode)
6364 HOST_WIDE_INT size = int_size_in_bytes (type);
6366 /* __m128 is returned in xmm0. */
6367 if ((SCALAR_INT_MODE_P (mode) || VECTOR_MODE_P (mode))
6368 && !COMPLEX_MODE_P (mode) && (GET_MODE_SIZE (mode) == 16 || size == 16))
6371 /* Otherwise, the size must be exactly in [1248]. */
6372 return (size != 1 && size != 2 && size != 4 && size != 8);
6376 ix86_return_in_memory (const_tree type, const_tree fntype ATTRIBUTE_UNUSED)
6378 #ifdef SUBTARGET_RETURN_IN_MEMORY
6379 return SUBTARGET_RETURN_IN_MEMORY (type, fntype);
6381 const enum machine_mode mode = type_natural_mode (type, NULL);
6385 if (ix86_function_type_abi (fntype) == MS_ABI)
6386 return return_in_memory_ms_64 (type, mode);
6388 return return_in_memory_64 (type, mode);
6391 return return_in_memory_32 (type, mode);
6395 /* Return false iff TYPE is returned in memory. This version is used
6396 on Solaris 10. It is similar to the generic ix86_return_in_memory,
6397 but differs notably in that when MMX is available, 8-byte vectors
6398 are returned in memory, rather than in MMX registers. */
6401 ix86_sol10_return_in_memory (const_tree type, const_tree fntype ATTRIBUTE_UNUSED)
6404 enum machine_mode mode = type_natural_mode (type, NULL);
6407 return return_in_memory_64 (type, mode);
6409 if (mode == BLKmode)
6412 size = int_size_in_bytes (type);
6414 if (VECTOR_MODE_P (mode))
6416 /* Return in memory only if MMX registers *are* available. This
6417 seems backwards, but it is consistent with the existing
6424 else if (mode == TImode)
6426 else if (mode == XFmode)
6432 /* When returning SSE vector types, we have a choice of either
6433 (1) being abi incompatible with a -march switch, or
6434 (2) generating an error.
6435 Given no good solution, I think the safest thing is one warning.
6436 The user won't be able to use -Werror, but....
6438 Choose the STRUCT_VALUE_RTX hook because that's (at present) only
6439 called in response to actually generating a caller or callee that
6440 uses such a type. As opposed to TARGET_RETURN_IN_MEMORY, which is called
6441 via aggregate_value_p for general type probing from tree-ssa. */
6444 ix86_struct_value_rtx (tree type, int incoming ATTRIBUTE_UNUSED)
6446 static bool warnedsse, warnedmmx;
6448 if (!TARGET_64BIT && type)
6450 /* Look at the return type of the function, not the function type. */
6451 enum machine_mode mode = TYPE_MODE (TREE_TYPE (type));
6453 if (!TARGET_SSE && !warnedsse)
6456 || (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 16))
6459 warning (0, "SSE vector return without SSE enabled "
6464 if (!TARGET_MMX && !warnedmmx)
6466 if (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 8)
6469 warning (0, "MMX vector return without MMX enabled "
6479 /* Create the va_list data type. */
6481 /* Returns the calling convention specific va_list date type.
6482 The argument ABI can be DEFAULT_ABI, MS_ABI, or SYSV_ABI. */
6485 ix86_build_builtin_va_list_abi (enum calling_abi abi)
6487 tree f_gpr, f_fpr, f_ovf, f_sav, record, type_decl;
6489 /* For i386 we use plain pointer to argument area. */
6490 if (!TARGET_64BIT || abi == MS_ABI)
6491 return build_pointer_type (char_type_node);
6493 record = (*lang_hooks.types.make_type) (RECORD_TYPE);
6494 type_decl = build_decl (TYPE_DECL, get_identifier ("__va_list_tag"), record);
6496 f_gpr = build_decl (FIELD_DECL, get_identifier ("gp_offset"),
6497 unsigned_type_node);
6498 f_fpr = build_decl (FIELD_DECL, get_identifier ("fp_offset"),
6499 unsigned_type_node);
6500 f_ovf = build_decl (FIELD_DECL, get_identifier ("overflow_arg_area"),
6502 f_sav = build_decl (FIELD_DECL, get_identifier ("reg_save_area"),
6505 va_list_gpr_counter_field = f_gpr;
6506 va_list_fpr_counter_field = f_fpr;
6508 DECL_FIELD_CONTEXT (f_gpr) = record;
6509 DECL_FIELD_CONTEXT (f_fpr) = record;
6510 DECL_FIELD_CONTEXT (f_ovf) = record;
6511 DECL_FIELD_CONTEXT (f_sav) = record;
6513 TREE_CHAIN (record) = type_decl;
6514 TYPE_NAME (record) = type_decl;
6515 TYPE_FIELDS (record) = f_gpr;
6516 TREE_CHAIN (f_gpr) = f_fpr;
6517 TREE_CHAIN (f_fpr) = f_ovf;
6518 TREE_CHAIN (f_ovf) = f_sav;
6520 layout_type (record);
6522 /* The correct type is an array type of one element. */
6523 return build_array_type (record, build_index_type (size_zero_node));
6526 /* Setup the builtin va_list data type and for 64-bit the additional
6527 calling convention specific va_list data types. */
6530 ix86_build_builtin_va_list (void)
6532 tree ret = ix86_build_builtin_va_list_abi (ix86_abi);
6534 /* Initialize abi specific va_list builtin types. */
6538 if (ix86_abi == MS_ABI)
6540 t = ix86_build_builtin_va_list_abi (SYSV_ABI);
6541 if (TREE_CODE (t) != RECORD_TYPE)
6542 t = build_variant_type_copy (t);
6543 sysv_va_list_type_node = t;
6548 if (TREE_CODE (t) != RECORD_TYPE)
6549 t = build_variant_type_copy (t);
6550 sysv_va_list_type_node = t;
6552 if (ix86_abi != MS_ABI)
6554 t = ix86_build_builtin_va_list_abi (MS_ABI);
6555 if (TREE_CODE (t) != RECORD_TYPE)
6556 t = build_variant_type_copy (t);
6557 ms_va_list_type_node = t;
6562 if (TREE_CODE (t) != RECORD_TYPE)
6563 t = build_variant_type_copy (t);
6564 ms_va_list_type_node = t;
6571 /* Worker function for TARGET_SETUP_INCOMING_VARARGS. */
6574 setup_incoming_varargs_64 (CUMULATIVE_ARGS *cum)
6583 int regparm = ix86_regparm;
6585 if (cum->call_abi != ix86_abi)
6586 regparm = ix86_abi != SYSV_ABI ? X86_64_REGPARM_MAX : X64_REGPARM_MAX;
6588 /* GPR size of varargs save area. */
6589 if (cfun->va_list_gpr_size)
6590 ix86_varargs_gpr_size = X86_64_REGPARM_MAX * UNITS_PER_WORD;
6592 ix86_varargs_gpr_size = 0;
6594 /* FPR size of varargs save area. We don't need it if we don't pass
6595 anything in SSE registers. */
6596 if (cum->sse_nregs && cfun->va_list_fpr_size)
6597 ix86_varargs_fpr_size = X86_64_SSE_REGPARM_MAX * 16;
6599 ix86_varargs_fpr_size = 0;
6601 if (! ix86_varargs_gpr_size && ! ix86_varargs_fpr_size)
6604 save_area = frame_pointer_rtx;
6605 set = get_varargs_alias_set ();
6607 for (i = cum->regno;
6609 && i < cum->regno + cfun->va_list_gpr_size / UNITS_PER_WORD;
6612 mem = gen_rtx_MEM (Pmode,
6613 plus_constant (save_area, i * UNITS_PER_WORD));
6614 MEM_NOTRAP_P (mem) = 1;
6615 set_mem_alias_set (mem, set);
6616 emit_move_insn (mem, gen_rtx_REG (Pmode,
6617 x86_64_int_parameter_registers[i]));
6620 if (ix86_varargs_fpr_size)
6622 /* Now emit code to save SSE registers. The AX parameter contains number
6623 of SSE parameter registers used to call this function. We use
6624 sse_prologue_save insn template that produces computed jump across
6625 SSE saves. We need some preparation work to get this working. */
6627 label = gen_label_rtx ();
6628 label_ref = gen_rtx_LABEL_REF (Pmode, label);
6630 /* Compute address to jump to :
6631 label - eax*4 + nnamed_sse_arguments*4 Or
6632 label - eax*5 + nnamed_sse_arguments*5 for AVX. */
6633 tmp_reg = gen_reg_rtx (Pmode);
6634 nsse_reg = gen_reg_rtx (Pmode);
6635 emit_insn (gen_zero_extendqidi2 (nsse_reg, gen_rtx_REG (QImode, AX_REG)));
6636 emit_insn (gen_rtx_SET (VOIDmode, tmp_reg,
6637 gen_rtx_MULT (Pmode, nsse_reg,
6640 /* vmovaps is one byte longer than movaps. */
6642 emit_insn (gen_rtx_SET (VOIDmode, tmp_reg,
6643 gen_rtx_PLUS (Pmode, tmp_reg,
6649 gen_rtx_CONST (DImode,
6650 gen_rtx_PLUS (DImode,
6652 GEN_INT (cum->sse_regno
6653 * (TARGET_AVX ? 5 : 4)))));
6655 emit_move_insn (nsse_reg, label_ref);
6656 emit_insn (gen_subdi3 (nsse_reg, nsse_reg, tmp_reg));
6658 /* Compute address of memory block we save into. We always use pointer
6659 pointing 127 bytes after first byte to store - this is needed to keep
6660 instruction size limited by 4 bytes (5 bytes for AVX) with one
6661 byte displacement. */
6662 tmp_reg = gen_reg_rtx (Pmode);
6663 emit_insn (gen_rtx_SET (VOIDmode, tmp_reg,
6664 plus_constant (save_area,
6665 ix86_varargs_gpr_size + 127)));
6666 mem = gen_rtx_MEM (BLKmode, plus_constant (tmp_reg, -127));
6667 MEM_NOTRAP_P (mem) = 1;
6668 set_mem_alias_set (mem, set);
6669 set_mem_align (mem, BITS_PER_WORD);
6671 /* And finally do the dirty job! */
6672 emit_insn (gen_sse_prologue_save (mem, nsse_reg,
6673 GEN_INT (cum->sse_regno), label));
6678 setup_incoming_varargs_ms_64 (CUMULATIVE_ARGS *cum)
6680 alias_set_type set = get_varargs_alias_set ();
6683 for (i = cum->regno; i < X64_REGPARM_MAX; i++)
6687 mem = gen_rtx_MEM (Pmode,
6688 plus_constant (virtual_incoming_args_rtx,
6689 i * UNITS_PER_WORD));
6690 MEM_NOTRAP_P (mem) = 1;
6691 set_mem_alias_set (mem, set);
6693 reg = gen_rtx_REG (Pmode, x86_64_ms_abi_int_parameter_registers[i]);
6694 emit_move_insn (mem, reg);
6699 ix86_setup_incoming_varargs (CUMULATIVE_ARGS *cum, enum machine_mode mode,
6700 tree type, int *pretend_size ATTRIBUTE_UNUSED,
6703 CUMULATIVE_ARGS next_cum;
6706 /* This argument doesn't appear to be used anymore. Which is good,
6707 because the old code here didn't suppress rtl generation. */
6708 gcc_assert (!no_rtl);
6713 fntype = TREE_TYPE (current_function_decl);
6715 /* For varargs, we do not want to skip the dummy va_dcl argument.
6716 For stdargs, we do want to skip the last named argument. */
6718 if (stdarg_p (fntype))
6719 function_arg_advance (&next_cum, mode, type, 1);
6721 if (cum->call_abi == MS_ABI)
6722 setup_incoming_varargs_ms_64 (&next_cum);
6724 setup_incoming_varargs_64 (&next_cum);
6727 /* Checks if TYPE is of kind va_list char *. */
6730 is_va_list_char_pointer (tree type)
6734 /* For 32-bit it is always true. */
6737 canonic = ix86_canonical_va_list_type (type);
6738 return (canonic == ms_va_list_type_node
6739 || (ix86_abi == MS_ABI && canonic == va_list_type_node));
6742 /* Implement va_start. */
6745 ix86_va_start (tree valist, rtx nextarg)
6747 HOST_WIDE_INT words, n_gpr, n_fpr;
6748 tree f_gpr, f_fpr, f_ovf, f_sav;
6749 tree gpr, fpr, ovf, sav, t;
6752 /* Only 64bit target needs something special. */
6753 if (!TARGET_64BIT || is_va_list_char_pointer (TREE_TYPE (valist)))
6755 std_expand_builtin_va_start (valist, nextarg);
6759 f_gpr = TYPE_FIELDS (TREE_TYPE (sysv_va_list_type_node));
6760 f_fpr = TREE_CHAIN (f_gpr);
6761 f_ovf = TREE_CHAIN (f_fpr);
6762 f_sav = TREE_CHAIN (f_ovf);
6764 valist = build1 (INDIRECT_REF, TREE_TYPE (TREE_TYPE (valist)), valist);
6765 gpr = build3 (COMPONENT_REF, TREE_TYPE (f_gpr), valist, f_gpr, NULL_TREE);
6766 fpr = build3 (COMPONENT_REF, TREE_TYPE (f_fpr), valist, f_fpr, NULL_TREE);
6767 ovf = build3 (COMPONENT_REF, TREE_TYPE (f_ovf), valist, f_ovf, NULL_TREE);
6768 sav = build3 (COMPONENT_REF, TREE_TYPE (f_sav), valist, f_sav, NULL_TREE);
6770 /* Count number of gp and fp argument registers used. */
6771 words = crtl->args.info.words;
6772 n_gpr = crtl->args.info.regno;
6773 n_fpr = crtl->args.info.sse_regno;
6775 if (cfun->va_list_gpr_size)
6777 type = TREE_TYPE (gpr);
6778 t = build2 (MODIFY_EXPR, type,
6779 gpr, build_int_cst (type, n_gpr * 8));
6780 TREE_SIDE_EFFECTS (t) = 1;
6781 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
6784 if (TARGET_SSE && cfun->va_list_fpr_size)
6786 type = TREE_TYPE (fpr);
6787 t = build2 (MODIFY_EXPR, type, fpr,
6788 build_int_cst (type, n_fpr * 16 + 8*X86_64_REGPARM_MAX));
6789 TREE_SIDE_EFFECTS (t) = 1;
6790 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
6793 /* Find the overflow area. */
6794 type = TREE_TYPE (ovf);
6795 t = make_tree (type, crtl->args.internal_arg_pointer);
6797 t = build2 (POINTER_PLUS_EXPR, type, t,
6798 size_int (words * UNITS_PER_WORD));
6799 t = build2 (MODIFY_EXPR, type, ovf, t);
6800 TREE_SIDE_EFFECTS (t) = 1;
6801 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
6803 if (ix86_varargs_gpr_size || ix86_varargs_fpr_size)
6805 /* Find the register save area.
6806 Prologue of the function save it right above stack frame. */
6807 type = TREE_TYPE (sav);
6808 t = make_tree (type, frame_pointer_rtx);
6809 if (!ix86_varargs_gpr_size)
6810 t = build2 (POINTER_PLUS_EXPR, type, t,
6811 size_int (-8 * X86_64_REGPARM_MAX));
6812 t = build2 (MODIFY_EXPR, type, sav, t);
6813 TREE_SIDE_EFFECTS (t) = 1;
6814 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
6818 /* Implement va_arg. */
6821 ix86_gimplify_va_arg (tree valist, tree type, gimple_seq *pre_p,
6824 static const int intreg[6] = { 0, 1, 2, 3, 4, 5 };
6825 tree f_gpr, f_fpr, f_ovf, f_sav;
6826 tree gpr, fpr, ovf, sav, t;
6828 tree lab_false, lab_over = NULL_TREE;
6833 enum machine_mode nat_mode;
6836 /* Only 64bit target needs something special. */
6837 if (!TARGET_64BIT || is_va_list_char_pointer (TREE_TYPE (valist)))
6838 return std_gimplify_va_arg_expr (valist, type, pre_p, post_p);
6840 f_gpr = TYPE_FIELDS (TREE_TYPE (sysv_va_list_type_node));
6841 f_fpr = TREE_CHAIN (f_gpr);
6842 f_ovf = TREE_CHAIN (f_fpr);
6843 f_sav = TREE_CHAIN (f_ovf);
6845 gpr = build3 (COMPONENT_REF, TREE_TYPE (f_gpr),
6846 build_va_arg_indirect_ref (valist), f_gpr, NULL_TREE);
6847 valist = build_va_arg_indirect_ref (valist);
6848 fpr = build3 (COMPONENT_REF, TREE_TYPE (f_fpr), valist, f_fpr, NULL_TREE);
6849 ovf = build3 (COMPONENT_REF, TREE_TYPE (f_ovf), valist, f_ovf, NULL_TREE);
6850 sav = build3 (COMPONENT_REF, TREE_TYPE (f_sav), valist, f_sav, NULL_TREE);
6852 indirect_p = pass_by_reference (NULL, TYPE_MODE (type), type, false);
6854 type = build_pointer_type (type);
6855 size = int_size_in_bytes (type);
6856 rsize = (size + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
6858 nat_mode = type_natural_mode (type, NULL);
6867 /* Unnamed 256bit vector mode parameters are passed on stack. */
6868 if (ix86_cfun_abi () == SYSV_ABI)
6875 container = construct_container (nat_mode, TYPE_MODE (type),
6876 type, 0, X86_64_REGPARM_MAX,
6877 X86_64_SSE_REGPARM_MAX, intreg,
6882 /* Pull the value out of the saved registers. */
6884 addr = create_tmp_var (ptr_type_node, "addr");
6885 DECL_POINTER_ALIAS_SET (addr) = get_varargs_alias_set ();
6889 int needed_intregs, needed_sseregs;
6891 tree int_addr, sse_addr;
6893 lab_false = create_artificial_label ();
6894 lab_over = create_artificial_label ();
6896 examine_argument (nat_mode, type, 0, &needed_intregs, &needed_sseregs);
6898 need_temp = (!REG_P (container)
6899 && ((needed_intregs && TYPE_ALIGN (type) > 64)
6900 || TYPE_ALIGN (type) > 128));
6902 /* In case we are passing structure, verify that it is consecutive block
6903 on the register save area. If not we need to do moves. */
6904 if (!need_temp && !REG_P (container))
6906 /* Verify that all registers are strictly consecutive */
6907 if (SSE_REGNO_P (REGNO (XEXP (XVECEXP (container, 0, 0), 0))))
6911 for (i = 0; i < XVECLEN (container, 0) && !need_temp; i++)
6913 rtx slot = XVECEXP (container, 0, i);
6914 if (REGNO (XEXP (slot, 0)) != FIRST_SSE_REG + (unsigned int) i
6915 || INTVAL (XEXP (slot, 1)) != i * 16)
6923 for (i = 0; i < XVECLEN (container, 0) && !need_temp; i++)
6925 rtx slot = XVECEXP (container, 0, i);
6926 if (REGNO (XEXP (slot, 0)) != (unsigned int) i
6927 || INTVAL (XEXP (slot, 1)) != i * 8)
6939 int_addr = create_tmp_var (ptr_type_node, "int_addr");
6940 DECL_POINTER_ALIAS_SET (int_addr) = get_varargs_alias_set ();
6941 sse_addr = create_tmp_var (ptr_type_node, "sse_addr");
6942 DECL_POINTER_ALIAS_SET (sse_addr) = get_varargs_alias_set ();
6945 /* First ensure that we fit completely in registers. */
6948 t = build_int_cst (TREE_TYPE (gpr),
6949 (X86_64_REGPARM_MAX - needed_intregs + 1) * 8);
6950 t = build2 (GE_EXPR, boolean_type_node, gpr, t);
6951 t2 = build1 (GOTO_EXPR, void_type_node, lab_false);
6952 t = build3 (COND_EXPR, void_type_node, t, t2, NULL_TREE);
6953 gimplify_and_add (t, pre_p);
6957 t = build_int_cst (TREE_TYPE (fpr),
6958 (X86_64_SSE_REGPARM_MAX - needed_sseregs + 1) * 16
6959 + X86_64_REGPARM_MAX * 8);
6960 t = build2 (GE_EXPR, boolean_type_node, fpr, t);
6961 t2 = build1 (GOTO_EXPR, void_type_node, lab_false);
6962 t = build3 (COND_EXPR, void_type_node, t, t2, NULL_TREE);
6963 gimplify_and_add (t, pre_p);
6966 /* Compute index to start of area used for integer regs. */
6969 /* int_addr = gpr + sav; */
6970 t = fold_convert (sizetype, gpr);
6971 t = build2 (POINTER_PLUS_EXPR, ptr_type_node, sav, t);
6972 gimplify_assign (int_addr, t, pre_p);
6976 /* sse_addr = fpr + sav; */
6977 t = fold_convert (sizetype, fpr);
6978 t = build2 (POINTER_PLUS_EXPR, ptr_type_node, sav, t);
6979 gimplify_assign (sse_addr, t, pre_p);
6984 tree temp = create_tmp_var (type, "va_arg_tmp");
6987 t = build1 (ADDR_EXPR, build_pointer_type (type), temp);
6988 gimplify_assign (addr, t, pre_p);
6990 for (i = 0; i < XVECLEN (container, 0); i++)
6992 rtx slot = XVECEXP (container, 0, i);
6993 rtx reg = XEXP (slot, 0);
6994 enum machine_mode mode = GET_MODE (reg);
6995 tree piece_type = lang_hooks.types.type_for_mode (mode, 1);
6996 tree addr_type = build_pointer_type (piece_type);
6997 tree daddr_type = build_pointer_type_for_mode (piece_type,
7001 tree dest_addr, dest;
7003 if (SSE_REGNO_P (REGNO (reg)))
7005 src_addr = sse_addr;
7006 src_offset = (REGNO (reg) - FIRST_SSE_REG) * 16;
7010 src_addr = int_addr;
7011 src_offset = REGNO (reg) * 8;
7013 src_addr = fold_convert (addr_type, src_addr);
7014 src_addr = fold_build2 (POINTER_PLUS_EXPR, addr_type, src_addr,
7015 size_int (src_offset));
7016 src = build_va_arg_indirect_ref (src_addr);
7018 dest_addr = fold_convert (daddr_type, addr);
7019 dest_addr = fold_build2 (POINTER_PLUS_EXPR, daddr_type, dest_addr,
7020 size_int (INTVAL (XEXP (slot, 1))));
7021 dest = build_va_arg_indirect_ref (dest_addr);
7023 gimplify_assign (dest, src, pre_p);
7029 t = build2 (PLUS_EXPR, TREE_TYPE (gpr), gpr,
7030 build_int_cst (TREE_TYPE (gpr), needed_intregs * 8));
7031 gimplify_assign (gpr, t, pre_p);
7036 t = build2 (PLUS_EXPR, TREE_TYPE (fpr), fpr,
7037 build_int_cst (TREE_TYPE (fpr), needed_sseregs * 16));
7038 gimplify_assign (fpr, t, pre_p);
7041 gimple_seq_add_stmt (pre_p, gimple_build_goto (lab_over));
7043 gimple_seq_add_stmt (pre_p, gimple_build_label (lab_false));
7046 /* ... otherwise out of the overflow area. */
7048 /* When we align parameter on stack for caller, if the parameter
7049 alignment is beyond MAX_SUPPORTED_STACK_ALIGNMENT, it will be
7050 aligned at MAX_SUPPORTED_STACK_ALIGNMENT. We will match callee
7051 here with caller. */
7052 arg_boundary = FUNCTION_ARG_BOUNDARY (VOIDmode, type);
7053 if ((unsigned int) arg_boundary > MAX_SUPPORTED_STACK_ALIGNMENT)
7054 arg_boundary = MAX_SUPPORTED_STACK_ALIGNMENT;
7056 /* Care for on-stack alignment if needed. */
7057 if (arg_boundary <= 64
7058 || integer_zerop (TYPE_SIZE (type)))
7062 HOST_WIDE_INT align = arg_boundary / 8;
7063 t = build2 (POINTER_PLUS_EXPR, TREE_TYPE (ovf), ovf,
7064 size_int (align - 1));
7065 t = fold_convert (sizetype, t);
7066 t = build2 (BIT_AND_EXPR, TREE_TYPE (t), t,
7068 t = fold_convert (TREE_TYPE (ovf), t);
7070 gimplify_expr (&t, pre_p, NULL, is_gimple_val, fb_rvalue);
7071 gimplify_assign (addr, t, pre_p);
7073 t = build2 (POINTER_PLUS_EXPR, TREE_TYPE (t), t,
7074 size_int (rsize * UNITS_PER_WORD));
7075 gimplify_assign (unshare_expr (ovf), t, pre_p);
7078 gimple_seq_add_stmt (pre_p, gimple_build_label (lab_over));
7080 ptrtype = build_pointer_type (type);
7081 addr = fold_convert (ptrtype, addr);
7084 addr = build_va_arg_indirect_ref (addr);
7085 return build_va_arg_indirect_ref (addr);
7088 /* Return nonzero if OPNUM's MEM should be matched
7089 in movabs* patterns. */
7092 ix86_check_movabs (rtx insn, int opnum)
7096 set = PATTERN (insn);
7097 if (GET_CODE (set) == PARALLEL)
7098 set = XVECEXP (set, 0, 0);
7099 gcc_assert (GET_CODE (set) == SET);
7100 mem = XEXP (set, opnum);
7101 while (GET_CODE (mem) == SUBREG)
7102 mem = SUBREG_REG (mem);
7103 gcc_assert (MEM_P (mem));
7104 return (volatile_ok || !MEM_VOLATILE_P (mem));
7107 /* Initialize the table of extra 80387 mathematical constants. */
7110 init_ext_80387_constants (void)
7112 static const char * cst[5] =
7114 "0.3010299956639811952256464283594894482", /* 0: fldlg2 */
7115 "0.6931471805599453094286904741849753009", /* 1: fldln2 */
7116 "1.4426950408889634073876517827983434472", /* 2: fldl2e */
7117 "3.3219280948873623478083405569094566090", /* 3: fldl2t */
7118 "3.1415926535897932385128089594061862044", /* 4: fldpi */
7122 for (i = 0; i < 5; i++)
7124 real_from_string (&ext_80387_constants_table[i], cst[i]);
7125 /* Ensure each constant is rounded to XFmode precision. */
7126 real_convert (&ext_80387_constants_table[i],
7127 XFmode, &ext_80387_constants_table[i]);
7130 ext_80387_constants_init = 1;
7133 /* Return true if the constant is something that can be loaded with
7134 a special instruction. */
7137 standard_80387_constant_p (rtx x)
7139 enum machine_mode mode = GET_MODE (x);
7143 if (!(X87_FLOAT_MODE_P (mode) && (GET_CODE (x) == CONST_DOUBLE)))
7146 if (x == CONST0_RTX (mode))
7148 if (x == CONST1_RTX (mode))
7151 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
7153 /* For XFmode constants, try to find a special 80387 instruction when
7154 optimizing for size or on those CPUs that benefit from them. */
7156 && (optimize_function_for_size_p (cfun) || TARGET_EXT_80387_CONSTANTS))
7160 if (! ext_80387_constants_init)
7161 init_ext_80387_constants ();
7163 for (i = 0; i < 5; i++)
7164 if (real_identical (&r, &ext_80387_constants_table[i]))
7168 /* Load of the constant -0.0 or -1.0 will be split as
7169 fldz;fchs or fld1;fchs sequence. */
7170 if (real_isnegzero (&r))
7172 if (real_identical (&r, &dconstm1))
7178 /* Return the opcode of the special instruction to be used to load
7182 standard_80387_constant_opcode (rtx x)
7184 switch (standard_80387_constant_p (x))
7208 /* Return the CONST_DOUBLE representing the 80387 constant that is
7209 loaded by the specified special instruction. The argument IDX
7210 matches the return value from standard_80387_constant_p. */
7213 standard_80387_constant_rtx (int idx)
7217 if (! ext_80387_constants_init)
7218 init_ext_80387_constants ();
7234 return CONST_DOUBLE_FROM_REAL_VALUE (ext_80387_constants_table[i],
7238 /* Return 1 if mode is a valid mode for sse. */
7240 standard_sse_mode_p (enum machine_mode mode)
7257 /* Return 1 if X is all 0s. For all 1s, return 2 if X is in 128bit
7258 SSE modes and SSE2 is enabled, return 3 if X is in 256bit AVX
7259 modes and AVX is enabled. */
7262 standard_sse_constant_p (rtx x)
7264 enum machine_mode mode = GET_MODE (x);
7266 if (x == const0_rtx || x == CONST0_RTX (GET_MODE (x)))
7268 if (vector_all_ones_operand (x, mode))
7270 if (standard_sse_mode_p (mode))
7271 return TARGET_SSE2 ? 2 : -2;
7272 else if (VALID_AVX256_REG_MODE (mode))
7273 return TARGET_AVX ? 3 : -3;
7279 /* Return the opcode of the special instruction to be used to load
7283 standard_sse_constant_opcode (rtx insn, rtx x)
7285 switch (standard_sse_constant_p (x))
7288 switch (get_attr_mode (insn))
7291 return TARGET_AVX ? "vxorps\t%0, %0, %0" : "xorps\t%0, %0";
7293 return TARGET_AVX ? "vxorpd\t%0, %0, %0" : "xorpd\t%0, %0";
7295 return TARGET_AVX ? "vpxor\t%0, %0, %0" : "pxor\t%0, %0";
7297 return "vxorps\t%x0, %x0, %x0";
7299 return "vxorpd\t%x0, %x0, %x0";
7301 return "vpxor\t%x0, %x0, %x0";
7307 switch (get_attr_mode (insn))
7312 return "vpcmpeqd\t%0, %0, %0";
7318 return "pcmpeqd\t%0, %0";
7323 /* Returns 1 if OP contains a symbol reference */
7326 symbolic_reference_mentioned_p (rtx op)
7331 if (GET_CODE (op) == SYMBOL_REF || GET_CODE (op) == LABEL_REF)
7334 fmt = GET_RTX_FORMAT (GET_CODE (op));
7335 for (i = GET_RTX_LENGTH (GET_CODE (op)) - 1; i >= 0; i--)
7341 for (j = XVECLEN (op, i) - 1; j >= 0; j--)
7342 if (symbolic_reference_mentioned_p (XVECEXP (op, i, j)))
7346 else if (fmt[i] == 'e' && symbolic_reference_mentioned_p (XEXP (op, i)))
7353 /* Return 1 if it is appropriate to emit `ret' instructions in the
7354 body of a function. Do this only if the epilogue is simple, needing a
7355 couple of insns. Prior to reloading, we can't tell how many registers
7356 must be saved, so return 0 then. Return 0 if there is no frame
7357 marker to de-allocate. */
7360 ix86_can_use_return_insn_p (void)
7362 struct ix86_frame frame;
7364 if (! reload_completed || frame_pointer_needed)
7367 /* Don't allow more than 32 pop, since that's all we can do
7368 with one instruction. */
7369 if (crtl->args.pops_args
7370 && crtl->args.size >= 32768)
7373 ix86_compute_frame_layout (&frame);
7374 return frame.to_allocate == 0 && (frame.nregs + frame.nsseregs) == 0;
7377 /* Value should be nonzero if functions must have frame pointers.
7378 Zero means the frame pointer need not be set up (and parms may
7379 be accessed via the stack pointer) in functions that seem suitable. */
7382 ix86_frame_pointer_required (void)
7384 /* If we accessed previous frames, then the generated code expects
7385 to be able to access the saved ebp value in our frame. */
7386 if (cfun->machine->accesses_prev_frame)
7389 /* Several x86 os'es need a frame pointer for other reasons,
7390 usually pertaining to setjmp. */
7391 if (SUBTARGET_FRAME_POINTER_REQUIRED)
7394 /* In override_options, TARGET_OMIT_LEAF_FRAME_POINTER turns off
7395 the frame pointer by default. Turn it back on now if we've not
7396 got a leaf function. */
7397 if (TARGET_OMIT_LEAF_FRAME_POINTER
7398 && (!current_function_is_leaf
7399 || ix86_current_function_calls_tls_descriptor))
7408 /* Record that the current function accesses previous call frames. */
7411 ix86_setup_frame_addresses (void)
7413 cfun->machine->accesses_prev_frame = 1;
7416 #if (defined(HAVE_GAS_HIDDEN) && (SUPPORTS_ONE_ONLY - 0)) || TARGET_MACHO
7417 # define USE_HIDDEN_LINKONCE 1
7419 # define USE_HIDDEN_LINKONCE 0
7422 static int pic_labels_used;
7424 /* Fills in the label name that should be used for a pc thunk for
7425 the given register. */
7428 get_pc_thunk_name (char name[32], unsigned int regno)
7430 gcc_assert (!TARGET_64BIT);
7432 if (USE_HIDDEN_LINKONCE)
7433 sprintf (name, "__i686.get_pc_thunk.%s", reg_names[regno]);
7435 ASM_GENERATE_INTERNAL_LABEL (name, "LPR", regno);
7439 /* This function generates code for -fpic that loads %ebx with
7440 the return address of the caller and then returns. */
7443 ix86_file_end (void)
7448 for (regno = 0; regno < 8; ++regno)
7452 if (! ((pic_labels_used >> regno) & 1))
7455 get_pc_thunk_name (name, regno);
7460 switch_to_section (darwin_sections[text_coal_section]);
7461 fputs ("\t.weak_definition\t", asm_out_file);
7462 assemble_name (asm_out_file, name);
7463 fputs ("\n\t.private_extern\t", asm_out_file);
7464 assemble_name (asm_out_file, name);
7465 fputs ("\n", asm_out_file);
7466 ASM_OUTPUT_LABEL (asm_out_file, name);
7470 if (USE_HIDDEN_LINKONCE)
7474 decl = build_decl (FUNCTION_DECL, get_identifier (name),
7476 TREE_PUBLIC (decl) = 1;
7477 TREE_STATIC (decl) = 1;
7478 DECL_ONE_ONLY (decl) = 1;
7480 (*targetm.asm_out.unique_section) (decl, 0);
7481 switch_to_section (get_named_section (decl, NULL, 0));
7483 (*targetm.asm_out.globalize_label) (asm_out_file, name);
7484 fputs ("\t.hidden\t", asm_out_file);
7485 assemble_name (asm_out_file, name);
7486 fputc ('\n', asm_out_file);
7487 ASM_DECLARE_FUNCTION_NAME (asm_out_file, name, decl);
7491 switch_to_section (text_section);
7492 ASM_OUTPUT_LABEL (asm_out_file, name);
7495 xops[0] = gen_rtx_REG (Pmode, regno);
7496 xops[1] = gen_rtx_MEM (Pmode, stack_pointer_rtx);
7497 output_asm_insn ("mov%z0\t{%1, %0|%0, %1}", xops);
7498 output_asm_insn ("ret", xops);
7501 if (NEED_INDICATE_EXEC_STACK)
7502 file_end_indicate_exec_stack ();
7505 /* Emit code for the SET_GOT patterns. */
7508 output_set_got (rtx dest, rtx label ATTRIBUTE_UNUSED)
7514 if (TARGET_VXWORKS_RTP && flag_pic)
7516 /* Load (*VXWORKS_GOTT_BASE) into the PIC register. */
7517 xops[2] = gen_rtx_MEM (Pmode,
7518 gen_rtx_SYMBOL_REF (Pmode, VXWORKS_GOTT_BASE));
7519 output_asm_insn ("mov{l}\t{%2, %0|%0, %2}", xops);
7521 /* Load (*VXWORKS_GOTT_BASE)[VXWORKS_GOTT_INDEX] into the PIC register.
7522 Use %P and a local symbol in order to print VXWORKS_GOTT_INDEX as
7523 an unadorned address. */
7524 xops[2] = gen_rtx_SYMBOL_REF (Pmode, VXWORKS_GOTT_INDEX);
7525 SYMBOL_REF_FLAGS (xops[2]) |= SYMBOL_FLAG_LOCAL;
7526 output_asm_insn ("mov{l}\t{%P2(%0), %0|%0, DWORD PTR %P2[%0]}", xops);
7530 xops[1] = gen_rtx_SYMBOL_REF (Pmode, GOT_SYMBOL_NAME);
7532 if (! TARGET_DEEP_BRANCH_PREDICTION || !flag_pic)
7534 xops[2] = gen_rtx_LABEL_REF (Pmode, label ? label : gen_label_rtx ());
7537 output_asm_insn ("mov%z0\t{%2, %0|%0, %2}", xops);
7539 output_asm_insn ("call\t%a2", xops);
7542 /* Output the Mach-O "canonical" label name ("Lxx$pb") here too. This
7543 is what will be referenced by the Mach-O PIC subsystem. */
7545 ASM_OUTPUT_LABEL (asm_out_file, MACHOPIC_FUNCTION_BASE_NAME);
7548 (*targetm.asm_out.internal_label) (asm_out_file, "L",
7549 CODE_LABEL_NUMBER (XEXP (xops[2], 0)));
7552 output_asm_insn ("pop%z0\t%0", xops);
7557 get_pc_thunk_name (name, REGNO (dest));
7558 pic_labels_used |= 1 << REGNO (dest);
7560 xops[2] = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (name));
7561 xops[2] = gen_rtx_MEM (QImode, xops[2]);
7562 output_asm_insn ("call\t%X2", xops);
7563 /* Output the Mach-O "canonical" label name ("Lxx$pb") here too. This
7564 is what will be referenced by the Mach-O PIC subsystem. */
7567 ASM_OUTPUT_LABEL (asm_out_file, MACHOPIC_FUNCTION_BASE_NAME);
7569 targetm.asm_out.internal_label (asm_out_file, "L",
7570 CODE_LABEL_NUMBER (label));
7577 if (!flag_pic || TARGET_DEEP_BRANCH_PREDICTION)
7578 output_asm_insn ("add%z0\t{%1, %0|%0, %1}", xops);
7580 output_asm_insn ("add%z0\t{%1+[.-%a2], %0|%0, %1+(.-%a2)}", xops);
7585 /* Generate an "push" pattern for input ARG. */
7590 return gen_rtx_SET (VOIDmode,
7592 gen_rtx_PRE_DEC (Pmode,
7593 stack_pointer_rtx)),
7597 /* Return >= 0 if there is an unused call-clobbered register available
7598 for the entire function. */
7601 ix86_select_alt_pic_regnum (void)
7603 if (current_function_is_leaf && !crtl->profile
7604 && !ix86_current_function_calls_tls_descriptor)
7607 /* Can't use the same register for both PIC and DRAP. */
7609 drap = REGNO (crtl->drap_reg);
7612 for (i = 2; i >= 0; --i)
7613 if (i != drap && !df_regs_ever_live_p (i))
7617 return INVALID_REGNUM;
7620 /* Return 1 if we need to save REGNO. */
7622 ix86_save_reg (unsigned int regno, int maybe_eh_return)
7624 if (pic_offset_table_rtx
7625 && regno == REAL_PIC_OFFSET_TABLE_REGNUM
7626 && (df_regs_ever_live_p (REAL_PIC_OFFSET_TABLE_REGNUM)
7628 || crtl->calls_eh_return
7629 || crtl->uses_const_pool))
7631 if (ix86_select_alt_pic_regnum () != INVALID_REGNUM)
7636 if (crtl->calls_eh_return && maybe_eh_return)
7641 unsigned test = EH_RETURN_DATA_REGNO (i);
7642 if (test == INVALID_REGNUM)
7650 && regno == REGNO (crtl->drap_reg))
7653 return (df_regs_ever_live_p (regno)
7654 && !call_used_regs[regno]
7655 && !fixed_regs[regno]
7656 && (regno != HARD_FRAME_POINTER_REGNUM || !frame_pointer_needed));
7659 /* Return number of saved general prupose registers. */
7662 ix86_nsaved_regs (void)
7667 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
7668 if (!SSE_REGNO_P (regno) && ix86_save_reg (regno, true))
7673 /* Return number of saved SSE registrers. */
7676 ix86_nsaved_sseregs (void)
7681 if (ix86_cfun_abi () != MS_ABI)
7683 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
7684 if (SSE_REGNO_P (regno) && ix86_save_reg (regno, true))
7689 /* Given FROM and TO register numbers, say whether this elimination is
7690 allowed. If stack alignment is needed, we can only replace argument
7691 pointer with hard frame pointer, or replace frame pointer with stack
7692 pointer. Otherwise, frame pointer elimination is automatically
7693 handled and all other eliminations are valid. */
7696 ix86_can_eliminate (int from, int to)
7698 if (stack_realign_fp)
7699 return ((from == ARG_POINTER_REGNUM
7700 && to == HARD_FRAME_POINTER_REGNUM)
7701 || (from == FRAME_POINTER_REGNUM
7702 && to == STACK_POINTER_REGNUM));
7704 return to == STACK_POINTER_REGNUM ? !frame_pointer_needed : 1;
7707 /* Return the offset between two registers, one to be eliminated, and the other
7708 its replacement, at the start of a routine. */
7711 ix86_initial_elimination_offset (int from, int to)
7713 struct ix86_frame frame;
7714 ix86_compute_frame_layout (&frame);
7716 if (from == ARG_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
7717 return frame.hard_frame_pointer_offset;
7718 else if (from == FRAME_POINTER_REGNUM
7719 && to == HARD_FRAME_POINTER_REGNUM)
7720 return frame.hard_frame_pointer_offset - frame.frame_pointer_offset;
7723 gcc_assert (to == STACK_POINTER_REGNUM);
7725 if (from == ARG_POINTER_REGNUM)
7726 return frame.stack_pointer_offset;
7728 gcc_assert (from == FRAME_POINTER_REGNUM);
7729 return frame.stack_pointer_offset - frame.frame_pointer_offset;
7733 /* In a dynamically-aligned function, we can't know the offset from
7734 stack pointer to frame pointer, so we must ensure that setjmp
7735 eliminates fp against the hard fp (%ebp) rather than trying to
7736 index from %esp up to the top of the frame across a gap that is
7737 of unknown (at compile-time) size. */
7739 ix86_builtin_setjmp_frame_value (void)
7741 return stack_realign_fp ? hard_frame_pointer_rtx : virtual_stack_vars_rtx;
7744 /* Fill structure ix86_frame about frame of currently computed function. */
7747 ix86_compute_frame_layout (struct ix86_frame *frame)
7749 HOST_WIDE_INT total_size;
7750 unsigned int stack_alignment_needed;
7751 HOST_WIDE_INT offset;
7752 unsigned int preferred_alignment;
7753 HOST_WIDE_INT size = get_frame_size ();
7755 frame->nregs = ix86_nsaved_regs ();
7756 frame->nsseregs = ix86_nsaved_sseregs ();
7759 stack_alignment_needed = crtl->stack_alignment_needed / BITS_PER_UNIT;
7760 preferred_alignment = crtl->preferred_stack_boundary / BITS_PER_UNIT;
7762 /* MS ABI seem to require stack alignment to be always 16 except for function
7764 if (ix86_cfun_abi () == MS_ABI && preferred_alignment < 16)
7766 preferred_alignment = 16;
7767 stack_alignment_needed = 16;
7768 crtl->preferred_stack_boundary = 128;
7769 crtl->stack_alignment_needed = 128;
7772 gcc_assert (!size || stack_alignment_needed);
7773 gcc_assert (preferred_alignment >= STACK_BOUNDARY / BITS_PER_UNIT);
7774 gcc_assert (preferred_alignment <= stack_alignment_needed);
7776 /* During reload iteration the amount of registers saved can change.
7777 Recompute the value as needed. Do not recompute when amount of registers
7778 didn't change as reload does multiple calls to the function and does not
7779 expect the decision to change within single iteration. */
7780 if (!optimize_function_for_size_p (cfun)
7781 && cfun->machine->use_fast_prologue_epilogue_nregs != frame->nregs)
7783 int count = frame->nregs;
7785 cfun->machine->use_fast_prologue_epilogue_nregs = count;
7786 /* The fast prologue uses move instead of push to save registers. This
7787 is significantly longer, but also executes faster as modern hardware
7788 can execute the moves in parallel, but can't do that for push/pop.
7790 Be careful about choosing what prologue to emit: When function takes
7791 many instructions to execute we may use slow version as well as in
7792 case function is known to be outside hot spot (this is known with
7793 feedback only). Weight the size of function by number of registers
7794 to save as it is cheap to use one or two push instructions but very
7795 slow to use many of them. */
7797 count = (count - 1) * FAST_PROLOGUE_INSN_COUNT;
7798 if (cfun->function_frequency < FUNCTION_FREQUENCY_NORMAL
7799 || (flag_branch_probabilities
7800 && cfun->function_frequency < FUNCTION_FREQUENCY_HOT))
7801 cfun->machine->use_fast_prologue_epilogue = false;
7803 cfun->machine->use_fast_prologue_epilogue
7804 = !expensive_function_p (count);
7806 if (TARGET_PROLOGUE_USING_MOVE
7807 && cfun->machine->use_fast_prologue_epilogue)
7808 frame->save_regs_using_mov = true;
7810 frame->save_regs_using_mov = false;
7813 /* Skip return address and saved base pointer. */
7814 offset = frame_pointer_needed ? UNITS_PER_WORD * 2 : UNITS_PER_WORD;
7816 frame->hard_frame_pointer_offset = offset;
7818 /* Set offset to aligned because the realigned frame starts from
7820 if (stack_realign_fp)
7821 offset = (offset + stack_alignment_needed -1) & -stack_alignment_needed;
7823 /* Register save area */
7824 offset += frame->nregs * UNITS_PER_WORD;
7826 /* Align SSE reg save area. */
7827 if (frame->nsseregs)
7828 frame->padding0 = ((offset + 16 - 1) & -16) - offset;
7830 frame->padding0 = 0;
7832 /* SSE register save area. */
7833 offset += frame->padding0 + frame->nsseregs * 16;
7836 frame->va_arg_size = ix86_varargs_gpr_size + ix86_varargs_fpr_size;
7837 offset += frame->va_arg_size;
7839 /* Align start of frame for local function. */
7840 frame->padding1 = ((offset + stack_alignment_needed - 1)
7841 & -stack_alignment_needed) - offset;
7843 offset += frame->padding1;
7845 /* Frame pointer points here. */
7846 frame->frame_pointer_offset = offset;
7850 /* Add outgoing arguments area. Can be skipped if we eliminated
7851 all the function calls as dead code.
7852 Skipping is however impossible when function calls alloca. Alloca
7853 expander assumes that last crtl->outgoing_args_size
7854 of stack frame are unused. */
7855 if (ACCUMULATE_OUTGOING_ARGS
7856 && (!current_function_is_leaf || cfun->calls_alloca
7857 || ix86_current_function_calls_tls_descriptor))
7859 offset += crtl->outgoing_args_size;
7860 frame->outgoing_arguments_size = crtl->outgoing_args_size;
7863 frame->outgoing_arguments_size = 0;
7865 /* Align stack boundary. Only needed if we're calling another function
7867 if (!current_function_is_leaf || cfun->calls_alloca
7868 || ix86_current_function_calls_tls_descriptor)
7869 frame->padding2 = ((offset + preferred_alignment - 1)
7870 & -preferred_alignment) - offset;
7872 frame->padding2 = 0;
7874 offset += frame->padding2;
7876 /* We've reached end of stack frame. */
7877 frame->stack_pointer_offset = offset;
7879 /* Size prologue needs to allocate. */
7880 frame->to_allocate =
7881 (size + frame->padding1 + frame->padding2
7882 + frame->outgoing_arguments_size + frame->va_arg_size);
7884 if ((!frame->to_allocate && frame->nregs <= 1)
7885 || (TARGET_64BIT && frame->to_allocate >= (HOST_WIDE_INT) 0x80000000))
7886 frame->save_regs_using_mov = false;
7888 if (!TARGET_64BIT_MS_ABI && TARGET_RED_ZONE && current_function_sp_is_unchanging
7889 && current_function_is_leaf
7890 && !ix86_current_function_calls_tls_descriptor)
7892 frame->red_zone_size = frame->to_allocate;
7893 if (frame->save_regs_using_mov)
7894 frame->red_zone_size += frame->nregs * UNITS_PER_WORD;
7895 if (frame->red_zone_size > RED_ZONE_SIZE - RED_ZONE_RESERVE)
7896 frame->red_zone_size = RED_ZONE_SIZE - RED_ZONE_RESERVE;
7899 frame->red_zone_size = 0;
7900 frame->to_allocate -= frame->red_zone_size;
7901 frame->stack_pointer_offset -= frame->red_zone_size;
7903 fprintf (stderr, "\n");
7904 fprintf (stderr, "size: %ld\n", (long)size);
7905 fprintf (stderr, "nregs: %ld\n", (long)frame->nregs);
7906 fprintf (stderr, "nsseregs: %ld\n", (long)frame->nsseregs);
7907 fprintf (stderr, "padding0: %ld\n", (long)frame->padding0);
7908 fprintf (stderr, "alignment1: %ld\n", (long)stack_alignment_needed);
7909 fprintf (stderr, "padding1: %ld\n", (long)frame->padding1);
7910 fprintf (stderr, "va_arg: %ld\n", (long)frame->va_arg_size);
7911 fprintf (stderr, "padding2: %ld\n", (long)frame->padding2);
7912 fprintf (stderr, "to_allocate: %ld\n", (long)frame->to_allocate);
7913 fprintf (stderr, "red_zone_size: %ld\n", (long)frame->red_zone_size);
7914 fprintf (stderr, "frame_pointer_offset: %ld\n", (long)frame->frame_pointer_offset);
7915 fprintf (stderr, "hard_frame_pointer_offset: %ld\n",
7916 (long)frame->hard_frame_pointer_offset);
7917 fprintf (stderr, "stack_pointer_offset: %ld\n", (long)frame->stack_pointer_offset);
7918 fprintf (stderr, "current_function_is_leaf: %ld\n", (long)current_function_is_leaf);
7919 fprintf (stderr, "cfun->calls_alloca: %ld\n", (long)cfun->calls_alloca);
7920 fprintf (stderr, "x86_current_function_calls_tls_descriptor: %ld\n", (long)ix86_current_function_calls_tls_descriptor);
7924 /* Emit code to save registers in the prologue. */
7927 ix86_emit_save_regs (void)
7932 for (regno = FIRST_PSEUDO_REGISTER - 1; regno-- > 0; )
7933 if (!SSE_REGNO_P (regno) && ix86_save_reg (regno, true))
7935 insn = emit_insn (gen_push (gen_rtx_REG (Pmode, regno)));
7936 RTX_FRAME_RELATED_P (insn) = 1;
7940 /* Emit code to save registers using MOV insns. First register
7941 is restored from POINTER + OFFSET. */
7943 ix86_emit_save_regs_using_mov (rtx pointer, HOST_WIDE_INT offset)
7948 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
7949 if (!SSE_REGNO_P (regno) && ix86_save_reg (regno, true))
7951 insn = emit_move_insn (adjust_address (gen_rtx_MEM (Pmode, pointer),
7953 gen_rtx_REG (Pmode, regno));
7954 RTX_FRAME_RELATED_P (insn) = 1;
7955 offset += UNITS_PER_WORD;
7959 /* Emit code to save registers using MOV insns. First register
7960 is restored from POINTER + OFFSET. */
7962 ix86_emit_save_sse_regs_using_mov (rtx pointer, HOST_WIDE_INT offset)
7968 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
7969 if (SSE_REGNO_P (regno) && ix86_save_reg (regno, true))
7971 mem = adjust_address (gen_rtx_MEM (TImode, pointer), TImode, offset);
7972 set_mem_align (mem, 128);
7973 insn = emit_move_insn (mem, gen_rtx_REG (TImode, regno));
7974 RTX_FRAME_RELATED_P (insn) = 1;
7979 /* Expand prologue or epilogue stack adjustment.
7980 The pattern exist to put a dependency on all ebp-based memory accesses.
7981 STYLE should be negative if instructions should be marked as frame related,
7982 zero if %r11 register is live and cannot be freely used and positive
7986 pro_epilogue_adjust_stack (rtx dest, rtx src, rtx offset, int style)
7991 insn = emit_insn (gen_pro_epilogue_adjust_stack_1 (dest, src, offset));
7992 else if (x86_64_immediate_operand (offset, DImode))
7993 insn = emit_insn (gen_pro_epilogue_adjust_stack_rex64 (dest, src, offset));
7997 /* r11 is used by indirect sibcall return as well, set before the
7998 epilogue and used after the epilogue. ATM indirect sibcall
7999 shouldn't be used together with huge frame sizes in one
8000 function because of the frame_size check in sibcall.c. */
8002 r11 = gen_rtx_REG (DImode, R11_REG);
8003 insn = emit_insn (gen_rtx_SET (DImode, r11, offset));
8005 RTX_FRAME_RELATED_P (insn) = 1;
8006 insn = emit_insn (gen_pro_epilogue_adjust_stack_rex64_2 (dest, src, r11,
8010 RTX_FRAME_RELATED_P (insn) = 1;
8013 /* Find an available register to be used as dynamic realign argument
8014 pointer regsiter. Such a register will be written in prologue and
8015 used in begin of body, so it must not be
8016 1. parameter passing register.
8018 We reuse static-chain register if it is available. Otherwise, we
8019 use DI for i386 and R13 for x86-64. We chose R13 since it has
8022 Return: the regno of chosen register. */
8025 find_drap_reg (void)
8027 tree decl = cfun->decl;
8031 /* Use R13 for nested function or function need static chain.
8032 Since function with tail call may use any caller-saved
8033 registers in epilogue, DRAP must not use caller-saved
8034 register in such case. */
8035 if ((decl_function_context (decl)
8036 && !DECL_NO_STATIC_CHAIN (decl))
8037 || crtl->tail_call_emit)
8044 /* Use DI for nested function or function need static chain.
8045 Since function with tail call may use any caller-saved
8046 registers in epilogue, DRAP must not use caller-saved
8047 register in such case. */
8048 if ((decl_function_context (decl)
8049 && !DECL_NO_STATIC_CHAIN (decl))
8050 || crtl->tail_call_emit)
8053 /* Reuse static chain register if it isn't used for parameter
8055 if (ix86_function_regparm (TREE_TYPE (decl), decl) <= 2
8056 && !lookup_attribute ("fastcall",
8057 TYPE_ATTRIBUTES (TREE_TYPE (decl))))
8064 /* Update incoming stack boundary and estimated stack alignment. */
8067 ix86_update_stack_boundary (void)
8069 /* Prefer the one specified at command line. */
8070 ix86_incoming_stack_boundary
8071 = (ix86_user_incoming_stack_boundary
8072 ? ix86_user_incoming_stack_boundary
8073 : ix86_default_incoming_stack_boundary);
8075 /* Incoming stack alignment can be changed on individual functions
8076 via force_align_arg_pointer attribute. We use the smallest
8077 incoming stack boundary. */
8078 if (ix86_incoming_stack_boundary > MIN_STACK_BOUNDARY
8079 && lookup_attribute (ix86_force_align_arg_pointer_string,
8080 TYPE_ATTRIBUTES (TREE_TYPE (current_function_decl))))
8081 ix86_incoming_stack_boundary = MIN_STACK_BOUNDARY;
8083 /* The incoming stack frame has to be aligned at least at
8084 parm_stack_boundary. */
8085 if (ix86_incoming_stack_boundary < crtl->parm_stack_boundary)
8086 ix86_incoming_stack_boundary = crtl->parm_stack_boundary;
8088 /* Stack at entrance of main is aligned by runtime. We use the
8089 smallest incoming stack boundary. */
8090 if (ix86_incoming_stack_boundary > MAIN_STACK_BOUNDARY
8091 && DECL_NAME (current_function_decl)
8092 && MAIN_NAME_P (DECL_NAME (current_function_decl))
8093 && DECL_FILE_SCOPE_P (current_function_decl))
8094 ix86_incoming_stack_boundary = MAIN_STACK_BOUNDARY;
8096 /* x86_64 vararg needs 16byte stack alignment for register save
8100 && crtl->stack_alignment_estimated < 128)
8101 crtl->stack_alignment_estimated = 128;
8104 /* Handle the TARGET_GET_DRAP_RTX hook. Return NULL if no DRAP is
8105 needed or an rtx for DRAP otherwise. */
8108 ix86_get_drap_rtx (void)
8110 if (ix86_force_drap || !ACCUMULATE_OUTGOING_ARGS)
8111 crtl->need_drap = true;
8113 if (stack_realign_drap)
8115 /* Assign DRAP to vDRAP and returns vDRAP */
8116 unsigned int regno = find_drap_reg ();
8121 arg_ptr = gen_rtx_REG (Pmode, regno);
8122 crtl->drap_reg = arg_ptr;
8125 drap_vreg = copy_to_reg (arg_ptr);
8129 insn = emit_insn_before (seq, NEXT_INSN (entry_of_function ()));
8130 RTX_FRAME_RELATED_P (insn) = 1;
8137 /* Handle the TARGET_INTERNAL_ARG_POINTER hook. */
8140 ix86_internal_arg_pointer (void)
8142 return virtual_incoming_args_rtx;
8145 /* Handle the TARGET_DWARF_HANDLE_FRAME_UNSPEC hook.
8146 This is called from dwarf2out.c to emit call frame instructions
8147 for frame-related insns containing UNSPECs and UNSPEC_VOLATILEs. */
8149 ix86_dwarf_handle_frame_unspec (const char *label, rtx pattern, int index)
8151 rtx unspec = SET_SRC (pattern);
8152 gcc_assert (GET_CODE (unspec) == UNSPEC);
8156 case UNSPEC_REG_SAVE:
8157 dwarf2out_reg_save_reg (label, XVECEXP (unspec, 0, 0),
8158 SET_DEST (pattern));
8160 case UNSPEC_DEF_CFA:
8161 dwarf2out_def_cfa (label, REGNO (SET_DEST (pattern)),
8162 INTVAL (XVECEXP (unspec, 0, 0)));
8169 /* Finalize stack_realign_needed flag, which will guide prologue/epilogue
8170 to be generated in correct form. */
8172 ix86_finalize_stack_realign_flags (void)
8174 /* Check if stack realign is really needed after reload, and
8175 stores result in cfun */
8176 unsigned int incoming_stack_boundary
8177 = (crtl->parm_stack_boundary > ix86_incoming_stack_boundary
8178 ? crtl->parm_stack_boundary : ix86_incoming_stack_boundary);
8179 unsigned int stack_realign = (incoming_stack_boundary
8180 < (current_function_is_leaf
8181 ? crtl->max_used_stack_slot_alignment
8182 : crtl->stack_alignment_needed));
8184 if (crtl->stack_realign_finalized)
8186 /* After stack_realign_needed is finalized, we can't no longer
8188 gcc_assert (crtl->stack_realign_needed == stack_realign);
8192 crtl->stack_realign_needed = stack_realign;
8193 crtl->stack_realign_finalized = true;
8197 /* Expand the prologue into a bunch of separate insns. */
8200 ix86_expand_prologue (void)
8204 struct ix86_frame frame;
8205 HOST_WIDE_INT allocate;
8207 ix86_finalize_stack_realign_flags ();
8209 /* DRAP should not coexist with stack_realign_fp */
8210 gcc_assert (!(crtl->drap_reg && stack_realign_fp));
8212 ix86_compute_frame_layout (&frame);
8214 /* Emit prologue code to adjust stack alignment and setup DRAP, in case
8215 of DRAP is needed and stack realignment is really needed after reload */
8216 if (crtl->drap_reg && crtl->stack_realign_needed)
8219 int align_bytes = crtl->stack_alignment_needed / BITS_PER_UNIT;
8220 int param_ptr_offset = (call_used_regs[REGNO (crtl->drap_reg)]
8221 ? 0 : UNITS_PER_WORD);
8223 gcc_assert (stack_realign_drap);
8225 /* Grab the argument pointer. */
8226 x = plus_constant (stack_pointer_rtx,
8227 (UNITS_PER_WORD + param_ptr_offset));
8230 /* Only need to push parameter pointer reg if it is caller
8232 if (!call_used_regs[REGNO (crtl->drap_reg)])
8234 /* Push arg pointer reg */
8235 insn = emit_insn (gen_push (y));
8236 RTX_FRAME_RELATED_P (insn) = 1;
8239 insn = emit_insn (gen_rtx_SET (VOIDmode, y, x));
8240 RTX_FRAME_RELATED_P (insn) = 1;
8242 /* Align the stack. */
8243 insn = emit_insn ((*ix86_gen_andsp) (stack_pointer_rtx,
8245 GEN_INT (-align_bytes)));
8246 RTX_FRAME_RELATED_P (insn) = 1;
8248 /* Replicate the return address on the stack so that return
8249 address can be reached via (argp - 1) slot. This is needed
8250 to implement macro RETURN_ADDR_RTX and intrinsic function
8251 expand_builtin_return_addr etc. */
8253 x = gen_frame_mem (Pmode,
8254 plus_constant (x, -UNITS_PER_WORD));
8255 insn = emit_insn (gen_push (x));
8256 RTX_FRAME_RELATED_P (insn) = 1;
8259 /* Note: AT&T enter does NOT have reversed args. Enter is probably
8260 slower on all targets. Also sdb doesn't like it. */
8262 if (frame_pointer_needed)
8264 insn = emit_insn (gen_push (hard_frame_pointer_rtx));
8265 RTX_FRAME_RELATED_P (insn) = 1;
8267 insn = emit_move_insn (hard_frame_pointer_rtx, stack_pointer_rtx);
8268 RTX_FRAME_RELATED_P (insn) = 1;
8271 if (stack_realign_fp)
8273 int align_bytes = crtl->stack_alignment_needed / BITS_PER_UNIT;
8274 gcc_assert (align_bytes > MIN_STACK_BOUNDARY / BITS_PER_UNIT);
8276 /* Align the stack. */
8277 insn = emit_insn ((*ix86_gen_andsp) (stack_pointer_rtx,
8279 GEN_INT (-align_bytes)));
8280 RTX_FRAME_RELATED_P (insn) = 1;
8283 allocate = frame.to_allocate + frame.nsseregs * 16 + frame.padding0;
8285 if (!frame.save_regs_using_mov)
8286 ix86_emit_save_regs ();
8288 allocate += frame.nregs * UNITS_PER_WORD;
8290 /* When using red zone we may start register saving before allocating
8291 the stack frame saving one cycle of the prologue. However I will
8292 avoid doing this if I am going to have to probe the stack since
8293 at least on x86_64 the stack probe can turn into a call that clobbers
8294 a red zone location */
8295 if (!TARGET_64BIT_MS_ABI && TARGET_RED_ZONE && frame.save_regs_using_mov
8296 && (! TARGET_STACK_PROBE || allocate < CHECK_STACK_LIMIT))
8297 ix86_emit_save_regs_using_mov ((frame_pointer_needed
8298 && !crtl->stack_realign_needed)
8299 ? hard_frame_pointer_rtx
8300 : stack_pointer_rtx,
8301 -frame.nregs * UNITS_PER_WORD);
8305 else if (! TARGET_STACK_PROBE || allocate < CHECK_STACK_LIMIT)
8306 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
8307 GEN_INT (-allocate), -1);
8310 /* Only valid for Win32. */
8311 rtx eax = gen_rtx_REG (Pmode, AX_REG);
8315 gcc_assert (!TARGET_64BIT || cfun->machine->call_abi == MS_ABI);
8317 if (cfun->machine->call_abi == MS_ABI)
8320 eax_live = ix86_eax_live_at_start_p ();
8324 emit_insn (gen_push (eax));
8325 allocate -= UNITS_PER_WORD;
8328 emit_move_insn (eax, GEN_INT (allocate));
8331 insn = gen_allocate_stack_worker_64 (eax, eax);
8333 insn = gen_allocate_stack_worker_32 (eax, eax);
8334 insn = emit_insn (insn);
8335 RTX_FRAME_RELATED_P (insn) = 1;
8336 t = gen_rtx_PLUS (Pmode, stack_pointer_rtx, GEN_INT (-allocate));
8337 t = gen_rtx_SET (VOIDmode, stack_pointer_rtx, t);
8338 add_reg_note (insn, REG_FRAME_RELATED_EXPR, t);
8342 if (frame_pointer_needed)
8343 t = plus_constant (hard_frame_pointer_rtx,
8346 - frame.nregs * UNITS_PER_WORD);
8348 t = plus_constant (stack_pointer_rtx, allocate);
8349 emit_move_insn (eax, gen_rtx_MEM (Pmode, t));
8353 if (frame.save_regs_using_mov
8354 && !(!TARGET_64BIT_MS_ABI && TARGET_RED_ZONE
8355 && (! TARGET_STACK_PROBE || allocate < CHECK_STACK_LIMIT)))
8357 if (!frame_pointer_needed
8358 || !frame.to_allocate
8359 || crtl->stack_realign_needed)
8360 ix86_emit_save_regs_using_mov (stack_pointer_rtx,
8362 + frame.nsseregs * 16 + frame.padding0);
8364 ix86_emit_save_regs_using_mov (hard_frame_pointer_rtx,
8365 -frame.nregs * UNITS_PER_WORD);
8367 if (!frame_pointer_needed
8368 || !frame.to_allocate
8369 || crtl->stack_realign_needed)
8370 ix86_emit_save_sse_regs_using_mov (stack_pointer_rtx,
8373 ix86_emit_save_sse_regs_using_mov (hard_frame_pointer_rtx,
8374 - frame.nregs * UNITS_PER_WORD
8375 - frame.nsseregs * 16
8378 pic_reg_used = false;
8379 if (pic_offset_table_rtx
8380 && (df_regs_ever_live_p (REAL_PIC_OFFSET_TABLE_REGNUM)
8383 unsigned int alt_pic_reg_used = ix86_select_alt_pic_regnum ();
8385 if (alt_pic_reg_used != INVALID_REGNUM)
8386 SET_REGNO (pic_offset_table_rtx, alt_pic_reg_used);
8388 pic_reg_used = true;
8395 if (ix86_cmodel == CM_LARGE_PIC)
8397 rtx tmp_reg = gen_rtx_REG (DImode, R11_REG);
8398 rtx label = gen_label_rtx ();
8400 LABEL_PRESERVE_P (label) = 1;
8401 gcc_assert (REGNO (pic_offset_table_rtx) != REGNO (tmp_reg));
8402 insn = emit_insn (gen_set_rip_rex64 (pic_offset_table_rtx, label));
8403 insn = emit_insn (gen_set_got_offset_rex64 (tmp_reg, label));
8404 insn = emit_insn (gen_adddi3 (pic_offset_table_rtx,
8405 pic_offset_table_rtx, tmp_reg));
8408 insn = emit_insn (gen_set_got_rex64 (pic_offset_table_rtx));
8411 insn = emit_insn (gen_set_got (pic_offset_table_rtx));
8414 /* In the pic_reg_used case, make sure that the got load isn't deleted
8415 when mcount needs it. Blockage to avoid call movement across mcount
8416 call is emitted in generic code after the NOTE_INSN_PROLOGUE_END
8418 if (crtl->profile && pic_reg_used)
8419 emit_insn (gen_prologue_use (pic_offset_table_rtx));
8421 if (crtl->drap_reg && !crtl->stack_realign_needed)
8423 /* vDRAP is setup but after reload it turns out stack realign
8424 isn't necessary, here we will emit prologue to setup DRAP
8425 without stack realign adjustment */
8426 int drap_bp_offset = UNITS_PER_WORD * 2;
8427 rtx x = plus_constant (hard_frame_pointer_rtx, drap_bp_offset);
8428 insn = emit_insn (gen_rtx_SET (VOIDmode, crtl->drap_reg, x));
8431 /* Prevent instructions from being scheduled into register save push
8432 sequence when access to the redzone area is done through frame pointer.
8433 The offset betweeh the frame pointer and the stack pointer is calculated
8434 relative to the value of the stack pointer at the end of the function
8435 prologue, and moving instructions that access redzone area via frame
8436 pointer inside push sequence violates this assumption. */
8437 if (frame_pointer_needed && frame.red_zone_size)
8438 emit_insn (gen_memory_blockage ());
8440 /* Emit cld instruction if stringops are used in the function. */
8441 if (TARGET_CLD && ix86_current_function_needs_cld)
8442 emit_insn (gen_cld ());
8445 /* Emit code to restore saved registers using MOV insns. First register
8446 is restored from POINTER + OFFSET. */
8448 ix86_emit_restore_regs_using_mov (rtx pointer, HOST_WIDE_INT offset,
8449 int maybe_eh_return)
8452 rtx base_address = gen_rtx_MEM (Pmode, pointer);
8454 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
8455 if (!SSE_REGNO_P (regno) && ix86_save_reg (regno, maybe_eh_return))
8457 /* Ensure that adjust_address won't be forced to produce pointer
8458 out of range allowed by x86-64 instruction set. */
8459 if (TARGET_64BIT && offset != trunc_int_for_mode (offset, SImode))
8463 r11 = gen_rtx_REG (DImode, R11_REG);
8464 emit_move_insn (r11, GEN_INT (offset));
8465 emit_insn (gen_adddi3 (r11, r11, pointer));
8466 base_address = gen_rtx_MEM (Pmode, r11);
8469 emit_move_insn (gen_rtx_REG (Pmode, regno),
8470 adjust_address (base_address, Pmode, offset));
8471 offset += UNITS_PER_WORD;
8475 /* Emit code to restore saved registers using MOV insns. First register
8476 is restored from POINTER + OFFSET. */
8478 ix86_emit_restore_sse_regs_using_mov (rtx pointer, HOST_WIDE_INT offset,
8479 int maybe_eh_return)
8482 rtx base_address = gen_rtx_MEM (TImode, pointer);
8485 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
8486 if (SSE_REGNO_P (regno) && ix86_save_reg (regno, maybe_eh_return))
8488 /* Ensure that adjust_address won't be forced to produce pointer
8489 out of range allowed by x86-64 instruction set. */
8490 if (TARGET_64BIT && offset != trunc_int_for_mode (offset, SImode))
8494 r11 = gen_rtx_REG (DImode, R11_REG);
8495 emit_move_insn (r11, GEN_INT (offset));
8496 emit_insn (gen_adddi3 (r11, r11, pointer));
8497 base_address = gen_rtx_MEM (TImode, r11);
8500 mem = adjust_address (base_address, TImode, offset);
8501 set_mem_align (mem, 128);
8502 emit_move_insn (gen_rtx_REG (TImode, regno), mem);
8507 /* Restore function stack, frame, and registers. */
8510 ix86_expand_epilogue (int style)
8514 struct ix86_frame frame;
8515 HOST_WIDE_INT offset;
8517 ix86_finalize_stack_realign_flags ();
8519 /* When stack is realigned, SP must be valid. */
8520 sp_valid = (!frame_pointer_needed
8521 || current_function_sp_is_unchanging
8522 || stack_realign_fp);
8524 ix86_compute_frame_layout (&frame);
8526 /* See the comment about red zone and frame
8527 pointer usage in ix86_expand_prologue. */
8528 if (frame_pointer_needed && frame.red_zone_size)
8529 emit_insn (gen_memory_blockage ());
8531 /* Calculate start of saved registers relative to ebp. Special care
8532 must be taken for the normal return case of a function using
8533 eh_return: the eax and edx registers are marked as saved, but not
8534 restored along this path. */
8535 offset = frame.nregs;
8536 if (crtl->calls_eh_return && style != 2)
8538 offset *= -UNITS_PER_WORD;
8539 offset -= frame.nsseregs * 16 + frame.padding0;
8541 /* If we're only restoring one register and sp is not valid then
8542 using a move instruction to restore the register since it's
8543 less work than reloading sp and popping the register.
8545 The default code result in stack adjustment using add/lea instruction,
8546 while this code results in LEAVE instruction (or discrete equivalent),
8547 so it is profitable in some other cases as well. Especially when there
8548 are no registers to restore. We also use this code when TARGET_USE_LEAVE
8549 and there is exactly one register to pop. This heuristic may need some
8550 tuning in future. */
8551 if ((!sp_valid && (frame.nregs + frame.nsseregs) <= 1)
8552 || (TARGET_EPILOGUE_USING_MOVE
8553 && cfun->machine->use_fast_prologue_epilogue
8554 && ((frame.nregs + frame.nsseregs) > 1 || frame.to_allocate))
8555 || (frame_pointer_needed && !(frame.nregs + frame.nsseregs) && frame.to_allocate)
8556 || (frame_pointer_needed && TARGET_USE_LEAVE
8557 && cfun->machine->use_fast_prologue_epilogue
8558 && (frame.nregs + frame.nsseregs) == 1)
8559 || crtl->calls_eh_return)
8561 /* Restore registers. We can use ebp or esp to address the memory
8562 locations. If both are available, default to ebp, since offsets
8563 are known to be small. Only exception is esp pointing directly
8564 to the end of block of saved registers, where we may simplify
8567 If we are realigning stack with bp and sp, regs restore can't
8568 be addressed by bp. sp must be used instead. */
8570 if (!frame_pointer_needed
8571 || (sp_valid && !frame.to_allocate)
8572 || stack_realign_fp)
8574 ix86_emit_restore_sse_regs_using_mov (stack_pointer_rtx,
8575 frame.to_allocate, style == 2);
8576 ix86_emit_restore_regs_using_mov (stack_pointer_rtx,
8578 + frame.nsseregs * 16
8579 + frame.padding0, style == 2);
8583 ix86_emit_restore_sse_regs_using_mov (hard_frame_pointer_rtx,
8584 offset, style == 2);
8585 ix86_emit_restore_regs_using_mov (hard_frame_pointer_rtx,
8587 + frame.nsseregs * 16
8588 + frame.padding0, style == 2);
8591 /* eh_return epilogues need %ecx added to the stack pointer. */
8594 rtx tmp, sa = EH_RETURN_STACKADJ_RTX;
8596 /* Stack align doesn't work with eh_return. */
8597 gcc_assert (!crtl->stack_realign_needed);
8599 if (frame_pointer_needed)
8601 tmp = gen_rtx_PLUS (Pmode, hard_frame_pointer_rtx, sa);
8602 tmp = plus_constant (tmp, UNITS_PER_WORD);
8603 emit_insn (gen_rtx_SET (VOIDmode, sa, tmp));
8605 tmp = gen_rtx_MEM (Pmode, hard_frame_pointer_rtx);
8606 emit_move_insn (hard_frame_pointer_rtx, tmp);
8608 pro_epilogue_adjust_stack (stack_pointer_rtx, sa,
8613 tmp = gen_rtx_PLUS (Pmode, stack_pointer_rtx, sa);
8614 tmp = plus_constant (tmp, (frame.to_allocate
8615 + frame.nregs * UNITS_PER_WORD
8616 + frame.nsseregs * 16
8618 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx, tmp));
8621 else if (!frame_pointer_needed)
8622 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
8623 GEN_INT (frame.to_allocate
8624 + frame.nregs * UNITS_PER_WORD
8625 + frame.nsseregs * 16
8628 /* If not an i386, mov & pop is faster than "leave". */
8629 else if (TARGET_USE_LEAVE || optimize_function_for_size_p (cfun)
8630 || !cfun->machine->use_fast_prologue_epilogue)
8631 emit_insn ((*ix86_gen_leave) ());
8634 pro_epilogue_adjust_stack (stack_pointer_rtx,
8635 hard_frame_pointer_rtx,
8638 emit_insn ((*ix86_gen_pop1) (hard_frame_pointer_rtx));
8643 /* First step is to deallocate the stack frame so that we can
8646 If we realign stack with frame pointer, then stack pointer
8647 won't be able to recover via lea $offset(%bp), %sp, because
8648 there is a padding area between bp and sp for realign.
8649 "add $to_allocate, %sp" must be used instead. */
8652 gcc_assert (frame_pointer_needed);
8653 gcc_assert (!stack_realign_fp);
8654 pro_epilogue_adjust_stack (stack_pointer_rtx,
8655 hard_frame_pointer_rtx,
8656 GEN_INT (offset), style);
8657 ix86_emit_restore_sse_regs_using_mov (stack_pointer_rtx,
8658 frame.to_allocate, style == 2);
8659 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
8660 GEN_INT (frame.nsseregs * 16), style);
8662 else if (frame.to_allocate || frame.nsseregs)
8664 ix86_emit_restore_sse_regs_using_mov (stack_pointer_rtx,
8667 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
8668 GEN_INT (frame.to_allocate
8669 + frame.nsseregs * 16
8670 + frame.padding0), style);
8673 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
8674 if (!SSE_REGNO_P (regno) && ix86_save_reg (regno, false))
8675 emit_insn ((*ix86_gen_pop1) (gen_rtx_REG (Pmode, regno)));
8676 if (frame_pointer_needed)
8678 /* Leave results in shorter dependency chains on CPUs that are
8679 able to grok it fast. */
8680 if (TARGET_USE_LEAVE)
8681 emit_insn ((*ix86_gen_leave) ());
8684 /* For stack realigned really happens, recover stack
8685 pointer to hard frame pointer is a must, if not using
8687 if (stack_realign_fp)
8688 pro_epilogue_adjust_stack (stack_pointer_rtx,
8689 hard_frame_pointer_rtx,
8691 emit_insn ((*ix86_gen_pop1) (hard_frame_pointer_rtx));
8696 if (crtl->drap_reg && crtl->stack_realign_needed)
8698 int param_ptr_offset = (call_used_regs[REGNO (crtl->drap_reg)]
8699 ? 0 : UNITS_PER_WORD);
8700 gcc_assert (stack_realign_drap);
8701 emit_insn ((*ix86_gen_add3) (stack_pointer_rtx,
8703 GEN_INT (-(UNITS_PER_WORD
8704 + param_ptr_offset))));
8705 if (!call_used_regs[REGNO (crtl->drap_reg)])
8706 emit_insn ((*ix86_gen_pop1) (crtl->drap_reg));
8710 /* Sibcall epilogues don't want a return instruction. */
8714 if (crtl->args.pops_args && crtl->args.size)
8716 rtx popc = GEN_INT (crtl->args.pops_args);
8718 /* i386 can only pop 64K bytes. If asked to pop more, pop
8719 return address, do explicit add, and jump indirectly to the
8722 if (crtl->args.pops_args >= 65536)
8724 rtx ecx = gen_rtx_REG (SImode, CX_REG);
8726 /* There is no "pascal" calling convention in any 64bit ABI. */
8727 gcc_assert (!TARGET_64BIT);
8729 emit_insn (gen_popsi1 (ecx));
8730 emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx, popc));
8731 emit_jump_insn (gen_return_indirect_internal (ecx));
8734 emit_jump_insn (gen_return_pop_internal (popc));
8737 emit_jump_insn (gen_return_internal ());
8740 /* Reset from the function's potential modifications. */
8743 ix86_output_function_epilogue (FILE *file ATTRIBUTE_UNUSED,
8744 HOST_WIDE_INT size ATTRIBUTE_UNUSED)
8746 if (pic_offset_table_rtx)
8747 SET_REGNO (pic_offset_table_rtx, REAL_PIC_OFFSET_TABLE_REGNUM);
8749 /* Mach-O doesn't support labels at the end of objects, so if
8750 it looks like we might want one, insert a NOP. */
8752 rtx insn = get_last_insn ();
8755 && NOTE_KIND (insn) != NOTE_INSN_DELETED_LABEL)
8756 insn = PREV_INSN (insn);
8760 && NOTE_KIND (insn) == NOTE_INSN_DELETED_LABEL)))
8761 fputs ("\tnop\n", file);
8767 /* Extract the parts of an RTL expression that is a valid memory address
8768 for an instruction. Return 0 if the structure of the address is
8769 grossly off. Return -1 if the address contains ASHIFT, so it is not
8770 strictly valid, but still used for computing length of lea instruction. */
8773 ix86_decompose_address (rtx addr, struct ix86_address *out)
8775 rtx base = NULL_RTX, index = NULL_RTX, disp = NULL_RTX;
8776 rtx base_reg, index_reg;
8777 HOST_WIDE_INT scale = 1;
8778 rtx scale_rtx = NULL_RTX;
8780 enum ix86_address_seg seg = SEG_DEFAULT;
8782 if (REG_P (addr) || GET_CODE (addr) == SUBREG)
8784 else if (GET_CODE (addr) == PLUS)
8794 addends[n++] = XEXP (op, 1);
8797 while (GET_CODE (op) == PLUS);
8802 for (i = n; i >= 0; --i)
8805 switch (GET_CODE (op))
8810 index = XEXP (op, 0);
8811 scale_rtx = XEXP (op, 1);
8815 if (XINT (op, 1) == UNSPEC_TP
8816 && TARGET_TLS_DIRECT_SEG_REFS
8817 && seg == SEG_DEFAULT)
8818 seg = TARGET_64BIT ? SEG_FS : SEG_GS;
8847 else if (GET_CODE (addr) == MULT)
8849 index = XEXP (addr, 0); /* index*scale */
8850 scale_rtx = XEXP (addr, 1);
8852 else if (GET_CODE (addr) == ASHIFT)
8856 /* We're called for lea too, which implements ashift on occasion. */
8857 index = XEXP (addr, 0);
8858 tmp = XEXP (addr, 1);
8859 if (!CONST_INT_P (tmp))
8861 scale = INTVAL (tmp);
8862 if ((unsigned HOST_WIDE_INT) scale > 3)
8868 disp = addr; /* displacement */
8870 /* Extract the integral value of scale. */
8873 if (!CONST_INT_P (scale_rtx))
8875 scale = INTVAL (scale_rtx);
8878 base_reg = base && GET_CODE (base) == SUBREG ? SUBREG_REG (base) : base;
8879 index_reg = index && GET_CODE (index) == SUBREG ? SUBREG_REG (index) : index;
8881 /* Allow arg pointer and stack pointer as index if there is not scaling. */
8882 if (base_reg && index_reg && scale == 1
8883 && (index_reg == arg_pointer_rtx
8884 || index_reg == frame_pointer_rtx
8885 || (REG_P (index_reg) && REGNO (index_reg) == STACK_POINTER_REGNUM)))
8888 tmp = base, base = index, index = tmp;
8889 tmp = base_reg, base_reg = index_reg, index_reg = tmp;
8892 /* Special case: %ebp cannot be encoded as a base without a displacement. */
8893 if ((base_reg == hard_frame_pointer_rtx
8894 || base_reg == frame_pointer_rtx
8895 || base_reg == arg_pointer_rtx) && !disp)
8898 /* Special case: on K6, [%esi] makes the instruction vector decoded.
8899 Avoid this by transforming to [%esi+0].
8900 Reload calls address legitimization without cfun defined, so we need
8901 to test cfun for being non-NULL. */
8902 if (TARGET_K6 && cfun && optimize_function_for_speed_p (cfun)
8903 && base_reg && !index_reg && !disp
8905 && REGNO_REG_CLASS (REGNO (base_reg)) == SIREG)
8908 /* Special case: encode reg+reg instead of reg*2. */
8909 if (!base && index && scale && scale == 2)
8910 base = index, base_reg = index_reg, scale = 1;
8912 /* Special case: scaling cannot be encoded without base or displacement. */
8913 if (!base && !disp && index && scale != 1)
8925 /* Return cost of the memory address x.
8926 For i386, it is better to use a complex address than let gcc copy
8927 the address into a reg and make a new pseudo. But not if the address
8928 requires to two regs - that would mean more pseudos with longer
8931 ix86_address_cost (rtx x, bool speed ATTRIBUTE_UNUSED)
8933 struct ix86_address parts;
8935 int ok = ix86_decompose_address (x, &parts);
8939 if (parts.base && GET_CODE (parts.base) == SUBREG)
8940 parts.base = SUBREG_REG (parts.base);
8941 if (parts.index && GET_CODE (parts.index) == SUBREG)
8942 parts.index = SUBREG_REG (parts.index);
8944 /* Attempt to minimize number of registers in the address. */
8946 && (!REG_P (parts.base) || REGNO (parts.base) >= FIRST_PSEUDO_REGISTER))
8948 && (!REG_P (parts.index)
8949 || REGNO (parts.index) >= FIRST_PSEUDO_REGISTER)))
8953 && (!REG_P (parts.base) || REGNO (parts.base) >= FIRST_PSEUDO_REGISTER)
8955 && (!REG_P (parts.index) || REGNO (parts.index) >= FIRST_PSEUDO_REGISTER)
8956 && parts.base != parts.index)
8959 /* AMD-K6 don't like addresses with ModR/M set to 00_xxx_100b,
8960 since it's predecode logic can't detect the length of instructions
8961 and it degenerates to vector decoded. Increase cost of such
8962 addresses here. The penalty is minimally 2 cycles. It may be worthwhile
8963 to split such addresses or even refuse such addresses at all.
8965 Following addressing modes are affected:
8970 The first and last case may be avoidable by explicitly coding the zero in
8971 memory address, but I don't have AMD-K6 machine handy to check this
8975 && ((!parts.disp && parts.base && parts.index && parts.scale != 1)
8976 || (parts.disp && !parts.base && parts.index && parts.scale != 1)
8977 || (!parts.disp && parts.base && parts.index && parts.scale == 1)))
8983 /* Allow {LABEL | SYMBOL}_REF - SYMBOL_REF-FOR-PICBASE for Mach-O as
8984 this is used for to form addresses to local data when -fPIC is in
8988 darwin_local_data_pic (rtx disp)
8990 return (GET_CODE (disp) == UNSPEC
8991 && XINT (disp, 1) == UNSPEC_MACHOPIC_OFFSET);
8994 /* Determine if a given RTX is a valid constant. We already know this
8995 satisfies CONSTANT_P. */
8998 legitimate_constant_p (rtx x)
9000 switch (GET_CODE (x))
9005 if (GET_CODE (x) == PLUS)
9007 if (!CONST_INT_P (XEXP (x, 1)))
9012 if (TARGET_MACHO && darwin_local_data_pic (x))
9015 /* Only some unspecs are valid as "constants". */
9016 if (GET_CODE (x) == UNSPEC)
9017 switch (XINT (x, 1))
9022 return TARGET_64BIT;
9025 x = XVECEXP (x, 0, 0);
9026 return (GET_CODE (x) == SYMBOL_REF
9027 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_EXEC);
9029 x = XVECEXP (x, 0, 0);
9030 return (GET_CODE (x) == SYMBOL_REF
9031 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_DYNAMIC);
9036 /* We must have drilled down to a symbol. */
9037 if (GET_CODE (x) == LABEL_REF)
9039 if (GET_CODE (x) != SYMBOL_REF)
9044 /* TLS symbols are never valid. */
9045 if (SYMBOL_REF_TLS_MODEL (x))
9048 /* DLLIMPORT symbols are never valid. */
9049 if (TARGET_DLLIMPORT_DECL_ATTRIBUTES
9050 && SYMBOL_REF_DLLIMPORT_P (x))
9055 if (GET_MODE (x) == TImode
9056 && x != CONST0_RTX (TImode)
9062 if (!standard_sse_constant_p (x))
9069 /* Otherwise we handle everything else in the move patterns. */
9073 /* Determine if it's legal to put X into the constant pool. This
9074 is not possible for the address of thread-local symbols, which
9075 is checked above. */
9078 ix86_cannot_force_const_mem (rtx x)
9080 /* We can always put integral constants and vectors in memory. */
9081 switch (GET_CODE (x))
9091 return !legitimate_constant_p (x);
9094 /* Determine if a given RTX is a valid constant address. */
9097 constant_address_p (rtx x)
9099 return CONSTANT_P (x) && legitimate_address_p (Pmode, x, 1);
9102 /* Nonzero if the constant value X is a legitimate general operand
9103 when generating PIC code. It is given that flag_pic is on and
9104 that X satisfies CONSTANT_P or is a CONST_DOUBLE. */
9107 legitimate_pic_operand_p (rtx x)
9111 switch (GET_CODE (x))
9114 inner = XEXP (x, 0);
9115 if (GET_CODE (inner) == PLUS
9116 && CONST_INT_P (XEXP (inner, 1)))
9117 inner = XEXP (inner, 0);
9119 /* Only some unspecs are valid as "constants". */
9120 if (GET_CODE (inner) == UNSPEC)
9121 switch (XINT (inner, 1))
9126 return TARGET_64BIT;
9128 x = XVECEXP (inner, 0, 0);
9129 return (GET_CODE (x) == SYMBOL_REF
9130 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_EXEC);
9131 case UNSPEC_MACHOPIC_OFFSET:
9132 return legitimate_pic_address_disp_p (x);
9140 return legitimate_pic_address_disp_p (x);
9147 /* Determine if a given CONST RTX is a valid memory displacement
9151 legitimate_pic_address_disp_p (rtx disp)
9155 /* In 64bit mode we can allow direct addresses of symbols and labels
9156 when they are not dynamic symbols. */
9159 rtx op0 = disp, op1;
9161 switch (GET_CODE (disp))
9167 if (GET_CODE (XEXP (disp, 0)) != PLUS)
9169 op0 = XEXP (XEXP (disp, 0), 0);
9170 op1 = XEXP (XEXP (disp, 0), 1);
9171 if (!CONST_INT_P (op1)
9172 || INTVAL (op1) >= 16*1024*1024
9173 || INTVAL (op1) < -16*1024*1024)
9175 if (GET_CODE (op0) == LABEL_REF)
9177 if (GET_CODE (op0) != SYMBOL_REF)
9182 /* TLS references should always be enclosed in UNSPEC. */
9183 if (SYMBOL_REF_TLS_MODEL (op0))
9185 if (!SYMBOL_REF_FAR_ADDR_P (op0) && SYMBOL_REF_LOCAL_P (op0)
9186 && ix86_cmodel != CM_LARGE_PIC)
9194 if (GET_CODE (disp) != CONST)
9196 disp = XEXP (disp, 0);
9200 /* We are unsafe to allow PLUS expressions. This limit allowed distance
9201 of GOT tables. We should not need these anyway. */
9202 if (GET_CODE (disp) != UNSPEC
9203 || (XINT (disp, 1) != UNSPEC_GOTPCREL
9204 && XINT (disp, 1) != UNSPEC_GOTOFF
9205 && XINT (disp, 1) != UNSPEC_PLTOFF))
9208 if (GET_CODE (XVECEXP (disp, 0, 0)) != SYMBOL_REF
9209 && GET_CODE (XVECEXP (disp, 0, 0)) != LABEL_REF)
9215 if (GET_CODE (disp) == PLUS)
9217 if (!CONST_INT_P (XEXP (disp, 1)))
9219 disp = XEXP (disp, 0);
9223 if (TARGET_MACHO && darwin_local_data_pic (disp))
9226 if (GET_CODE (disp) != UNSPEC)
9229 switch (XINT (disp, 1))
9234 /* We need to check for both symbols and labels because VxWorks loads
9235 text labels with @GOT rather than @GOTOFF. See gotoff_operand for
9237 return (GET_CODE (XVECEXP (disp, 0, 0)) == SYMBOL_REF
9238 || GET_CODE (XVECEXP (disp, 0, 0)) == LABEL_REF);
9240 /* Refuse GOTOFF in 64bit mode since it is always 64bit when used.
9241 While ABI specify also 32bit relocation but we don't produce it in
9242 small PIC model at all. */
9243 if ((GET_CODE (XVECEXP (disp, 0, 0)) == SYMBOL_REF
9244 || GET_CODE (XVECEXP (disp, 0, 0)) == LABEL_REF)
9246 return gotoff_operand (XVECEXP (disp, 0, 0), Pmode);
9248 case UNSPEC_GOTTPOFF:
9249 case UNSPEC_GOTNTPOFF:
9250 case UNSPEC_INDNTPOFF:
9253 disp = XVECEXP (disp, 0, 0);
9254 return (GET_CODE (disp) == SYMBOL_REF
9255 && SYMBOL_REF_TLS_MODEL (disp) == TLS_MODEL_INITIAL_EXEC);
9257 disp = XVECEXP (disp, 0, 0);
9258 return (GET_CODE (disp) == SYMBOL_REF
9259 && SYMBOL_REF_TLS_MODEL (disp) == TLS_MODEL_LOCAL_EXEC);
9261 disp = XVECEXP (disp, 0, 0);
9262 return (GET_CODE (disp) == SYMBOL_REF
9263 && SYMBOL_REF_TLS_MODEL (disp) == TLS_MODEL_LOCAL_DYNAMIC);
9269 /* GO_IF_LEGITIMATE_ADDRESS recognizes an RTL expression that is a valid
9270 memory address for an instruction. The MODE argument is the machine mode
9271 for the MEM expression that wants to use this address.
9273 It only recognizes address in canonical form. LEGITIMIZE_ADDRESS should
9274 convert common non-canonical forms to canonical form so that they will
9278 legitimate_address_p (enum machine_mode mode ATTRIBUTE_UNUSED,
9279 rtx addr, int strict)
9281 struct ix86_address parts;
9282 rtx base, index, disp;
9283 HOST_WIDE_INT scale;
9284 const char *reason = NULL;
9285 rtx reason_rtx = NULL_RTX;
9287 if (ix86_decompose_address (addr, &parts) <= 0)
9289 reason = "decomposition failed";
9294 index = parts.index;
9296 scale = parts.scale;
9298 /* Validate base register.
9300 Don't allow SUBREG's that span more than a word here. It can lead to spill
9301 failures when the base is one word out of a two word structure, which is
9302 represented internally as a DImode int. */
9311 else if (GET_CODE (base) == SUBREG
9312 && REG_P (SUBREG_REG (base))
9313 && GET_MODE_SIZE (GET_MODE (SUBREG_REG (base)))
9315 reg = SUBREG_REG (base);
9318 reason = "base is not a register";
9322 if (GET_MODE (base) != Pmode)
9324 reason = "base is not in Pmode";
9328 if ((strict && ! REG_OK_FOR_BASE_STRICT_P (reg))
9329 || (! strict && ! REG_OK_FOR_BASE_NONSTRICT_P (reg)))
9331 reason = "base is not valid";
9336 /* Validate index register.
9338 Don't allow SUBREG's that span more than a word here -- same as above. */
9347 else if (GET_CODE (index) == SUBREG
9348 && REG_P (SUBREG_REG (index))
9349 && GET_MODE_SIZE (GET_MODE (SUBREG_REG (index)))
9351 reg = SUBREG_REG (index);
9354 reason = "index is not a register";
9358 if (GET_MODE (index) != Pmode)
9360 reason = "index is not in Pmode";
9364 if ((strict && ! REG_OK_FOR_INDEX_STRICT_P (reg))
9365 || (! strict && ! REG_OK_FOR_INDEX_NONSTRICT_P (reg)))
9367 reason = "index is not valid";
9372 /* Validate scale factor. */
9375 reason_rtx = GEN_INT (scale);
9378 reason = "scale without index";
9382 if (scale != 2 && scale != 4 && scale != 8)
9384 reason = "scale is not a valid multiplier";
9389 /* Validate displacement. */
9394 if (GET_CODE (disp) == CONST
9395 && GET_CODE (XEXP (disp, 0)) == UNSPEC
9396 && XINT (XEXP (disp, 0), 1) != UNSPEC_MACHOPIC_OFFSET)
9397 switch (XINT (XEXP (disp, 0), 1))
9399 /* Refuse GOTOFF and GOT in 64bit mode since it is always 64bit when
9400 used. While ABI specify also 32bit relocations, we don't produce
9401 them at all and use IP relative instead. */
9404 gcc_assert (flag_pic);
9406 goto is_legitimate_pic;
9407 reason = "64bit address unspec";
9410 case UNSPEC_GOTPCREL:
9411 gcc_assert (flag_pic);
9412 goto is_legitimate_pic;
9414 case UNSPEC_GOTTPOFF:
9415 case UNSPEC_GOTNTPOFF:
9416 case UNSPEC_INDNTPOFF:
9422 reason = "invalid address unspec";
9426 else if (SYMBOLIC_CONST (disp)
9430 && MACHOPIC_INDIRECT
9431 && !machopic_operand_p (disp)
9437 if (TARGET_64BIT && (index || base))
9439 /* foo@dtpoff(%rX) is ok. */
9440 if (GET_CODE (disp) != CONST
9441 || GET_CODE (XEXP (disp, 0)) != PLUS
9442 || GET_CODE (XEXP (XEXP (disp, 0), 0)) != UNSPEC
9443 || !CONST_INT_P (XEXP (XEXP (disp, 0), 1))
9444 || (XINT (XEXP (XEXP (disp, 0), 0), 1) != UNSPEC_DTPOFF
9445 && XINT (XEXP (XEXP (disp, 0), 0), 1) != UNSPEC_NTPOFF))
9447 reason = "non-constant pic memory reference";
9451 else if (! legitimate_pic_address_disp_p (disp))
9453 reason = "displacement is an invalid pic construct";
9457 /* This code used to verify that a symbolic pic displacement
9458 includes the pic_offset_table_rtx register.
9460 While this is good idea, unfortunately these constructs may
9461 be created by "adds using lea" optimization for incorrect
9470 This code is nonsensical, but results in addressing
9471 GOT table with pic_offset_table_rtx base. We can't
9472 just refuse it easily, since it gets matched by
9473 "addsi3" pattern, that later gets split to lea in the
9474 case output register differs from input. While this
9475 can be handled by separate addsi pattern for this case
9476 that never results in lea, this seems to be easier and
9477 correct fix for crash to disable this test. */
9479 else if (GET_CODE (disp) != LABEL_REF
9480 && !CONST_INT_P (disp)
9481 && (GET_CODE (disp) != CONST
9482 || !legitimate_constant_p (disp))
9483 && (GET_CODE (disp) != SYMBOL_REF
9484 || !legitimate_constant_p (disp)))
9486 reason = "displacement is not constant";
9489 else if (TARGET_64BIT
9490 && !x86_64_immediate_operand (disp, VOIDmode))
9492 reason = "displacement is out of range";
9497 /* Everything looks valid. */
9504 /* Return a unique alias set for the GOT. */
9506 static alias_set_type
9507 ix86_GOT_alias_set (void)
9509 static alias_set_type set = -1;
9511 set = new_alias_set ();
9515 /* Return a legitimate reference for ORIG (an address) using the
9516 register REG. If REG is 0, a new pseudo is generated.
9518 There are two types of references that must be handled:
9520 1. Global data references must load the address from the GOT, via
9521 the PIC reg. An insn is emitted to do this load, and the reg is
9524 2. Static data references, constant pool addresses, and code labels
9525 compute the address as an offset from the GOT, whose base is in
9526 the PIC reg. Static data objects have SYMBOL_FLAG_LOCAL set to
9527 differentiate them from global data objects. The returned
9528 address is the PIC reg + an unspec constant.
9530 GO_IF_LEGITIMATE_ADDRESS rejects symbolic references unless the PIC
9531 reg also appears in the address. */
9534 legitimize_pic_address (rtx orig, rtx reg)
9541 if (TARGET_MACHO && !TARGET_64BIT)
9544 reg = gen_reg_rtx (Pmode);
9545 /* Use the generic Mach-O PIC machinery. */
9546 return machopic_legitimize_pic_address (orig, GET_MODE (orig), reg);
9550 if (TARGET_64BIT && legitimate_pic_address_disp_p (addr))
9552 else if (TARGET_64BIT
9553 && ix86_cmodel != CM_SMALL_PIC
9554 && gotoff_operand (addr, Pmode))
9557 /* This symbol may be referenced via a displacement from the PIC
9558 base address (@GOTOFF). */
9560 if (reload_in_progress)
9561 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
9562 if (GET_CODE (addr) == CONST)
9563 addr = XEXP (addr, 0);
9564 if (GET_CODE (addr) == PLUS)
9566 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, XEXP (addr, 0)),
9568 new_rtx = gen_rtx_PLUS (Pmode, new_rtx, XEXP (addr, 1));
9571 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTOFF);
9572 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
9574 tmpreg = gen_reg_rtx (Pmode);
9577 emit_move_insn (tmpreg, new_rtx);
9581 new_rtx = expand_simple_binop (Pmode, PLUS, reg, pic_offset_table_rtx,
9582 tmpreg, 1, OPTAB_DIRECT);
9585 else new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, tmpreg);
9587 else if (!TARGET_64BIT && gotoff_operand (addr, Pmode))
9589 /* This symbol may be referenced via a displacement from the PIC
9590 base address (@GOTOFF). */
9592 if (reload_in_progress)
9593 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
9594 if (GET_CODE (addr) == CONST)
9595 addr = XEXP (addr, 0);
9596 if (GET_CODE (addr) == PLUS)
9598 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, XEXP (addr, 0)),
9600 new_rtx = gen_rtx_PLUS (Pmode, new_rtx, XEXP (addr, 1));
9603 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTOFF);
9604 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
9605 new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new_rtx);
9609 emit_move_insn (reg, new_rtx);
9613 else if ((GET_CODE (addr) == SYMBOL_REF && SYMBOL_REF_TLS_MODEL (addr) == 0)
9614 /* We can't use @GOTOFF for text labels on VxWorks;
9615 see gotoff_operand. */
9616 || (TARGET_VXWORKS_RTP && GET_CODE (addr) == LABEL_REF))
9618 if (TARGET_DLLIMPORT_DECL_ATTRIBUTES)
9620 if (GET_CODE (addr) == SYMBOL_REF && SYMBOL_REF_DLLIMPORT_P (addr))
9621 return legitimize_dllimport_symbol (addr, true);
9622 if (GET_CODE (addr) == CONST && GET_CODE (XEXP (addr, 0)) == PLUS
9623 && GET_CODE (XEXP (XEXP (addr, 0), 0)) == SYMBOL_REF
9624 && SYMBOL_REF_DLLIMPORT_P (XEXP (XEXP (addr, 0), 0)))
9626 rtx t = legitimize_dllimport_symbol (XEXP (XEXP (addr, 0), 0), true);
9627 return gen_rtx_PLUS (Pmode, t, XEXP (XEXP (addr, 0), 1));
9631 if (TARGET_64BIT && ix86_cmodel != CM_LARGE_PIC)
9633 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTPCREL);
9634 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
9635 new_rtx = gen_const_mem (Pmode, new_rtx);
9636 set_mem_alias_set (new_rtx, ix86_GOT_alias_set ());
9639 reg = gen_reg_rtx (Pmode);
9640 /* Use directly gen_movsi, otherwise the address is loaded
9641 into register for CSE. We don't want to CSE this addresses,
9642 instead we CSE addresses from the GOT table, so skip this. */
9643 emit_insn (gen_movsi (reg, new_rtx));
9648 /* This symbol must be referenced via a load from the
9649 Global Offset Table (@GOT). */
9651 if (reload_in_progress)
9652 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
9653 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOT);
9654 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
9656 new_rtx = force_reg (Pmode, new_rtx);
9657 new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new_rtx);
9658 new_rtx = gen_const_mem (Pmode, new_rtx);
9659 set_mem_alias_set (new_rtx, ix86_GOT_alias_set ());
9662 reg = gen_reg_rtx (Pmode);
9663 emit_move_insn (reg, new_rtx);
9669 if (CONST_INT_P (addr)
9670 && !x86_64_immediate_operand (addr, VOIDmode))
9674 emit_move_insn (reg, addr);
9678 new_rtx = force_reg (Pmode, addr);
9680 else if (GET_CODE (addr) == CONST)
9682 addr = XEXP (addr, 0);
9684 /* We must match stuff we generate before. Assume the only
9685 unspecs that can get here are ours. Not that we could do
9686 anything with them anyway.... */
9687 if (GET_CODE (addr) == UNSPEC
9688 || (GET_CODE (addr) == PLUS
9689 && GET_CODE (XEXP (addr, 0)) == UNSPEC))
9691 gcc_assert (GET_CODE (addr) == PLUS);
9693 if (GET_CODE (addr) == PLUS)
9695 rtx op0 = XEXP (addr, 0), op1 = XEXP (addr, 1);
9697 /* Check first to see if this is a constant offset from a @GOTOFF
9698 symbol reference. */
9699 if (gotoff_operand (op0, Pmode)
9700 && CONST_INT_P (op1))
9704 if (reload_in_progress)
9705 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
9706 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, op0),
9708 new_rtx = gen_rtx_PLUS (Pmode, new_rtx, op1);
9709 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
9710 new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new_rtx);
9714 emit_move_insn (reg, new_rtx);
9720 if (INTVAL (op1) < -16*1024*1024
9721 || INTVAL (op1) >= 16*1024*1024)
9723 if (!x86_64_immediate_operand (op1, Pmode))
9724 op1 = force_reg (Pmode, op1);
9725 new_rtx = gen_rtx_PLUS (Pmode, force_reg (Pmode, op0), op1);
9731 base = legitimize_pic_address (XEXP (addr, 0), reg);
9732 new_rtx = legitimize_pic_address (XEXP (addr, 1),
9733 base == reg ? NULL_RTX : reg);
9735 if (CONST_INT_P (new_rtx))
9736 new_rtx = plus_constant (base, INTVAL (new_rtx));
9739 if (GET_CODE (new_rtx) == PLUS && CONSTANT_P (XEXP (new_rtx, 1)))
9741 base = gen_rtx_PLUS (Pmode, base, XEXP (new_rtx, 0));
9742 new_rtx = XEXP (new_rtx, 1);
9744 new_rtx = gen_rtx_PLUS (Pmode, base, new_rtx);
9752 /* Load the thread pointer. If TO_REG is true, force it into a register. */
9755 get_thread_pointer (int to_reg)
9759 tp = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx), UNSPEC_TP);
9763 reg = gen_reg_rtx (Pmode);
9764 insn = gen_rtx_SET (VOIDmode, reg, tp);
9765 insn = emit_insn (insn);
9770 /* A subroutine of ix86_legitimize_address and ix86_expand_move. FOR_MOV is
9771 false if we expect this to be used for a memory address and true if
9772 we expect to load the address into a register. */
9775 legitimize_tls_address (rtx x, enum tls_model model, int for_mov)
9777 rtx dest, base, off, pic, tp;
9782 case TLS_MODEL_GLOBAL_DYNAMIC:
9783 dest = gen_reg_rtx (Pmode);
9784 tp = TARGET_GNU2_TLS ? get_thread_pointer (1) : 0;
9786 if (TARGET_64BIT && ! TARGET_GNU2_TLS)
9788 rtx rax = gen_rtx_REG (Pmode, AX_REG), insns;
9791 emit_call_insn (gen_tls_global_dynamic_64 (rax, x));
9792 insns = get_insns ();
9795 RTL_CONST_CALL_P (insns) = 1;
9796 emit_libcall_block (insns, dest, rax, x);
9798 else if (TARGET_64BIT && TARGET_GNU2_TLS)
9799 emit_insn (gen_tls_global_dynamic_64 (dest, x));
9801 emit_insn (gen_tls_global_dynamic_32 (dest, x));
9803 if (TARGET_GNU2_TLS)
9805 dest = force_reg (Pmode, gen_rtx_PLUS (Pmode, tp, dest));
9807 set_unique_reg_note (get_last_insn (), REG_EQUIV, x);
9811 case TLS_MODEL_LOCAL_DYNAMIC:
9812 base = gen_reg_rtx (Pmode);
9813 tp = TARGET_GNU2_TLS ? get_thread_pointer (1) : 0;
9815 if (TARGET_64BIT && ! TARGET_GNU2_TLS)
9817 rtx rax = gen_rtx_REG (Pmode, AX_REG), insns, note;
9820 emit_call_insn (gen_tls_local_dynamic_base_64 (rax));
9821 insns = get_insns ();
9824 note = gen_rtx_EXPR_LIST (VOIDmode, const0_rtx, NULL);
9825 note = gen_rtx_EXPR_LIST (VOIDmode, ix86_tls_get_addr (), note);
9826 RTL_CONST_CALL_P (insns) = 1;
9827 emit_libcall_block (insns, base, rax, note);
9829 else if (TARGET_64BIT && TARGET_GNU2_TLS)
9830 emit_insn (gen_tls_local_dynamic_base_64 (base));
9832 emit_insn (gen_tls_local_dynamic_base_32 (base));
9834 if (TARGET_GNU2_TLS)
9836 rtx x = ix86_tls_module_base ();
9838 set_unique_reg_note (get_last_insn (), REG_EQUIV,
9839 gen_rtx_MINUS (Pmode, x, tp));
9842 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x), UNSPEC_DTPOFF);
9843 off = gen_rtx_CONST (Pmode, off);
9845 dest = force_reg (Pmode, gen_rtx_PLUS (Pmode, base, off));
9847 if (TARGET_GNU2_TLS)
9849 dest = force_reg (Pmode, gen_rtx_PLUS (Pmode, dest, tp));
9851 set_unique_reg_note (get_last_insn (), REG_EQUIV, x);
9856 case TLS_MODEL_INITIAL_EXEC:
9860 type = UNSPEC_GOTNTPOFF;
9864 if (reload_in_progress)
9865 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
9866 pic = pic_offset_table_rtx;
9867 type = TARGET_ANY_GNU_TLS ? UNSPEC_GOTNTPOFF : UNSPEC_GOTTPOFF;
9869 else if (!TARGET_ANY_GNU_TLS)
9871 pic = gen_reg_rtx (Pmode);
9872 emit_insn (gen_set_got (pic));
9873 type = UNSPEC_GOTTPOFF;
9878 type = UNSPEC_INDNTPOFF;
9881 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x), type);
9882 off = gen_rtx_CONST (Pmode, off);
9884 off = gen_rtx_PLUS (Pmode, pic, off);
9885 off = gen_const_mem (Pmode, off);
9886 set_mem_alias_set (off, ix86_GOT_alias_set ());
9888 if (TARGET_64BIT || TARGET_ANY_GNU_TLS)
9890 base = get_thread_pointer (for_mov || !TARGET_TLS_DIRECT_SEG_REFS);
9891 off = force_reg (Pmode, off);
9892 return gen_rtx_PLUS (Pmode, base, off);
9896 base = get_thread_pointer (true);
9897 dest = gen_reg_rtx (Pmode);
9898 emit_insn (gen_subsi3 (dest, base, off));
9902 case TLS_MODEL_LOCAL_EXEC:
9903 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x),
9904 (TARGET_64BIT || TARGET_ANY_GNU_TLS)
9905 ? UNSPEC_NTPOFF : UNSPEC_TPOFF);
9906 off = gen_rtx_CONST (Pmode, off);
9908 if (TARGET_64BIT || TARGET_ANY_GNU_TLS)
9910 base = get_thread_pointer (for_mov || !TARGET_TLS_DIRECT_SEG_REFS);
9911 return gen_rtx_PLUS (Pmode, base, off);
9915 base = get_thread_pointer (true);
9916 dest = gen_reg_rtx (Pmode);
9917 emit_insn (gen_subsi3 (dest, base, off));
9928 /* Create or return the unique __imp_DECL dllimport symbol corresponding
9931 static GTY((if_marked ("tree_map_marked_p"), param_is (struct tree_map)))
9932 htab_t dllimport_map;
9935 get_dllimport_decl (tree decl)
9937 struct tree_map *h, in;
9941 size_t namelen, prefixlen;
9947 dllimport_map = htab_create_ggc (512, tree_map_hash, tree_map_eq, 0);
9949 in.hash = htab_hash_pointer (decl);
9950 in.base.from = decl;
9951 loc = htab_find_slot_with_hash (dllimport_map, &in, in.hash, INSERT);
9952 h = (struct tree_map *) *loc;
9956 *loc = h = GGC_NEW (struct tree_map);
9958 h->base.from = decl;
9959 h->to = to = build_decl (VAR_DECL, NULL, ptr_type_node);
9960 DECL_ARTIFICIAL (to) = 1;
9961 DECL_IGNORED_P (to) = 1;
9962 DECL_EXTERNAL (to) = 1;
9963 TREE_READONLY (to) = 1;
9965 name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl));
9966 name = targetm.strip_name_encoding (name);
9967 prefix = name[0] == FASTCALL_PREFIX || user_label_prefix[0] == 0
9968 ? "*__imp_" : "*__imp__";
9969 namelen = strlen (name);
9970 prefixlen = strlen (prefix);
9971 imp_name = (char *) alloca (namelen + prefixlen + 1);
9972 memcpy (imp_name, prefix, prefixlen);
9973 memcpy (imp_name + prefixlen, name, namelen + 1);
9975 name = ggc_alloc_string (imp_name, namelen + prefixlen);
9976 rtl = gen_rtx_SYMBOL_REF (Pmode, name);
9977 SET_SYMBOL_REF_DECL (rtl, to);
9978 SYMBOL_REF_FLAGS (rtl) = SYMBOL_FLAG_LOCAL;
9980 rtl = gen_const_mem (Pmode, rtl);
9981 set_mem_alias_set (rtl, ix86_GOT_alias_set ());
9983 SET_DECL_RTL (to, rtl);
9984 SET_DECL_ASSEMBLER_NAME (to, get_identifier (name));
9989 /* Expand SYMBOL into its corresponding dllimport symbol. WANT_REG is
9990 true if we require the result be a register. */
9993 legitimize_dllimport_symbol (rtx symbol, bool want_reg)
9998 gcc_assert (SYMBOL_REF_DECL (symbol));
9999 imp_decl = get_dllimport_decl (SYMBOL_REF_DECL (symbol));
10001 x = DECL_RTL (imp_decl);
10003 x = force_reg (Pmode, x);
10007 /* Try machine-dependent ways of modifying an illegitimate address
10008 to be legitimate. If we find one, return the new, valid address.
10009 This macro is used in only one place: `memory_address' in explow.c.
10011 OLDX is the address as it was before break_out_memory_refs was called.
10012 In some cases it is useful to look at this to decide what needs to be done.
10014 MODE and WIN are passed so that this macro can use
10015 GO_IF_LEGITIMATE_ADDRESS.
10017 It is always safe for this macro to do nothing. It exists to recognize
10018 opportunities to optimize the output.
10020 For the 80386, we handle X+REG by loading X into a register R and
10021 using R+REG. R will go in a general reg and indexing will be used.
10022 However, if REG is a broken-out memory address or multiplication,
10023 nothing needs to be done because REG can certainly go in a general reg.
10025 When -fpic is used, special handling is needed for symbolic references.
10026 See comments by legitimize_pic_address in i386.c for details. */
10029 ix86_legitimize_address (rtx x, rtx oldx ATTRIBUTE_UNUSED,
10030 enum machine_mode mode)
10035 log = GET_CODE (x) == SYMBOL_REF ? SYMBOL_REF_TLS_MODEL (x) : 0;
10037 return legitimize_tls_address (x, (enum tls_model) log, false);
10038 if (GET_CODE (x) == CONST
10039 && GET_CODE (XEXP (x, 0)) == PLUS
10040 && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF
10041 && (log = SYMBOL_REF_TLS_MODEL (XEXP (XEXP (x, 0), 0))))
10043 rtx t = legitimize_tls_address (XEXP (XEXP (x, 0), 0),
10044 (enum tls_model) log, false);
10045 return gen_rtx_PLUS (Pmode, t, XEXP (XEXP (x, 0), 1));
10048 if (TARGET_DLLIMPORT_DECL_ATTRIBUTES)
10050 if (GET_CODE (x) == SYMBOL_REF && SYMBOL_REF_DLLIMPORT_P (x))
10051 return legitimize_dllimport_symbol (x, true);
10052 if (GET_CODE (x) == CONST
10053 && GET_CODE (XEXP (x, 0)) == PLUS
10054 && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF
10055 && SYMBOL_REF_DLLIMPORT_P (XEXP (XEXP (x, 0), 0)))
10057 rtx t = legitimize_dllimport_symbol (XEXP (XEXP (x, 0), 0), true);
10058 return gen_rtx_PLUS (Pmode, t, XEXP (XEXP (x, 0), 1));
10062 if (flag_pic && SYMBOLIC_CONST (x))
10063 return legitimize_pic_address (x, 0);
10065 /* Canonicalize shifts by 0, 1, 2, 3 into multiply */
10066 if (GET_CODE (x) == ASHIFT
10067 && CONST_INT_P (XEXP (x, 1))
10068 && (unsigned HOST_WIDE_INT) INTVAL (XEXP (x, 1)) < 4)
10071 log = INTVAL (XEXP (x, 1));
10072 x = gen_rtx_MULT (Pmode, force_reg (Pmode, XEXP (x, 0)),
10073 GEN_INT (1 << log));
10076 if (GET_CODE (x) == PLUS)
10078 /* Canonicalize shifts by 0, 1, 2, 3 into multiply. */
10080 if (GET_CODE (XEXP (x, 0)) == ASHIFT
10081 && CONST_INT_P (XEXP (XEXP (x, 0), 1))
10082 && (unsigned HOST_WIDE_INT) INTVAL (XEXP (XEXP (x, 0), 1)) < 4)
10085 log = INTVAL (XEXP (XEXP (x, 0), 1));
10086 XEXP (x, 0) = gen_rtx_MULT (Pmode,
10087 force_reg (Pmode, XEXP (XEXP (x, 0), 0)),
10088 GEN_INT (1 << log));
10091 if (GET_CODE (XEXP (x, 1)) == ASHIFT
10092 && CONST_INT_P (XEXP (XEXP (x, 1), 1))
10093 && (unsigned HOST_WIDE_INT) INTVAL (XEXP (XEXP (x, 1), 1)) < 4)
10096 log = INTVAL (XEXP (XEXP (x, 1), 1));
10097 XEXP (x, 1) = gen_rtx_MULT (Pmode,
10098 force_reg (Pmode, XEXP (XEXP (x, 1), 0)),
10099 GEN_INT (1 << log));
10102 /* Put multiply first if it isn't already. */
10103 if (GET_CODE (XEXP (x, 1)) == MULT)
10105 rtx tmp = XEXP (x, 0);
10106 XEXP (x, 0) = XEXP (x, 1);
10111 /* Canonicalize (plus (mult (reg) (const)) (plus (reg) (const)))
10112 into (plus (plus (mult (reg) (const)) (reg)) (const)). This can be
10113 created by virtual register instantiation, register elimination, and
10114 similar optimizations. */
10115 if (GET_CODE (XEXP (x, 0)) == MULT && GET_CODE (XEXP (x, 1)) == PLUS)
10118 x = gen_rtx_PLUS (Pmode,
10119 gen_rtx_PLUS (Pmode, XEXP (x, 0),
10120 XEXP (XEXP (x, 1), 0)),
10121 XEXP (XEXP (x, 1), 1));
10125 (plus (plus (mult (reg) (const)) (plus (reg) (const))) const)
10126 into (plus (plus (mult (reg) (const)) (reg)) (const)). */
10127 else if (GET_CODE (x) == PLUS && GET_CODE (XEXP (x, 0)) == PLUS
10128 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
10129 && GET_CODE (XEXP (XEXP (x, 0), 1)) == PLUS
10130 && CONSTANT_P (XEXP (x, 1)))
10133 rtx other = NULL_RTX;
10135 if (CONST_INT_P (XEXP (x, 1)))
10137 constant = XEXP (x, 1);
10138 other = XEXP (XEXP (XEXP (x, 0), 1), 1);
10140 else if (CONST_INT_P (XEXP (XEXP (XEXP (x, 0), 1), 1)))
10142 constant = XEXP (XEXP (XEXP (x, 0), 1), 1);
10143 other = XEXP (x, 1);
10151 x = gen_rtx_PLUS (Pmode,
10152 gen_rtx_PLUS (Pmode, XEXP (XEXP (x, 0), 0),
10153 XEXP (XEXP (XEXP (x, 0), 1), 0)),
10154 plus_constant (other, INTVAL (constant)));
10158 if (changed && legitimate_address_p (mode, x, FALSE))
10161 if (GET_CODE (XEXP (x, 0)) == MULT)
10164 XEXP (x, 0) = force_operand (XEXP (x, 0), 0);
10167 if (GET_CODE (XEXP (x, 1)) == MULT)
10170 XEXP (x, 1) = force_operand (XEXP (x, 1), 0);
10174 && REG_P (XEXP (x, 1))
10175 && REG_P (XEXP (x, 0)))
10178 if (flag_pic && SYMBOLIC_CONST (XEXP (x, 1)))
10181 x = legitimize_pic_address (x, 0);
10184 if (changed && legitimate_address_p (mode, x, FALSE))
10187 if (REG_P (XEXP (x, 0)))
10189 rtx temp = gen_reg_rtx (Pmode);
10190 rtx val = force_operand (XEXP (x, 1), temp);
10192 emit_move_insn (temp, val);
10194 XEXP (x, 1) = temp;
10198 else if (REG_P (XEXP (x, 1)))
10200 rtx temp = gen_reg_rtx (Pmode);
10201 rtx val = force_operand (XEXP (x, 0), temp);
10203 emit_move_insn (temp, val);
10205 XEXP (x, 0) = temp;
10213 /* Print an integer constant expression in assembler syntax. Addition
10214 and subtraction are the only arithmetic that may appear in these
10215 expressions. FILE is the stdio stream to write to, X is the rtx, and
10216 CODE is the operand print code from the output string. */
10219 output_pic_addr_const (FILE *file, rtx x, int code)
10223 switch (GET_CODE (x))
10226 gcc_assert (flag_pic);
10231 if (! TARGET_MACHO || TARGET_64BIT)
10232 output_addr_const (file, x);
10235 const char *name = XSTR (x, 0);
10237 /* Mark the decl as referenced so that cgraph will
10238 output the function. */
10239 if (SYMBOL_REF_DECL (x))
10240 mark_decl_referenced (SYMBOL_REF_DECL (x));
10243 if (MACHOPIC_INDIRECT
10244 && machopic_classify_symbol (x) == MACHOPIC_UNDEFINED_FUNCTION)
10245 name = machopic_indirection_name (x, /*stub_p=*/true);
10247 assemble_name (file, name);
10249 if (!TARGET_MACHO && !(TARGET_64BIT && DEFAULT_ABI == MS_ABI)
10250 && code == 'P' && ! SYMBOL_REF_LOCAL_P (x))
10251 fputs ("@PLT", file);
10258 ASM_GENERATE_INTERNAL_LABEL (buf, "L", CODE_LABEL_NUMBER (x));
10259 assemble_name (asm_out_file, buf);
10263 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
10267 /* This used to output parentheses around the expression,
10268 but that does not work on the 386 (either ATT or BSD assembler). */
10269 output_pic_addr_const (file, XEXP (x, 0), code);
10273 if (GET_MODE (x) == VOIDmode)
10275 /* We can use %d if the number is <32 bits and positive. */
10276 if (CONST_DOUBLE_HIGH (x) || CONST_DOUBLE_LOW (x) < 0)
10277 fprintf (file, "0x%lx%08lx",
10278 (unsigned long) CONST_DOUBLE_HIGH (x),
10279 (unsigned long) CONST_DOUBLE_LOW (x));
10281 fprintf (file, HOST_WIDE_INT_PRINT_DEC, CONST_DOUBLE_LOW (x));
10284 /* We can't handle floating point constants;
10285 PRINT_OPERAND must handle them. */
10286 output_operand_lossage ("floating constant misused");
10290 /* Some assemblers need integer constants to appear first. */
10291 if (CONST_INT_P (XEXP (x, 0)))
10293 output_pic_addr_const (file, XEXP (x, 0), code);
10295 output_pic_addr_const (file, XEXP (x, 1), code);
10299 gcc_assert (CONST_INT_P (XEXP (x, 1)));
10300 output_pic_addr_const (file, XEXP (x, 1), code);
10302 output_pic_addr_const (file, XEXP (x, 0), code);
10308 putc (ASSEMBLER_DIALECT == ASM_INTEL ? '(' : '[', file);
10309 output_pic_addr_const (file, XEXP (x, 0), code);
10311 output_pic_addr_const (file, XEXP (x, 1), code);
10313 putc (ASSEMBLER_DIALECT == ASM_INTEL ? ')' : ']', file);
10317 gcc_assert (XVECLEN (x, 0) == 1);
10318 output_pic_addr_const (file, XVECEXP (x, 0, 0), code);
10319 switch (XINT (x, 1))
10322 fputs ("@GOT", file);
10324 case UNSPEC_GOTOFF:
10325 fputs ("@GOTOFF", file);
10327 case UNSPEC_PLTOFF:
10328 fputs ("@PLTOFF", file);
10330 case UNSPEC_GOTPCREL:
10331 fputs (ASSEMBLER_DIALECT == ASM_ATT ?
10332 "@GOTPCREL(%rip)" : "@GOTPCREL[rip]", file);
10334 case UNSPEC_GOTTPOFF:
10335 /* FIXME: This might be @TPOFF in Sun ld too. */
10336 fputs ("@GOTTPOFF", file);
10339 fputs ("@TPOFF", file);
10341 case UNSPEC_NTPOFF:
10343 fputs ("@TPOFF", file);
10345 fputs ("@NTPOFF", file);
10347 case UNSPEC_DTPOFF:
10348 fputs ("@DTPOFF", file);
10350 case UNSPEC_GOTNTPOFF:
10352 fputs (ASSEMBLER_DIALECT == ASM_ATT ?
10353 "@GOTTPOFF(%rip)": "@GOTTPOFF[rip]", file);
10355 fputs ("@GOTNTPOFF", file);
10357 case UNSPEC_INDNTPOFF:
10358 fputs ("@INDNTPOFF", file);
10361 case UNSPEC_MACHOPIC_OFFSET:
10363 machopic_output_function_base_name (file);
10367 output_operand_lossage ("invalid UNSPEC as operand");
10373 output_operand_lossage ("invalid expression as operand");
10377 /* This is called from dwarf2out.c via TARGET_ASM_OUTPUT_DWARF_DTPREL.
10378 We need to emit DTP-relative relocations. */
10380 static void ATTRIBUTE_UNUSED
10381 i386_output_dwarf_dtprel (FILE *file, int size, rtx x)
10383 fputs (ASM_LONG, file);
10384 output_addr_const (file, x);
10385 fputs ("@DTPOFF", file);
10391 fputs (", 0", file);
10394 gcc_unreachable ();
10398 /* Return true if X is a representation of the PIC register. This copes
10399 with calls from ix86_find_base_term, where the register might have
10400 been replaced by a cselib value. */
10403 ix86_pic_register_p (rtx x)
10405 if (GET_CODE (x) == VALUE)
10406 return (pic_offset_table_rtx
10407 && rtx_equal_for_cselib_p (x, pic_offset_table_rtx));
10409 return REG_P (x) && REGNO (x) == PIC_OFFSET_TABLE_REGNUM;
10412 /* In the name of slightly smaller debug output, and to cater to
10413 general assembler lossage, recognize PIC+GOTOFF and turn it back
10414 into a direct symbol reference.
10416 On Darwin, this is necessary to avoid a crash, because Darwin
10417 has a different PIC label for each routine but the DWARF debugging
10418 information is not associated with any particular routine, so it's
10419 necessary to remove references to the PIC label from RTL stored by
10420 the DWARF output code. */
10423 ix86_delegitimize_address (rtx orig_x)
10426 /* reg_addend is NULL or a multiple of some register. */
10427 rtx reg_addend = NULL_RTX;
10428 /* const_addend is NULL or a const_int. */
10429 rtx const_addend = NULL_RTX;
10430 /* This is the result, or NULL. */
10431 rtx result = NULL_RTX;
10438 if (GET_CODE (x) != CONST
10439 || GET_CODE (XEXP (x, 0)) != UNSPEC
10440 || XINT (XEXP (x, 0), 1) != UNSPEC_GOTPCREL
10441 || !MEM_P (orig_x))
10443 return XVECEXP (XEXP (x, 0), 0, 0);
10446 if (GET_CODE (x) != PLUS
10447 || GET_CODE (XEXP (x, 1)) != CONST)
10450 if (ix86_pic_register_p (XEXP (x, 0)))
10451 /* %ebx + GOT/GOTOFF */
10453 else if (GET_CODE (XEXP (x, 0)) == PLUS)
10455 /* %ebx + %reg * scale + GOT/GOTOFF */
10456 reg_addend = XEXP (x, 0);
10457 if (ix86_pic_register_p (XEXP (reg_addend, 0)))
10458 reg_addend = XEXP (reg_addend, 1);
10459 else if (ix86_pic_register_p (XEXP (reg_addend, 1)))
10460 reg_addend = XEXP (reg_addend, 0);
10463 if (!REG_P (reg_addend)
10464 && GET_CODE (reg_addend) != MULT
10465 && GET_CODE (reg_addend) != ASHIFT)
10471 x = XEXP (XEXP (x, 1), 0);
10472 if (GET_CODE (x) == PLUS
10473 && CONST_INT_P (XEXP (x, 1)))
10475 const_addend = XEXP (x, 1);
10479 if (GET_CODE (x) == UNSPEC
10480 && ((XINT (x, 1) == UNSPEC_GOT && MEM_P (orig_x))
10481 || (XINT (x, 1) == UNSPEC_GOTOFF && !MEM_P (orig_x))))
10482 result = XVECEXP (x, 0, 0);
10484 if (TARGET_MACHO && darwin_local_data_pic (x)
10485 && !MEM_P (orig_x))
10486 result = XVECEXP (x, 0, 0);
10492 result = gen_rtx_CONST (Pmode, gen_rtx_PLUS (Pmode, result, const_addend));
10494 result = gen_rtx_PLUS (Pmode, reg_addend, result);
10498 /* If X is a machine specific address (i.e. a symbol or label being
10499 referenced as a displacement from the GOT implemented using an
10500 UNSPEC), then return the base term. Otherwise return X. */
10503 ix86_find_base_term (rtx x)
10509 if (GET_CODE (x) != CONST)
10511 term = XEXP (x, 0);
10512 if (GET_CODE (term) == PLUS
10513 && (CONST_INT_P (XEXP (term, 1))
10514 || GET_CODE (XEXP (term, 1)) == CONST_DOUBLE))
10515 term = XEXP (term, 0);
10516 if (GET_CODE (term) != UNSPEC
10517 || XINT (term, 1) != UNSPEC_GOTPCREL)
10520 return XVECEXP (term, 0, 0);
10523 return ix86_delegitimize_address (x);
10527 put_condition_code (enum rtx_code code, enum machine_mode mode, int reverse,
10528 int fp, FILE *file)
10530 const char *suffix;
10532 if (mode == CCFPmode || mode == CCFPUmode)
10534 enum rtx_code second_code, bypass_code;
10535 ix86_fp_comparison_codes (code, &bypass_code, &code, &second_code);
10536 gcc_assert (bypass_code == UNKNOWN && second_code == UNKNOWN);
10537 code = ix86_fp_compare_code_to_integer (code);
10541 code = reverse_condition (code);
10592 gcc_assert (mode == CCmode || mode == CCNOmode || mode == CCGCmode);
10596 /* ??? Use "nbe" instead of "a" for fcmov lossage on some assemblers.
10597 Those same assemblers have the same but opposite lossage on cmov. */
10598 if (mode == CCmode)
10599 suffix = fp ? "nbe" : "a";
10600 else if (mode == CCCmode)
10603 gcc_unreachable ();
10619 gcc_unreachable ();
10623 gcc_assert (mode == CCmode || mode == CCCmode);
10640 gcc_unreachable ();
10644 /* ??? As above. */
10645 gcc_assert (mode == CCmode || mode == CCCmode);
10646 suffix = fp ? "nb" : "ae";
10649 gcc_assert (mode == CCmode || mode == CCGCmode || mode == CCNOmode);
10653 /* ??? As above. */
10654 if (mode == CCmode)
10656 else if (mode == CCCmode)
10657 suffix = fp ? "nb" : "ae";
10659 gcc_unreachable ();
10662 suffix = fp ? "u" : "p";
10665 suffix = fp ? "nu" : "np";
10668 gcc_unreachable ();
10670 fputs (suffix, file);
10673 /* Print the name of register X to FILE based on its machine mode and number.
10674 If CODE is 'w', pretend the mode is HImode.
10675 If CODE is 'b', pretend the mode is QImode.
10676 If CODE is 'k', pretend the mode is SImode.
10677 If CODE is 'q', pretend the mode is DImode.
10678 If CODE is 'x', pretend the mode is V4SFmode.
10679 If CODE is 't', pretend the mode is V8SFmode.
10680 If CODE is 'h', pretend the reg is the 'high' byte register.
10681 If CODE is 'y', print "st(0)" instead of "st", if the reg is stack op.
10682 If CODE is 'd', duplicate the operand for AVX instruction.
10686 print_reg (rtx x, int code, FILE *file)
10689 bool duplicated = code == 'd' && TARGET_AVX;
10691 gcc_assert (x == pc_rtx
10692 || (REGNO (x) != ARG_POINTER_REGNUM
10693 && REGNO (x) != FRAME_POINTER_REGNUM
10694 && REGNO (x) != FLAGS_REG
10695 && REGNO (x) != FPSR_REG
10696 && REGNO (x) != FPCR_REG));
10698 if (ASSEMBLER_DIALECT == ASM_ATT)
10703 gcc_assert (TARGET_64BIT);
10704 fputs ("rip", file);
10708 if (code == 'w' || MMX_REG_P (x))
10710 else if (code == 'b')
10712 else if (code == 'k')
10714 else if (code == 'q')
10716 else if (code == 'y')
10718 else if (code == 'h')
10720 else if (code == 'x')
10722 else if (code == 't')
10725 code = GET_MODE_SIZE (GET_MODE (x));
10727 /* Irritatingly, AMD extended registers use different naming convention
10728 from the normal registers. */
10729 if (REX_INT_REG_P (x))
10731 gcc_assert (TARGET_64BIT);
10735 error ("extended registers have no high halves");
10738 fprintf (file, "r%ib", REGNO (x) - FIRST_REX_INT_REG + 8);
10741 fprintf (file, "r%iw", REGNO (x) - FIRST_REX_INT_REG + 8);
10744 fprintf (file, "r%id", REGNO (x) - FIRST_REX_INT_REG + 8);
10747 fprintf (file, "r%i", REGNO (x) - FIRST_REX_INT_REG + 8);
10750 error ("unsupported operand size for extended register");
10760 if (STACK_TOP_P (x))
10769 if (! ANY_FP_REG_P (x))
10770 putc (code == 8 && TARGET_64BIT ? 'r' : 'e', file);
10775 reg = hi_reg_name[REGNO (x)];
10778 if (REGNO (x) >= ARRAY_SIZE (qi_reg_name))
10780 reg = qi_reg_name[REGNO (x)];
10783 if (REGNO (x) >= ARRAY_SIZE (qi_high_reg_name))
10785 reg = qi_high_reg_name[REGNO (x)];
10790 gcc_assert (!duplicated);
10792 fputs (hi_reg_name[REGNO (x)] + 1, file);
10797 gcc_unreachable ();
10803 if (ASSEMBLER_DIALECT == ASM_ATT)
10804 fprintf (file, ", %%%s", reg);
10806 fprintf (file, ", %s", reg);
10810 /* Locate some local-dynamic symbol still in use by this function
10811 so that we can print its name in some tls_local_dynamic_base
10815 get_some_local_dynamic_name_1 (rtx *px, void *data ATTRIBUTE_UNUSED)
10819 if (GET_CODE (x) == SYMBOL_REF
10820 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_DYNAMIC)
10822 cfun->machine->some_ld_name = XSTR (x, 0);
10829 static const char *
10830 get_some_local_dynamic_name (void)
10834 if (cfun->machine->some_ld_name)
10835 return cfun->machine->some_ld_name;
10837 for (insn = get_insns (); insn ; insn = NEXT_INSN (insn))
10839 && for_each_rtx (&PATTERN (insn), get_some_local_dynamic_name_1, 0))
10840 return cfun->machine->some_ld_name;
10842 gcc_unreachable ();
10845 /* Meaning of CODE:
10846 L,W,B,Q,S,T -- print the opcode suffix for specified size of operand.
10847 C -- print opcode suffix for set/cmov insn.
10848 c -- like C, but print reversed condition
10849 E,e -- likewise, but for compare-and-branch fused insn.
10850 F,f -- likewise, but for floating-point.
10851 O -- if HAVE_AS_IX86_CMOV_SUN_SYNTAX, expand to "w.", "l." or "q.",
10853 R -- print the prefix for register names.
10854 z -- print the opcode suffix for the size of the current operand.
10855 Z -- likewise, with special suffixes for x87 instructions.
10856 * -- print a star (in certain assembler syntax)
10857 A -- print an absolute memory reference.
10858 w -- print the operand as if it's a "word" (HImode) even if it isn't.
10859 s -- print a shift double count, followed by the assemblers argument
10861 b -- print the QImode name of the register for the indicated operand.
10862 %b0 would print %al if operands[0] is reg 0.
10863 w -- likewise, print the HImode name of the register.
10864 k -- likewise, print the SImode name of the register.
10865 q -- likewise, print the DImode name of the register.
10866 x -- likewise, print the V4SFmode name of the register.
10867 t -- likewise, print the V8SFmode name of the register.
10868 h -- print the QImode name for a "high" register, either ah, bh, ch or dh.
10869 y -- print "st(0)" instead of "st" as a register.
10870 d -- print duplicated register operand for AVX instruction.
10871 D -- print condition for SSE cmp instruction.
10872 P -- if PIC, print an @PLT suffix.
10873 X -- don't print any sort of PIC '@' suffix for a symbol.
10874 & -- print some in-use local-dynamic symbol name.
10875 H -- print a memory address offset by 8; used for sse high-parts
10876 Y -- print condition for SSE5 com* instruction.
10877 + -- print a branch hint as 'cs' or 'ds' prefix
10878 ; -- print a semicolon (after prefixes due to bug in older gas).
10882 print_operand (FILE *file, rtx x, int code)
10889 if (ASSEMBLER_DIALECT == ASM_ATT)
10894 assemble_name (file, get_some_local_dynamic_name ());
10898 switch (ASSEMBLER_DIALECT)
10905 /* Intel syntax. For absolute addresses, registers should not
10906 be surrounded by braces. */
10910 PRINT_OPERAND (file, x, 0);
10917 gcc_unreachable ();
10920 PRINT_OPERAND (file, x, 0);
10925 if (ASSEMBLER_DIALECT == ASM_ATT)
10930 if (ASSEMBLER_DIALECT == ASM_ATT)
10935 if (ASSEMBLER_DIALECT == ASM_ATT)
10940 if (ASSEMBLER_DIALECT == ASM_ATT)
10945 if (ASSEMBLER_DIALECT == ASM_ATT)
10950 if (ASSEMBLER_DIALECT == ASM_ATT)
10955 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)
10957 /* Opcodes don't get size suffixes if using Intel opcodes. */
10958 if (ASSEMBLER_DIALECT == ASM_INTEL)
10961 switch (GET_MODE_SIZE (GET_MODE (x)))
10980 output_operand_lossage
10981 ("invalid operand size for operand code '%c'", code);
10986 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
10988 (0, "non-integer operand used with operand code '%c'", code);
10992 /* 387 opcodes don't get size suffixes if using Intel opcodes. */
10993 if (ASSEMBLER_DIALECT == ASM_INTEL)
10996 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)
10998 switch (GET_MODE_SIZE (GET_MODE (x)))
11001 #ifdef HAVE_AS_IX86_FILDS
11011 #ifdef HAVE_AS_IX86_FILDQ
11014 fputs ("ll", file);
11022 else if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
11024 /* 387 opcodes don't get size suffixes
11025 if the operands are registers. */
11026 if (STACK_REG_P (x))
11029 switch (GET_MODE_SIZE (GET_MODE (x)))
11050 output_operand_lossage
11051 ("invalid operand type used with operand code '%c'", code);
11055 output_operand_lossage
11056 ("invalid operand size for operand code '%c'", code);
11073 if (CONST_INT_P (x) || ! SHIFT_DOUBLE_OMITS_COUNT)
11075 PRINT_OPERAND (file, x, 0);
11076 fputs (", ", file);
11081 /* Little bit of braindamage here. The SSE compare instructions
11082 does use completely different names for the comparisons that the
11083 fp conditional moves. */
11086 switch (GET_CODE (x))
11089 fputs ("eq", file);
11092 fputs ("eq_us", file);
11095 fputs ("lt", file);
11098 fputs ("nge", file);
11101 fputs ("le", file);
11104 fputs ("ngt", file);
11107 fputs ("unord", file);
11110 fputs ("neq", file);
11113 fputs ("neq_oq", file);
11116 fputs ("ge", file);
11119 fputs ("nlt", file);
11122 fputs ("gt", file);
11125 fputs ("nle", file);
11128 fputs ("ord", file);
11131 output_operand_lossage ("operand is not a condition code, invalid operand code 'D'");
11137 switch (GET_CODE (x))
11141 fputs ("eq", file);
11145 fputs ("lt", file);
11149 fputs ("le", file);
11152 fputs ("unord", file);
11156 fputs ("neq", file);
11160 fputs ("nlt", file);
11164 fputs ("nle", file);
11167 fputs ("ord", file);
11170 output_operand_lossage ("operand is not a condition code, invalid operand code 'D'");
11176 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
11177 if (ASSEMBLER_DIALECT == ASM_ATT)
11179 switch (GET_MODE (x))
11181 case HImode: putc ('w', file); break;
11183 case SFmode: putc ('l', file); break;
11185 case DFmode: putc ('q', file); break;
11186 default: gcc_unreachable ();
11193 if (!COMPARISON_P (x))
11195 output_operand_lossage ("operand is neither a constant nor a "
11196 "condition code, invalid operand code "
11200 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 0, 0, file);
11203 if (!COMPARISON_P (x))
11205 output_operand_lossage ("operand is neither a constant nor a "
11206 "condition code, invalid operand code "
11210 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
11211 if (ASSEMBLER_DIALECT == ASM_ATT)
11214 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 0, 1, file);
11217 /* Like above, but reverse condition */
11219 /* Check to see if argument to %c is really a constant
11220 and not a condition code which needs to be reversed. */
11221 if (!COMPARISON_P (x))
11223 output_operand_lossage ("operand is neither a constant nor a "
11224 "condition code, invalid operand "
11228 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 1, 0, file);
11231 if (!COMPARISON_P (x))
11233 output_operand_lossage ("operand is neither a constant nor a "
11234 "condition code, invalid operand "
11238 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
11239 if (ASSEMBLER_DIALECT == ASM_ATT)
11242 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 1, 1, file);
11246 put_condition_code (GET_CODE (x), CCmode, 0, 0, file);
11250 put_condition_code (GET_CODE (x), CCmode, 1, 0, file);
11254 /* It doesn't actually matter what mode we use here, as we're
11255 only going to use this for printing. */
11256 x = adjust_address_nv (x, DImode, 8);
11264 || optimize_function_for_size_p (cfun) || !TARGET_BRANCH_PREDICTION_HINTS)
11267 x = find_reg_note (current_output_insn, REG_BR_PROB, 0);
11270 int pred_val = INTVAL (XEXP (x, 0));
11272 if (pred_val < REG_BR_PROB_BASE * 45 / 100
11273 || pred_val > REG_BR_PROB_BASE * 55 / 100)
11275 int taken = pred_val > REG_BR_PROB_BASE / 2;
11276 int cputaken = final_forward_branch_p (current_output_insn) == 0;
11278 /* Emit hints only in the case default branch prediction
11279 heuristics would fail. */
11280 if (taken != cputaken)
11282 /* We use 3e (DS) prefix for taken branches and
11283 2e (CS) prefix for not taken branches. */
11285 fputs ("ds ; ", file);
11287 fputs ("cs ; ", file);
11295 switch (GET_CODE (x))
11298 fputs ("neq", file);
11301 fputs ("eq", file);
11305 fputs (INTEGRAL_MODE_P (GET_MODE (x)) ? "ge" : "unlt", file);
11309 fputs (INTEGRAL_MODE_P (GET_MODE (x)) ? "gt" : "unle", file);
11313 fputs ("le", file);
11317 fputs ("lt", file);
11320 fputs ("unord", file);
11323 fputs ("ord", file);
11326 fputs ("ueq", file);
11329 fputs ("nlt", file);
11332 fputs ("nle", file);
11335 fputs ("ule", file);
11338 fputs ("ult", file);
11341 fputs ("une", file);
11344 output_operand_lossage ("operand is not a condition code, invalid operand code 'D'");
11351 fputs (" ; ", file);
11358 output_operand_lossage ("invalid operand code '%c'", code);
11363 print_reg (x, code, file);
11365 else if (MEM_P (x))
11367 /* No `byte ptr' prefix for call instructions or BLKmode operands. */
11368 if (ASSEMBLER_DIALECT == ASM_INTEL && code != 'X' && code != 'P'
11369 && GET_MODE (x) != BLKmode)
11372 switch (GET_MODE_SIZE (GET_MODE (x)))
11374 case 1: size = "BYTE"; break;
11375 case 2: size = "WORD"; break;
11376 case 4: size = "DWORD"; break;
11377 case 8: size = "QWORD"; break;
11378 case 12: size = "XWORD"; break;
11380 if (GET_MODE (x) == XFmode)
11386 gcc_unreachable ();
11389 /* Check for explicit size override (codes 'b', 'w' and 'k') */
11392 else if (code == 'w')
11394 else if (code == 'k')
11397 fputs (size, file);
11398 fputs (" PTR ", file);
11402 /* Avoid (%rip) for call operands. */
11403 if (CONSTANT_ADDRESS_P (x) && code == 'P'
11404 && !CONST_INT_P (x))
11405 output_addr_const (file, x);
11406 else if (this_is_asm_operands && ! address_operand (x, VOIDmode))
11407 output_operand_lossage ("invalid constraints for operand");
11409 output_address (x);
11412 else if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) == SFmode)
11417 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
11418 REAL_VALUE_TO_TARGET_SINGLE (r, l);
11420 if (ASSEMBLER_DIALECT == ASM_ATT)
11422 fprintf (file, "0x%08lx", (long unsigned int) l);
11425 /* These float cases don't actually occur as immediate operands. */
11426 else if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) == DFmode)
11430 real_to_decimal (dstr, CONST_DOUBLE_REAL_VALUE (x), sizeof (dstr), 0, 1);
11431 fprintf (file, "%s", dstr);
11434 else if (GET_CODE (x) == CONST_DOUBLE
11435 && GET_MODE (x) == XFmode)
11439 real_to_decimal (dstr, CONST_DOUBLE_REAL_VALUE (x), sizeof (dstr), 0, 1);
11440 fprintf (file, "%s", dstr);
11445 /* We have patterns that allow zero sets of memory, for instance.
11446 In 64-bit mode, we should probably support all 8-byte vectors,
11447 since we can in fact encode that into an immediate. */
11448 if (GET_CODE (x) == CONST_VECTOR)
11450 gcc_assert (x == CONST0_RTX (GET_MODE (x)));
11456 if (CONST_INT_P (x) || GET_CODE (x) == CONST_DOUBLE)
11458 if (ASSEMBLER_DIALECT == ASM_ATT)
11461 else if (GET_CODE (x) == CONST || GET_CODE (x) == SYMBOL_REF
11462 || GET_CODE (x) == LABEL_REF)
11464 if (ASSEMBLER_DIALECT == ASM_ATT)
11467 fputs ("OFFSET FLAT:", file);
11470 if (CONST_INT_P (x))
11471 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
11473 output_pic_addr_const (file, x, code);
11475 output_addr_const (file, x);
11479 /* Print a memory operand whose address is ADDR. */
11482 print_operand_address (FILE *file, rtx addr)
11484 struct ix86_address parts;
11485 rtx base, index, disp;
11487 int ok = ix86_decompose_address (addr, &parts);
11492 index = parts.index;
11494 scale = parts.scale;
11502 if (ASSEMBLER_DIALECT == ASM_ATT)
11504 fputs ((parts.seg == SEG_FS ? "fs:" : "gs:"), file);
11507 gcc_unreachable ();
11510 /* Use one byte shorter RIP relative addressing for 64bit mode. */
11511 if (TARGET_64BIT && !base && !index)
11515 if (GET_CODE (disp) == CONST
11516 && GET_CODE (XEXP (disp, 0)) == PLUS
11517 && CONST_INT_P (XEXP (XEXP (disp, 0), 1)))
11518 symbol = XEXP (XEXP (disp, 0), 0);
11520 if (GET_CODE (symbol) == LABEL_REF
11521 || (GET_CODE (symbol) == SYMBOL_REF
11522 && SYMBOL_REF_TLS_MODEL (symbol) == 0))
11525 if (!base && !index)
11527 /* Displacement only requires special attention. */
11529 if (CONST_INT_P (disp))
11531 if (ASSEMBLER_DIALECT == ASM_INTEL && parts.seg == SEG_DEFAULT)
11532 fputs ("ds:", file);
11533 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (disp));
11536 output_pic_addr_const (file, disp, 0);
11538 output_addr_const (file, disp);
11542 if (ASSEMBLER_DIALECT == ASM_ATT)
11547 output_pic_addr_const (file, disp, 0);
11548 else if (GET_CODE (disp) == LABEL_REF)
11549 output_asm_label (disp);
11551 output_addr_const (file, disp);
11556 print_reg (base, 0, file);
11560 print_reg (index, 0, file);
11562 fprintf (file, ",%d", scale);
11568 rtx offset = NULL_RTX;
11572 /* Pull out the offset of a symbol; print any symbol itself. */
11573 if (GET_CODE (disp) == CONST
11574 && GET_CODE (XEXP (disp, 0)) == PLUS
11575 && CONST_INT_P (XEXP (XEXP (disp, 0), 1)))
11577 offset = XEXP (XEXP (disp, 0), 1);
11578 disp = gen_rtx_CONST (VOIDmode,
11579 XEXP (XEXP (disp, 0), 0));
11583 output_pic_addr_const (file, disp, 0);
11584 else if (GET_CODE (disp) == LABEL_REF)
11585 output_asm_label (disp);
11586 else if (CONST_INT_P (disp))
11589 output_addr_const (file, disp);
11595 print_reg (base, 0, file);
11598 if (INTVAL (offset) >= 0)
11600 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (offset));
11604 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (offset));
11611 print_reg (index, 0, file);
11613 fprintf (file, "*%d", scale);
11621 output_addr_const_extra (FILE *file, rtx x)
11625 if (GET_CODE (x) != UNSPEC)
11628 op = XVECEXP (x, 0, 0);
11629 switch (XINT (x, 1))
11631 case UNSPEC_GOTTPOFF:
11632 output_addr_const (file, op);
11633 /* FIXME: This might be @TPOFF in Sun ld. */
11634 fputs ("@GOTTPOFF", file);
11637 output_addr_const (file, op);
11638 fputs ("@TPOFF", file);
11640 case UNSPEC_NTPOFF:
11641 output_addr_const (file, op);
11643 fputs ("@TPOFF", file);
11645 fputs ("@NTPOFF", file);
11647 case UNSPEC_DTPOFF:
11648 output_addr_const (file, op);
11649 fputs ("@DTPOFF", file);
11651 case UNSPEC_GOTNTPOFF:
11652 output_addr_const (file, op);
11654 fputs (ASSEMBLER_DIALECT == ASM_ATT ?
11655 "@GOTTPOFF(%rip)" : "@GOTTPOFF[rip]", file);
11657 fputs ("@GOTNTPOFF", file);
11659 case UNSPEC_INDNTPOFF:
11660 output_addr_const (file, op);
11661 fputs ("@INDNTPOFF", file);
11664 case UNSPEC_MACHOPIC_OFFSET:
11665 output_addr_const (file, op);
11667 machopic_output_function_base_name (file);
11678 /* Split one or more DImode RTL references into pairs of SImode
11679 references. The RTL can be REG, offsettable MEM, integer constant, or
11680 CONST_DOUBLE. "operands" is a pointer to an array of DImode RTL to
11681 split and "num" is its length. lo_half and hi_half are output arrays
11682 that parallel "operands". */
11685 split_di (rtx operands[], int num, rtx lo_half[], rtx hi_half[])
11689 rtx op = operands[num];
11691 /* simplify_subreg refuse to split volatile memory addresses,
11692 but we still have to handle it. */
11695 lo_half[num] = adjust_address (op, SImode, 0);
11696 hi_half[num] = adjust_address (op, SImode, 4);
11700 lo_half[num] = simplify_gen_subreg (SImode, op,
11701 GET_MODE (op) == VOIDmode
11702 ? DImode : GET_MODE (op), 0);
11703 hi_half[num] = simplify_gen_subreg (SImode, op,
11704 GET_MODE (op) == VOIDmode
11705 ? DImode : GET_MODE (op), 4);
11709 /* Split one or more TImode RTL references into pairs of DImode
11710 references. The RTL can be REG, offsettable MEM, integer constant, or
11711 CONST_DOUBLE. "operands" is a pointer to an array of DImode RTL to
11712 split and "num" is its length. lo_half and hi_half are output arrays
11713 that parallel "operands". */
11716 split_ti (rtx operands[], int num, rtx lo_half[], rtx hi_half[])
11720 rtx op = operands[num];
11722 /* simplify_subreg refuse to split volatile memory addresses, but we
11723 still have to handle it. */
11726 lo_half[num] = adjust_address (op, DImode, 0);
11727 hi_half[num] = adjust_address (op, DImode, 8);
11731 lo_half[num] = simplify_gen_subreg (DImode, op, TImode, 0);
11732 hi_half[num] = simplify_gen_subreg (DImode, op, TImode, 8);
11737 /* Output code to perform a 387 binary operation in INSN, one of PLUS,
11738 MINUS, MULT or DIV. OPERANDS are the insn operands, where operands[3]
11739 is the expression of the binary operation. The output may either be
11740 emitted here, or returned to the caller, like all output_* functions.
11742 There is no guarantee that the operands are the same mode, as they
11743 might be within FLOAT or FLOAT_EXTEND expressions. */
11745 #ifndef SYSV386_COMPAT
11746 /* Set to 1 for compatibility with brain-damaged assemblers. No-one
11747 wants to fix the assemblers because that causes incompatibility
11748 with gcc. No-one wants to fix gcc because that causes
11749 incompatibility with assemblers... You can use the option of
11750 -DSYSV386_COMPAT=0 if you recompile both gcc and gas this way. */
11751 #define SYSV386_COMPAT 1
11755 output_387_binary_op (rtx insn, rtx *operands)
11757 static char buf[40];
11760 int is_sse = SSE_REG_P (operands[0]) || SSE_REG_P (operands[1]) || SSE_REG_P (operands[2]);
11762 #ifdef ENABLE_CHECKING
11763 /* Even if we do not want to check the inputs, this documents input
11764 constraints. Which helps in understanding the following code. */
11765 if (STACK_REG_P (operands[0])
11766 && ((REG_P (operands[1])
11767 && REGNO (operands[0]) == REGNO (operands[1])
11768 && (STACK_REG_P (operands[2]) || MEM_P (operands[2])))
11769 || (REG_P (operands[2])
11770 && REGNO (operands[0]) == REGNO (operands[2])
11771 && (STACK_REG_P (operands[1]) || MEM_P (operands[1]))))
11772 && (STACK_TOP_P (operands[1]) || STACK_TOP_P (operands[2])))
11775 gcc_assert (is_sse);
11778 switch (GET_CODE (operands[3]))
11781 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
11782 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
11790 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
11791 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
11799 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
11800 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
11808 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
11809 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
11817 gcc_unreachable ();
11824 strcpy (buf, ssep);
11825 if (GET_MODE (operands[0]) == SFmode)
11826 strcat (buf, "ss\t{%2, %1, %0|%0, %1, %2}");
11828 strcat (buf, "sd\t{%2, %1, %0|%0, %1, %2}");
11832 strcpy (buf, ssep + 1);
11833 if (GET_MODE (operands[0]) == SFmode)
11834 strcat (buf, "ss\t{%2, %0|%0, %2}");
11836 strcat (buf, "sd\t{%2, %0|%0, %2}");
11842 switch (GET_CODE (operands[3]))
11846 if (REG_P (operands[2]) && REGNO (operands[0]) == REGNO (operands[2]))
11848 rtx temp = operands[2];
11849 operands[2] = operands[1];
11850 operands[1] = temp;
11853 /* know operands[0] == operands[1]. */
11855 if (MEM_P (operands[2]))
11861 if (find_regno_note (insn, REG_DEAD, REGNO (operands[2])))
11863 if (STACK_TOP_P (operands[0]))
11864 /* How is it that we are storing to a dead operand[2]?
11865 Well, presumably operands[1] is dead too. We can't
11866 store the result to st(0) as st(0) gets popped on this
11867 instruction. Instead store to operands[2] (which I
11868 think has to be st(1)). st(1) will be popped later.
11869 gcc <= 2.8.1 didn't have this check and generated
11870 assembly code that the Unixware assembler rejected. */
11871 p = "p\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
11873 p = "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
11877 if (STACK_TOP_P (operands[0]))
11878 p = "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
11880 p = "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
11885 if (MEM_P (operands[1]))
11891 if (MEM_P (operands[2]))
11897 if (find_regno_note (insn, REG_DEAD, REGNO (operands[2])))
11900 /* The SystemV/386 SVR3.2 assembler, and probably all AT&T
11901 derived assemblers, confusingly reverse the direction of
11902 the operation for fsub{r} and fdiv{r} when the
11903 destination register is not st(0). The Intel assembler
11904 doesn't have this brain damage. Read !SYSV386_COMPAT to
11905 figure out what the hardware really does. */
11906 if (STACK_TOP_P (operands[0]))
11907 p = "{p\t%0, %2|rp\t%2, %0}";
11909 p = "{rp\t%2, %0|p\t%0, %2}";
11911 if (STACK_TOP_P (operands[0]))
11912 /* As above for fmul/fadd, we can't store to st(0). */
11913 p = "rp\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
11915 p = "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
11920 if (find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
11923 if (STACK_TOP_P (operands[0]))
11924 p = "{rp\t%0, %1|p\t%1, %0}";
11926 p = "{p\t%1, %0|rp\t%0, %1}";
11928 if (STACK_TOP_P (operands[0]))
11929 p = "p\t{%0, %1|%1, %0}"; /* st(1) = st(1) op st(0); pop */
11931 p = "rp\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2); pop */
11936 if (STACK_TOP_P (operands[0]))
11938 if (STACK_TOP_P (operands[1]))
11939 p = "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
11941 p = "r\t{%y1, %0|%0, %y1}"; /* st(0) = st(r1) op st(0) */
11944 else if (STACK_TOP_P (operands[1]))
11947 p = "{\t%1, %0|r\t%0, %1}";
11949 p = "r\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2) */
11955 p = "{r\t%2, %0|\t%0, %2}";
11957 p = "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
11963 gcc_unreachable ();
11970 /* Return needed mode for entity in optimize_mode_switching pass. */
11973 ix86_mode_needed (int entity, rtx insn)
11975 enum attr_i387_cw mode;
11977 /* The mode UNINITIALIZED is used to store control word after a
11978 function call or ASM pattern. The mode ANY specify that function
11979 has no requirements on the control word and make no changes in the
11980 bits we are interested in. */
11983 || (NONJUMP_INSN_P (insn)
11984 && (asm_noperands (PATTERN (insn)) >= 0
11985 || GET_CODE (PATTERN (insn)) == ASM_INPUT)))
11986 return I387_CW_UNINITIALIZED;
11988 if (recog_memoized (insn) < 0)
11989 return I387_CW_ANY;
11991 mode = get_attr_i387_cw (insn);
11996 if (mode == I387_CW_TRUNC)
12001 if (mode == I387_CW_FLOOR)
12006 if (mode == I387_CW_CEIL)
12011 if (mode == I387_CW_MASK_PM)
12016 gcc_unreachable ();
12019 return I387_CW_ANY;
12022 /* Output code to initialize control word copies used by trunc?f?i and
12023 rounding patterns. CURRENT_MODE is set to current control word,
12024 while NEW_MODE is set to new control word. */
12027 emit_i387_cw_initialization (int mode)
12029 rtx stored_mode = assign_386_stack_local (HImode, SLOT_CW_STORED);
12032 enum ix86_stack_slot slot;
12034 rtx reg = gen_reg_rtx (HImode);
12036 emit_insn (gen_x86_fnstcw_1 (stored_mode));
12037 emit_move_insn (reg, copy_rtx (stored_mode));
12039 if (TARGET_64BIT || TARGET_PARTIAL_REG_STALL
12040 || optimize_function_for_size_p (cfun))
12044 case I387_CW_TRUNC:
12045 /* round toward zero (truncate) */
12046 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0c00)));
12047 slot = SLOT_CW_TRUNC;
12050 case I387_CW_FLOOR:
12051 /* round down toward -oo */
12052 emit_insn (gen_andhi3 (reg, reg, GEN_INT (~0x0c00)));
12053 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0400)));
12054 slot = SLOT_CW_FLOOR;
12058 /* round up toward +oo */
12059 emit_insn (gen_andhi3 (reg, reg, GEN_INT (~0x0c00)));
12060 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0800)));
12061 slot = SLOT_CW_CEIL;
12064 case I387_CW_MASK_PM:
12065 /* mask precision exception for nearbyint() */
12066 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0020)));
12067 slot = SLOT_CW_MASK_PM;
12071 gcc_unreachable ();
12078 case I387_CW_TRUNC:
12079 /* round toward zero (truncate) */
12080 emit_insn (gen_movsi_insv_1 (reg, GEN_INT (0xc)));
12081 slot = SLOT_CW_TRUNC;
12084 case I387_CW_FLOOR:
12085 /* round down toward -oo */
12086 emit_insn (gen_movsi_insv_1 (reg, GEN_INT (0x4)));
12087 slot = SLOT_CW_FLOOR;
12091 /* round up toward +oo */
12092 emit_insn (gen_movsi_insv_1 (reg, GEN_INT (0x8)));
12093 slot = SLOT_CW_CEIL;
12096 case I387_CW_MASK_PM:
12097 /* mask precision exception for nearbyint() */
12098 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0020)));
12099 slot = SLOT_CW_MASK_PM;
12103 gcc_unreachable ();
12107 gcc_assert (slot < MAX_386_STACK_LOCALS);
12109 new_mode = assign_386_stack_local (HImode, slot);
12110 emit_move_insn (new_mode, reg);
12113 /* Output code for INSN to convert a float to a signed int. OPERANDS
12114 are the insn operands. The output may be [HSD]Imode and the input
12115 operand may be [SDX]Fmode. */
12118 output_fix_trunc (rtx insn, rtx *operands, int fisttp)
12120 int stack_top_dies = find_regno_note (insn, REG_DEAD, FIRST_STACK_REG) != 0;
12121 int dimode_p = GET_MODE (operands[0]) == DImode;
12122 int round_mode = get_attr_i387_cw (insn);
12124 /* Jump through a hoop or two for DImode, since the hardware has no
12125 non-popping instruction. We used to do this a different way, but
12126 that was somewhat fragile and broke with post-reload splitters. */
12127 if ((dimode_p || fisttp) && !stack_top_dies)
12128 output_asm_insn ("fld\t%y1", operands);
12130 gcc_assert (STACK_TOP_P (operands[1]));
12131 gcc_assert (MEM_P (operands[0]));
12132 gcc_assert (GET_MODE (operands[1]) != TFmode);
12135 output_asm_insn ("fisttp%Z0\t%0", operands);
12138 if (round_mode != I387_CW_ANY)
12139 output_asm_insn ("fldcw\t%3", operands);
12140 if (stack_top_dies || dimode_p)
12141 output_asm_insn ("fistp%Z0\t%0", operands);
12143 output_asm_insn ("fist%Z0\t%0", operands);
12144 if (round_mode != I387_CW_ANY)
12145 output_asm_insn ("fldcw\t%2", operands);
12151 /* Output code for x87 ffreep insn. The OPNO argument, which may only
12152 have the values zero or one, indicates the ffreep insn's operand
12153 from the OPERANDS array. */
12155 static const char *
12156 output_387_ffreep (rtx *operands ATTRIBUTE_UNUSED, int opno)
12158 if (TARGET_USE_FFREEP)
12159 #if HAVE_AS_IX86_FFREEP
12160 return opno ? "ffreep\t%y1" : "ffreep\t%y0";
12163 static char retval[] = ".word\t0xc_df";
12164 int regno = REGNO (operands[opno]);
12166 gcc_assert (FP_REGNO_P (regno));
12168 retval[9] = '0' + (regno - FIRST_STACK_REG);
12173 return opno ? "fstp\t%y1" : "fstp\t%y0";
12177 /* Output code for INSN to compare OPERANDS. EFLAGS_P is 1 when fcomi
12178 should be used. UNORDERED_P is true when fucom should be used. */
12181 output_fp_compare (rtx insn, rtx *operands, int eflags_p, int unordered_p)
12183 int stack_top_dies;
12184 rtx cmp_op0, cmp_op1;
12185 int is_sse = SSE_REG_P (operands[0]) || SSE_REG_P (operands[1]);
12189 cmp_op0 = operands[0];
12190 cmp_op1 = operands[1];
12194 cmp_op0 = operands[1];
12195 cmp_op1 = operands[2];
12200 static const char ucomiss[] = "vucomiss\t{%1, %0|%0, %1}";
12201 static const char ucomisd[] = "vucomisd\t{%1, %0|%0, %1}";
12202 static const char comiss[] = "vcomiss\t{%1, %0|%0, %1}";
12203 static const char comisd[] = "vcomisd\t{%1, %0|%0, %1}";
12205 if (GET_MODE (operands[0]) == SFmode)
12207 return &ucomiss[TARGET_AVX ? 0 : 1];
12209 return &comiss[TARGET_AVX ? 0 : 1];
12212 return &ucomisd[TARGET_AVX ? 0 : 1];
12214 return &comisd[TARGET_AVX ? 0 : 1];
12217 gcc_assert (STACK_TOP_P (cmp_op0));
12219 stack_top_dies = find_regno_note (insn, REG_DEAD, FIRST_STACK_REG) != 0;
12221 if (cmp_op1 == CONST0_RTX (GET_MODE (cmp_op1)))
12223 if (stack_top_dies)
12225 output_asm_insn ("ftst\n\tfnstsw\t%0", operands);
12226 return output_387_ffreep (operands, 1);
12229 return "ftst\n\tfnstsw\t%0";
12232 if (STACK_REG_P (cmp_op1)
12234 && find_regno_note (insn, REG_DEAD, REGNO (cmp_op1))
12235 && REGNO (cmp_op1) != FIRST_STACK_REG)
12237 /* If both the top of the 387 stack dies, and the other operand
12238 is also a stack register that dies, then this must be a
12239 `fcompp' float compare */
12243 /* There is no double popping fcomi variant. Fortunately,
12244 eflags is immune from the fstp's cc clobbering. */
12246 output_asm_insn ("fucomip\t{%y1, %0|%0, %y1}", operands);
12248 output_asm_insn ("fcomip\t{%y1, %0|%0, %y1}", operands);
12249 return output_387_ffreep (operands, 0);
12254 return "fucompp\n\tfnstsw\t%0";
12256 return "fcompp\n\tfnstsw\t%0";
12261 /* Encoded here as eflags_p | intmode | unordered_p | stack_top_dies. */
12263 static const char * const alt[16] =
12265 "fcom%Z2\t%y2\n\tfnstsw\t%0",
12266 "fcomp%Z2\t%y2\n\tfnstsw\t%0",
12267 "fucom%Z2\t%y2\n\tfnstsw\t%0",
12268 "fucomp%Z2\t%y2\n\tfnstsw\t%0",
12270 "ficom%Z2\t%y2\n\tfnstsw\t%0",
12271 "ficomp%Z2\t%y2\n\tfnstsw\t%0",
12275 "fcomi\t{%y1, %0|%0, %y1}",
12276 "fcomip\t{%y1, %0|%0, %y1}",
12277 "fucomi\t{%y1, %0|%0, %y1}",
12278 "fucomip\t{%y1, %0|%0, %y1}",
12289 mask = eflags_p << 3;
12290 mask |= (GET_MODE_CLASS (GET_MODE (cmp_op1)) == MODE_INT) << 2;
12291 mask |= unordered_p << 1;
12292 mask |= stack_top_dies;
12294 gcc_assert (mask < 16);
12303 ix86_output_addr_vec_elt (FILE *file, int value)
12305 const char *directive = ASM_LONG;
12309 directive = ASM_QUAD;
12311 gcc_assert (!TARGET_64BIT);
12314 fprintf (file, "%s%s%d\n", directive, LPREFIX, value);
12318 ix86_output_addr_diff_elt (FILE *file, int value, int rel)
12320 const char *directive = ASM_LONG;
12323 if (TARGET_64BIT && CASE_VECTOR_MODE == DImode)
12324 directive = ASM_QUAD;
12326 gcc_assert (!TARGET_64BIT);
12328 /* We can't use @GOTOFF for text labels on VxWorks; see gotoff_operand. */
12329 if (TARGET_64BIT || TARGET_VXWORKS_RTP)
12330 fprintf (file, "%s%s%d-%s%d\n",
12331 directive, LPREFIX, value, LPREFIX, rel);
12332 else if (HAVE_AS_GOTOFF_IN_DATA)
12333 fprintf (file, "%s%s%d@GOTOFF\n", ASM_LONG, LPREFIX, value);
12335 else if (TARGET_MACHO)
12337 fprintf (file, "%s%s%d-", ASM_LONG, LPREFIX, value);
12338 machopic_output_function_base_name (file);
12339 fprintf(file, "\n");
12343 asm_fprintf (file, "%s%U%s+[.-%s%d]\n",
12344 ASM_LONG, GOT_SYMBOL_NAME, LPREFIX, value);
12347 /* Generate either "mov $0, reg" or "xor reg, reg", as appropriate
12351 ix86_expand_clear (rtx dest)
12355 /* We play register width games, which are only valid after reload. */
12356 gcc_assert (reload_completed);
12358 /* Avoid HImode and its attendant prefix byte. */
12359 if (GET_MODE_SIZE (GET_MODE (dest)) < 4)
12360 dest = gen_rtx_REG (SImode, REGNO (dest));
12361 tmp = gen_rtx_SET (VOIDmode, dest, const0_rtx);
12363 /* This predicate should match that for movsi_xor and movdi_xor_rex64. */
12364 if (reload_completed && (!TARGET_USE_MOV0 || optimize_insn_for_speed_p ()))
12366 rtx clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
12367 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, tmp, clob));
12373 /* X is an unchanging MEM. If it is a constant pool reference, return
12374 the constant pool rtx, else NULL. */
12377 maybe_get_pool_constant (rtx x)
12379 x = ix86_delegitimize_address (XEXP (x, 0));
12381 if (GET_CODE (x) == SYMBOL_REF && CONSTANT_POOL_ADDRESS_P (x))
12382 return get_pool_constant (x);
12388 ix86_expand_move (enum machine_mode mode, rtx operands[])
12391 enum tls_model model;
12396 if (GET_CODE (op1) == SYMBOL_REF)
12398 model = SYMBOL_REF_TLS_MODEL (op1);
12401 op1 = legitimize_tls_address (op1, model, true);
12402 op1 = force_operand (op1, op0);
12406 else if (TARGET_DLLIMPORT_DECL_ATTRIBUTES
12407 && SYMBOL_REF_DLLIMPORT_P (op1))
12408 op1 = legitimize_dllimport_symbol (op1, false);
12410 else if (GET_CODE (op1) == CONST
12411 && GET_CODE (XEXP (op1, 0)) == PLUS
12412 && GET_CODE (XEXP (XEXP (op1, 0), 0)) == SYMBOL_REF)
12414 rtx addend = XEXP (XEXP (op1, 0), 1);
12415 rtx symbol = XEXP (XEXP (op1, 0), 0);
12418 model = SYMBOL_REF_TLS_MODEL (symbol);
12420 tmp = legitimize_tls_address (symbol, model, true);
12421 else if (TARGET_DLLIMPORT_DECL_ATTRIBUTES
12422 && SYMBOL_REF_DLLIMPORT_P (symbol))
12423 tmp = legitimize_dllimport_symbol (symbol, true);
12427 tmp = force_operand (tmp, NULL);
12428 tmp = expand_simple_binop (Pmode, PLUS, tmp, addend,
12429 op0, 1, OPTAB_DIRECT);
12435 if (flag_pic && mode == Pmode && symbolic_operand (op1, Pmode))
12437 if (TARGET_MACHO && !TARGET_64BIT)
12442 rtx temp = ((reload_in_progress
12443 || ((op0 && REG_P (op0))
12445 ? op0 : gen_reg_rtx (Pmode));
12446 op1 = machopic_indirect_data_reference (op1, temp);
12447 op1 = machopic_legitimize_pic_address (op1, mode,
12448 temp == op1 ? 0 : temp);
12450 else if (MACHOPIC_INDIRECT)
12451 op1 = machopic_indirect_data_reference (op1, 0);
12459 op1 = force_reg (Pmode, op1);
12460 else if (!TARGET_64BIT || !x86_64_movabs_operand (op1, Pmode))
12462 rtx reg = !can_create_pseudo_p () ? op0 : NULL_RTX;
12463 op1 = legitimize_pic_address (op1, reg);
12472 && (PUSH_ROUNDING (GET_MODE_SIZE (mode)) != GET_MODE_SIZE (mode)
12473 || !push_operand (op0, mode))
12475 op1 = force_reg (mode, op1);
12477 if (push_operand (op0, mode)
12478 && ! general_no_elim_operand (op1, mode))
12479 op1 = copy_to_mode_reg (mode, op1);
12481 /* Force large constants in 64bit compilation into register
12482 to get them CSEed. */
12483 if (can_create_pseudo_p ()
12484 && (mode == DImode) && TARGET_64BIT
12485 && immediate_operand (op1, mode)
12486 && !x86_64_zext_immediate_operand (op1, VOIDmode)
12487 && !register_operand (op0, mode)
12489 op1 = copy_to_mode_reg (mode, op1);
12491 if (can_create_pseudo_p ()
12492 && FLOAT_MODE_P (mode)
12493 && GET_CODE (op1) == CONST_DOUBLE)
12495 /* If we are loading a floating point constant to a register,
12496 force the value to memory now, since we'll get better code
12497 out the back end. */
12499 op1 = validize_mem (force_const_mem (mode, op1));
12500 if (!register_operand (op0, mode))
12502 rtx temp = gen_reg_rtx (mode);
12503 emit_insn (gen_rtx_SET (VOIDmode, temp, op1));
12504 emit_move_insn (op0, temp);
12510 emit_insn (gen_rtx_SET (VOIDmode, op0, op1));
12514 ix86_expand_vector_move (enum machine_mode mode, rtx operands[])
12516 rtx op0 = operands[0], op1 = operands[1];
12517 unsigned int align = GET_MODE_ALIGNMENT (mode);
12519 /* Force constants other than zero into memory. We do not know how
12520 the instructions used to build constants modify the upper 64 bits
12521 of the register, once we have that information we may be able
12522 to handle some of them more efficiently. */
12523 if (can_create_pseudo_p ()
12524 && register_operand (op0, mode)
12525 && (CONSTANT_P (op1)
12526 || (GET_CODE (op1) == SUBREG
12527 && CONSTANT_P (SUBREG_REG (op1))))
12528 && standard_sse_constant_p (op1) <= 0)
12529 op1 = validize_mem (force_const_mem (mode, op1));
12531 /* We need to check memory alignment for SSE mode since attribute
12532 can make operands unaligned. */
12533 if (can_create_pseudo_p ()
12534 && SSE_REG_MODE_P (mode)
12535 && ((MEM_P (op0) && (MEM_ALIGN (op0) < align))
12536 || (MEM_P (op1) && (MEM_ALIGN (op1) < align))))
12540 /* ix86_expand_vector_move_misalign() does not like constants ... */
12541 if (CONSTANT_P (op1)
12542 || (GET_CODE (op1) == SUBREG
12543 && CONSTANT_P (SUBREG_REG (op1))))
12544 op1 = validize_mem (force_const_mem (mode, op1));
12546 /* ... nor both arguments in memory. */
12547 if (!register_operand (op0, mode)
12548 && !register_operand (op1, mode))
12549 op1 = force_reg (mode, op1);
12551 tmp[0] = op0; tmp[1] = op1;
12552 ix86_expand_vector_move_misalign (mode, tmp);
12556 /* Make operand1 a register if it isn't already. */
12557 if (can_create_pseudo_p ()
12558 && !register_operand (op0, mode)
12559 && !register_operand (op1, mode))
12561 emit_move_insn (op0, force_reg (GET_MODE (op0), op1));
12565 emit_insn (gen_rtx_SET (VOIDmode, op0, op1));
12568 /* Implement the movmisalign patterns for SSE. Non-SSE modes go
12569 straight to ix86_expand_vector_move. */
12570 /* Code generation for scalar reg-reg moves of single and double precision data:
12571 if (x86_sse_partial_reg_dependency == true | x86_sse_split_regs == true)
12575 if (x86_sse_partial_reg_dependency == true)
12580 Code generation for scalar loads of double precision data:
12581 if (x86_sse_split_regs == true)
12582 movlpd mem, reg (gas syntax)
12586 Code generation for unaligned packed loads of single precision data
12587 (x86_sse_unaligned_move_optimal overrides x86_sse_partial_reg_dependency):
12588 if (x86_sse_unaligned_move_optimal)
12591 if (x86_sse_partial_reg_dependency == true)
12603 Code generation for unaligned packed loads of double precision data
12604 (x86_sse_unaligned_move_optimal overrides x86_sse_split_regs):
12605 if (x86_sse_unaligned_move_optimal)
12608 if (x86_sse_split_regs == true)
12621 ix86_expand_vector_move_misalign (enum machine_mode mode, rtx operands[])
12630 switch (GET_MODE_CLASS (mode))
12632 case MODE_VECTOR_INT:
12634 switch (GET_MODE_SIZE (mode))
12637 op0 = gen_lowpart (V16QImode, op0);
12638 op1 = gen_lowpart (V16QImode, op1);
12639 emit_insn (gen_avx_movdqu (op0, op1));
12642 op0 = gen_lowpart (V32QImode, op0);
12643 op1 = gen_lowpart (V32QImode, op1);
12644 emit_insn (gen_avx_movdqu256 (op0, op1));
12647 gcc_unreachable ();
12650 case MODE_VECTOR_FLOAT:
12651 op0 = gen_lowpart (mode, op0);
12652 op1 = gen_lowpart (mode, op1);
12657 emit_insn (gen_avx_movups (op0, op1));
12660 emit_insn (gen_avx_movups256 (op0, op1));
12663 emit_insn (gen_avx_movupd (op0, op1));
12666 emit_insn (gen_avx_movupd256 (op0, op1));
12669 gcc_unreachable ();
12674 gcc_unreachable ();
12682 /* If we're optimizing for size, movups is the smallest. */
12683 if (optimize_insn_for_size_p ())
12685 op0 = gen_lowpart (V4SFmode, op0);
12686 op1 = gen_lowpart (V4SFmode, op1);
12687 emit_insn (gen_sse_movups (op0, op1));
12691 /* ??? If we have typed data, then it would appear that using
12692 movdqu is the only way to get unaligned data loaded with
12694 if (TARGET_SSE2 && GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
12696 op0 = gen_lowpart (V16QImode, op0);
12697 op1 = gen_lowpart (V16QImode, op1);
12698 emit_insn (gen_sse2_movdqu (op0, op1));
12702 if (TARGET_SSE2 && mode == V2DFmode)
12706 if (TARGET_SSE_UNALIGNED_MOVE_OPTIMAL)
12708 op0 = gen_lowpart (V2DFmode, op0);
12709 op1 = gen_lowpart (V2DFmode, op1);
12710 emit_insn (gen_sse2_movupd (op0, op1));
12714 /* When SSE registers are split into halves, we can avoid
12715 writing to the top half twice. */
12716 if (TARGET_SSE_SPLIT_REGS)
12718 emit_clobber (op0);
12723 /* ??? Not sure about the best option for the Intel chips.
12724 The following would seem to satisfy; the register is
12725 entirely cleared, breaking the dependency chain. We
12726 then store to the upper half, with a dependency depth
12727 of one. A rumor has it that Intel recommends two movsd
12728 followed by an unpacklpd, but this is unconfirmed. And
12729 given that the dependency depth of the unpacklpd would
12730 still be one, I'm not sure why this would be better. */
12731 zero = CONST0_RTX (V2DFmode);
12734 m = adjust_address (op1, DFmode, 0);
12735 emit_insn (gen_sse2_loadlpd (op0, zero, m));
12736 m = adjust_address (op1, DFmode, 8);
12737 emit_insn (gen_sse2_loadhpd (op0, op0, m));
12741 if (TARGET_SSE_UNALIGNED_MOVE_OPTIMAL)
12743 op0 = gen_lowpart (V4SFmode, op0);
12744 op1 = gen_lowpart (V4SFmode, op1);
12745 emit_insn (gen_sse_movups (op0, op1));
12749 if (TARGET_SSE_PARTIAL_REG_DEPENDENCY)
12750 emit_move_insn (op0, CONST0_RTX (mode));
12752 emit_clobber (op0);
12754 if (mode != V4SFmode)
12755 op0 = gen_lowpart (V4SFmode, op0);
12756 m = adjust_address (op1, V2SFmode, 0);
12757 emit_insn (gen_sse_loadlps (op0, op0, m));
12758 m = adjust_address (op1, V2SFmode, 8);
12759 emit_insn (gen_sse_loadhps (op0, op0, m));
12762 else if (MEM_P (op0))
12764 /* If we're optimizing for size, movups is the smallest. */
12765 if (optimize_insn_for_size_p ())
12767 op0 = gen_lowpart (V4SFmode, op0);
12768 op1 = gen_lowpart (V4SFmode, op1);
12769 emit_insn (gen_sse_movups (op0, op1));
12773 /* ??? Similar to above, only less clear because of quote
12774 typeless stores unquote. */
12775 if (TARGET_SSE2 && !TARGET_SSE_TYPELESS_STORES
12776 && GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
12778 op0 = gen_lowpart (V16QImode, op0);
12779 op1 = gen_lowpart (V16QImode, op1);
12780 emit_insn (gen_sse2_movdqu (op0, op1));
12784 if (TARGET_SSE2 && mode == V2DFmode)
12786 m = adjust_address (op0, DFmode, 0);
12787 emit_insn (gen_sse2_storelpd (m, op1));
12788 m = adjust_address (op0, DFmode, 8);
12789 emit_insn (gen_sse2_storehpd (m, op1));
12793 if (mode != V4SFmode)
12794 op1 = gen_lowpart (V4SFmode, op1);
12795 m = adjust_address (op0, V2SFmode, 0);
12796 emit_insn (gen_sse_storelps (m, op1));
12797 m = adjust_address (op0, V2SFmode, 8);
12798 emit_insn (gen_sse_storehps (m, op1));
12802 gcc_unreachable ();
12805 /* Expand a push in MODE. This is some mode for which we do not support
12806 proper push instructions, at least from the registers that we expect
12807 the value to live in. */
12810 ix86_expand_push (enum machine_mode mode, rtx x)
12814 tmp = expand_simple_binop (Pmode, PLUS, stack_pointer_rtx,
12815 GEN_INT (-GET_MODE_SIZE (mode)),
12816 stack_pointer_rtx, 1, OPTAB_DIRECT);
12817 if (tmp != stack_pointer_rtx)
12818 emit_move_insn (stack_pointer_rtx, tmp);
12820 tmp = gen_rtx_MEM (mode, stack_pointer_rtx);
12822 /* When we push an operand onto stack, it has to be aligned at least
12823 at the function argument boundary. However since we don't have
12824 the argument type, we can't determine the actual argument
12826 emit_move_insn (tmp, x);
12829 /* Helper function of ix86_fixup_binary_operands to canonicalize
12830 operand order. Returns true if the operands should be swapped. */
12833 ix86_swap_binary_operands_p (enum rtx_code code, enum machine_mode mode,
12836 rtx dst = operands[0];
12837 rtx src1 = operands[1];
12838 rtx src2 = operands[2];
12840 /* If the operation is not commutative, we can't do anything. */
12841 if (GET_RTX_CLASS (code) != RTX_COMM_ARITH)
12844 /* Highest priority is that src1 should match dst. */
12845 if (rtx_equal_p (dst, src1))
12847 if (rtx_equal_p (dst, src2))
12850 /* Next highest priority is that immediate constants come second. */
12851 if (immediate_operand (src2, mode))
12853 if (immediate_operand (src1, mode))
12856 /* Lowest priority is that memory references should come second. */
12866 /* Fix up OPERANDS to satisfy ix86_binary_operator_ok. Return the
12867 destination to use for the operation. If different from the true
12868 destination in operands[0], a copy operation will be required. */
12871 ix86_fixup_binary_operands (enum rtx_code code, enum machine_mode mode,
12874 rtx dst = operands[0];
12875 rtx src1 = operands[1];
12876 rtx src2 = operands[2];
12878 /* Canonicalize operand order. */
12879 if (ix86_swap_binary_operands_p (code, mode, operands))
12883 /* It is invalid to swap operands of different modes. */
12884 gcc_assert (GET_MODE (src1) == GET_MODE (src2));
12891 /* Both source operands cannot be in memory. */
12892 if (MEM_P (src1) && MEM_P (src2))
12894 /* Optimization: Only read from memory once. */
12895 if (rtx_equal_p (src1, src2))
12897 src2 = force_reg (mode, src2);
12901 src2 = force_reg (mode, src2);
12904 /* If the destination is memory, and we do not have matching source
12905 operands, do things in registers. */
12906 if (MEM_P (dst) && !rtx_equal_p (dst, src1))
12907 dst = gen_reg_rtx (mode);
12909 /* Source 1 cannot be a constant. */
12910 if (CONSTANT_P (src1))
12911 src1 = force_reg (mode, src1);
12913 /* Source 1 cannot be a non-matching memory. */
12914 if (MEM_P (src1) && !rtx_equal_p (dst, src1))
12915 src1 = force_reg (mode, src1);
12917 operands[1] = src1;
12918 operands[2] = src2;
12922 /* Similarly, but assume that the destination has already been
12923 set up properly. */
12926 ix86_fixup_binary_operands_no_copy (enum rtx_code code,
12927 enum machine_mode mode, rtx operands[])
12929 rtx dst = ix86_fixup_binary_operands (code, mode, operands);
12930 gcc_assert (dst == operands[0]);
12933 /* Attempt to expand a binary operator. Make the expansion closer to the
12934 actual machine, then just general_operand, which will allow 3 separate
12935 memory references (one output, two input) in a single insn. */
12938 ix86_expand_binary_operator (enum rtx_code code, enum machine_mode mode,
12941 rtx src1, src2, dst, op, clob;
12943 dst = ix86_fixup_binary_operands (code, mode, operands);
12944 src1 = operands[1];
12945 src2 = operands[2];
12947 /* Emit the instruction. */
12949 op = gen_rtx_SET (VOIDmode, dst, gen_rtx_fmt_ee (code, mode, src1, src2));
12950 if (reload_in_progress)
12952 /* Reload doesn't know about the flags register, and doesn't know that
12953 it doesn't want to clobber it. We can only do this with PLUS. */
12954 gcc_assert (code == PLUS);
12959 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
12960 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
12963 /* Fix up the destination if needed. */
12964 if (dst != operands[0])
12965 emit_move_insn (operands[0], dst);
12968 /* Return TRUE or FALSE depending on whether the binary operator meets the
12969 appropriate constraints. */
12972 ix86_binary_operator_ok (enum rtx_code code, enum machine_mode mode,
12975 rtx dst = operands[0];
12976 rtx src1 = operands[1];
12977 rtx src2 = operands[2];
12979 /* Both source operands cannot be in memory. */
12980 if (MEM_P (src1) && MEM_P (src2))
12983 /* Canonicalize operand order for commutative operators. */
12984 if (ix86_swap_binary_operands_p (code, mode, operands))
12991 /* If the destination is memory, we must have a matching source operand. */
12992 if (MEM_P (dst) && !rtx_equal_p (dst, src1))
12995 /* Source 1 cannot be a constant. */
12996 if (CONSTANT_P (src1))
12999 /* Source 1 cannot be a non-matching memory. */
13000 if (MEM_P (src1) && !rtx_equal_p (dst, src1))
13006 /* Attempt to expand a unary operator. Make the expansion closer to the
13007 actual machine, then just general_operand, which will allow 2 separate
13008 memory references (one output, one input) in a single insn. */
13011 ix86_expand_unary_operator (enum rtx_code code, enum machine_mode mode,
13014 int matching_memory;
13015 rtx src, dst, op, clob;
13020 /* If the destination is memory, and we do not have matching source
13021 operands, do things in registers. */
13022 matching_memory = 0;
13025 if (rtx_equal_p (dst, src))
13026 matching_memory = 1;
13028 dst = gen_reg_rtx (mode);
13031 /* When source operand is memory, destination must match. */
13032 if (MEM_P (src) && !matching_memory)
13033 src = force_reg (mode, src);
13035 /* Emit the instruction. */
13037 op = gen_rtx_SET (VOIDmode, dst, gen_rtx_fmt_e (code, mode, src));
13038 if (reload_in_progress || code == NOT)
13040 /* Reload doesn't know about the flags register, and doesn't know that
13041 it doesn't want to clobber it. */
13042 gcc_assert (code == NOT);
13047 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
13048 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
13051 /* Fix up the destination if needed. */
13052 if (dst != operands[0])
13053 emit_move_insn (operands[0], dst);
13056 #define LEA_SEARCH_THRESHOLD 12
13058 /* Search backward for non-agu definition of register number REGNO1
13059 or register number REGNO2 in INSN's basic block until
13060 1. Pass LEA_SEARCH_THRESHOLD instructions, or
13061 2. Reach BB boundary, or
13062 3. Reach agu definition.
13063 Returns the distance between the non-agu definition point and INSN.
13064 If no definition point, returns -1. */
13067 distance_non_agu_define (unsigned int regno1, unsigned int regno2,
13070 basic_block bb = BLOCK_FOR_INSN (insn);
13073 enum attr_type insn_type;
13075 if (insn != BB_HEAD (bb))
13077 rtx prev = PREV_INSN (insn);
13078 while (prev && distance < LEA_SEARCH_THRESHOLD)
13083 for (def_rec = DF_INSN_DEFS (prev); *def_rec; def_rec++)
13084 if (DF_REF_TYPE (*def_rec) == DF_REF_REG_DEF
13085 && !DF_REF_IS_ARTIFICIAL (*def_rec)
13086 && (regno1 == DF_REF_REGNO (*def_rec)
13087 || regno2 == DF_REF_REGNO (*def_rec)))
13089 insn_type = get_attr_type (prev);
13090 if (insn_type != TYPE_LEA)
13094 if (prev == BB_HEAD (bb))
13096 prev = PREV_INSN (prev);
13100 if (distance < LEA_SEARCH_THRESHOLD)
13104 bool simple_loop = false;
13106 FOR_EACH_EDGE (e, ei, bb->preds)
13109 simple_loop = true;
13115 rtx prev = BB_END (bb);
13118 && distance < LEA_SEARCH_THRESHOLD)
13123 for (def_rec = DF_INSN_DEFS (prev); *def_rec; def_rec++)
13124 if (DF_REF_TYPE (*def_rec) == DF_REF_REG_DEF
13125 && !DF_REF_IS_ARTIFICIAL (*def_rec)
13126 && (regno1 == DF_REF_REGNO (*def_rec)
13127 || regno2 == DF_REF_REGNO (*def_rec)))
13129 insn_type = get_attr_type (prev);
13130 if (insn_type != TYPE_LEA)
13134 prev = PREV_INSN (prev);
13142 /* get_attr_type may modify recog data. We want to make sure
13143 that recog data is valid for instruction INSN, on which
13144 distance_non_agu_define is called. INSN is unchanged here. */
13145 extract_insn_cached (insn);
13149 /* Return the distance between INSN and the next insn that uses
13150 register number REGNO0 in memory address. Return -1 if no such
13151 a use is found within LEA_SEARCH_THRESHOLD or REGNO0 is set. */
13154 distance_agu_use (unsigned int regno0, rtx insn)
13156 basic_block bb = BLOCK_FOR_INSN (insn);
13161 if (insn != BB_END (bb))
13163 rtx next = NEXT_INSN (insn);
13164 while (next && distance < LEA_SEARCH_THRESHOLD)
13170 for (use_rec = DF_INSN_USES (next); *use_rec; use_rec++)
13171 if ((DF_REF_TYPE (*use_rec) == DF_REF_REG_MEM_LOAD
13172 || DF_REF_TYPE (*use_rec) == DF_REF_REG_MEM_STORE)
13173 && regno0 == DF_REF_REGNO (*use_rec))
13175 /* Return DISTANCE if OP0 is used in memory
13176 address in NEXT. */
13180 for (def_rec = DF_INSN_DEFS (next); *def_rec; def_rec++)
13181 if (DF_REF_TYPE (*def_rec) == DF_REF_REG_DEF
13182 && !DF_REF_IS_ARTIFICIAL (*def_rec)
13183 && regno0 == DF_REF_REGNO (*def_rec))
13185 /* Return -1 if OP0 is set in NEXT. */
13189 if (next == BB_END (bb))
13191 next = NEXT_INSN (next);
13195 if (distance < LEA_SEARCH_THRESHOLD)
13199 bool simple_loop = false;
13201 FOR_EACH_EDGE (e, ei, bb->succs)
13204 simple_loop = true;
13210 rtx next = BB_HEAD (bb);
13213 && distance < LEA_SEARCH_THRESHOLD)
13219 for (use_rec = DF_INSN_USES (next); *use_rec; use_rec++)
13220 if ((DF_REF_TYPE (*use_rec) == DF_REF_REG_MEM_LOAD
13221 || DF_REF_TYPE (*use_rec) == DF_REF_REG_MEM_STORE)
13222 && regno0 == DF_REF_REGNO (*use_rec))
13224 /* Return DISTANCE if OP0 is used in memory
13225 address in NEXT. */
13229 for (def_rec = DF_INSN_DEFS (next); *def_rec; def_rec++)
13230 if (DF_REF_TYPE (*def_rec) == DF_REF_REG_DEF
13231 && !DF_REF_IS_ARTIFICIAL (*def_rec)
13232 && regno0 == DF_REF_REGNO (*def_rec))
13234 /* Return -1 if OP0 is set in NEXT. */
13239 next = NEXT_INSN (next);
13247 /* Define this macro to tune LEA priority vs ADD, it take effect when
13248 there is a dilemma of choicing LEA or ADD
13249 Negative value: ADD is more preferred than LEA
13251 Positive value: LEA is more preferred than ADD*/
13252 #define IX86_LEA_PRIORITY 2
13254 /* Return true if it is ok to optimize an ADD operation to LEA
13255 operation to avoid flag register consumation. For the processors
13256 like ATOM, if the destination register of LEA holds an actual
13257 address which will be used soon, LEA is better and otherwise ADD
13261 ix86_lea_for_add_ok (enum rtx_code code ATTRIBUTE_UNUSED,
13262 rtx insn, rtx operands[])
13264 unsigned int regno0 = true_regnum (operands[0]);
13265 unsigned int regno1 = true_regnum (operands[1]);
13266 unsigned int regno2;
13268 if (!TARGET_OPT_AGU || optimize_function_for_size_p (cfun))
13269 return regno0 != regno1;
13271 regno2 = true_regnum (operands[2]);
13273 /* If a = b + c, (a!=b && a!=c), must use lea form. */
13274 if (regno0 != regno1 && regno0 != regno2)
13278 int dist_define, dist_use;
13279 dist_define = distance_non_agu_define (regno1, regno2, insn);
13280 if (dist_define <= 0)
13283 /* If this insn has both backward non-agu dependence and forward
13284 agu dependence, the one with short distance take effect. */
13285 dist_use = distance_agu_use (regno0, insn);
13287 || (dist_define + IX86_LEA_PRIORITY) < dist_use)
13294 /* Return true if destination reg of SET_BODY is shift count of
13298 ix86_dep_by_shift_count_body (const_rtx set_body, const_rtx use_body)
13304 /* Retrieve destination of SET_BODY. */
13305 switch (GET_CODE (set_body))
13308 set_dest = SET_DEST (set_body);
13309 if (!set_dest || !REG_P (set_dest))
13313 for (i = XVECLEN (set_body, 0) - 1; i >= 0; i--)
13314 if (ix86_dep_by_shift_count_body (XVECEXP (set_body, 0, i),
13322 /* Retrieve shift count of USE_BODY. */
13323 switch (GET_CODE (use_body))
13326 shift_rtx = XEXP (use_body, 1);
13329 for (i = XVECLEN (use_body, 0) - 1; i >= 0; i--)
13330 if (ix86_dep_by_shift_count_body (set_body,
13331 XVECEXP (use_body, 0, i)))
13339 && (GET_CODE (shift_rtx) == ASHIFT
13340 || GET_CODE (shift_rtx) == LSHIFTRT
13341 || GET_CODE (shift_rtx) == ASHIFTRT
13342 || GET_CODE (shift_rtx) == ROTATE
13343 || GET_CODE (shift_rtx) == ROTATERT))
13345 rtx shift_count = XEXP (shift_rtx, 1);
13347 /* Return true if shift count is dest of SET_BODY. */
13348 if (REG_P (shift_count)
13349 && true_regnum (set_dest) == true_regnum (shift_count))
13356 /* Return true if destination reg of SET_INSN is shift count of
13360 ix86_dep_by_shift_count (const_rtx set_insn, const_rtx use_insn)
13362 return ix86_dep_by_shift_count_body (PATTERN (set_insn),
13363 PATTERN (use_insn));
13366 /* Return TRUE or FALSE depending on whether the unary operator meets the
13367 appropriate constraints. */
13370 ix86_unary_operator_ok (enum rtx_code code ATTRIBUTE_UNUSED,
13371 enum machine_mode mode ATTRIBUTE_UNUSED,
13372 rtx operands[2] ATTRIBUTE_UNUSED)
13374 /* If one of operands is memory, source and destination must match. */
13375 if ((MEM_P (operands[0])
13376 || MEM_P (operands[1]))
13377 && ! rtx_equal_p (operands[0], operands[1]))
13382 /* Post-reload splitter for converting an SF or DFmode value in an
13383 SSE register into an unsigned SImode. */
13386 ix86_split_convert_uns_si_sse (rtx operands[])
13388 enum machine_mode vecmode;
13389 rtx value, large, zero_or_two31, input, two31, x;
13391 large = operands[1];
13392 zero_or_two31 = operands[2];
13393 input = operands[3];
13394 two31 = operands[4];
13395 vecmode = GET_MODE (large);
13396 value = gen_rtx_REG (vecmode, REGNO (operands[0]));
13398 /* Load up the value into the low element. We must ensure that the other
13399 elements are valid floats -- zero is the easiest such value. */
13402 if (vecmode == V4SFmode)
13403 emit_insn (gen_vec_setv4sf_0 (value, CONST0_RTX (V4SFmode), input));
13405 emit_insn (gen_sse2_loadlpd (value, CONST0_RTX (V2DFmode), input));
13409 input = gen_rtx_REG (vecmode, REGNO (input));
13410 emit_move_insn (value, CONST0_RTX (vecmode));
13411 if (vecmode == V4SFmode)
13412 emit_insn (gen_sse_movss (value, value, input));
13414 emit_insn (gen_sse2_movsd (value, value, input));
13417 emit_move_insn (large, two31);
13418 emit_move_insn (zero_or_two31, MEM_P (two31) ? large : two31);
13420 x = gen_rtx_fmt_ee (LE, vecmode, large, value);
13421 emit_insn (gen_rtx_SET (VOIDmode, large, x));
13423 x = gen_rtx_AND (vecmode, zero_or_two31, large);
13424 emit_insn (gen_rtx_SET (VOIDmode, zero_or_two31, x));
13426 x = gen_rtx_MINUS (vecmode, value, zero_or_two31);
13427 emit_insn (gen_rtx_SET (VOIDmode, value, x));
13429 large = gen_rtx_REG (V4SImode, REGNO (large));
13430 emit_insn (gen_ashlv4si3 (large, large, GEN_INT (31)));
13432 x = gen_rtx_REG (V4SImode, REGNO (value));
13433 if (vecmode == V4SFmode)
13434 emit_insn (gen_sse2_cvttps2dq (x, value));
13436 emit_insn (gen_sse2_cvttpd2dq (x, value));
13439 emit_insn (gen_xorv4si3 (value, value, large));
13442 /* Convert an unsigned DImode value into a DFmode, using only SSE.
13443 Expects the 64-bit DImode to be supplied in a pair of integral
13444 registers. Requires SSE2; will use SSE3 if available. For x86_32,
13445 -mfpmath=sse, !optimize_size only. */
13448 ix86_expand_convert_uns_didf_sse (rtx target, rtx input)
13450 REAL_VALUE_TYPE bias_lo_rvt, bias_hi_rvt;
13451 rtx int_xmm, fp_xmm;
13452 rtx biases, exponents;
13455 int_xmm = gen_reg_rtx (V4SImode);
13456 if (TARGET_INTER_UNIT_MOVES)
13457 emit_insn (gen_movdi_to_sse (int_xmm, input));
13458 else if (TARGET_SSE_SPLIT_REGS)
13460 emit_clobber (int_xmm);
13461 emit_move_insn (gen_lowpart (DImode, int_xmm), input);
13465 x = gen_reg_rtx (V2DImode);
13466 ix86_expand_vector_init_one_nonzero (false, V2DImode, x, input, 0);
13467 emit_move_insn (int_xmm, gen_lowpart (V4SImode, x));
13470 x = gen_rtx_CONST_VECTOR (V4SImode,
13471 gen_rtvec (4, GEN_INT (0x43300000UL),
13472 GEN_INT (0x45300000UL),
13473 const0_rtx, const0_rtx));
13474 exponents = validize_mem (force_const_mem (V4SImode, x));
13476 /* int_xmm = {0x45300000UL, fp_xmm/hi, 0x43300000, fp_xmm/lo } */
13477 emit_insn (gen_sse2_punpckldq (int_xmm, int_xmm, exponents));
13479 /* Concatenating (juxtaposing) (0x43300000UL ## fp_value_low_xmm)
13480 yields a valid DF value equal to (0x1.0p52 + double(fp_value_lo_xmm)).
13481 Similarly (0x45300000UL ## fp_value_hi_xmm) yields
13482 (0x1.0p84 + double(fp_value_hi_xmm)).
13483 Note these exponents differ by 32. */
13485 fp_xmm = copy_to_mode_reg (V2DFmode, gen_lowpart (V2DFmode, int_xmm));
13487 /* Subtract off those 0x1.0p52 and 0x1.0p84 biases, to produce values
13488 in [0,2**32-1] and [0]+[2**32,2**64-1] respectively. */
13489 real_ldexp (&bias_lo_rvt, &dconst1, 52);
13490 real_ldexp (&bias_hi_rvt, &dconst1, 84);
13491 biases = const_double_from_real_value (bias_lo_rvt, DFmode);
13492 x = const_double_from_real_value (bias_hi_rvt, DFmode);
13493 biases = gen_rtx_CONST_VECTOR (V2DFmode, gen_rtvec (2, biases, x));
13494 biases = validize_mem (force_const_mem (V2DFmode, biases));
13495 emit_insn (gen_subv2df3 (fp_xmm, fp_xmm, biases));
13497 /* Add the upper and lower DFmode values together. */
13499 emit_insn (gen_sse3_haddv2df3 (fp_xmm, fp_xmm, fp_xmm));
13502 x = copy_to_mode_reg (V2DFmode, fp_xmm);
13503 emit_insn (gen_sse2_unpckhpd (fp_xmm, fp_xmm, fp_xmm));
13504 emit_insn (gen_addv2df3 (fp_xmm, fp_xmm, x));
13507 ix86_expand_vector_extract (false, target, fp_xmm, 0);
13510 /* Not used, but eases macroization of patterns. */
13512 ix86_expand_convert_uns_sixf_sse (rtx target ATTRIBUTE_UNUSED,
13513 rtx input ATTRIBUTE_UNUSED)
13515 gcc_unreachable ();
13518 /* Convert an unsigned SImode value into a DFmode. Only currently used
13519 for SSE, but applicable anywhere. */
13522 ix86_expand_convert_uns_sidf_sse (rtx target, rtx input)
13524 REAL_VALUE_TYPE TWO31r;
13527 x = expand_simple_binop (SImode, PLUS, input, GEN_INT (-2147483647 - 1),
13528 NULL, 1, OPTAB_DIRECT);
13530 fp = gen_reg_rtx (DFmode);
13531 emit_insn (gen_floatsidf2 (fp, x));
13533 real_ldexp (&TWO31r, &dconst1, 31);
13534 x = const_double_from_real_value (TWO31r, DFmode);
13536 x = expand_simple_binop (DFmode, PLUS, fp, x, target, 0, OPTAB_DIRECT);
13538 emit_move_insn (target, x);
13541 /* Convert a signed DImode value into a DFmode. Only used for SSE in
13542 32-bit mode; otherwise we have a direct convert instruction. */
13545 ix86_expand_convert_sign_didf_sse (rtx target, rtx input)
13547 REAL_VALUE_TYPE TWO32r;
13548 rtx fp_lo, fp_hi, x;
13550 fp_lo = gen_reg_rtx (DFmode);
13551 fp_hi = gen_reg_rtx (DFmode);
13553 emit_insn (gen_floatsidf2 (fp_hi, gen_highpart (SImode, input)));
13555 real_ldexp (&TWO32r, &dconst1, 32);
13556 x = const_double_from_real_value (TWO32r, DFmode);
13557 fp_hi = expand_simple_binop (DFmode, MULT, fp_hi, x, fp_hi, 0, OPTAB_DIRECT);
13559 ix86_expand_convert_uns_sidf_sse (fp_lo, gen_lowpart (SImode, input));
13561 x = expand_simple_binop (DFmode, PLUS, fp_hi, fp_lo, target,
13564 emit_move_insn (target, x);
13567 /* Convert an unsigned SImode value into a SFmode, using only SSE.
13568 For x86_32, -mfpmath=sse, !optimize_size only. */
13570 ix86_expand_convert_uns_sisf_sse (rtx target, rtx input)
13572 REAL_VALUE_TYPE ONE16r;
13573 rtx fp_hi, fp_lo, int_hi, int_lo, x;
13575 real_ldexp (&ONE16r, &dconst1, 16);
13576 x = const_double_from_real_value (ONE16r, SFmode);
13577 int_lo = expand_simple_binop (SImode, AND, input, GEN_INT(0xffff),
13578 NULL, 0, OPTAB_DIRECT);
13579 int_hi = expand_simple_binop (SImode, LSHIFTRT, input, GEN_INT(16),
13580 NULL, 0, OPTAB_DIRECT);
13581 fp_hi = gen_reg_rtx (SFmode);
13582 fp_lo = gen_reg_rtx (SFmode);
13583 emit_insn (gen_floatsisf2 (fp_hi, int_hi));
13584 emit_insn (gen_floatsisf2 (fp_lo, int_lo));
13585 fp_hi = expand_simple_binop (SFmode, MULT, fp_hi, x, fp_hi,
13587 fp_hi = expand_simple_binop (SFmode, PLUS, fp_hi, fp_lo, target,
13589 if (!rtx_equal_p (target, fp_hi))
13590 emit_move_insn (target, fp_hi);
13593 /* A subroutine of ix86_build_signbit_mask. If VECT is true,
13594 then replicate the value for all elements of the vector
13598 ix86_build_const_vector (enum machine_mode mode, bool vect, rtx value)
13605 v = gen_rtvec (4, value, value, value, value);
13606 return gen_rtx_CONST_VECTOR (V4SImode, v);
13610 v = gen_rtvec (2, value, value);
13611 return gen_rtx_CONST_VECTOR (V2DImode, v);
13615 v = gen_rtvec (4, value, value, value, value);
13617 v = gen_rtvec (4, value, CONST0_RTX (SFmode),
13618 CONST0_RTX (SFmode), CONST0_RTX (SFmode));
13619 return gen_rtx_CONST_VECTOR (V4SFmode, v);
13623 v = gen_rtvec (2, value, value);
13625 v = gen_rtvec (2, value, CONST0_RTX (DFmode));
13626 return gen_rtx_CONST_VECTOR (V2DFmode, v);
13629 gcc_unreachable ();
13633 /* A subroutine of ix86_expand_fp_absneg_operator, copysign expanders
13634 and ix86_expand_int_vcond. Create a mask for the sign bit in MODE
13635 for an SSE register. If VECT is true, then replicate the mask for
13636 all elements of the vector register. If INVERT is true, then create
13637 a mask excluding the sign bit. */
13640 ix86_build_signbit_mask (enum machine_mode mode, bool vect, bool invert)
13642 enum machine_mode vec_mode, imode;
13643 HOST_WIDE_INT hi, lo;
13648 /* Find the sign bit, sign extended to 2*HWI. */
13654 vec_mode = (mode == SImode) ? V4SImode : V4SFmode;
13655 lo = 0x80000000, hi = lo < 0;
13661 vec_mode = (mode == DImode) ? V2DImode : V2DFmode;
13662 if (HOST_BITS_PER_WIDE_INT >= 64)
13663 lo = (HOST_WIDE_INT)1 << shift, hi = -1;
13665 lo = 0, hi = (HOST_WIDE_INT)1 << (shift - HOST_BITS_PER_WIDE_INT);
13670 vec_mode = VOIDmode;
13671 if (HOST_BITS_PER_WIDE_INT >= 64)
13674 lo = 0, hi = (HOST_WIDE_INT)1 << shift;
13681 lo = 0, hi = (HOST_WIDE_INT)1 << (shift - HOST_BITS_PER_WIDE_INT);
13685 lo = ~lo, hi = ~hi;
13691 mask = immed_double_const (lo, hi, imode);
13693 vec = gen_rtvec (2, v, mask);
13694 v = gen_rtx_CONST_VECTOR (V2DImode, vec);
13695 v = copy_to_mode_reg (mode, gen_lowpart (mode, v));
13702 gcc_unreachable ();
13706 lo = ~lo, hi = ~hi;
13708 /* Force this value into the low part of a fp vector constant. */
13709 mask = immed_double_const (lo, hi, imode);
13710 mask = gen_lowpart (mode, mask);
13712 if (vec_mode == VOIDmode)
13713 return force_reg (mode, mask);
13715 v = ix86_build_const_vector (mode, vect, mask);
13716 return force_reg (vec_mode, v);
13719 /* Generate code for floating point ABS or NEG. */
13722 ix86_expand_fp_absneg_operator (enum rtx_code code, enum machine_mode mode,
13725 rtx mask, set, use, clob, dst, src;
13726 bool use_sse = false;
13727 bool vector_mode = VECTOR_MODE_P (mode);
13728 enum machine_mode elt_mode = mode;
13732 elt_mode = GET_MODE_INNER (mode);
13735 else if (mode == TFmode)
13737 else if (TARGET_SSE_MATH)
13738 use_sse = SSE_FLOAT_MODE_P (mode);
13740 /* NEG and ABS performed with SSE use bitwise mask operations.
13741 Create the appropriate mask now. */
13743 mask = ix86_build_signbit_mask (elt_mode, vector_mode, code == ABS);
13752 set = gen_rtx_fmt_ee (code == NEG ? XOR : AND, mode, src, mask);
13753 set = gen_rtx_SET (VOIDmode, dst, set);
13758 set = gen_rtx_fmt_e (code, mode, src);
13759 set = gen_rtx_SET (VOIDmode, dst, set);
13762 use = gen_rtx_USE (VOIDmode, mask);
13763 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
13764 emit_insn (gen_rtx_PARALLEL (VOIDmode,
13765 gen_rtvec (3, set, use, clob)));
13772 /* Expand a copysign operation. Special case operand 0 being a constant. */
13775 ix86_expand_copysign (rtx operands[])
13777 enum machine_mode mode;
13778 rtx dest, op0, op1, mask, nmask;
13780 dest = operands[0];
13784 mode = GET_MODE (dest);
13786 if (GET_CODE (op0) == CONST_DOUBLE)
13788 rtx (*copysign_insn)(rtx, rtx, rtx, rtx);
13790 if (real_isneg (CONST_DOUBLE_REAL_VALUE (op0)))
13791 op0 = simplify_unary_operation (ABS, mode, op0, mode);
13793 if (mode == SFmode || mode == DFmode)
13795 enum machine_mode vmode;
13797 vmode = mode == SFmode ? V4SFmode : V2DFmode;
13799 if (op0 == CONST0_RTX (mode))
13800 op0 = CONST0_RTX (vmode);
13805 if (mode == SFmode)
13806 v = gen_rtvec (4, op0, CONST0_RTX (SFmode),
13807 CONST0_RTX (SFmode), CONST0_RTX (SFmode));
13809 v = gen_rtvec (2, op0, CONST0_RTX (DFmode));
13811 op0 = force_reg (vmode, gen_rtx_CONST_VECTOR (vmode, v));
13814 else if (op0 != CONST0_RTX (mode))
13815 op0 = force_reg (mode, op0);
13817 mask = ix86_build_signbit_mask (mode, 0, 0);
13819 if (mode == SFmode)
13820 copysign_insn = gen_copysignsf3_const;
13821 else if (mode == DFmode)
13822 copysign_insn = gen_copysigndf3_const;
13824 copysign_insn = gen_copysigntf3_const;
13826 emit_insn (copysign_insn (dest, op0, op1, mask));
13830 rtx (*copysign_insn)(rtx, rtx, rtx, rtx, rtx, rtx);
13832 nmask = ix86_build_signbit_mask (mode, 0, 1);
13833 mask = ix86_build_signbit_mask (mode, 0, 0);
13835 if (mode == SFmode)
13836 copysign_insn = gen_copysignsf3_var;
13837 else if (mode == DFmode)
13838 copysign_insn = gen_copysigndf3_var;
13840 copysign_insn = gen_copysigntf3_var;
13842 emit_insn (copysign_insn (dest, NULL_RTX, op0, op1, nmask, mask));
13846 /* Deconstruct a copysign operation into bit masks. Operand 0 is known to
13847 be a constant, and so has already been expanded into a vector constant. */
13850 ix86_split_copysign_const (rtx operands[])
13852 enum machine_mode mode, vmode;
13853 rtx dest, op0, op1, mask, x;
13855 dest = operands[0];
13858 mask = operands[3];
13860 mode = GET_MODE (dest);
13861 vmode = GET_MODE (mask);
13863 dest = simplify_gen_subreg (vmode, dest, mode, 0);
13864 x = gen_rtx_AND (vmode, dest, mask);
13865 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
13867 if (op0 != CONST0_RTX (vmode))
13869 x = gen_rtx_IOR (vmode, dest, op0);
13870 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
13874 /* Deconstruct a copysign operation into bit masks. Operand 0 is variable,
13875 so we have to do two masks. */
13878 ix86_split_copysign_var (rtx operands[])
13880 enum machine_mode mode, vmode;
13881 rtx dest, scratch, op0, op1, mask, nmask, x;
13883 dest = operands[0];
13884 scratch = operands[1];
13887 nmask = operands[4];
13888 mask = operands[5];
13890 mode = GET_MODE (dest);
13891 vmode = GET_MODE (mask);
13893 if (rtx_equal_p (op0, op1))
13895 /* Shouldn't happen often (it's useless, obviously), but when it does
13896 we'd generate incorrect code if we continue below. */
13897 emit_move_insn (dest, op0);
13901 if (REG_P (mask) && REGNO (dest) == REGNO (mask)) /* alternative 0 */
13903 gcc_assert (REGNO (op1) == REGNO (scratch));
13905 x = gen_rtx_AND (vmode, scratch, mask);
13906 emit_insn (gen_rtx_SET (VOIDmode, scratch, x));
13909 op0 = simplify_gen_subreg (vmode, op0, mode, 0);
13910 x = gen_rtx_NOT (vmode, dest);
13911 x = gen_rtx_AND (vmode, x, op0);
13912 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
13916 if (REGNO (op1) == REGNO (scratch)) /* alternative 1,3 */
13918 x = gen_rtx_AND (vmode, scratch, mask);
13920 else /* alternative 2,4 */
13922 gcc_assert (REGNO (mask) == REGNO (scratch));
13923 op1 = simplify_gen_subreg (vmode, op1, mode, 0);
13924 x = gen_rtx_AND (vmode, scratch, op1);
13926 emit_insn (gen_rtx_SET (VOIDmode, scratch, x));
13928 if (REGNO (op0) == REGNO (dest)) /* alternative 1,2 */
13930 dest = simplify_gen_subreg (vmode, op0, mode, 0);
13931 x = gen_rtx_AND (vmode, dest, nmask);
13933 else /* alternative 3,4 */
13935 gcc_assert (REGNO (nmask) == REGNO (dest));
13937 op0 = simplify_gen_subreg (vmode, op0, mode, 0);
13938 x = gen_rtx_AND (vmode, dest, op0);
13940 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
13943 x = gen_rtx_IOR (vmode, dest, scratch);
13944 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
13947 /* Return TRUE or FALSE depending on whether the first SET in INSN
13948 has source and destination with matching CC modes, and that the
13949 CC mode is at least as constrained as REQ_MODE. */
13952 ix86_match_ccmode (rtx insn, enum machine_mode req_mode)
13955 enum machine_mode set_mode;
13957 set = PATTERN (insn);
13958 if (GET_CODE (set) == PARALLEL)
13959 set = XVECEXP (set, 0, 0);
13960 gcc_assert (GET_CODE (set) == SET);
13961 gcc_assert (GET_CODE (SET_SRC (set)) == COMPARE);
13963 set_mode = GET_MODE (SET_DEST (set));
13967 if (req_mode != CCNOmode
13968 && (req_mode != CCmode
13969 || XEXP (SET_SRC (set), 1) != const0_rtx))
13973 if (req_mode == CCGCmode)
13977 if (req_mode == CCGOCmode || req_mode == CCNOmode)
13981 if (req_mode == CCZmode)
13992 gcc_unreachable ();
13995 return (GET_MODE (SET_SRC (set)) == set_mode);
13998 /* Generate insn patterns to do an integer compare of OPERANDS. */
14001 ix86_expand_int_compare (enum rtx_code code, rtx op0, rtx op1)
14003 enum machine_mode cmpmode;
14006 cmpmode = SELECT_CC_MODE (code, op0, op1);
14007 flags = gen_rtx_REG (cmpmode, FLAGS_REG);
14009 /* This is very simple, but making the interface the same as in the
14010 FP case makes the rest of the code easier. */
14011 tmp = gen_rtx_COMPARE (cmpmode, op0, op1);
14012 emit_insn (gen_rtx_SET (VOIDmode, flags, tmp));
14014 /* Return the test that should be put into the flags user, i.e.
14015 the bcc, scc, or cmov instruction. */
14016 return gen_rtx_fmt_ee (code, VOIDmode, flags, const0_rtx);
14019 /* Figure out whether to use ordered or unordered fp comparisons.
14020 Return the appropriate mode to use. */
14023 ix86_fp_compare_mode (enum rtx_code code ATTRIBUTE_UNUSED)
14025 /* ??? In order to make all comparisons reversible, we do all comparisons
14026 non-trapping when compiling for IEEE. Once gcc is able to distinguish
14027 all forms trapping and nontrapping comparisons, we can make inequality
14028 comparisons trapping again, since it results in better code when using
14029 FCOM based compares. */
14030 return TARGET_IEEE_FP ? CCFPUmode : CCFPmode;
14034 ix86_cc_mode (enum rtx_code code, rtx op0, rtx op1)
14036 enum machine_mode mode = GET_MODE (op0);
14038 if (SCALAR_FLOAT_MODE_P (mode))
14040 gcc_assert (!DECIMAL_FLOAT_MODE_P (mode));
14041 return ix86_fp_compare_mode (code);
14046 /* Only zero flag is needed. */
14047 case EQ: /* ZF=0 */
14048 case NE: /* ZF!=0 */
14050 /* Codes needing carry flag. */
14051 case GEU: /* CF=0 */
14052 case LTU: /* CF=1 */
14053 /* Detect overflow checks. They need just the carry flag. */
14054 if (GET_CODE (op0) == PLUS
14055 && rtx_equal_p (op1, XEXP (op0, 0)))
14059 case GTU: /* CF=0 & ZF=0 */
14060 case LEU: /* CF=1 | ZF=1 */
14061 /* Detect overflow checks. They need just the carry flag. */
14062 if (GET_CODE (op0) == MINUS
14063 && rtx_equal_p (op1, XEXP (op0, 0)))
14067 /* Codes possibly doable only with sign flag when
14068 comparing against zero. */
14069 case GE: /* SF=OF or SF=0 */
14070 case LT: /* SF<>OF or SF=1 */
14071 if (op1 == const0_rtx)
14074 /* For other cases Carry flag is not required. */
14076 /* Codes doable only with sign flag when comparing
14077 against zero, but we miss jump instruction for it
14078 so we need to use relational tests against overflow
14079 that thus needs to be zero. */
14080 case GT: /* ZF=0 & SF=OF */
14081 case LE: /* ZF=1 | SF<>OF */
14082 if (op1 == const0_rtx)
14086 /* strcmp pattern do (use flags) and combine may ask us for proper
14091 gcc_unreachable ();
14095 /* Return the fixed registers used for condition codes. */
14098 ix86_fixed_condition_code_regs (unsigned int *p1, unsigned int *p2)
14105 /* If two condition code modes are compatible, return a condition code
14106 mode which is compatible with both. Otherwise, return
14109 static enum machine_mode
14110 ix86_cc_modes_compatible (enum machine_mode m1, enum machine_mode m2)
14115 if (GET_MODE_CLASS (m1) != MODE_CC || GET_MODE_CLASS (m2) != MODE_CC)
14118 if ((m1 == CCGCmode && m2 == CCGOCmode)
14119 || (m1 == CCGOCmode && m2 == CCGCmode))
14125 gcc_unreachable ();
14155 /* These are only compatible with themselves, which we already
14161 /* Split comparison code CODE into comparisons we can do using branch
14162 instructions. BYPASS_CODE is comparison code for branch that will
14163 branch around FIRST_CODE and SECOND_CODE. If some of branches
14164 is not required, set value to UNKNOWN.
14165 We never require more than two branches. */
14168 ix86_fp_comparison_codes (enum rtx_code code, enum rtx_code *bypass_code,
14169 enum rtx_code *first_code,
14170 enum rtx_code *second_code)
14172 *first_code = code;
14173 *bypass_code = UNKNOWN;
14174 *second_code = UNKNOWN;
14176 /* The fcomi comparison sets flags as follows:
14186 case GT: /* GTU - CF=0 & ZF=0 */
14187 case GE: /* GEU - CF=0 */
14188 case ORDERED: /* PF=0 */
14189 case UNORDERED: /* PF=1 */
14190 case UNEQ: /* EQ - ZF=1 */
14191 case UNLT: /* LTU - CF=1 */
14192 case UNLE: /* LEU - CF=1 | ZF=1 */
14193 case LTGT: /* EQ - ZF=0 */
14195 case LT: /* LTU - CF=1 - fails on unordered */
14196 *first_code = UNLT;
14197 *bypass_code = UNORDERED;
14199 case LE: /* LEU - CF=1 | ZF=1 - fails on unordered */
14200 *first_code = UNLE;
14201 *bypass_code = UNORDERED;
14203 case EQ: /* EQ - ZF=1 - fails on unordered */
14204 *first_code = UNEQ;
14205 *bypass_code = UNORDERED;
14207 case NE: /* NE - ZF=0 - fails on unordered */
14208 *first_code = LTGT;
14209 *second_code = UNORDERED;
14211 case UNGE: /* GEU - CF=0 - fails on unordered */
14213 *second_code = UNORDERED;
14215 case UNGT: /* GTU - CF=0 & ZF=0 - fails on unordered */
14217 *second_code = UNORDERED;
14220 gcc_unreachable ();
14222 if (!TARGET_IEEE_FP)
14224 *second_code = UNKNOWN;
14225 *bypass_code = UNKNOWN;
14229 /* Return cost of comparison done fcom + arithmetics operations on AX.
14230 All following functions do use number of instructions as a cost metrics.
14231 In future this should be tweaked to compute bytes for optimize_size and
14232 take into account performance of various instructions on various CPUs. */
14234 ix86_fp_comparison_arithmetics_cost (enum rtx_code code)
14236 if (!TARGET_IEEE_FP)
14238 /* The cost of code output by ix86_expand_fp_compare. */
14262 gcc_unreachable ();
14266 /* Return cost of comparison done using fcomi operation.
14267 See ix86_fp_comparison_arithmetics_cost for the metrics. */
14269 ix86_fp_comparison_fcomi_cost (enum rtx_code code)
14271 enum rtx_code bypass_code, first_code, second_code;
14272 /* Return arbitrarily high cost when instruction is not supported - this
14273 prevents gcc from using it. */
14276 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
14277 return (bypass_code != UNKNOWN || second_code != UNKNOWN) + 2;
14280 /* Return cost of comparison done using sahf operation.
14281 See ix86_fp_comparison_arithmetics_cost for the metrics. */
14283 ix86_fp_comparison_sahf_cost (enum rtx_code code)
14285 enum rtx_code bypass_code, first_code, second_code;
14286 /* Return arbitrarily high cost when instruction is not preferred - this
14287 avoids gcc from using it. */
14288 if (!(TARGET_SAHF && (TARGET_USE_SAHF || optimize_insn_for_size_p ())))
14290 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
14291 return (bypass_code != UNKNOWN || second_code != UNKNOWN) + 3;
14294 /* Compute cost of the comparison done using any method.
14295 See ix86_fp_comparison_arithmetics_cost for the metrics. */
14297 ix86_fp_comparison_cost (enum rtx_code code)
14299 int fcomi_cost, sahf_cost, arithmetics_cost = 1024;
14302 fcomi_cost = ix86_fp_comparison_fcomi_cost (code);
14303 sahf_cost = ix86_fp_comparison_sahf_cost (code);
14305 min = arithmetics_cost = ix86_fp_comparison_arithmetics_cost (code);
14306 if (min > sahf_cost)
14308 if (min > fcomi_cost)
14313 /* Return true if we should use an FCOMI instruction for this
14317 ix86_use_fcomi_compare (enum rtx_code code ATTRIBUTE_UNUSED)
14319 enum rtx_code swapped_code = swap_condition (code);
14321 return ((ix86_fp_comparison_cost (code)
14322 == ix86_fp_comparison_fcomi_cost (code))
14323 || (ix86_fp_comparison_cost (swapped_code)
14324 == ix86_fp_comparison_fcomi_cost (swapped_code)));
14327 /* Swap, force into registers, or otherwise massage the two operands
14328 to a fp comparison. The operands are updated in place; the new
14329 comparison code is returned. */
14331 static enum rtx_code
14332 ix86_prepare_fp_compare_args (enum rtx_code code, rtx *pop0, rtx *pop1)
14334 enum machine_mode fpcmp_mode = ix86_fp_compare_mode (code);
14335 rtx op0 = *pop0, op1 = *pop1;
14336 enum machine_mode op_mode = GET_MODE (op0);
14337 int is_sse = TARGET_SSE_MATH && SSE_FLOAT_MODE_P (op_mode);
14339 /* All of the unordered compare instructions only work on registers.
14340 The same is true of the fcomi compare instructions. The XFmode
14341 compare instructions require registers except when comparing
14342 against zero or when converting operand 1 from fixed point to
14346 && (fpcmp_mode == CCFPUmode
14347 || (op_mode == XFmode
14348 && ! (standard_80387_constant_p (op0) == 1
14349 || standard_80387_constant_p (op1) == 1)
14350 && GET_CODE (op1) != FLOAT)
14351 || ix86_use_fcomi_compare (code)))
14353 op0 = force_reg (op_mode, op0);
14354 op1 = force_reg (op_mode, op1);
14358 /* %%% We only allow op1 in memory; op0 must be st(0). So swap
14359 things around if they appear profitable, otherwise force op0
14360 into a register. */
14362 if (standard_80387_constant_p (op0) == 0
14364 && ! (standard_80387_constant_p (op1) == 0
14368 tmp = op0, op0 = op1, op1 = tmp;
14369 code = swap_condition (code);
14373 op0 = force_reg (op_mode, op0);
14375 if (CONSTANT_P (op1))
14377 int tmp = standard_80387_constant_p (op1);
14379 op1 = validize_mem (force_const_mem (op_mode, op1));
14383 op1 = force_reg (op_mode, op1);
14386 op1 = force_reg (op_mode, op1);
14390 /* Try to rearrange the comparison to make it cheaper. */
14391 if (ix86_fp_comparison_cost (code)
14392 > ix86_fp_comparison_cost (swap_condition (code))
14393 && (REG_P (op1) || can_create_pseudo_p ()))
14396 tmp = op0, op0 = op1, op1 = tmp;
14397 code = swap_condition (code);
14399 op0 = force_reg (op_mode, op0);
14407 /* Convert comparison codes we use to represent FP comparison to integer
14408 code that will result in proper branch. Return UNKNOWN if no such code
14412 ix86_fp_compare_code_to_integer (enum rtx_code code)
14441 /* Generate insn patterns to do a floating point compare of OPERANDS. */
14444 ix86_expand_fp_compare (enum rtx_code code, rtx op0, rtx op1, rtx scratch,
14445 rtx *second_test, rtx *bypass_test)
14447 enum machine_mode fpcmp_mode, intcmp_mode;
14449 int cost = ix86_fp_comparison_cost (code);
14450 enum rtx_code bypass_code, first_code, second_code;
14452 fpcmp_mode = ix86_fp_compare_mode (code);
14453 code = ix86_prepare_fp_compare_args (code, &op0, &op1);
14456 *second_test = NULL_RTX;
14458 *bypass_test = NULL_RTX;
14460 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
14462 /* Do fcomi/sahf based test when profitable. */
14463 if (ix86_fp_comparison_arithmetics_cost (code) > cost
14464 && (bypass_code == UNKNOWN || bypass_test)
14465 && (second_code == UNKNOWN || second_test))
14467 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
14468 tmp = gen_rtx_SET (VOIDmode, gen_rtx_REG (fpcmp_mode, FLAGS_REG),
14474 gcc_assert (TARGET_SAHF);
14477 scratch = gen_reg_rtx (HImode);
14478 tmp2 = gen_rtx_CLOBBER (VOIDmode, scratch);
14480 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, tmp, tmp2)));
14483 /* The FP codes work out to act like unsigned. */
14484 intcmp_mode = fpcmp_mode;
14486 if (bypass_code != UNKNOWN)
14487 *bypass_test = gen_rtx_fmt_ee (bypass_code, VOIDmode,
14488 gen_rtx_REG (intcmp_mode, FLAGS_REG),
14490 if (second_code != UNKNOWN)
14491 *second_test = gen_rtx_fmt_ee (second_code, VOIDmode,
14492 gen_rtx_REG (intcmp_mode, FLAGS_REG),
14497 /* Sadness wrt reg-stack pops killing fpsr -- gotta get fnstsw first. */
14498 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
14499 tmp2 = gen_rtx_UNSPEC (HImode, gen_rtvec (1, tmp), UNSPEC_FNSTSW);
14501 scratch = gen_reg_rtx (HImode);
14502 emit_insn (gen_rtx_SET (VOIDmode, scratch, tmp2));
14504 /* In the unordered case, we have to check C2 for NaN's, which
14505 doesn't happen to work out to anything nice combination-wise.
14506 So do some bit twiddling on the value we've got in AH to come
14507 up with an appropriate set of condition codes. */
14509 intcmp_mode = CCNOmode;
14514 if (code == GT || !TARGET_IEEE_FP)
14516 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x45)));
14521 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
14522 emit_insn (gen_addqi_ext_1 (scratch, scratch, constm1_rtx));
14523 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x44)));
14524 intcmp_mode = CCmode;
14530 if (code == LT && TARGET_IEEE_FP)
14532 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
14533 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x01)));
14534 intcmp_mode = CCmode;
14539 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x01)));
14545 if (code == GE || !TARGET_IEEE_FP)
14547 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x05)));
14552 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
14553 emit_insn (gen_xorqi_cc_ext_1 (scratch, scratch,
14560 if (code == LE && TARGET_IEEE_FP)
14562 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
14563 emit_insn (gen_addqi_ext_1 (scratch, scratch, constm1_rtx));
14564 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x40)));
14565 intcmp_mode = CCmode;
14570 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x45)));
14576 if (code == EQ && TARGET_IEEE_FP)
14578 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
14579 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x40)));
14580 intcmp_mode = CCmode;
14585 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x40)));
14592 if (code == NE && TARGET_IEEE_FP)
14594 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
14595 emit_insn (gen_xorqi_cc_ext_1 (scratch, scratch,
14601 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x40)));
14607 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x04)));
14611 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x04)));
14616 gcc_unreachable ();
14620 /* Return the test that should be put into the flags user, i.e.
14621 the bcc, scc, or cmov instruction. */
14622 return gen_rtx_fmt_ee (code, VOIDmode,
14623 gen_rtx_REG (intcmp_mode, FLAGS_REG),
14628 ix86_expand_compare (enum rtx_code code, rtx *second_test, rtx *bypass_test)
14631 op0 = ix86_compare_op0;
14632 op1 = ix86_compare_op1;
14635 *second_test = NULL_RTX;
14637 *bypass_test = NULL_RTX;
14639 if (GET_MODE_CLASS (GET_MODE (ix86_compare_op0)) == MODE_CC)
14640 ret = gen_rtx_fmt_ee (code, VOIDmode, ix86_compare_op0, ix86_compare_op1);
14642 else if (SCALAR_FLOAT_MODE_P (GET_MODE (op0)))
14644 gcc_assert (!DECIMAL_FLOAT_MODE_P (GET_MODE (op0)));
14645 ret = ix86_expand_fp_compare (code, op0, op1, NULL_RTX,
14646 second_test, bypass_test);
14649 ret = ix86_expand_int_compare (code, op0, op1);
14654 /* Return true if the CODE will result in nontrivial jump sequence. */
14656 ix86_fp_jump_nontrivial_p (enum rtx_code code)
14658 enum rtx_code bypass_code, first_code, second_code;
14661 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
14662 return bypass_code != UNKNOWN || second_code != UNKNOWN;
14666 ix86_expand_branch (enum rtx_code code, rtx label)
14670 switch (GET_MODE (ix86_compare_op0))
14676 tmp = ix86_expand_compare (code, NULL, NULL);
14677 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
14678 gen_rtx_LABEL_REF (VOIDmode, label),
14680 emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
14689 enum rtx_code bypass_code, first_code, second_code;
14691 code = ix86_prepare_fp_compare_args (code, &ix86_compare_op0,
14692 &ix86_compare_op1);
14694 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
14696 /* Check whether we will use the natural sequence with one jump. If
14697 so, we can expand jump early. Otherwise delay expansion by
14698 creating compound insn to not confuse optimizers. */
14699 if (bypass_code == UNKNOWN && second_code == UNKNOWN)
14701 ix86_split_fp_branch (code, ix86_compare_op0, ix86_compare_op1,
14702 gen_rtx_LABEL_REF (VOIDmode, label),
14703 pc_rtx, NULL_RTX, NULL_RTX);
14707 tmp = gen_rtx_fmt_ee (code, VOIDmode,
14708 ix86_compare_op0, ix86_compare_op1);
14709 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
14710 gen_rtx_LABEL_REF (VOIDmode, label),
14712 tmp = gen_rtx_SET (VOIDmode, pc_rtx, tmp);
14714 use_fcomi = ix86_use_fcomi_compare (code);
14715 vec = rtvec_alloc (3 + !use_fcomi);
14716 RTVEC_ELT (vec, 0) = tmp;
14718 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCFPmode, FPSR_REG));
14720 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCFPmode, FLAGS_REG));
14723 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (HImode));
14725 emit_jump_insn (gen_rtx_PARALLEL (VOIDmode, vec));
14734 /* Expand DImode branch into multiple compare+branch. */
14736 rtx lo[2], hi[2], label2;
14737 enum rtx_code code1, code2, code3;
14738 enum machine_mode submode;
14740 if (CONSTANT_P (ix86_compare_op0) && ! CONSTANT_P (ix86_compare_op1))
14742 tmp = ix86_compare_op0;
14743 ix86_compare_op0 = ix86_compare_op1;
14744 ix86_compare_op1 = tmp;
14745 code = swap_condition (code);
14747 if (GET_MODE (ix86_compare_op0) == DImode)
14749 split_di (&ix86_compare_op0, 1, lo+0, hi+0);
14750 split_di (&ix86_compare_op1, 1, lo+1, hi+1);
14755 split_ti (&ix86_compare_op0, 1, lo+0, hi+0);
14756 split_ti (&ix86_compare_op1, 1, lo+1, hi+1);
14760 /* When comparing for equality, we can use (hi0^hi1)|(lo0^lo1) to
14761 avoid two branches. This costs one extra insn, so disable when
14762 optimizing for size. */
14764 if ((code == EQ || code == NE)
14765 && (!optimize_insn_for_size_p ()
14766 || hi[1] == const0_rtx || lo[1] == const0_rtx))
14771 if (hi[1] != const0_rtx)
14772 xor1 = expand_binop (submode, xor_optab, xor1, hi[1],
14773 NULL_RTX, 0, OPTAB_WIDEN);
14776 if (lo[1] != const0_rtx)
14777 xor0 = expand_binop (submode, xor_optab, xor0, lo[1],
14778 NULL_RTX, 0, OPTAB_WIDEN);
14780 tmp = expand_binop (submode, ior_optab, xor1, xor0,
14781 NULL_RTX, 0, OPTAB_WIDEN);
14783 ix86_compare_op0 = tmp;
14784 ix86_compare_op1 = const0_rtx;
14785 ix86_expand_branch (code, label);
14789 /* Otherwise, if we are doing less-than or greater-or-equal-than,
14790 op1 is a constant and the low word is zero, then we can just
14791 examine the high word. Similarly for low word -1 and
14792 less-or-equal-than or greater-than. */
14794 if (CONST_INT_P (hi[1]))
14797 case LT: case LTU: case GE: case GEU:
14798 if (lo[1] == const0_rtx)
14800 ix86_compare_op0 = hi[0];
14801 ix86_compare_op1 = hi[1];
14802 ix86_expand_branch (code, label);
14806 case LE: case LEU: case GT: case GTU:
14807 if (lo[1] == constm1_rtx)
14809 ix86_compare_op0 = hi[0];
14810 ix86_compare_op1 = hi[1];
14811 ix86_expand_branch (code, label);
14819 /* Otherwise, we need two or three jumps. */
14821 label2 = gen_label_rtx ();
14824 code2 = swap_condition (code);
14825 code3 = unsigned_condition (code);
14829 case LT: case GT: case LTU: case GTU:
14832 case LE: code1 = LT; code2 = GT; break;
14833 case GE: code1 = GT; code2 = LT; break;
14834 case LEU: code1 = LTU; code2 = GTU; break;
14835 case GEU: code1 = GTU; code2 = LTU; break;
14837 case EQ: code1 = UNKNOWN; code2 = NE; break;
14838 case NE: code2 = UNKNOWN; break;
14841 gcc_unreachable ();
14846 * if (hi(a) < hi(b)) goto true;
14847 * if (hi(a) > hi(b)) goto false;
14848 * if (lo(a) < lo(b)) goto true;
14852 ix86_compare_op0 = hi[0];
14853 ix86_compare_op1 = hi[1];
14855 if (code1 != UNKNOWN)
14856 ix86_expand_branch (code1, label);
14857 if (code2 != UNKNOWN)
14858 ix86_expand_branch (code2, label2);
14860 ix86_compare_op0 = lo[0];
14861 ix86_compare_op1 = lo[1];
14862 ix86_expand_branch (code3, label);
14864 if (code2 != UNKNOWN)
14865 emit_label (label2);
14870 /* If we have already emitted a compare insn, go straight to simple.
14871 ix86_expand_compare won't emit anything if ix86_compare_emitted
14873 gcc_assert (GET_MODE_CLASS (GET_MODE (ix86_compare_op0)) == MODE_CC);
14878 /* Split branch based on floating point condition. */
14880 ix86_split_fp_branch (enum rtx_code code, rtx op1, rtx op2,
14881 rtx target1, rtx target2, rtx tmp, rtx pushed)
14883 rtx second, bypass;
14884 rtx label = NULL_RTX;
14886 int bypass_probability = -1, second_probability = -1, probability = -1;
14889 if (target2 != pc_rtx)
14892 code = reverse_condition_maybe_unordered (code);
14897 condition = ix86_expand_fp_compare (code, op1, op2,
14898 tmp, &second, &bypass);
14900 /* Remove pushed operand from stack. */
14902 ix86_free_from_memory (GET_MODE (pushed));
14904 if (split_branch_probability >= 0)
14906 /* Distribute the probabilities across the jumps.
14907 Assume the BYPASS and SECOND to be always test
14909 probability = split_branch_probability;
14911 /* Value of 1 is low enough to make no need for probability
14912 to be updated. Later we may run some experiments and see
14913 if unordered values are more frequent in practice. */
14915 bypass_probability = 1;
14917 second_probability = 1;
14919 if (bypass != NULL_RTX)
14921 label = gen_label_rtx ();
14922 i = emit_jump_insn (gen_rtx_SET
14924 gen_rtx_IF_THEN_ELSE (VOIDmode,
14926 gen_rtx_LABEL_REF (VOIDmode,
14929 if (bypass_probability >= 0)
14930 add_reg_note (i, REG_BR_PROB, GEN_INT (bypass_probability));
14932 i = emit_jump_insn (gen_rtx_SET
14934 gen_rtx_IF_THEN_ELSE (VOIDmode,
14935 condition, target1, target2)));
14936 if (probability >= 0)
14937 add_reg_note (i, REG_BR_PROB, GEN_INT (probability));
14938 if (second != NULL_RTX)
14940 i = emit_jump_insn (gen_rtx_SET
14942 gen_rtx_IF_THEN_ELSE (VOIDmode, second, target1,
14944 if (second_probability >= 0)
14945 add_reg_note (i, REG_BR_PROB, GEN_INT (second_probability));
14947 if (label != NULL_RTX)
14948 emit_label (label);
14952 ix86_expand_setcc (enum rtx_code code, rtx dest)
14954 rtx ret, tmp, tmpreg, equiv;
14955 rtx second_test, bypass_test;
14957 if (GET_MODE (ix86_compare_op0) == (TARGET_64BIT ? TImode : DImode))
14958 return 0; /* FAIL */
14960 gcc_assert (GET_MODE (dest) == QImode);
14962 ret = ix86_expand_compare (code, &second_test, &bypass_test);
14963 PUT_MODE (ret, QImode);
14968 emit_insn (gen_rtx_SET (VOIDmode, tmp, ret));
14969 if (bypass_test || second_test)
14971 rtx test = second_test;
14973 rtx tmp2 = gen_reg_rtx (QImode);
14976 gcc_assert (!second_test);
14977 test = bypass_test;
14979 PUT_CODE (test, reverse_condition_maybe_unordered (GET_CODE (test)));
14981 PUT_MODE (test, QImode);
14982 emit_insn (gen_rtx_SET (VOIDmode, tmp2, test));
14985 emit_insn (gen_andqi3 (tmp, tmpreg, tmp2));
14987 emit_insn (gen_iorqi3 (tmp, tmpreg, tmp2));
14990 /* Attach a REG_EQUAL note describing the comparison result. */
14991 if (ix86_compare_op0 && ix86_compare_op1)
14993 equiv = simplify_gen_relational (code, QImode,
14994 GET_MODE (ix86_compare_op0),
14995 ix86_compare_op0, ix86_compare_op1);
14996 set_unique_reg_note (get_last_insn (), REG_EQUAL, equiv);
14999 return 1; /* DONE */
15002 /* Expand comparison setting or clearing carry flag. Return true when
15003 successful and set pop for the operation. */
15005 ix86_expand_carry_flag_compare (enum rtx_code code, rtx op0, rtx op1, rtx *pop)
15007 enum machine_mode mode =
15008 GET_MODE (op0) != VOIDmode ? GET_MODE (op0) : GET_MODE (op1);
15010 /* Do not handle DImode compares that go through special path. */
15011 if (mode == (TARGET_64BIT ? TImode : DImode))
15014 if (SCALAR_FLOAT_MODE_P (mode))
15016 rtx second_test = NULL, bypass_test = NULL;
15017 rtx compare_op, compare_seq;
15019 gcc_assert (!DECIMAL_FLOAT_MODE_P (mode));
15021 /* Shortcut: following common codes never translate
15022 into carry flag compares. */
15023 if (code == EQ || code == NE || code == UNEQ || code == LTGT
15024 || code == ORDERED || code == UNORDERED)
15027 /* These comparisons require zero flag; swap operands so they won't. */
15028 if ((code == GT || code == UNLE || code == LE || code == UNGT)
15029 && !TARGET_IEEE_FP)
15034 code = swap_condition (code);
15037 /* Try to expand the comparison and verify that we end up with
15038 carry flag based comparison. This fails to be true only when
15039 we decide to expand comparison using arithmetic that is not
15040 too common scenario. */
15042 compare_op = ix86_expand_fp_compare (code, op0, op1, NULL_RTX,
15043 &second_test, &bypass_test);
15044 compare_seq = get_insns ();
15047 if (second_test || bypass_test)
15050 if (GET_MODE (XEXP (compare_op, 0)) == CCFPmode
15051 || GET_MODE (XEXP (compare_op, 0)) == CCFPUmode)
15052 code = ix86_fp_compare_code_to_integer (GET_CODE (compare_op));
15054 code = GET_CODE (compare_op);
15056 if (code != LTU && code != GEU)
15059 emit_insn (compare_seq);
15064 if (!INTEGRAL_MODE_P (mode))
15073 /* Convert a==0 into (unsigned)a<1. */
15076 if (op1 != const0_rtx)
15079 code = (code == EQ ? LTU : GEU);
15082 /* Convert a>b into b<a or a>=b-1. */
15085 if (CONST_INT_P (op1))
15087 op1 = gen_int_mode (INTVAL (op1) + 1, GET_MODE (op0));
15088 /* Bail out on overflow. We still can swap operands but that
15089 would force loading of the constant into register. */
15090 if (op1 == const0_rtx
15091 || !x86_64_immediate_operand (op1, GET_MODE (op1)))
15093 code = (code == GTU ? GEU : LTU);
15100 code = (code == GTU ? LTU : GEU);
15104 /* Convert a>=0 into (unsigned)a<0x80000000. */
15107 if (mode == DImode || op1 != const0_rtx)
15109 op1 = gen_int_mode (1 << (GET_MODE_BITSIZE (mode) - 1), mode);
15110 code = (code == LT ? GEU : LTU);
15114 if (mode == DImode || op1 != constm1_rtx)
15116 op1 = gen_int_mode (1 << (GET_MODE_BITSIZE (mode) - 1), mode);
15117 code = (code == LE ? GEU : LTU);
15123 /* Swapping operands may cause constant to appear as first operand. */
15124 if (!nonimmediate_operand (op0, VOIDmode))
15126 if (!can_create_pseudo_p ())
15128 op0 = force_reg (mode, op0);
15130 ix86_compare_op0 = op0;
15131 ix86_compare_op1 = op1;
15132 *pop = ix86_expand_compare (code, NULL, NULL);
15133 gcc_assert (GET_CODE (*pop) == LTU || GET_CODE (*pop) == GEU);
15138 ix86_expand_int_movcc (rtx operands[])
15140 enum rtx_code code = GET_CODE (operands[1]), compare_code;
15141 rtx compare_seq, compare_op;
15142 rtx second_test, bypass_test;
15143 enum machine_mode mode = GET_MODE (operands[0]);
15144 bool sign_bit_compare_p = false;;
15147 compare_op = ix86_expand_compare (code, &second_test, &bypass_test);
15148 compare_seq = get_insns ();
15151 compare_code = GET_CODE (compare_op);
15153 if ((ix86_compare_op1 == const0_rtx && (code == GE || code == LT))
15154 || (ix86_compare_op1 == constm1_rtx && (code == GT || code == LE)))
15155 sign_bit_compare_p = true;
15157 /* Don't attempt mode expansion here -- if we had to expand 5 or 6
15158 HImode insns, we'd be swallowed in word prefix ops. */
15160 if ((mode != HImode || TARGET_FAST_PREFIX)
15161 && (mode != (TARGET_64BIT ? TImode : DImode))
15162 && CONST_INT_P (operands[2])
15163 && CONST_INT_P (operands[3]))
15165 rtx out = operands[0];
15166 HOST_WIDE_INT ct = INTVAL (operands[2]);
15167 HOST_WIDE_INT cf = INTVAL (operands[3]);
15168 HOST_WIDE_INT diff;
15171 /* Sign bit compares are better done using shifts than we do by using
15173 if (sign_bit_compare_p
15174 || ix86_expand_carry_flag_compare (code, ix86_compare_op0,
15175 ix86_compare_op1, &compare_op))
15177 /* Detect overlap between destination and compare sources. */
15180 if (!sign_bit_compare_p)
15182 bool fpcmp = false;
15184 compare_code = GET_CODE (compare_op);
15186 if (GET_MODE (XEXP (compare_op, 0)) == CCFPmode
15187 || GET_MODE (XEXP (compare_op, 0)) == CCFPUmode)
15190 compare_code = ix86_fp_compare_code_to_integer (compare_code);
15193 /* To simplify rest of code, restrict to the GEU case. */
15194 if (compare_code == LTU)
15196 HOST_WIDE_INT tmp = ct;
15199 compare_code = reverse_condition (compare_code);
15200 code = reverse_condition (code);
15205 PUT_CODE (compare_op,
15206 reverse_condition_maybe_unordered
15207 (GET_CODE (compare_op)));
15209 PUT_CODE (compare_op, reverse_condition (GET_CODE (compare_op)));
15213 if (reg_overlap_mentioned_p (out, ix86_compare_op0)
15214 || reg_overlap_mentioned_p (out, ix86_compare_op1))
15215 tmp = gen_reg_rtx (mode);
15217 if (mode == DImode)
15218 emit_insn (gen_x86_movdicc_0_m1_rex64 (tmp, compare_op));
15220 emit_insn (gen_x86_movsicc_0_m1 (gen_lowpart (SImode, tmp), compare_op));
15224 if (code == GT || code == GE)
15225 code = reverse_condition (code);
15228 HOST_WIDE_INT tmp = ct;
15233 tmp = emit_store_flag (tmp, code, ix86_compare_op0,
15234 ix86_compare_op1, VOIDmode, 0, -1);
15247 tmp = expand_simple_binop (mode, PLUS,
15249 copy_rtx (tmp), 1, OPTAB_DIRECT);
15260 tmp = expand_simple_binop (mode, IOR,
15262 copy_rtx (tmp), 1, OPTAB_DIRECT);
15264 else if (diff == -1 && ct)
15274 tmp = expand_simple_unop (mode, NOT, tmp, copy_rtx (tmp), 1);
15276 tmp = expand_simple_binop (mode, PLUS,
15277 copy_rtx (tmp), GEN_INT (cf),
15278 copy_rtx (tmp), 1, OPTAB_DIRECT);
15286 * andl cf - ct, dest
15296 tmp = expand_simple_unop (mode, NOT, tmp, copy_rtx (tmp), 1);
15299 tmp = expand_simple_binop (mode, AND,
15301 gen_int_mode (cf - ct, mode),
15302 copy_rtx (tmp), 1, OPTAB_DIRECT);
15304 tmp = expand_simple_binop (mode, PLUS,
15305 copy_rtx (tmp), GEN_INT (ct),
15306 copy_rtx (tmp), 1, OPTAB_DIRECT);
15309 if (!rtx_equal_p (tmp, out))
15310 emit_move_insn (copy_rtx (out), copy_rtx (tmp));
15312 return 1; /* DONE */
15317 enum machine_mode cmp_mode = GET_MODE (ix86_compare_op0);
15320 tmp = ct, ct = cf, cf = tmp;
15323 if (SCALAR_FLOAT_MODE_P (cmp_mode))
15325 gcc_assert (!DECIMAL_FLOAT_MODE_P (cmp_mode));
15327 /* We may be reversing unordered compare to normal compare, that
15328 is not valid in general (we may convert non-trapping condition
15329 to trapping one), however on i386 we currently emit all
15330 comparisons unordered. */
15331 compare_code = reverse_condition_maybe_unordered (compare_code);
15332 code = reverse_condition_maybe_unordered (code);
15336 compare_code = reverse_condition (compare_code);
15337 code = reverse_condition (code);
15341 compare_code = UNKNOWN;
15342 if (GET_MODE_CLASS (GET_MODE (ix86_compare_op0)) == MODE_INT
15343 && CONST_INT_P (ix86_compare_op1))
15345 if (ix86_compare_op1 == const0_rtx
15346 && (code == LT || code == GE))
15347 compare_code = code;
15348 else if (ix86_compare_op1 == constm1_rtx)
15352 else if (code == GT)
15357 /* Optimize dest = (op0 < 0) ? -1 : cf. */
15358 if (compare_code != UNKNOWN
15359 && GET_MODE (ix86_compare_op0) == GET_MODE (out)
15360 && (cf == -1 || ct == -1))
15362 /* If lea code below could be used, only optimize
15363 if it results in a 2 insn sequence. */
15365 if (! (diff == 1 || diff == 2 || diff == 4 || diff == 8
15366 || diff == 3 || diff == 5 || diff == 9)
15367 || (compare_code == LT && ct == -1)
15368 || (compare_code == GE && cf == -1))
15371 * notl op1 (if necessary)
15379 code = reverse_condition (code);
15382 out = emit_store_flag (out, code, ix86_compare_op0,
15383 ix86_compare_op1, VOIDmode, 0, -1);
15385 out = expand_simple_binop (mode, IOR,
15387 out, 1, OPTAB_DIRECT);
15388 if (out != operands[0])
15389 emit_move_insn (operands[0], out);
15391 return 1; /* DONE */
15396 if ((diff == 1 || diff == 2 || diff == 4 || diff == 8
15397 || diff == 3 || diff == 5 || diff == 9)
15398 && ((mode != QImode && mode != HImode) || !TARGET_PARTIAL_REG_STALL)
15400 || x86_64_immediate_operand (GEN_INT (cf), VOIDmode)))
15406 * lea cf(dest*(ct-cf)),dest
15410 * This also catches the degenerate setcc-only case.
15416 out = emit_store_flag (out, code, ix86_compare_op0,
15417 ix86_compare_op1, VOIDmode, 0, 1);
15420 /* On x86_64 the lea instruction operates on Pmode, so we need
15421 to get arithmetics done in proper mode to match. */
15423 tmp = copy_rtx (out);
15427 out1 = copy_rtx (out);
15428 tmp = gen_rtx_MULT (mode, out1, GEN_INT (diff & ~1));
15432 tmp = gen_rtx_PLUS (mode, tmp, out1);
15438 tmp = gen_rtx_PLUS (mode, tmp, GEN_INT (cf));
15441 if (!rtx_equal_p (tmp, out))
15444 out = force_operand (tmp, copy_rtx (out));
15446 emit_insn (gen_rtx_SET (VOIDmode, copy_rtx (out), copy_rtx (tmp)));
15448 if (!rtx_equal_p (out, operands[0]))
15449 emit_move_insn (operands[0], copy_rtx (out));
15451 return 1; /* DONE */
15455 * General case: Jumpful:
15456 * xorl dest,dest cmpl op1, op2
15457 * cmpl op1, op2 movl ct, dest
15458 * setcc dest jcc 1f
15459 * decl dest movl cf, dest
15460 * andl (cf-ct),dest 1:
15463 * Size 20. Size 14.
15465 * This is reasonably steep, but branch mispredict costs are
15466 * high on modern cpus, so consider failing only if optimizing
15470 if ((!TARGET_CMOVE || (mode == QImode && TARGET_PARTIAL_REG_STALL))
15471 && BRANCH_COST (optimize_insn_for_speed_p (),
15476 enum machine_mode cmp_mode = GET_MODE (ix86_compare_op0);
15481 if (SCALAR_FLOAT_MODE_P (cmp_mode))
15483 gcc_assert (!DECIMAL_FLOAT_MODE_P (cmp_mode));
15485 /* We may be reversing unordered compare to normal compare,
15486 that is not valid in general (we may convert non-trapping
15487 condition to trapping one), however on i386 we currently
15488 emit all comparisons unordered. */
15489 code = reverse_condition_maybe_unordered (code);
15493 code = reverse_condition (code);
15494 if (compare_code != UNKNOWN)
15495 compare_code = reverse_condition (compare_code);
15499 if (compare_code != UNKNOWN)
15501 /* notl op1 (if needed)
15506 For x < 0 (resp. x <= -1) there will be no notl,
15507 so if possible swap the constants to get rid of the
15509 True/false will be -1/0 while code below (store flag
15510 followed by decrement) is 0/-1, so the constants need
15511 to be exchanged once more. */
15513 if (compare_code == GE || !cf)
15515 code = reverse_condition (code);
15520 HOST_WIDE_INT tmp = cf;
15525 out = emit_store_flag (out, code, ix86_compare_op0,
15526 ix86_compare_op1, VOIDmode, 0, -1);
15530 out = emit_store_flag (out, code, ix86_compare_op0,
15531 ix86_compare_op1, VOIDmode, 0, 1);
15533 out = expand_simple_binop (mode, PLUS, copy_rtx (out), constm1_rtx,
15534 copy_rtx (out), 1, OPTAB_DIRECT);
15537 out = expand_simple_binop (mode, AND, copy_rtx (out),
15538 gen_int_mode (cf - ct, mode),
15539 copy_rtx (out), 1, OPTAB_DIRECT);
15541 out = expand_simple_binop (mode, PLUS, copy_rtx (out), GEN_INT (ct),
15542 copy_rtx (out), 1, OPTAB_DIRECT);
15543 if (!rtx_equal_p (out, operands[0]))
15544 emit_move_insn (operands[0], copy_rtx (out));
15546 return 1; /* DONE */
15550 if (!TARGET_CMOVE || (mode == QImode && TARGET_PARTIAL_REG_STALL))
15552 /* Try a few things more with specific constants and a variable. */
15555 rtx var, orig_out, out, tmp;
15557 if (BRANCH_COST (optimize_insn_for_speed_p (), false) <= 2)
15558 return 0; /* FAIL */
15560 /* If one of the two operands is an interesting constant, load a
15561 constant with the above and mask it in with a logical operation. */
15563 if (CONST_INT_P (operands[2]))
15566 if (INTVAL (operands[2]) == 0 && operands[3] != constm1_rtx)
15567 operands[3] = constm1_rtx, op = and_optab;
15568 else if (INTVAL (operands[2]) == -1 && operands[3] != const0_rtx)
15569 operands[3] = const0_rtx, op = ior_optab;
15571 return 0; /* FAIL */
15573 else if (CONST_INT_P (operands[3]))
15576 if (INTVAL (operands[3]) == 0 && operands[2] != constm1_rtx)
15577 operands[2] = constm1_rtx, op = and_optab;
15578 else if (INTVAL (operands[3]) == -1 && operands[3] != const0_rtx)
15579 operands[2] = const0_rtx, op = ior_optab;
15581 return 0; /* FAIL */
15584 return 0; /* FAIL */
15586 orig_out = operands[0];
15587 tmp = gen_reg_rtx (mode);
15590 /* Recurse to get the constant loaded. */
15591 if (ix86_expand_int_movcc (operands) == 0)
15592 return 0; /* FAIL */
15594 /* Mask in the interesting variable. */
15595 out = expand_binop (mode, op, var, tmp, orig_out, 0,
15597 if (!rtx_equal_p (out, orig_out))
15598 emit_move_insn (copy_rtx (orig_out), copy_rtx (out));
15600 return 1; /* DONE */
15604 * For comparison with above,
15614 if (! nonimmediate_operand (operands[2], mode))
15615 operands[2] = force_reg (mode, operands[2]);
15616 if (! nonimmediate_operand (operands[3], mode))
15617 operands[3] = force_reg (mode, operands[3]);
15619 if (bypass_test && reg_overlap_mentioned_p (operands[0], operands[3]))
15621 rtx tmp = gen_reg_rtx (mode);
15622 emit_move_insn (tmp, operands[3]);
15625 if (second_test && reg_overlap_mentioned_p (operands[0], operands[2]))
15627 rtx tmp = gen_reg_rtx (mode);
15628 emit_move_insn (tmp, operands[2]);
15632 if (! register_operand (operands[2], VOIDmode)
15634 || ! register_operand (operands[3], VOIDmode)))
15635 operands[2] = force_reg (mode, operands[2]);
15638 && ! register_operand (operands[3], VOIDmode))
15639 operands[3] = force_reg (mode, operands[3]);
15641 emit_insn (compare_seq);
15642 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
15643 gen_rtx_IF_THEN_ELSE (mode,
15644 compare_op, operands[2],
15647 emit_insn (gen_rtx_SET (VOIDmode, copy_rtx (operands[0]),
15648 gen_rtx_IF_THEN_ELSE (mode,
15650 copy_rtx (operands[3]),
15651 copy_rtx (operands[0]))));
15653 emit_insn (gen_rtx_SET (VOIDmode, copy_rtx (operands[0]),
15654 gen_rtx_IF_THEN_ELSE (mode,
15656 copy_rtx (operands[2]),
15657 copy_rtx (operands[0]))));
15659 return 1; /* DONE */
15662 /* Swap, force into registers, or otherwise massage the two operands
15663 to an sse comparison with a mask result. Thus we differ a bit from
15664 ix86_prepare_fp_compare_args which expects to produce a flags result.
15666 The DEST operand exists to help determine whether to commute commutative
15667 operators. The POP0/POP1 operands are updated in place. The new
15668 comparison code is returned, or UNKNOWN if not implementable. */
15670 static enum rtx_code
15671 ix86_prepare_sse_fp_compare_args (rtx dest, enum rtx_code code,
15672 rtx *pop0, rtx *pop1)
15680 /* We have no LTGT as an operator. We could implement it with
15681 NE & ORDERED, but this requires an extra temporary. It's
15682 not clear that it's worth it. */
15689 /* These are supported directly. */
15696 /* For commutative operators, try to canonicalize the destination
15697 operand to be first in the comparison - this helps reload to
15698 avoid extra moves. */
15699 if (!dest || !rtx_equal_p (dest, *pop1))
15707 /* These are not supported directly. Swap the comparison operands
15708 to transform into something that is supported. */
15712 code = swap_condition (code);
15716 gcc_unreachable ();
15722 /* Detect conditional moves that exactly match min/max operational
15723 semantics. Note that this is IEEE safe, as long as we don't
15724 interchange the operands.
15726 Returns FALSE if this conditional move doesn't match a MIN/MAX,
15727 and TRUE if the operation is successful and instructions are emitted. */
15730 ix86_expand_sse_fp_minmax (rtx dest, enum rtx_code code, rtx cmp_op0,
15731 rtx cmp_op1, rtx if_true, rtx if_false)
15733 enum machine_mode mode;
15739 else if (code == UNGE)
15742 if_true = if_false;
15748 if (rtx_equal_p (cmp_op0, if_true) && rtx_equal_p (cmp_op1, if_false))
15750 else if (rtx_equal_p (cmp_op1, if_true) && rtx_equal_p (cmp_op0, if_false))
15755 mode = GET_MODE (dest);
15757 /* We want to check HONOR_NANS and HONOR_SIGNED_ZEROS here,
15758 but MODE may be a vector mode and thus not appropriate. */
15759 if (!flag_finite_math_only || !flag_unsafe_math_optimizations)
15761 int u = is_min ? UNSPEC_IEEE_MIN : UNSPEC_IEEE_MAX;
15764 if_true = force_reg (mode, if_true);
15765 v = gen_rtvec (2, if_true, if_false);
15766 tmp = gen_rtx_UNSPEC (mode, v, u);
15770 code = is_min ? SMIN : SMAX;
15771 tmp = gen_rtx_fmt_ee (code, mode, if_true, if_false);
15774 emit_insn (gen_rtx_SET (VOIDmode, dest, tmp));
15778 /* Expand an sse vector comparison. Return the register with the result. */
15781 ix86_expand_sse_cmp (rtx dest, enum rtx_code code, rtx cmp_op0, rtx cmp_op1,
15782 rtx op_true, rtx op_false)
15784 enum machine_mode mode = GET_MODE (dest);
15787 cmp_op0 = force_reg (mode, cmp_op0);
15788 if (!nonimmediate_operand (cmp_op1, mode))
15789 cmp_op1 = force_reg (mode, cmp_op1);
15792 || reg_overlap_mentioned_p (dest, op_true)
15793 || reg_overlap_mentioned_p (dest, op_false))
15794 dest = gen_reg_rtx (mode);
15796 x = gen_rtx_fmt_ee (code, mode, cmp_op0, cmp_op1);
15797 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
15802 /* Expand DEST = CMP ? OP_TRUE : OP_FALSE into a sequence of logical
15803 operations. This is used for both scalar and vector conditional moves. */
15806 ix86_expand_sse_movcc (rtx dest, rtx cmp, rtx op_true, rtx op_false)
15808 enum machine_mode mode = GET_MODE (dest);
15811 if (op_false == CONST0_RTX (mode))
15813 op_true = force_reg (mode, op_true);
15814 x = gen_rtx_AND (mode, cmp, op_true);
15815 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
15817 else if (op_true == CONST0_RTX (mode))
15819 op_false = force_reg (mode, op_false);
15820 x = gen_rtx_NOT (mode, cmp);
15821 x = gen_rtx_AND (mode, x, op_false);
15822 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
15824 else if (TARGET_SSE5)
15826 rtx pcmov = gen_rtx_SET (mode, dest,
15827 gen_rtx_IF_THEN_ELSE (mode, cmp,
15834 op_true = force_reg (mode, op_true);
15835 op_false = force_reg (mode, op_false);
15837 t2 = gen_reg_rtx (mode);
15839 t3 = gen_reg_rtx (mode);
15843 x = gen_rtx_AND (mode, op_true, cmp);
15844 emit_insn (gen_rtx_SET (VOIDmode, t2, x));
15846 x = gen_rtx_NOT (mode, cmp);
15847 x = gen_rtx_AND (mode, x, op_false);
15848 emit_insn (gen_rtx_SET (VOIDmode, t3, x));
15850 x = gen_rtx_IOR (mode, t3, t2);
15851 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
15855 /* Expand a floating-point conditional move. Return true if successful. */
15858 ix86_expand_fp_movcc (rtx operands[])
15860 enum machine_mode mode = GET_MODE (operands[0]);
15861 enum rtx_code code = GET_CODE (operands[1]);
15862 rtx tmp, compare_op, second_test, bypass_test;
15864 if (TARGET_SSE_MATH && SSE_FLOAT_MODE_P (mode))
15866 enum machine_mode cmode;
15868 /* Since we've no cmove for sse registers, don't force bad register
15869 allocation just to gain access to it. Deny movcc when the
15870 comparison mode doesn't match the move mode. */
15871 cmode = GET_MODE (ix86_compare_op0);
15872 if (cmode == VOIDmode)
15873 cmode = GET_MODE (ix86_compare_op1);
15877 code = ix86_prepare_sse_fp_compare_args (operands[0], code,
15879 &ix86_compare_op1);
15880 if (code == UNKNOWN)
15883 if (ix86_expand_sse_fp_minmax (operands[0], code, ix86_compare_op0,
15884 ix86_compare_op1, operands[2],
15888 tmp = ix86_expand_sse_cmp (operands[0], code, ix86_compare_op0,
15889 ix86_compare_op1, operands[2], operands[3]);
15890 ix86_expand_sse_movcc (operands[0], tmp, operands[2], operands[3]);
15894 /* The floating point conditional move instructions don't directly
15895 support conditions resulting from a signed integer comparison. */
15897 compare_op = ix86_expand_compare (code, &second_test, &bypass_test);
15899 /* The floating point conditional move instructions don't directly
15900 support signed integer comparisons. */
15902 if (!fcmov_comparison_operator (compare_op, VOIDmode))
15904 gcc_assert (!second_test && !bypass_test);
15905 tmp = gen_reg_rtx (QImode);
15906 ix86_expand_setcc (code, tmp);
15908 ix86_compare_op0 = tmp;
15909 ix86_compare_op1 = const0_rtx;
15910 compare_op = ix86_expand_compare (code, &second_test, &bypass_test);
15912 if (bypass_test && reg_overlap_mentioned_p (operands[0], operands[3]))
15914 tmp = gen_reg_rtx (mode);
15915 emit_move_insn (tmp, operands[3]);
15918 if (second_test && reg_overlap_mentioned_p (operands[0], operands[2]))
15920 tmp = gen_reg_rtx (mode);
15921 emit_move_insn (tmp, operands[2]);
15925 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
15926 gen_rtx_IF_THEN_ELSE (mode, compare_op,
15927 operands[2], operands[3])));
15929 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
15930 gen_rtx_IF_THEN_ELSE (mode, bypass_test,
15931 operands[3], operands[0])));
15933 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
15934 gen_rtx_IF_THEN_ELSE (mode, second_test,
15935 operands[2], operands[0])));
15940 /* Expand a floating-point vector conditional move; a vcond operation
15941 rather than a movcc operation. */
15944 ix86_expand_fp_vcond (rtx operands[])
15946 enum rtx_code code = GET_CODE (operands[3]);
15949 code = ix86_prepare_sse_fp_compare_args (operands[0], code,
15950 &operands[4], &operands[5]);
15951 if (code == UNKNOWN)
15954 if (ix86_expand_sse_fp_minmax (operands[0], code, operands[4],
15955 operands[5], operands[1], operands[2]))
15958 cmp = ix86_expand_sse_cmp (operands[0], code, operands[4], operands[5],
15959 operands[1], operands[2]);
15960 ix86_expand_sse_movcc (operands[0], cmp, operands[1], operands[2]);
15964 /* Expand a signed/unsigned integral vector conditional move. */
15967 ix86_expand_int_vcond (rtx operands[])
15969 enum machine_mode mode = GET_MODE (operands[0]);
15970 enum rtx_code code = GET_CODE (operands[3]);
15971 bool negate = false;
15974 cop0 = operands[4];
15975 cop1 = operands[5];
15977 /* SSE5 supports all of the comparisons on all vector int types. */
15980 /* Canonicalize the comparison to EQ, GT, GTU. */
15991 code = reverse_condition (code);
15997 code = reverse_condition (code);
16003 code = swap_condition (code);
16004 x = cop0, cop0 = cop1, cop1 = x;
16008 gcc_unreachable ();
16011 /* Only SSE4.1/SSE4.2 supports V2DImode. */
16012 if (mode == V2DImode)
16017 /* SSE4.1 supports EQ. */
16018 if (!TARGET_SSE4_1)
16024 /* SSE4.2 supports GT/GTU. */
16025 if (!TARGET_SSE4_2)
16030 gcc_unreachable ();
16034 /* Unsigned parallel compare is not supported by the hardware. Play some
16035 tricks to turn this into a signed comparison against 0. */
16038 cop0 = force_reg (mode, cop0);
16047 /* Perform a parallel modulo subtraction. */
16048 t1 = gen_reg_rtx (mode);
16049 emit_insn ((mode == V4SImode
16051 : gen_subv2di3) (t1, cop0, cop1));
16053 /* Extract the original sign bit of op0. */
16054 mask = ix86_build_signbit_mask (GET_MODE_INNER (mode),
16056 t2 = gen_reg_rtx (mode);
16057 emit_insn ((mode == V4SImode
16059 : gen_andv2di3) (t2, cop0, mask));
16061 /* XOR it back into the result of the subtraction. This results
16062 in the sign bit set iff we saw unsigned underflow. */
16063 x = gen_reg_rtx (mode);
16064 emit_insn ((mode == V4SImode
16066 : gen_xorv2di3) (x, t1, t2));
16074 /* Perform a parallel unsigned saturating subtraction. */
16075 x = gen_reg_rtx (mode);
16076 emit_insn (gen_rtx_SET (VOIDmode, x,
16077 gen_rtx_US_MINUS (mode, cop0, cop1)));
16084 gcc_unreachable ();
16088 cop1 = CONST0_RTX (mode);
16092 x = ix86_expand_sse_cmp (operands[0], code, cop0, cop1,
16093 operands[1+negate], operands[2-negate]);
16095 ix86_expand_sse_movcc (operands[0], x, operands[1+negate],
16096 operands[2-negate]);
16100 /* Unpack OP[1] into the next wider integer vector type. UNSIGNED_P is
16101 true if we should do zero extension, else sign extension. HIGH_P is
16102 true if we want the N/2 high elements, else the low elements. */
16105 ix86_expand_sse_unpack (rtx operands[2], bool unsigned_p, bool high_p)
16107 enum machine_mode imode = GET_MODE (operands[1]);
16108 rtx (*unpack)(rtx, rtx, rtx);
16115 unpack = gen_vec_interleave_highv16qi;
16117 unpack = gen_vec_interleave_lowv16qi;
16121 unpack = gen_vec_interleave_highv8hi;
16123 unpack = gen_vec_interleave_lowv8hi;
16127 unpack = gen_vec_interleave_highv4si;
16129 unpack = gen_vec_interleave_lowv4si;
16132 gcc_unreachable ();
16135 dest = gen_lowpart (imode, operands[0]);
16138 se = force_reg (imode, CONST0_RTX (imode));
16140 se = ix86_expand_sse_cmp (gen_reg_rtx (imode), GT, CONST0_RTX (imode),
16141 operands[1], pc_rtx, pc_rtx);
16143 emit_insn (unpack (dest, operands[1], se));
16146 /* This function performs the same task as ix86_expand_sse_unpack,
16147 but with SSE4.1 instructions. */
16150 ix86_expand_sse4_unpack (rtx operands[2], bool unsigned_p, bool high_p)
16152 enum machine_mode imode = GET_MODE (operands[1]);
16153 rtx (*unpack)(rtx, rtx);
16160 unpack = gen_sse4_1_zero_extendv8qiv8hi2;
16162 unpack = gen_sse4_1_extendv8qiv8hi2;
16166 unpack = gen_sse4_1_zero_extendv4hiv4si2;
16168 unpack = gen_sse4_1_extendv4hiv4si2;
16172 unpack = gen_sse4_1_zero_extendv2siv2di2;
16174 unpack = gen_sse4_1_extendv2siv2di2;
16177 gcc_unreachable ();
16180 dest = operands[0];
16183 /* Shift higher 8 bytes to lower 8 bytes. */
16184 src = gen_reg_rtx (imode);
16185 emit_insn (gen_sse2_lshrti3 (gen_lowpart (TImode, src),
16186 gen_lowpart (TImode, operands[1]),
16192 emit_insn (unpack (dest, src));
16195 /* This function performs the same task as ix86_expand_sse_unpack,
16196 but with sse5 instructions. */
16199 ix86_expand_sse5_unpack (rtx operands[2], bool unsigned_p, bool high_p)
16201 enum machine_mode imode = GET_MODE (operands[1]);
16202 int pperm_bytes[16];
16204 int h = (high_p) ? 8 : 0;
16207 rtvec v = rtvec_alloc (16);
16210 rtx op0 = operands[0], op1 = operands[1];
16215 vs = rtvec_alloc (8);
16216 h2 = (high_p) ? 8 : 0;
16217 for (i = 0; i < 8; i++)
16219 pperm_bytes[2*i+0] = PPERM_SRC | PPERM_SRC2 | i | h;
16220 pperm_bytes[2*i+1] = ((unsigned_p)
16222 : PPERM_SIGN | PPERM_SRC2 | i | h);
16225 for (i = 0; i < 16; i++)
16226 RTVEC_ELT (v, i) = GEN_INT (pperm_bytes[i]);
16228 for (i = 0; i < 8; i++)
16229 RTVEC_ELT (vs, i) = GEN_INT (i + h2);
16231 p = gen_rtx_PARALLEL (VOIDmode, vs);
16232 x = force_reg (V16QImode, gen_rtx_CONST_VECTOR (V16QImode, v));
16234 emit_insn (gen_sse5_pperm_zero_v16qi_v8hi (op0, op1, p, x));
16236 emit_insn (gen_sse5_pperm_sign_v16qi_v8hi (op0, op1, p, x));
16240 vs = rtvec_alloc (4);
16241 h2 = (high_p) ? 4 : 0;
16242 for (i = 0; i < 4; i++)
16244 sign_extend = ((unsigned_p)
16246 : PPERM_SIGN | PPERM_SRC2 | ((2*i) + 1 + h));
16247 pperm_bytes[4*i+0] = PPERM_SRC | PPERM_SRC2 | ((2*i) + 0 + h);
16248 pperm_bytes[4*i+1] = PPERM_SRC | PPERM_SRC2 | ((2*i) + 1 + h);
16249 pperm_bytes[4*i+2] = sign_extend;
16250 pperm_bytes[4*i+3] = sign_extend;
16253 for (i = 0; i < 16; i++)
16254 RTVEC_ELT (v, i) = GEN_INT (pperm_bytes[i]);
16256 for (i = 0; i < 4; i++)
16257 RTVEC_ELT (vs, i) = GEN_INT (i + h2);
16259 p = gen_rtx_PARALLEL (VOIDmode, vs);
16260 x = force_reg (V16QImode, gen_rtx_CONST_VECTOR (V16QImode, v));
16262 emit_insn (gen_sse5_pperm_zero_v8hi_v4si (op0, op1, p, x));
16264 emit_insn (gen_sse5_pperm_sign_v8hi_v4si (op0, op1, p, x));
16268 vs = rtvec_alloc (2);
16269 h2 = (high_p) ? 2 : 0;
16270 for (i = 0; i < 2; i++)
16272 sign_extend = ((unsigned_p)
16274 : PPERM_SIGN | PPERM_SRC2 | ((4*i) + 3 + h));
16275 pperm_bytes[8*i+0] = PPERM_SRC | PPERM_SRC2 | ((4*i) + 0 + h);
16276 pperm_bytes[8*i+1] = PPERM_SRC | PPERM_SRC2 | ((4*i) + 1 + h);
16277 pperm_bytes[8*i+2] = PPERM_SRC | PPERM_SRC2 | ((4*i) + 2 + h);
16278 pperm_bytes[8*i+3] = PPERM_SRC | PPERM_SRC2 | ((4*i) + 3 + h);
16279 pperm_bytes[8*i+4] = sign_extend;
16280 pperm_bytes[8*i+5] = sign_extend;
16281 pperm_bytes[8*i+6] = sign_extend;
16282 pperm_bytes[8*i+7] = sign_extend;
16285 for (i = 0; i < 16; i++)
16286 RTVEC_ELT (v, i) = GEN_INT (pperm_bytes[i]);
16288 for (i = 0; i < 2; i++)
16289 RTVEC_ELT (vs, i) = GEN_INT (i + h2);
16291 p = gen_rtx_PARALLEL (VOIDmode, vs);
16292 x = force_reg (V16QImode, gen_rtx_CONST_VECTOR (V16QImode, v));
16294 emit_insn (gen_sse5_pperm_zero_v4si_v2di (op0, op1, p, x));
16296 emit_insn (gen_sse5_pperm_sign_v4si_v2di (op0, op1, p, x));
16300 gcc_unreachable ();
16306 /* Pack the high bits from OPERANDS[1] and low bits from OPERANDS[2] into the
16307 next narrower integer vector type */
16309 ix86_expand_sse5_pack (rtx operands[3])
16311 enum machine_mode imode = GET_MODE (operands[0]);
16312 int pperm_bytes[16];
16314 rtvec v = rtvec_alloc (16);
16316 rtx op0 = operands[0];
16317 rtx op1 = operands[1];
16318 rtx op2 = operands[2];
16323 for (i = 0; i < 8; i++)
16325 pperm_bytes[i+0] = PPERM_SRC | PPERM_SRC1 | (i*2);
16326 pperm_bytes[i+8] = PPERM_SRC | PPERM_SRC2 | (i*2);
16329 for (i = 0; i < 16; i++)
16330 RTVEC_ELT (v, i) = GEN_INT (pperm_bytes[i]);
16332 x = force_reg (V16QImode, gen_rtx_CONST_VECTOR (V16QImode, v));
16333 emit_insn (gen_sse5_pperm_pack_v8hi_v16qi (op0, op1, op2, x));
16337 for (i = 0; i < 4; i++)
16339 pperm_bytes[(2*i)+0] = PPERM_SRC | PPERM_SRC1 | ((i*4) + 0);
16340 pperm_bytes[(2*i)+1] = PPERM_SRC | PPERM_SRC1 | ((i*4) + 1);
16341 pperm_bytes[(2*i)+8] = PPERM_SRC | PPERM_SRC2 | ((i*4) + 0);
16342 pperm_bytes[(2*i)+9] = PPERM_SRC | PPERM_SRC2 | ((i*4) + 1);
16345 for (i = 0; i < 16; i++)
16346 RTVEC_ELT (v, i) = GEN_INT (pperm_bytes[i]);
16348 x = force_reg (V16QImode, gen_rtx_CONST_VECTOR (V16QImode, v));
16349 emit_insn (gen_sse5_pperm_pack_v4si_v8hi (op0, op1, op2, x));
16353 for (i = 0; i < 2; i++)
16355 pperm_bytes[(4*i)+0] = PPERM_SRC | PPERM_SRC1 | ((i*8) + 0);
16356 pperm_bytes[(4*i)+1] = PPERM_SRC | PPERM_SRC1 | ((i*8) + 1);
16357 pperm_bytes[(4*i)+2] = PPERM_SRC | PPERM_SRC1 | ((i*8) + 2);
16358 pperm_bytes[(4*i)+3] = PPERM_SRC | PPERM_SRC1 | ((i*8) + 3);
16359 pperm_bytes[(4*i)+8] = PPERM_SRC | PPERM_SRC2 | ((i*8) + 0);
16360 pperm_bytes[(4*i)+9] = PPERM_SRC | PPERM_SRC2 | ((i*8) + 1);
16361 pperm_bytes[(4*i)+10] = PPERM_SRC | PPERM_SRC2 | ((i*8) + 2);
16362 pperm_bytes[(4*i)+11] = PPERM_SRC | PPERM_SRC2 | ((i*8) + 3);
16365 for (i = 0; i < 16; i++)
16366 RTVEC_ELT (v, i) = GEN_INT (pperm_bytes[i]);
16368 x = force_reg (V16QImode, gen_rtx_CONST_VECTOR (V16QImode, v));
16369 emit_insn (gen_sse5_pperm_pack_v2di_v4si (op0, op1, op2, x));
16373 gcc_unreachable ();
16379 /* Expand conditional increment or decrement using adb/sbb instructions.
16380 The default case using setcc followed by the conditional move can be
16381 done by generic code. */
16383 ix86_expand_int_addcc (rtx operands[])
16385 enum rtx_code code = GET_CODE (operands[1]);
16387 rtx val = const0_rtx;
16388 bool fpcmp = false;
16389 enum machine_mode mode = GET_MODE (operands[0]);
16391 if (operands[3] != const1_rtx
16392 && operands[3] != constm1_rtx)
16394 if (!ix86_expand_carry_flag_compare (code, ix86_compare_op0,
16395 ix86_compare_op1, &compare_op))
16397 code = GET_CODE (compare_op);
16399 if (GET_MODE (XEXP (compare_op, 0)) == CCFPmode
16400 || GET_MODE (XEXP (compare_op, 0)) == CCFPUmode)
16403 code = ix86_fp_compare_code_to_integer (code);
16410 PUT_CODE (compare_op,
16411 reverse_condition_maybe_unordered
16412 (GET_CODE (compare_op)));
16414 PUT_CODE (compare_op, reverse_condition (GET_CODE (compare_op)));
16416 PUT_MODE (compare_op, mode);
16418 /* Construct either adc or sbb insn. */
16419 if ((code == LTU) == (operands[3] == constm1_rtx))
16421 switch (GET_MODE (operands[0]))
16424 emit_insn (gen_subqi3_carry (operands[0], operands[2], val, compare_op));
16427 emit_insn (gen_subhi3_carry (operands[0], operands[2], val, compare_op));
16430 emit_insn (gen_subsi3_carry (operands[0], operands[2], val, compare_op));
16433 emit_insn (gen_subdi3_carry_rex64 (operands[0], operands[2], val, compare_op));
16436 gcc_unreachable ();
16441 switch (GET_MODE (operands[0]))
16444 emit_insn (gen_addqi3_carry (operands[0], operands[2], val, compare_op));
16447 emit_insn (gen_addhi3_carry (operands[0], operands[2], val, compare_op));
16450 emit_insn (gen_addsi3_carry (operands[0], operands[2], val, compare_op));
16453 emit_insn (gen_adddi3_carry_rex64 (operands[0], operands[2], val, compare_op));
16456 gcc_unreachable ();
16459 return 1; /* DONE */
16463 /* Split operands 0 and 1 into SImode parts. Similar to split_di, but
16464 works for floating pointer parameters and nonoffsetable memories.
16465 For pushes, it returns just stack offsets; the values will be saved
16466 in the right order. Maximally three parts are generated. */
16469 ix86_split_to_parts (rtx operand, rtx *parts, enum machine_mode mode)
16474 size = mode==XFmode ? 3 : GET_MODE_SIZE (mode) / 4;
16476 size = (GET_MODE_SIZE (mode) + 4) / 8;
16478 gcc_assert (!REG_P (operand) || !MMX_REGNO_P (REGNO (operand)));
16479 gcc_assert (size >= 2 && size <= 4);
16481 /* Optimize constant pool reference to immediates. This is used by fp
16482 moves, that force all constants to memory to allow combining. */
16483 if (MEM_P (operand) && MEM_READONLY_P (operand))
16485 rtx tmp = maybe_get_pool_constant (operand);
16490 if (MEM_P (operand) && !offsettable_memref_p (operand))
16492 /* The only non-offsetable memories we handle are pushes. */
16493 int ok = push_operand (operand, VOIDmode);
16497 operand = copy_rtx (operand);
16498 PUT_MODE (operand, Pmode);
16499 parts[0] = parts[1] = parts[2] = parts[3] = operand;
16503 if (GET_CODE (operand) == CONST_VECTOR)
16505 enum machine_mode imode = int_mode_for_mode (mode);
16506 /* Caution: if we looked through a constant pool memory above,
16507 the operand may actually have a different mode now. That's
16508 ok, since we want to pun this all the way back to an integer. */
16509 operand = simplify_subreg (imode, operand, GET_MODE (operand), 0);
16510 gcc_assert (operand != NULL);
16516 if (mode == DImode)
16517 split_di (&operand, 1, &parts[0], &parts[1]);
16522 if (REG_P (operand))
16524 gcc_assert (reload_completed);
16525 for (i = 0; i < size; i++)
16526 parts[i] = gen_rtx_REG (SImode, REGNO (operand) + i);
16528 else if (offsettable_memref_p (operand))
16530 operand = adjust_address (operand, SImode, 0);
16531 parts[0] = operand;
16532 for (i = 1; i < size; i++)
16533 parts[i] = adjust_address (operand, SImode, 4 * i);
16535 else if (GET_CODE (operand) == CONST_DOUBLE)
16540 REAL_VALUE_FROM_CONST_DOUBLE (r, operand);
16544 real_to_target (l, &r, mode);
16545 parts[3] = gen_int_mode (l[3], SImode);
16546 parts[2] = gen_int_mode (l[2], SImode);
16549 REAL_VALUE_TO_TARGET_LONG_DOUBLE (r, l);
16550 parts[2] = gen_int_mode (l[2], SImode);
16553 REAL_VALUE_TO_TARGET_DOUBLE (r, l);
16556 gcc_unreachable ();
16558 parts[1] = gen_int_mode (l[1], SImode);
16559 parts[0] = gen_int_mode (l[0], SImode);
16562 gcc_unreachable ();
16567 if (mode == TImode)
16568 split_ti (&operand, 1, &parts[0], &parts[1]);
16569 if (mode == XFmode || mode == TFmode)
16571 enum machine_mode upper_mode = mode==XFmode ? SImode : DImode;
16572 if (REG_P (operand))
16574 gcc_assert (reload_completed);
16575 parts[0] = gen_rtx_REG (DImode, REGNO (operand) + 0);
16576 parts[1] = gen_rtx_REG (upper_mode, REGNO (operand) + 1);
16578 else if (offsettable_memref_p (operand))
16580 operand = adjust_address (operand, DImode, 0);
16581 parts[0] = operand;
16582 parts[1] = adjust_address (operand, upper_mode, 8);
16584 else if (GET_CODE (operand) == CONST_DOUBLE)
16589 REAL_VALUE_FROM_CONST_DOUBLE (r, operand);
16590 real_to_target (l, &r, mode);
16592 /* Do not use shift by 32 to avoid warning on 32bit systems. */
16593 if (HOST_BITS_PER_WIDE_INT >= 64)
16596 ((l[0] & (((HOST_WIDE_INT) 2 << 31) - 1))
16597 + ((((HOST_WIDE_INT) l[1]) << 31) << 1),
16600 parts[0] = immed_double_const (l[0], l[1], DImode);
16602 if (upper_mode == SImode)
16603 parts[1] = gen_int_mode (l[2], SImode);
16604 else if (HOST_BITS_PER_WIDE_INT >= 64)
16607 ((l[2] & (((HOST_WIDE_INT) 2 << 31) - 1))
16608 + ((((HOST_WIDE_INT) l[3]) << 31) << 1),
16611 parts[1] = immed_double_const (l[2], l[3], DImode);
16614 gcc_unreachable ();
16621 /* Emit insns to perform a move or push of DI, DF, XF, and TF values.
16622 Return false when normal moves are needed; true when all required
16623 insns have been emitted. Operands 2-4 contain the input values
16624 int the correct order; operands 5-7 contain the output values. */
16627 ix86_split_long_move (rtx operands[])
16632 int collisions = 0;
16633 enum machine_mode mode = GET_MODE (operands[0]);
16634 bool collisionparts[4];
16636 /* The DFmode expanders may ask us to move double.
16637 For 64bit target this is single move. By hiding the fact
16638 here we simplify i386.md splitters. */
16639 if (GET_MODE_SIZE (GET_MODE (operands[0])) == 8 && TARGET_64BIT)
16641 /* Optimize constant pool reference to immediates. This is used by
16642 fp moves, that force all constants to memory to allow combining. */
16644 if (MEM_P (operands[1])
16645 && GET_CODE (XEXP (operands[1], 0)) == SYMBOL_REF
16646 && CONSTANT_POOL_ADDRESS_P (XEXP (operands[1], 0)))
16647 operands[1] = get_pool_constant (XEXP (operands[1], 0));
16648 if (push_operand (operands[0], VOIDmode))
16650 operands[0] = copy_rtx (operands[0]);
16651 PUT_MODE (operands[0], Pmode);
16654 operands[0] = gen_lowpart (DImode, operands[0]);
16655 operands[1] = gen_lowpart (DImode, operands[1]);
16656 emit_move_insn (operands[0], operands[1]);
16660 /* The only non-offsettable memory we handle is push. */
16661 if (push_operand (operands[0], VOIDmode))
16664 gcc_assert (!MEM_P (operands[0])
16665 || offsettable_memref_p (operands[0]));
16667 nparts = ix86_split_to_parts (operands[1], part[1], GET_MODE (operands[0]));
16668 ix86_split_to_parts (operands[0], part[0], GET_MODE (operands[0]));
16670 /* When emitting push, take care for source operands on the stack. */
16671 if (push && MEM_P (operands[1])
16672 && reg_overlap_mentioned_p (stack_pointer_rtx, operands[1]))
16673 for (i = 0; i < nparts - 1; i++)
16674 part[1][i] = change_address (part[1][i],
16675 GET_MODE (part[1][i]),
16676 XEXP (part[1][i + 1], 0));
16678 /* We need to do copy in the right order in case an address register
16679 of the source overlaps the destination. */
16680 if (REG_P (part[0][0]) && MEM_P (part[1][0]))
16684 for (i = 0; i < nparts; i++)
16687 = reg_overlap_mentioned_p (part[0][i], XEXP (part[1][0], 0));
16688 if (collisionparts[i])
16692 /* Collision in the middle part can be handled by reordering. */
16693 if (collisions == 1 && nparts == 3 && collisionparts [1])
16695 tmp = part[0][1]; part[0][1] = part[0][2]; part[0][2] = tmp;
16696 tmp = part[1][1]; part[1][1] = part[1][2]; part[1][2] = tmp;
16698 else if (collisions == 1
16700 && (collisionparts [1] || collisionparts [2]))
16702 if (collisionparts [1])
16704 tmp = part[0][1]; part[0][1] = part[0][2]; part[0][2] = tmp;
16705 tmp = part[1][1]; part[1][1] = part[1][2]; part[1][2] = tmp;
16709 tmp = part[0][2]; part[0][2] = part[0][3]; part[0][3] = tmp;
16710 tmp = part[1][2]; part[1][2] = part[1][3]; part[1][3] = tmp;
16714 /* If there are more collisions, we can't handle it by reordering.
16715 Do an lea to the last part and use only one colliding move. */
16716 else if (collisions > 1)
16722 base = part[0][nparts - 1];
16724 /* Handle the case when the last part isn't valid for lea.
16725 Happens in 64-bit mode storing the 12-byte XFmode. */
16726 if (GET_MODE (base) != Pmode)
16727 base = gen_rtx_REG (Pmode, REGNO (base));
16729 emit_insn (gen_rtx_SET (VOIDmode, base, XEXP (part[1][0], 0)));
16730 part[1][0] = replace_equiv_address (part[1][0], base);
16731 for (i = 1; i < nparts; i++)
16733 tmp = plus_constant (base, UNITS_PER_WORD * i);
16734 part[1][i] = replace_equiv_address (part[1][i], tmp);
16745 if (TARGET_128BIT_LONG_DOUBLE && mode == XFmode)
16746 emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx, GEN_INT (-4)));
16747 emit_move_insn (part[0][2], part[1][2]);
16749 else if (nparts == 4)
16751 emit_move_insn (part[0][3], part[1][3]);
16752 emit_move_insn (part[0][2], part[1][2]);
16757 /* In 64bit mode we don't have 32bit push available. In case this is
16758 register, it is OK - we will just use larger counterpart. We also
16759 retype memory - these comes from attempt to avoid REX prefix on
16760 moving of second half of TFmode value. */
16761 if (GET_MODE (part[1][1]) == SImode)
16763 switch (GET_CODE (part[1][1]))
16766 part[1][1] = adjust_address (part[1][1], DImode, 0);
16770 part[1][1] = gen_rtx_REG (DImode, REGNO (part[1][1]));
16774 gcc_unreachable ();
16777 if (GET_MODE (part[1][0]) == SImode)
16778 part[1][0] = part[1][1];
16781 emit_move_insn (part[0][1], part[1][1]);
16782 emit_move_insn (part[0][0], part[1][0]);
16786 /* Choose correct order to not overwrite the source before it is copied. */
16787 if ((REG_P (part[0][0])
16788 && REG_P (part[1][1])
16789 && (REGNO (part[0][0]) == REGNO (part[1][1])
16791 && REGNO (part[0][0]) == REGNO (part[1][2]))
16793 && REGNO (part[0][0]) == REGNO (part[1][3]))))
16795 && reg_overlap_mentioned_p (part[0][0], XEXP (part[1][0], 0))))
16797 for (i = 0, j = nparts - 1; i < nparts; i++, j--)
16799 operands[2 + i] = part[0][j];
16800 operands[6 + i] = part[1][j];
16805 for (i = 0; i < nparts; i++)
16807 operands[2 + i] = part[0][i];
16808 operands[6 + i] = part[1][i];
16812 /* If optimizing for size, attempt to locally unCSE nonzero constants. */
16813 if (optimize_insn_for_size_p ())
16815 for (j = 0; j < nparts - 1; j++)
16816 if (CONST_INT_P (operands[6 + j])
16817 && operands[6 + j] != const0_rtx
16818 && REG_P (operands[2 + j]))
16819 for (i = j; i < nparts - 1; i++)
16820 if (CONST_INT_P (operands[7 + i])
16821 && INTVAL (operands[7 + i]) == INTVAL (operands[6 + j]))
16822 operands[7 + i] = operands[2 + j];
16825 for (i = 0; i < nparts; i++)
16826 emit_move_insn (operands[2 + i], operands[6 + i]);
16831 /* Helper function of ix86_split_ashl used to generate an SImode/DImode
16832 left shift by a constant, either using a single shift or
16833 a sequence of add instructions. */
16836 ix86_expand_ashl_const (rtx operand, int count, enum machine_mode mode)
16840 emit_insn ((mode == DImode
16842 : gen_adddi3) (operand, operand, operand));
16844 else if (!optimize_insn_for_size_p ()
16845 && count * ix86_cost->add <= ix86_cost->shift_const)
16848 for (i=0; i<count; i++)
16850 emit_insn ((mode == DImode
16852 : gen_adddi3) (operand, operand, operand));
16856 emit_insn ((mode == DImode
16858 : gen_ashldi3) (operand, operand, GEN_INT (count)));
16862 ix86_split_ashl (rtx *operands, rtx scratch, enum machine_mode mode)
16864 rtx low[2], high[2];
16866 const int single_width = mode == DImode ? 32 : 64;
16868 if (CONST_INT_P (operands[2]))
16870 (mode == DImode ? split_di : split_ti) (operands, 2, low, high);
16871 count = INTVAL (operands[2]) & (single_width * 2 - 1);
16873 if (count >= single_width)
16875 emit_move_insn (high[0], low[1]);
16876 emit_move_insn (low[0], const0_rtx);
16878 if (count > single_width)
16879 ix86_expand_ashl_const (high[0], count - single_width, mode);
16883 if (!rtx_equal_p (operands[0], operands[1]))
16884 emit_move_insn (operands[0], operands[1]);
16885 emit_insn ((mode == DImode
16887 : gen_x86_64_shld) (high[0], low[0], GEN_INT (count)));
16888 ix86_expand_ashl_const (low[0], count, mode);
16893 (mode == DImode ? split_di : split_ti) (operands, 1, low, high);
16895 if (operands[1] == const1_rtx)
16897 /* Assuming we've chosen a QImode capable registers, then 1 << N
16898 can be done with two 32/64-bit shifts, no branches, no cmoves. */
16899 if (ANY_QI_REG_P (low[0]) && ANY_QI_REG_P (high[0]))
16901 rtx s, d, flags = gen_rtx_REG (CCZmode, FLAGS_REG);
16903 ix86_expand_clear (low[0]);
16904 ix86_expand_clear (high[0]);
16905 emit_insn (gen_testqi_ccz_1 (operands[2], GEN_INT (single_width)));
16907 d = gen_lowpart (QImode, low[0]);
16908 d = gen_rtx_STRICT_LOW_PART (VOIDmode, d);
16909 s = gen_rtx_EQ (QImode, flags, const0_rtx);
16910 emit_insn (gen_rtx_SET (VOIDmode, d, s));
16912 d = gen_lowpart (QImode, high[0]);
16913 d = gen_rtx_STRICT_LOW_PART (VOIDmode, d);
16914 s = gen_rtx_NE (QImode, flags, const0_rtx);
16915 emit_insn (gen_rtx_SET (VOIDmode, d, s));
16918 /* Otherwise, we can get the same results by manually performing
16919 a bit extract operation on bit 5/6, and then performing the two
16920 shifts. The two methods of getting 0/1 into low/high are exactly
16921 the same size. Avoiding the shift in the bit extract case helps
16922 pentium4 a bit; no one else seems to care much either way. */
16927 if (TARGET_PARTIAL_REG_STALL && !optimize_insn_for_size_p ())
16928 x = gen_rtx_ZERO_EXTEND (mode == DImode ? SImode : DImode, operands[2]);
16930 x = gen_lowpart (mode == DImode ? SImode : DImode, operands[2]);
16931 emit_insn (gen_rtx_SET (VOIDmode, high[0], x));
16933 emit_insn ((mode == DImode
16935 : gen_lshrdi3) (high[0], high[0], GEN_INT (mode == DImode ? 5 : 6)));
16936 emit_insn ((mode == DImode
16938 : gen_anddi3) (high[0], high[0], GEN_INT (1)));
16939 emit_move_insn (low[0], high[0]);
16940 emit_insn ((mode == DImode
16942 : gen_xordi3) (low[0], low[0], GEN_INT (1)));
16945 emit_insn ((mode == DImode
16947 : gen_ashldi3) (low[0], low[0], operands[2]));
16948 emit_insn ((mode == DImode
16950 : gen_ashldi3) (high[0], high[0], operands[2]));
16954 if (operands[1] == constm1_rtx)
16956 /* For -1 << N, we can avoid the shld instruction, because we
16957 know that we're shifting 0...31/63 ones into a -1. */
16958 emit_move_insn (low[0], constm1_rtx);
16959 if (optimize_insn_for_size_p ())
16960 emit_move_insn (high[0], low[0]);
16962 emit_move_insn (high[0], constm1_rtx);
16966 if (!rtx_equal_p (operands[0], operands[1]))
16967 emit_move_insn (operands[0], operands[1]);
16969 (mode == DImode ? split_di : split_ti) (operands, 1, low, high);
16970 emit_insn ((mode == DImode
16972 : gen_x86_64_shld) (high[0], low[0], operands[2]));
16975 emit_insn ((mode == DImode ? gen_ashlsi3 : gen_ashldi3) (low[0], low[0], operands[2]));
16977 if (TARGET_CMOVE && scratch)
16979 ix86_expand_clear (scratch);
16980 emit_insn ((mode == DImode
16981 ? gen_x86_shift_adj_1
16982 : gen_x86_64_shift_adj_1) (high[0], low[0], operands[2],
16986 emit_insn ((mode == DImode
16987 ? gen_x86_shift_adj_2
16988 : gen_x86_64_shift_adj_2) (high[0], low[0], operands[2]));
16992 ix86_split_ashr (rtx *operands, rtx scratch, enum machine_mode mode)
16994 rtx low[2], high[2];
16996 const int single_width = mode == DImode ? 32 : 64;
16998 if (CONST_INT_P (operands[2]))
17000 (mode == DImode ? split_di : split_ti) (operands, 2, low, high);
17001 count = INTVAL (operands[2]) & (single_width * 2 - 1);
17003 if (count == single_width * 2 - 1)
17005 emit_move_insn (high[0], high[1]);
17006 emit_insn ((mode == DImode
17008 : gen_ashrdi3) (high[0], high[0],
17009 GEN_INT (single_width - 1)));
17010 emit_move_insn (low[0], high[0]);
17013 else if (count >= single_width)
17015 emit_move_insn (low[0], high[1]);
17016 emit_move_insn (high[0], low[0]);
17017 emit_insn ((mode == DImode
17019 : gen_ashrdi3) (high[0], high[0],
17020 GEN_INT (single_width - 1)));
17021 if (count > single_width)
17022 emit_insn ((mode == DImode
17024 : gen_ashrdi3) (low[0], low[0],
17025 GEN_INT (count - single_width)));
17029 if (!rtx_equal_p (operands[0], operands[1]))
17030 emit_move_insn (operands[0], operands[1]);
17031 emit_insn ((mode == DImode
17033 : gen_x86_64_shrd) (low[0], high[0], GEN_INT (count)));
17034 emit_insn ((mode == DImode
17036 : gen_ashrdi3) (high[0], high[0], GEN_INT (count)));
17041 if (!rtx_equal_p (operands[0], operands[1]))
17042 emit_move_insn (operands[0], operands[1]);
17044 (mode == DImode ? split_di : split_ti) (operands, 1, low, high);
17046 emit_insn ((mode == DImode
17048 : gen_x86_64_shrd) (low[0], high[0], operands[2]));
17049 emit_insn ((mode == DImode
17051 : gen_ashrdi3) (high[0], high[0], operands[2]));
17053 if (TARGET_CMOVE && scratch)
17055 emit_move_insn (scratch, high[0]);
17056 emit_insn ((mode == DImode
17058 : gen_ashrdi3) (scratch, scratch,
17059 GEN_INT (single_width - 1)));
17060 emit_insn ((mode == DImode
17061 ? gen_x86_shift_adj_1
17062 : gen_x86_64_shift_adj_1) (low[0], high[0], operands[2],
17066 emit_insn ((mode == DImode
17067 ? gen_x86_shift_adj_3
17068 : gen_x86_64_shift_adj_3) (low[0], high[0], operands[2]));
17073 ix86_split_lshr (rtx *operands, rtx scratch, enum machine_mode mode)
17075 rtx low[2], high[2];
17077 const int single_width = mode == DImode ? 32 : 64;
17079 if (CONST_INT_P (operands[2]))
17081 (mode == DImode ? split_di : split_ti) (operands, 2, low, high);
17082 count = INTVAL (operands[2]) & (single_width * 2 - 1);
17084 if (count >= single_width)
17086 emit_move_insn (low[0], high[1]);
17087 ix86_expand_clear (high[0]);
17089 if (count > single_width)
17090 emit_insn ((mode == DImode
17092 : gen_lshrdi3) (low[0], low[0],
17093 GEN_INT (count - single_width)));
17097 if (!rtx_equal_p (operands[0], operands[1]))
17098 emit_move_insn (operands[0], operands[1]);
17099 emit_insn ((mode == DImode
17101 : gen_x86_64_shrd) (low[0], high[0], GEN_INT (count)));
17102 emit_insn ((mode == DImode
17104 : gen_lshrdi3) (high[0], high[0], GEN_INT (count)));
17109 if (!rtx_equal_p (operands[0], operands[1]))
17110 emit_move_insn (operands[0], operands[1]);
17112 (mode == DImode ? split_di : split_ti) (operands, 1, low, high);
17114 emit_insn ((mode == DImode
17116 : gen_x86_64_shrd) (low[0], high[0], operands[2]));
17117 emit_insn ((mode == DImode
17119 : gen_lshrdi3) (high[0], high[0], operands[2]));
17121 /* Heh. By reversing the arguments, we can reuse this pattern. */
17122 if (TARGET_CMOVE && scratch)
17124 ix86_expand_clear (scratch);
17125 emit_insn ((mode == DImode
17126 ? gen_x86_shift_adj_1
17127 : gen_x86_64_shift_adj_1) (low[0], high[0], operands[2],
17131 emit_insn ((mode == DImode
17132 ? gen_x86_shift_adj_2
17133 : gen_x86_64_shift_adj_2) (low[0], high[0], operands[2]));
17137 /* Predict just emitted jump instruction to be taken with probability PROB. */
17139 predict_jump (int prob)
17141 rtx insn = get_last_insn ();
17142 gcc_assert (JUMP_P (insn));
17143 add_reg_note (insn, REG_BR_PROB, GEN_INT (prob));
17146 /* Helper function for the string operations below. Dest VARIABLE whether
17147 it is aligned to VALUE bytes. If true, jump to the label. */
17149 ix86_expand_aligntest (rtx variable, int value, bool epilogue)
17151 rtx label = gen_label_rtx ();
17152 rtx tmpcount = gen_reg_rtx (GET_MODE (variable));
17153 if (GET_MODE (variable) == DImode)
17154 emit_insn (gen_anddi3 (tmpcount, variable, GEN_INT (value)));
17156 emit_insn (gen_andsi3 (tmpcount, variable, GEN_INT (value)));
17157 emit_cmp_and_jump_insns (tmpcount, const0_rtx, EQ, 0, GET_MODE (variable),
17160 predict_jump (REG_BR_PROB_BASE * 50 / 100);
17162 predict_jump (REG_BR_PROB_BASE * 90 / 100);
17166 /* Adjust COUNTER by the VALUE. */
17168 ix86_adjust_counter (rtx countreg, HOST_WIDE_INT value)
17170 if (GET_MODE (countreg) == DImode)
17171 emit_insn (gen_adddi3 (countreg, countreg, GEN_INT (-value)));
17173 emit_insn (gen_addsi3 (countreg, countreg, GEN_INT (-value)));
17176 /* Zero extend possibly SImode EXP to Pmode register. */
17178 ix86_zero_extend_to_Pmode (rtx exp)
17181 if (GET_MODE (exp) == VOIDmode)
17182 return force_reg (Pmode, exp);
17183 if (GET_MODE (exp) == Pmode)
17184 return copy_to_mode_reg (Pmode, exp);
17185 r = gen_reg_rtx (Pmode);
17186 emit_insn (gen_zero_extendsidi2 (r, exp));
17190 /* Divide COUNTREG by SCALE. */
17192 scale_counter (rtx countreg, int scale)
17195 rtx piece_size_mask;
17199 if (CONST_INT_P (countreg))
17200 return GEN_INT (INTVAL (countreg) / scale);
17201 gcc_assert (REG_P (countreg));
17203 piece_size_mask = GEN_INT (scale - 1);
17204 sc = expand_simple_binop (GET_MODE (countreg), LSHIFTRT, countreg,
17205 GEN_INT (exact_log2 (scale)),
17206 NULL, 1, OPTAB_DIRECT);
17210 /* Return mode for the memcpy/memset loop counter. Prefer SImode over
17211 DImode for constant loop counts. */
17213 static enum machine_mode
17214 counter_mode (rtx count_exp)
17216 if (GET_MODE (count_exp) != VOIDmode)
17217 return GET_MODE (count_exp);
17218 if (GET_CODE (count_exp) != CONST_INT)
17220 if (TARGET_64BIT && (INTVAL (count_exp) & ~0xffffffff))
17225 /* When SRCPTR is non-NULL, output simple loop to move memory
17226 pointer to SRCPTR to DESTPTR via chunks of MODE unrolled UNROLL times,
17227 overall size is COUNT specified in bytes. When SRCPTR is NULL, output the
17228 equivalent loop to set memory by VALUE (supposed to be in MODE).
17230 The size is rounded down to whole number of chunk size moved at once.
17231 SRCMEM and DESTMEM provide MEMrtx to feed proper aliasing info. */
17235 expand_set_or_movmem_via_loop (rtx destmem, rtx srcmem,
17236 rtx destptr, rtx srcptr, rtx value,
17237 rtx count, enum machine_mode mode, int unroll,
17240 rtx out_label, top_label, iter, tmp;
17241 enum machine_mode iter_mode = counter_mode (count);
17242 rtx piece_size = GEN_INT (GET_MODE_SIZE (mode) * unroll);
17243 rtx piece_size_mask = GEN_INT (~((GET_MODE_SIZE (mode) * unroll) - 1));
17249 top_label = gen_label_rtx ();
17250 out_label = gen_label_rtx ();
17251 iter = gen_reg_rtx (iter_mode);
17253 size = expand_simple_binop (iter_mode, AND, count, piece_size_mask,
17254 NULL, 1, OPTAB_DIRECT);
17255 /* Those two should combine. */
17256 if (piece_size == const1_rtx)
17258 emit_cmp_and_jump_insns (size, const0_rtx, EQ, NULL_RTX, iter_mode,
17260 predict_jump (REG_BR_PROB_BASE * 10 / 100);
17262 emit_move_insn (iter, const0_rtx);
17264 emit_label (top_label);
17266 tmp = convert_modes (Pmode, iter_mode, iter, true);
17267 x_addr = gen_rtx_PLUS (Pmode, destptr, tmp);
17268 destmem = change_address (destmem, mode, x_addr);
17272 y_addr = gen_rtx_PLUS (Pmode, srcptr, copy_rtx (tmp));
17273 srcmem = change_address (srcmem, mode, y_addr);
17275 /* When unrolling for chips that reorder memory reads and writes,
17276 we can save registers by using single temporary.
17277 Also using 4 temporaries is overkill in 32bit mode. */
17278 if (!TARGET_64BIT && 0)
17280 for (i = 0; i < unroll; i++)
17285 adjust_address (copy_rtx (destmem), mode, GET_MODE_SIZE (mode));
17287 adjust_address (copy_rtx (srcmem), mode, GET_MODE_SIZE (mode));
17289 emit_move_insn (destmem, srcmem);
17295 gcc_assert (unroll <= 4);
17296 for (i = 0; i < unroll; i++)
17298 tmpreg[i] = gen_reg_rtx (mode);
17302 adjust_address (copy_rtx (srcmem), mode, GET_MODE_SIZE (mode));
17304 emit_move_insn (tmpreg[i], srcmem);
17306 for (i = 0; i < unroll; i++)
17311 adjust_address (copy_rtx (destmem), mode, GET_MODE_SIZE (mode));
17313 emit_move_insn (destmem, tmpreg[i]);
17318 for (i = 0; i < unroll; i++)
17322 adjust_address (copy_rtx (destmem), mode, GET_MODE_SIZE (mode));
17323 emit_move_insn (destmem, value);
17326 tmp = expand_simple_binop (iter_mode, PLUS, iter, piece_size, iter,
17327 true, OPTAB_LIB_WIDEN);
17329 emit_move_insn (iter, tmp);
17331 emit_cmp_and_jump_insns (iter, size, LT, NULL_RTX, iter_mode,
17333 if (expected_size != -1)
17335 expected_size /= GET_MODE_SIZE (mode) * unroll;
17336 if (expected_size == 0)
17338 else if (expected_size > REG_BR_PROB_BASE)
17339 predict_jump (REG_BR_PROB_BASE - 1);
17341 predict_jump (REG_BR_PROB_BASE - (REG_BR_PROB_BASE + expected_size / 2) / expected_size);
17344 predict_jump (REG_BR_PROB_BASE * 80 / 100);
17345 iter = ix86_zero_extend_to_Pmode (iter);
17346 tmp = expand_simple_binop (Pmode, PLUS, destptr, iter, destptr,
17347 true, OPTAB_LIB_WIDEN);
17348 if (tmp != destptr)
17349 emit_move_insn (destptr, tmp);
17352 tmp = expand_simple_binop (Pmode, PLUS, srcptr, iter, srcptr,
17353 true, OPTAB_LIB_WIDEN);
17355 emit_move_insn (srcptr, tmp);
17357 emit_label (out_label);
17360 /* Output "rep; mov" instruction.
17361 Arguments have same meaning as for previous function */
17363 expand_movmem_via_rep_mov (rtx destmem, rtx srcmem,
17364 rtx destptr, rtx srcptr,
17366 enum machine_mode mode)
17372 /* If the size is known, it is shorter to use rep movs. */
17373 if (mode == QImode && CONST_INT_P (count)
17374 && !(INTVAL (count) & 3))
17377 if (destptr != XEXP (destmem, 0) || GET_MODE (destmem) != BLKmode)
17378 destmem = adjust_automodify_address_nv (destmem, BLKmode, destptr, 0);
17379 if (srcptr != XEXP (srcmem, 0) || GET_MODE (srcmem) != BLKmode)
17380 srcmem = adjust_automodify_address_nv (srcmem, BLKmode, srcptr, 0);
17381 countreg = ix86_zero_extend_to_Pmode (scale_counter (count, GET_MODE_SIZE (mode)));
17382 if (mode != QImode)
17384 destexp = gen_rtx_ASHIFT (Pmode, countreg,
17385 GEN_INT (exact_log2 (GET_MODE_SIZE (mode))));
17386 destexp = gen_rtx_PLUS (Pmode, destexp, destptr);
17387 srcexp = gen_rtx_ASHIFT (Pmode, countreg,
17388 GEN_INT (exact_log2 (GET_MODE_SIZE (mode))));
17389 srcexp = gen_rtx_PLUS (Pmode, srcexp, srcptr);
17393 destexp = gen_rtx_PLUS (Pmode, destptr, countreg);
17394 srcexp = gen_rtx_PLUS (Pmode, srcptr, countreg);
17396 if (CONST_INT_P (count))
17398 count = GEN_INT (INTVAL (count)
17399 & ~((HOST_WIDE_INT) GET_MODE_SIZE (mode) - 1));
17400 destmem = shallow_copy_rtx (destmem);
17401 srcmem = shallow_copy_rtx (srcmem);
17402 set_mem_size (destmem, count);
17403 set_mem_size (srcmem, count);
17407 if (MEM_SIZE (destmem))
17408 set_mem_size (destmem, NULL_RTX);
17409 if (MEM_SIZE (srcmem))
17410 set_mem_size (srcmem, NULL_RTX);
17412 emit_insn (gen_rep_mov (destptr, destmem, srcptr, srcmem, countreg,
17416 /* Output "rep; stos" instruction.
17417 Arguments have same meaning as for previous function */
17419 expand_setmem_via_rep_stos (rtx destmem, rtx destptr, rtx value,
17420 rtx count, enum machine_mode mode,
17426 if (destptr != XEXP (destmem, 0) || GET_MODE (destmem) != BLKmode)
17427 destmem = adjust_automodify_address_nv (destmem, BLKmode, destptr, 0);
17428 value = force_reg (mode, gen_lowpart (mode, value));
17429 countreg = ix86_zero_extend_to_Pmode (scale_counter (count, GET_MODE_SIZE (mode)));
17430 if (mode != QImode)
17432 destexp = gen_rtx_ASHIFT (Pmode, countreg,
17433 GEN_INT (exact_log2 (GET_MODE_SIZE (mode))));
17434 destexp = gen_rtx_PLUS (Pmode, destexp, destptr);
17437 destexp = gen_rtx_PLUS (Pmode, destptr, countreg);
17438 if (orig_value == const0_rtx && CONST_INT_P (count))
17440 count = GEN_INT (INTVAL (count)
17441 & ~((HOST_WIDE_INT) GET_MODE_SIZE (mode) - 1));
17442 destmem = shallow_copy_rtx (destmem);
17443 set_mem_size (destmem, count);
17445 else if (MEM_SIZE (destmem))
17446 set_mem_size (destmem, NULL_RTX);
17447 emit_insn (gen_rep_stos (destptr, countreg, destmem, value, destexp));
17451 emit_strmov (rtx destmem, rtx srcmem,
17452 rtx destptr, rtx srcptr, enum machine_mode mode, int offset)
17454 rtx src = adjust_automodify_address_nv (srcmem, mode, srcptr, offset);
17455 rtx dest = adjust_automodify_address_nv (destmem, mode, destptr, offset);
17456 emit_insn (gen_strmov (destptr, dest, srcptr, src));
17459 /* Output code to copy at most count & (max_size - 1) bytes from SRC to DEST. */
17461 expand_movmem_epilogue (rtx destmem, rtx srcmem,
17462 rtx destptr, rtx srcptr, rtx count, int max_size)
17465 if (CONST_INT_P (count))
17467 HOST_WIDE_INT countval = INTVAL (count);
17470 if ((countval & 0x10) && max_size > 16)
17474 emit_strmov (destmem, srcmem, destptr, srcptr, DImode, offset);
17475 emit_strmov (destmem, srcmem, destptr, srcptr, DImode, offset + 8);
17478 gcc_unreachable ();
17481 if ((countval & 0x08) && max_size > 8)
17484 emit_strmov (destmem, srcmem, destptr, srcptr, DImode, offset);
17487 emit_strmov (destmem, srcmem, destptr, srcptr, SImode, offset);
17488 emit_strmov (destmem, srcmem, destptr, srcptr, SImode, offset + 4);
17492 if ((countval & 0x04) && max_size > 4)
17494 emit_strmov (destmem, srcmem, destptr, srcptr, SImode, offset);
17497 if ((countval & 0x02) && max_size > 2)
17499 emit_strmov (destmem, srcmem, destptr, srcptr, HImode, offset);
17502 if ((countval & 0x01) && max_size > 1)
17504 emit_strmov (destmem, srcmem, destptr, srcptr, QImode, offset);
17511 count = expand_simple_binop (GET_MODE (count), AND, count, GEN_INT (max_size - 1),
17512 count, 1, OPTAB_DIRECT);
17513 expand_set_or_movmem_via_loop (destmem, srcmem, destptr, srcptr, NULL,
17514 count, QImode, 1, 4);
17518 /* When there are stringops, we can cheaply increase dest and src pointers.
17519 Otherwise we save code size by maintaining offset (zero is readily
17520 available from preceding rep operation) and using x86 addressing modes.
17522 if (TARGET_SINGLE_STRINGOP)
17526 rtx label = ix86_expand_aligntest (count, 4, true);
17527 src = change_address (srcmem, SImode, srcptr);
17528 dest = change_address (destmem, SImode, destptr);
17529 emit_insn (gen_strmov (destptr, dest, srcptr, src));
17530 emit_label (label);
17531 LABEL_NUSES (label) = 1;
17535 rtx label = ix86_expand_aligntest (count, 2, true);
17536 src = change_address (srcmem, HImode, srcptr);
17537 dest = change_address (destmem, HImode, destptr);
17538 emit_insn (gen_strmov (destptr, dest, srcptr, src));
17539 emit_label (label);
17540 LABEL_NUSES (label) = 1;
17544 rtx label = ix86_expand_aligntest (count, 1, true);
17545 src = change_address (srcmem, QImode, srcptr);
17546 dest = change_address (destmem, QImode, destptr);
17547 emit_insn (gen_strmov (destptr, dest, srcptr, src));
17548 emit_label (label);
17549 LABEL_NUSES (label) = 1;
17554 rtx offset = force_reg (Pmode, const0_rtx);
17559 rtx label = ix86_expand_aligntest (count, 4, true);
17560 src = change_address (srcmem, SImode, srcptr);
17561 dest = change_address (destmem, SImode, destptr);
17562 emit_move_insn (dest, src);
17563 tmp = expand_simple_binop (Pmode, PLUS, offset, GEN_INT (4), NULL,
17564 true, OPTAB_LIB_WIDEN);
17566 emit_move_insn (offset, tmp);
17567 emit_label (label);
17568 LABEL_NUSES (label) = 1;
17572 rtx label = ix86_expand_aligntest (count, 2, true);
17573 tmp = gen_rtx_PLUS (Pmode, srcptr, offset);
17574 src = change_address (srcmem, HImode, tmp);
17575 tmp = gen_rtx_PLUS (Pmode, destptr, offset);
17576 dest = change_address (destmem, HImode, tmp);
17577 emit_move_insn (dest, src);
17578 tmp = expand_simple_binop (Pmode, PLUS, offset, GEN_INT (2), tmp,
17579 true, OPTAB_LIB_WIDEN);
17581 emit_move_insn (offset, tmp);
17582 emit_label (label);
17583 LABEL_NUSES (label) = 1;
17587 rtx label = ix86_expand_aligntest (count, 1, true);
17588 tmp = gen_rtx_PLUS (Pmode, srcptr, offset);
17589 src = change_address (srcmem, QImode, tmp);
17590 tmp = gen_rtx_PLUS (Pmode, destptr, offset);
17591 dest = change_address (destmem, QImode, tmp);
17592 emit_move_insn (dest, src);
17593 emit_label (label);
17594 LABEL_NUSES (label) = 1;
17599 /* Output code to set at most count & (max_size - 1) bytes starting by DEST. */
17601 expand_setmem_epilogue_via_loop (rtx destmem, rtx destptr, rtx value,
17602 rtx count, int max_size)
17605 expand_simple_binop (counter_mode (count), AND, count,
17606 GEN_INT (max_size - 1), count, 1, OPTAB_DIRECT);
17607 expand_set_or_movmem_via_loop (destmem, NULL, destptr, NULL,
17608 gen_lowpart (QImode, value), count, QImode,
17612 /* Output code to set at most count & (max_size - 1) bytes starting by DEST. */
17614 expand_setmem_epilogue (rtx destmem, rtx destptr, rtx value, rtx count, int max_size)
17618 if (CONST_INT_P (count))
17620 HOST_WIDE_INT countval = INTVAL (count);
17623 if ((countval & 0x10) && max_size > 16)
17627 dest = adjust_automodify_address_nv (destmem, DImode, destptr, offset);
17628 emit_insn (gen_strset (destptr, dest, value));
17629 dest = adjust_automodify_address_nv (destmem, DImode, destptr, offset + 8);
17630 emit_insn (gen_strset (destptr, dest, value));
17633 gcc_unreachable ();
17636 if ((countval & 0x08) && max_size > 8)
17640 dest = adjust_automodify_address_nv (destmem, DImode, destptr, offset);
17641 emit_insn (gen_strset (destptr, dest, value));
17645 dest = adjust_automodify_address_nv (destmem, SImode, destptr, offset);
17646 emit_insn (gen_strset (destptr, dest, value));
17647 dest = adjust_automodify_address_nv (destmem, SImode, destptr, offset + 4);
17648 emit_insn (gen_strset (destptr, dest, value));
17652 if ((countval & 0x04) && max_size > 4)
17654 dest = adjust_automodify_address_nv (destmem, SImode, destptr, offset);
17655 emit_insn (gen_strset (destptr, dest, gen_lowpart (SImode, value)));
17658 if ((countval & 0x02) && max_size > 2)
17660 dest = adjust_automodify_address_nv (destmem, HImode, destptr, offset);
17661 emit_insn (gen_strset (destptr, dest, gen_lowpart (HImode, value)));
17664 if ((countval & 0x01) && max_size > 1)
17666 dest = adjust_automodify_address_nv (destmem, QImode, destptr, offset);
17667 emit_insn (gen_strset (destptr, dest, gen_lowpart (QImode, value)));
17674 expand_setmem_epilogue_via_loop (destmem, destptr, value, count, max_size);
17679 rtx label = ix86_expand_aligntest (count, 16, true);
17682 dest = change_address (destmem, DImode, destptr);
17683 emit_insn (gen_strset (destptr, dest, value));
17684 emit_insn (gen_strset (destptr, dest, value));
17688 dest = change_address (destmem, SImode, destptr);
17689 emit_insn (gen_strset (destptr, dest, value));
17690 emit_insn (gen_strset (destptr, dest, value));
17691 emit_insn (gen_strset (destptr, dest, value));
17692 emit_insn (gen_strset (destptr, dest, value));
17694 emit_label (label);
17695 LABEL_NUSES (label) = 1;
17699 rtx label = ix86_expand_aligntest (count, 8, true);
17702 dest = change_address (destmem, DImode, destptr);
17703 emit_insn (gen_strset (destptr, dest, value));
17707 dest = change_address (destmem, SImode, destptr);
17708 emit_insn (gen_strset (destptr, dest, value));
17709 emit_insn (gen_strset (destptr, dest, value));
17711 emit_label (label);
17712 LABEL_NUSES (label) = 1;
17716 rtx label = ix86_expand_aligntest (count, 4, true);
17717 dest = change_address (destmem, SImode, destptr);
17718 emit_insn (gen_strset (destptr, dest, gen_lowpart (SImode, value)));
17719 emit_label (label);
17720 LABEL_NUSES (label) = 1;
17724 rtx label = ix86_expand_aligntest (count, 2, true);
17725 dest = change_address (destmem, HImode, destptr);
17726 emit_insn (gen_strset (destptr, dest, gen_lowpart (HImode, value)));
17727 emit_label (label);
17728 LABEL_NUSES (label) = 1;
17732 rtx label = ix86_expand_aligntest (count, 1, true);
17733 dest = change_address (destmem, QImode, destptr);
17734 emit_insn (gen_strset (destptr, dest, gen_lowpart (QImode, value)));
17735 emit_label (label);
17736 LABEL_NUSES (label) = 1;
17740 /* Copy enough from DEST to SRC to align DEST known to by aligned by ALIGN to
17741 DESIRED_ALIGNMENT. */
17743 expand_movmem_prologue (rtx destmem, rtx srcmem,
17744 rtx destptr, rtx srcptr, rtx count,
17745 int align, int desired_alignment)
17747 if (align <= 1 && desired_alignment > 1)
17749 rtx label = ix86_expand_aligntest (destptr, 1, false);
17750 srcmem = change_address (srcmem, QImode, srcptr);
17751 destmem = change_address (destmem, QImode, destptr);
17752 emit_insn (gen_strmov (destptr, destmem, srcptr, srcmem));
17753 ix86_adjust_counter (count, 1);
17754 emit_label (label);
17755 LABEL_NUSES (label) = 1;
17757 if (align <= 2 && desired_alignment > 2)
17759 rtx label = ix86_expand_aligntest (destptr, 2, false);
17760 srcmem = change_address (srcmem, HImode, srcptr);
17761 destmem = change_address (destmem, HImode, destptr);
17762 emit_insn (gen_strmov (destptr, destmem, srcptr, srcmem));
17763 ix86_adjust_counter (count, 2);
17764 emit_label (label);
17765 LABEL_NUSES (label) = 1;
17767 if (align <= 4 && desired_alignment > 4)
17769 rtx label = ix86_expand_aligntest (destptr, 4, false);
17770 srcmem = change_address (srcmem, SImode, srcptr);
17771 destmem = change_address (destmem, SImode, destptr);
17772 emit_insn (gen_strmov (destptr, destmem, srcptr, srcmem));
17773 ix86_adjust_counter (count, 4);
17774 emit_label (label);
17775 LABEL_NUSES (label) = 1;
17777 gcc_assert (desired_alignment <= 8);
17780 /* Copy enough from DST to SRC to align DST known to DESIRED_ALIGN.
17781 ALIGN_BYTES is how many bytes need to be copied. */
17783 expand_constant_movmem_prologue (rtx dst, rtx *srcp, rtx destreg, rtx srcreg,
17784 int desired_align, int align_bytes)
17787 rtx src_size, dst_size;
17789 int src_align_bytes = get_mem_align_offset (src, desired_align * BITS_PER_UNIT);
17790 if (src_align_bytes >= 0)
17791 src_align_bytes = desired_align - src_align_bytes;
17792 src_size = MEM_SIZE (src);
17793 dst_size = MEM_SIZE (dst);
17794 if (align_bytes & 1)
17796 dst = adjust_automodify_address_nv (dst, QImode, destreg, 0);
17797 src = adjust_automodify_address_nv (src, QImode, srcreg, 0);
17799 emit_insn (gen_strmov (destreg, dst, srcreg, src));
17801 if (align_bytes & 2)
17803 dst = adjust_automodify_address_nv (dst, HImode, destreg, off);
17804 src = adjust_automodify_address_nv (src, HImode, srcreg, off);
17805 if (MEM_ALIGN (dst) < 2 * BITS_PER_UNIT)
17806 set_mem_align (dst, 2 * BITS_PER_UNIT);
17807 if (src_align_bytes >= 0
17808 && (src_align_bytes & 1) == (align_bytes & 1)
17809 && MEM_ALIGN (src) < 2 * BITS_PER_UNIT)
17810 set_mem_align (src, 2 * BITS_PER_UNIT);
17812 emit_insn (gen_strmov (destreg, dst, srcreg, src));
17814 if (align_bytes & 4)
17816 dst = adjust_automodify_address_nv (dst, SImode, destreg, off);
17817 src = adjust_automodify_address_nv (src, SImode, srcreg, off);
17818 if (MEM_ALIGN (dst) < 4 * BITS_PER_UNIT)
17819 set_mem_align (dst, 4 * BITS_PER_UNIT);
17820 if (src_align_bytes >= 0)
17822 unsigned int src_align = 0;
17823 if ((src_align_bytes & 3) == (align_bytes & 3))
17825 else if ((src_align_bytes & 1) == (align_bytes & 1))
17827 if (MEM_ALIGN (src) < src_align * BITS_PER_UNIT)
17828 set_mem_align (src, src_align * BITS_PER_UNIT);
17831 emit_insn (gen_strmov (destreg, dst, srcreg, src));
17833 dst = adjust_automodify_address_nv (dst, BLKmode, destreg, off);
17834 src = adjust_automodify_address_nv (src, BLKmode, srcreg, off);
17835 if (MEM_ALIGN (dst) < (unsigned int) desired_align * BITS_PER_UNIT)
17836 set_mem_align (dst, desired_align * BITS_PER_UNIT);
17837 if (src_align_bytes >= 0)
17839 unsigned int src_align = 0;
17840 if ((src_align_bytes & 7) == (align_bytes & 7))
17842 else if ((src_align_bytes & 3) == (align_bytes & 3))
17844 else if ((src_align_bytes & 1) == (align_bytes & 1))
17846 if (src_align > (unsigned int) desired_align)
17847 src_align = desired_align;
17848 if (MEM_ALIGN (src) < src_align * BITS_PER_UNIT)
17849 set_mem_align (src, src_align * BITS_PER_UNIT);
17852 set_mem_size (dst, GEN_INT (INTVAL (dst_size) - align_bytes));
17854 set_mem_size (dst, GEN_INT (INTVAL (src_size) - align_bytes));
17859 /* Set enough from DEST to align DEST known to by aligned by ALIGN to
17860 DESIRED_ALIGNMENT. */
17862 expand_setmem_prologue (rtx destmem, rtx destptr, rtx value, rtx count,
17863 int align, int desired_alignment)
17865 if (align <= 1 && desired_alignment > 1)
17867 rtx label = ix86_expand_aligntest (destptr, 1, false);
17868 destmem = change_address (destmem, QImode, destptr);
17869 emit_insn (gen_strset (destptr, destmem, gen_lowpart (QImode, value)));
17870 ix86_adjust_counter (count, 1);
17871 emit_label (label);
17872 LABEL_NUSES (label) = 1;
17874 if (align <= 2 && desired_alignment > 2)
17876 rtx label = ix86_expand_aligntest (destptr, 2, false);
17877 destmem = change_address (destmem, HImode, destptr);
17878 emit_insn (gen_strset (destptr, destmem, gen_lowpart (HImode, value)));
17879 ix86_adjust_counter (count, 2);
17880 emit_label (label);
17881 LABEL_NUSES (label) = 1;
17883 if (align <= 4 && desired_alignment > 4)
17885 rtx label = ix86_expand_aligntest (destptr, 4, false);
17886 destmem = change_address (destmem, SImode, destptr);
17887 emit_insn (gen_strset (destptr, destmem, gen_lowpart (SImode, value)));
17888 ix86_adjust_counter (count, 4);
17889 emit_label (label);
17890 LABEL_NUSES (label) = 1;
17892 gcc_assert (desired_alignment <= 8);
17895 /* Set enough from DST to align DST known to by aligned by ALIGN to
17896 DESIRED_ALIGN. ALIGN_BYTES is how many bytes need to be stored. */
17898 expand_constant_setmem_prologue (rtx dst, rtx destreg, rtx value,
17899 int desired_align, int align_bytes)
17902 rtx dst_size = MEM_SIZE (dst);
17903 if (align_bytes & 1)
17905 dst = adjust_automodify_address_nv (dst, QImode, destreg, 0);
17907 emit_insn (gen_strset (destreg, dst,
17908 gen_lowpart (QImode, value)));
17910 if (align_bytes & 2)
17912 dst = adjust_automodify_address_nv (dst, HImode, destreg, off);
17913 if (MEM_ALIGN (dst) < 2 * BITS_PER_UNIT)
17914 set_mem_align (dst, 2 * BITS_PER_UNIT);
17916 emit_insn (gen_strset (destreg, dst,
17917 gen_lowpart (HImode, value)));
17919 if (align_bytes & 4)
17921 dst = adjust_automodify_address_nv (dst, SImode, destreg, off);
17922 if (MEM_ALIGN (dst) < 4 * BITS_PER_UNIT)
17923 set_mem_align (dst, 4 * BITS_PER_UNIT);
17925 emit_insn (gen_strset (destreg, dst,
17926 gen_lowpart (SImode, value)));
17928 dst = adjust_automodify_address_nv (dst, BLKmode, destreg, off);
17929 if (MEM_ALIGN (dst) < (unsigned int) desired_align * BITS_PER_UNIT)
17930 set_mem_align (dst, desired_align * BITS_PER_UNIT);
17932 set_mem_size (dst, GEN_INT (INTVAL (dst_size) - align_bytes));
17936 /* Given COUNT and EXPECTED_SIZE, decide on codegen of string operation. */
17937 static enum stringop_alg
17938 decide_alg (HOST_WIDE_INT count, HOST_WIDE_INT expected_size, bool memset,
17939 int *dynamic_check)
17941 const struct stringop_algs * algs;
17942 bool optimize_for_speed;
17943 /* Algorithms using the rep prefix want at least edi and ecx;
17944 additionally, memset wants eax and memcpy wants esi. Don't
17945 consider such algorithms if the user has appropriated those
17946 registers for their own purposes. */
17947 bool rep_prefix_usable = !(fixed_regs[CX_REG] || fixed_regs[DI_REG]
17949 ? fixed_regs[AX_REG] : fixed_regs[SI_REG]));
17951 #define ALG_USABLE_P(alg) (rep_prefix_usable \
17952 || (alg != rep_prefix_1_byte \
17953 && alg != rep_prefix_4_byte \
17954 && alg != rep_prefix_8_byte))
17955 const struct processor_costs *cost;
17957 /* Even if the string operation call is cold, we still might spend a lot
17958 of time processing large blocks. */
17959 if (optimize_function_for_size_p (cfun)
17960 || (optimize_insn_for_size_p ()
17961 && expected_size != -1 && expected_size < 256))
17962 optimize_for_speed = false;
17964 optimize_for_speed = true;
17966 cost = optimize_for_speed ? ix86_cost : &ix86_size_cost;
17968 *dynamic_check = -1;
17970 algs = &cost->memset[TARGET_64BIT != 0];
17972 algs = &cost->memcpy[TARGET_64BIT != 0];
17973 if (stringop_alg != no_stringop && ALG_USABLE_P (stringop_alg))
17974 return stringop_alg;
17975 /* rep; movq or rep; movl is the smallest variant. */
17976 else if (!optimize_for_speed)
17978 if (!count || (count & 3))
17979 return rep_prefix_usable ? rep_prefix_1_byte : loop_1_byte;
17981 return rep_prefix_usable ? rep_prefix_4_byte : loop;
17983 /* Very tiny blocks are best handled via the loop, REP is expensive to setup.
17985 else if (expected_size != -1 && expected_size < 4)
17986 return loop_1_byte;
17987 else if (expected_size != -1)
17990 enum stringop_alg alg = libcall;
17991 for (i = 0; i < NAX_STRINGOP_ALGS; i++)
17993 /* We get here if the algorithms that were not libcall-based
17994 were rep-prefix based and we are unable to use rep prefixes
17995 based on global register usage. Break out of the loop and
17996 use the heuristic below. */
17997 if (algs->size[i].max == 0)
17999 if (algs->size[i].max >= expected_size || algs->size[i].max == -1)
18001 enum stringop_alg candidate = algs->size[i].alg;
18003 if (candidate != libcall && ALG_USABLE_P (candidate))
18005 /* Honor TARGET_INLINE_ALL_STRINGOPS by picking
18006 last non-libcall inline algorithm. */
18007 if (TARGET_INLINE_ALL_STRINGOPS)
18009 /* When the current size is best to be copied by a libcall,
18010 but we are still forced to inline, run the heuristic below
18011 that will pick code for medium sized blocks. */
18012 if (alg != libcall)
18016 else if (ALG_USABLE_P (candidate))
18020 gcc_assert (TARGET_INLINE_ALL_STRINGOPS || !rep_prefix_usable);
18022 /* When asked to inline the call anyway, try to pick meaningful choice.
18023 We look for maximal size of block that is faster to copy by hand and
18024 take blocks of at most of that size guessing that average size will
18025 be roughly half of the block.
18027 If this turns out to be bad, we might simply specify the preferred
18028 choice in ix86_costs. */
18029 if ((TARGET_INLINE_ALL_STRINGOPS || TARGET_INLINE_STRINGOPS_DYNAMICALLY)
18030 && (algs->unknown_size == libcall || !ALG_USABLE_P (algs->unknown_size)))
18033 enum stringop_alg alg;
18035 bool any_alg_usable_p = true;
18037 for (i = 0; i < NAX_STRINGOP_ALGS; i++)
18039 enum stringop_alg candidate = algs->size[i].alg;
18040 any_alg_usable_p = any_alg_usable_p && ALG_USABLE_P (candidate);
18042 if (candidate != libcall && candidate
18043 && ALG_USABLE_P (candidate))
18044 max = algs->size[i].max;
18046 /* If there aren't any usable algorithms, then recursing on
18047 smaller sizes isn't going to find anything. Just return the
18048 simple byte-at-a-time copy loop. */
18049 if (!any_alg_usable_p)
18051 /* Pick something reasonable. */
18052 if (TARGET_INLINE_STRINGOPS_DYNAMICALLY)
18053 *dynamic_check = 128;
18054 return loop_1_byte;
18058 alg = decide_alg (count, max / 2, memset, dynamic_check);
18059 gcc_assert (*dynamic_check == -1);
18060 gcc_assert (alg != libcall);
18061 if (TARGET_INLINE_STRINGOPS_DYNAMICALLY)
18062 *dynamic_check = max;
18065 return ALG_USABLE_P (algs->unknown_size) ? algs->unknown_size : libcall;
18066 #undef ALG_USABLE_P
18069 /* Decide on alignment. We know that the operand is already aligned to ALIGN
18070 (ALIGN can be based on profile feedback and thus it is not 100% guaranteed). */
18072 decide_alignment (int align,
18073 enum stringop_alg alg,
18076 int desired_align = 0;
18080 gcc_unreachable ();
18082 case unrolled_loop:
18083 desired_align = GET_MODE_SIZE (Pmode);
18085 case rep_prefix_8_byte:
18088 case rep_prefix_4_byte:
18089 /* PentiumPro has special logic triggering for 8 byte aligned blocks.
18090 copying whole cacheline at once. */
18091 if (TARGET_PENTIUMPRO)
18096 case rep_prefix_1_byte:
18097 /* PentiumPro has special logic triggering for 8 byte aligned blocks.
18098 copying whole cacheline at once. */
18099 if (TARGET_PENTIUMPRO)
18113 if (desired_align < align)
18114 desired_align = align;
18115 if (expected_size != -1 && expected_size < 4)
18116 desired_align = align;
18117 return desired_align;
18120 /* Return the smallest power of 2 greater than VAL. */
18122 smallest_pow2_greater_than (int val)
18130 /* Expand string move (memcpy) operation. Use i386 string operations when
18131 profitable. expand_setmem contains similar code. The code depends upon
18132 architecture, block size and alignment, but always has the same
18135 1) Prologue guard: Conditional that jumps up to epilogues for small
18136 blocks that can be handled by epilogue alone. This is faster but
18137 also needed for correctness, since prologue assume the block is larger
18138 than the desired alignment.
18140 Optional dynamic check for size and libcall for large
18141 blocks is emitted here too, with -minline-stringops-dynamically.
18143 2) Prologue: copy first few bytes in order to get destination aligned
18144 to DESIRED_ALIGN. It is emitted only when ALIGN is less than
18145 DESIRED_ALIGN and and up to DESIRED_ALIGN - ALIGN bytes can be copied.
18146 We emit either a jump tree on power of two sized blocks, or a byte loop.
18148 3) Main body: the copying loop itself, copying in SIZE_NEEDED chunks
18149 with specified algorithm.
18151 4) Epilogue: code copying tail of the block that is too small to be
18152 handled by main body (or up to size guarded by prologue guard). */
18155 ix86_expand_movmem (rtx dst, rtx src, rtx count_exp, rtx align_exp,
18156 rtx expected_align_exp, rtx expected_size_exp)
18162 rtx jump_around_label = NULL;
18163 HOST_WIDE_INT align = 1;
18164 unsigned HOST_WIDE_INT count = 0;
18165 HOST_WIDE_INT expected_size = -1;
18166 int size_needed = 0, epilogue_size_needed;
18167 int desired_align = 0, align_bytes = 0;
18168 enum stringop_alg alg;
18170 bool need_zero_guard = false;
18172 if (CONST_INT_P (align_exp))
18173 align = INTVAL (align_exp);
18174 /* i386 can do misaligned access on reasonably increased cost. */
18175 if (CONST_INT_P (expected_align_exp)
18176 && INTVAL (expected_align_exp) > align)
18177 align = INTVAL (expected_align_exp);
18178 /* ALIGN is the minimum of destination and source alignment, but we care here
18179 just about destination alignment. */
18180 else if (MEM_ALIGN (dst) > (unsigned HOST_WIDE_INT) align * BITS_PER_UNIT)
18181 align = MEM_ALIGN (dst) / BITS_PER_UNIT;
18183 if (CONST_INT_P (count_exp))
18184 count = expected_size = INTVAL (count_exp);
18185 if (CONST_INT_P (expected_size_exp) && count == 0)
18186 expected_size = INTVAL (expected_size_exp);
18188 /* Make sure we don't need to care about overflow later on. */
18189 if (count > ((unsigned HOST_WIDE_INT) 1 << 30))
18192 /* Step 0: Decide on preferred algorithm, desired alignment and
18193 size of chunks to be copied by main loop. */
18195 alg = decide_alg (count, expected_size, false, &dynamic_check);
18196 desired_align = decide_alignment (align, alg, expected_size);
18198 if (!TARGET_ALIGN_STRINGOPS)
18199 align = desired_align;
18201 if (alg == libcall)
18203 gcc_assert (alg != no_stringop);
18205 count_exp = copy_to_mode_reg (GET_MODE (count_exp), count_exp);
18206 destreg = copy_to_mode_reg (Pmode, XEXP (dst, 0));
18207 srcreg = copy_to_mode_reg (Pmode, XEXP (src, 0));
18212 gcc_unreachable ();
18214 need_zero_guard = true;
18215 size_needed = GET_MODE_SIZE (Pmode);
18217 case unrolled_loop:
18218 need_zero_guard = true;
18219 size_needed = GET_MODE_SIZE (Pmode) * (TARGET_64BIT ? 4 : 2);
18221 case rep_prefix_8_byte:
18224 case rep_prefix_4_byte:
18227 case rep_prefix_1_byte:
18231 need_zero_guard = true;
18236 epilogue_size_needed = size_needed;
18238 /* Step 1: Prologue guard. */
18240 /* Alignment code needs count to be in register. */
18241 if (CONST_INT_P (count_exp) && desired_align > align)
18243 if (INTVAL (count_exp) > desired_align
18244 && INTVAL (count_exp) > size_needed)
18247 = get_mem_align_offset (dst, desired_align * BITS_PER_UNIT);
18248 if (align_bytes <= 0)
18251 align_bytes = desired_align - align_bytes;
18253 if (align_bytes == 0)
18254 count_exp = force_reg (counter_mode (count_exp), count_exp);
18256 gcc_assert (desired_align >= 1 && align >= 1);
18258 /* Ensure that alignment prologue won't copy past end of block. */
18259 if (size_needed > 1 || (desired_align > 1 && desired_align > align))
18261 epilogue_size_needed = MAX (size_needed - 1, desired_align - align);
18262 /* Epilogue always copies COUNT_EXP & EPILOGUE_SIZE_NEEDED bytes.
18263 Make sure it is power of 2. */
18264 epilogue_size_needed = smallest_pow2_greater_than (epilogue_size_needed);
18268 if (count < (unsigned HOST_WIDE_INT)epilogue_size_needed)
18270 /* If main algorithm works on QImode, no epilogue is needed.
18271 For small sizes just don't align anything. */
18272 if (size_needed == 1)
18273 desired_align = align;
18280 label = gen_label_rtx ();
18281 emit_cmp_and_jump_insns (count_exp,
18282 GEN_INT (epilogue_size_needed),
18283 LTU, 0, counter_mode (count_exp), 1, label);
18284 if (expected_size == -1 || expected_size < epilogue_size_needed)
18285 predict_jump (REG_BR_PROB_BASE * 60 / 100);
18287 predict_jump (REG_BR_PROB_BASE * 20 / 100);
18291 /* Emit code to decide on runtime whether library call or inline should be
18293 if (dynamic_check != -1)
18295 if (CONST_INT_P (count_exp))
18297 if (UINTVAL (count_exp) >= (unsigned HOST_WIDE_INT)dynamic_check)
18299 emit_block_move_via_libcall (dst, src, count_exp, false);
18300 count_exp = const0_rtx;
18306 rtx hot_label = gen_label_rtx ();
18307 jump_around_label = gen_label_rtx ();
18308 emit_cmp_and_jump_insns (count_exp, GEN_INT (dynamic_check - 1),
18309 LEU, 0, GET_MODE (count_exp), 1, hot_label);
18310 predict_jump (REG_BR_PROB_BASE * 90 / 100);
18311 emit_block_move_via_libcall (dst, src, count_exp, false);
18312 emit_jump (jump_around_label);
18313 emit_label (hot_label);
18317 /* Step 2: Alignment prologue. */
18319 if (desired_align > align)
18321 if (align_bytes == 0)
18323 /* Except for the first move in epilogue, we no longer know
18324 constant offset in aliasing info. It don't seems to worth
18325 the pain to maintain it for the first move, so throw away
18327 src = change_address (src, BLKmode, srcreg);
18328 dst = change_address (dst, BLKmode, destreg);
18329 expand_movmem_prologue (dst, src, destreg, srcreg, count_exp, align,
18334 /* If we know how many bytes need to be stored before dst is
18335 sufficiently aligned, maintain aliasing info accurately. */
18336 dst = expand_constant_movmem_prologue (dst, &src, destreg, srcreg,
18337 desired_align, align_bytes);
18338 count_exp = plus_constant (count_exp, -align_bytes);
18339 count -= align_bytes;
18341 if (need_zero_guard
18342 && (count < (unsigned HOST_WIDE_INT) size_needed
18343 || (align_bytes == 0
18344 && count < ((unsigned HOST_WIDE_INT) size_needed
18345 + desired_align - align))))
18347 /* It is possible that we copied enough so the main loop will not
18349 gcc_assert (size_needed > 1);
18350 if (label == NULL_RTX)
18351 label = gen_label_rtx ();
18352 emit_cmp_and_jump_insns (count_exp,
18353 GEN_INT (size_needed),
18354 LTU, 0, counter_mode (count_exp), 1, label);
18355 if (expected_size == -1
18356 || expected_size < (desired_align - align) / 2 + size_needed)
18357 predict_jump (REG_BR_PROB_BASE * 20 / 100);
18359 predict_jump (REG_BR_PROB_BASE * 60 / 100);
18362 if (label && size_needed == 1)
18364 emit_label (label);
18365 LABEL_NUSES (label) = 1;
18367 epilogue_size_needed = 1;
18369 else if (label == NULL_RTX)
18370 epilogue_size_needed = size_needed;
18372 /* Step 3: Main loop. */
18378 gcc_unreachable ();
18380 expand_set_or_movmem_via_loop (dst, src, destreg, srcreg, NULL,
18381 count_exp, QImode, 1, expected_size);
18384 expand_set_or_movmem_via_loop (dst, src, destreg, srcreg, NULL,
18385 count_exp, Pmode, 1, expected_size);
18387 case unrolled_loop:
18388 /* Unroll only by factor of 2 in 32bit mode, since we don't have enough
18389 registers for 4 temporaries anyway. */
18390 expand_set_or_movmem_via_loop (dst, src, destreg, srcreg, NULL,
18391 count_exp, Pmode, TARGET_64BIT ? 4 : 2,
18394 case rep_prefix_8_byte:
18395 expand_movmem_via_rep_mov (dst, src, destreg, srcreg, count_exp,
18398 case rep_prefix_4_byte:
18399 expand_movmem_via_rep_mov (dst, src, destreg, srcreg, count_exp,
18402 case rep_prefix_1_byte:
18403 expand_movmem_via_rep_mov (dst, src, destreg, srcreg, count_exp,
18407 /* Adjust properly the offset of src and dest memory for aliasing. */
18408 if (CONST_INT_P (count_exp))
18410 src = adjust_automodify_address_nv (src, BLKmode, srcreg,
18411 (count / size_needed) * size_needed);
18412 dst = adjust_automodify_address_nv (dst, BLKmode, destreg,
18413 (count / size_needed) * size_needed);
18417 src = change_address (src, BLKmode, srcreg);
18418 dst = change_address (dst, BLKmode, destreg);
18421 /* Step 4: Epilogue to copy the remaining bytes. */
18425 /* When the main loop is done, COUNT_EXP might hold original count,
18426 while we want to copy only COUNT_EXP & SIZE_NEEDED bytes.
18427 Epilogue code will actually copy COUNT_EXP & EPILOGUE_SIZE_NEEDED
18428 bytes. Compensate if needed. */
18430 if (size_needed < epilogue_size_needed)
18433 expand_simple_binop (counter_mode (count_exp), AND, count_exp,
18434 GEN_INT (size_needed - 1), count_exp, 1,
18436 if (tmp != count_exp)
18437 emit_move_insn (count_exp, tmp);
18439 emit_label (label);
18440 LABEL_NUSES (label) = 1;
18443 if (count_exp != const0_rtx && epilogue_size_needed > 1)
18444 expand_movmem_epilogue (dst, src, destreg, srcreg, count_exp,
18445 epilogue_size_needed);
18446 if (jump_around_label)
18447 emit_label (jump_around_label);
18451 /* Helper function for memcpy. For QImode value 0xXY produce
18452 0xXYXYXYXY of wide specified by MODE. This is essentially
18453 a * 0x10101010, but we can do slightly better than
18454 synth_mult by unwinding the sequence by hand on CPUs with
18457 promote_duplicated_reg (enum machine_mode mode, rtx val)
18459 enum machine_mode valmode = GET_MODE (val);
18461 int nops = mode == DImode ? 3 : 2;
18463 gcc_assert (mode == SImode || mode == DImode);
18464 if (val == const0_rtx)
18465 return copy_to_mode_reg (mode, const0_rtx);
18466 if (CONST_INT_P (val))
18468 HOST_WIDE_INT v = INTVAL (val) & 255;
18472 if (mode == DImode)
18473 v |= (v << 16) << 16;
18474 return copy_to_mode_reg (mode, gen_int_mode (v, mode));
18477 if (valmode == VOIDmode)
18479 if (valmode != QImode)
18480 val = gen_lowpart (QImode, val);
18481 if (mode == QImode)
18483 if (!TARGET_PARTIAL_REG_STALL)
18485 if (ix86_cost->mult_init[mode == DImode ? 3 : 2]
18486 + ix86_cost->mult_bit * (mode == DImode ? 8 : 4)
18487 <= (ix86_cost->shift_const + ix86_cost->add) * nops
18488 + (COSTS_N_INSNS (TARGET_PARTIAL_REG_STALL == 0)))
18490 rtx reg = convert_modes (mode, QImode, val, true);
18491 tmp = promote_duplicated_reg (mode, const1_rtx);
18492 return expand_simple_binop (mode, MULT, reg, tmp, NULL, 1,
18497 rtx reg = convert_modes (mode, QImode, val, true);
18499 if (!TARGET_PARTIAL_REG_STALL)
18500 if (mode == SImode)
18501 emit_insn (gen_movsi_insv_1 (reg, reg));
18503 emit_insn (gen_movdi_insv_1_rex64 (reg, reg));
18506 tmp = expand_simple_binop (mode, ASHIFT, reg, GEN_INT (8),
18507 NULL, 1, OPTAB_DIRECT);
18509 expand_simple_binop (mode, IOR, reg, tmp, reg, 1, OPTAB_DIRECT);
18511 tmp = expand_simple_binop (mode, ASHIFT, reg, GEN_INT (16),
18512 NULL, 1, OPTAB_DIRECT);
18513 reg = expand_simple_binop (mode, IOR, reg, tmp, reg, 1, OPTAB_DIRECT);
18514 if (mode == SImode)
18516 tmp = expand_simple_binop (mode, ASHIFT, reg, GEN_INT (32),
18517 NULL, 1, OPTAB_DIRECT);
18518 reg = expand_simple_binop (mode, IOR, reg, tmp, reg, 1, OPTAB_DIRECT);
18523 /* Duplicate value VAL using promote_duplicated_reg into maximal size that will
18524 be needed by main loop copying SIZE_NEEDED chunks and prologue getting
18525 alignment from ALIGN to DESIRED_ALIGN. */
18527 promote_duplicated_reg_to_size (rtx val, int size_needed, int desired_align, int align)
18532 && (size_needed > 4 || (desired_align > align && desired_align > 4)))
18533 promoted_val = promote_duplicated_reg (DImode, val);
18534 else if (size_needed > 2 || (desired_align > align && desired_align > 2))
18535 promoted_val = promote_duplicated_reg (SImode, val);
18536 else if (size_needed > 1 || (desired_align > align && desired_align > 1))
18537 promoted_val = promote_duplicated_reg (HImode, val);
18539 promoted_val = val;
18541 return promoted_val;
18544 /* Expand string clear operation (bzero). Use i386 string operations when
18545 profitable. See expand_movmem comment for explanation of individual
18546 steps performed. */
18548 ix86_expand_setmem (rtx dst, rtx count_exp, rtx val_exp, rtx align_exp,
18549 rtx expected_align_exp, rtx expected_size_exp)
18554 rtx jump_around_label = NULL;
18555 HOST_WIDE_INT align = 1;
18556 unsigned HOST_WIDE_INT count = 0;
18557 HOST_WIDE_INT expected_size = -1;
18558 int size_needed = 0, epilogue_size_needed;
18559 int desired_align = 0, align_bytes = 0;
18560 enum stringop_alg alg;
18561 rtx promoted_val = NULL;
18562 bool force_loopy_epilogue = false;
18564 bool need_zero_guard = false;
18566 if (CONST_INT_P (align_exp))
18567 align = INTVAL (align_exp);
18568 /* i386 can do misaligned access on reasonably increased cost. */
18569 if (CONST_INT_P (expected_align_exp)
18570 && INTVAL (expected_align_exp) > align)
18571 align = INTVAL (expected_align_exp);
18572 if (CONST_INT_P (count_exp))
18573 count = expected_size = INTVAL (count_exp);
18574 if (CONST_INT_P (expected_size_exp) && count == 0)
18575 expected_size = INTVAL (expected_size_exp);
18577 /* Make sure we don't need to care about overflow later on. */
18578 if (count > ((unsigned HOST_WIDE_INT) 1 << 30))
18581 /* Step 0: Decide on preferred algorithm, desired alignment and
18582 size of chunks to be copied by main loop. */
18584 alg = decide_alg (count, expected_size, true, &dynamic_check);
18585 desired_align = decide_alignment (align, alg, expected_size);
18587 if (!TARGET_ALIGN_STRINGOPS)
18588 align = desired_align;
18590 if (alg == libcall)
18592 gcc_assert (alg != no_stringop);
18594 count_exp = copy_to_mode_reg (counter_mode (count_exp), count_exp);
18595 destreg = copy_to_mode_reg (Pmode, XEXP (dst, 0));
18600 gcc_unreachable ();
18602 need_zero_guard = true;
18603 size_needed = GET_MODE_SIZE (Pmode);
18605 case unrolled_loop:
18606 need_zero_guard = true;
18607 size_needed = GET_MODE_SIZE (Pmode) * 4;
18609 case rep_prefix_8_byte:
18612 case rep_prefix_4_byte:
18615 case rep_prefix_1_byte:
18619 need_zero_guard = true;
18623 epilogue_size_needed = size_needed;
18625 /* Step 1: Prologue guard. */
18627 /* Alignment code needs count to be in register. */
18628 if (CONST_INT_P (count_exp) && desired_align > align)
18630 if (INTVAL (count_exp) > desired_align
18631 && INTVAL (count_exp) > size_needed)
18634 = get_mem_align_offset (dst, desired_align * BITS_PER_UNIT);
18635 if (align_bytes <= 0)
18638 align_bytes = desired_align - align_bytes;
18640 if (align_bytes == 0)
18642 enum machine_mode mode = SImode;
18643 if (TARGET_64BIT && (count & ~0xffffffff))
18645 count_exp = force_reg (mode, count_exp);
18648 /* Do the cheap promotion to allow better CSE across the
18649 main loop and epilogue (ie one load of the big constant in the
18650 front of all code. */
18651 if (CONST_INT_P (val_exp))
18652 promoted_val = promote_duplicated_reg_to_size (val_exp, size_needed,
18653 desired_align, align);
18654 /* Ensure that alignment prologue won't copy past end of block. */
18655 if (size_needed > 1 || (desired_align > 1 && desired_align > align))
18657 epilogue_size_needed = MAX (size_needed - 1, desired_align - align);
18658 /* Epilogue always copies COUNT_EXP & (EPILOGUE_SIZE_NEEDED - 1) bytes.
18659 Make sure it is power of 2. */
18660 epilogue_size_needed = smallest_pow2_greater_than (epilogue_size_needed);
18662 /* To improve performance of small blocks, we jump around the VAL
18663 promoting mode. This mean that if the promoted VAL is not constant,
18664 we might not use it in the epilogue and have to use byte
18666 if (epilogue_size_needed > 2 && !promoted_val)
18667 force_loopy_epilogue = true;
18670 if (count < (unsigned HOST_WIDE_INT)epilogue_size_needed)
18672 /* If main algorithm works on QImode, no epilogue is needed.
18673 For small sizes just don't align anything. */
18674 if (size_needed == 1)
18675 desired_align = align;
18682 label = gen_label_rtx ();
18683 emit_cmp_and_jump_insns (count_exp,
18684 GEN_INT (epilogue_size_needed),
18685 LTU, 0, counter_mode (count_exp), 1, label);
18686 if (expected_size == -1 || expected_size <= epilogue_size_needed)
18687 predict_jump (REG_BR_PROB_BASE * 60 / 100);
18689 predict_jump (REG_BR_PROB_BASE * 20 / 100);
18692 if (dynamic_check != -1)
18694 rtx hot_label = gen_label_rtx ();
18695 jump_around_label = gen_label_rtx ();
18696 emit_cmp_and_jump_insns (count_exp, GEN_INT (dynamic_check - 1),
18697 LEU, 0, counter_mode (count_exp), 1, hot_label);
18698 predict_jump (REG_BR_PROB_BASE * 90 / 100);
18699 set_storage_via_libcall (dst, count_exp, val_exp, false);
18700 emit_jump (jump_around_label);
18701 emit_label (hot_label);
18704 /* Step 2: Alignment prologue. */
18706 /* Do the expensive promotion once we branched off the small blocks. */
18708 promoted_val = promote_duplicated_reg_to_size (val_exp, size_needed,
18709 desired_align, align);
18710 gcc_assert (desired_align >= 1 && align >= 1);
18712 if (desired_align > align)
18714 if (align_bytes == 0)
18716 /* Except for the first move in epilogue, we no longer know
18717 constant offset in aliasing info. It don't seems to worth
18718 the pain to maintain it for the first move, so throw away
18720 dst = change_address (dst, BLKmode, destreg);
18721 expand_setmem_prologue (dst, destreg, promoted_val, count_exp, align,
18726 /* If we know how many bytes need to be stored before dst is
18727 sufficiently aligned, maintain aliasing info accurately. */
18728 dst = expand_constant_setmem_prologue (dst, destreg, promoted_val,
18729 desired_align, align_bytes);
18730 count_exp = plus_constant (count_exp, -align_bytes);
18731 count -= align_bytes;
18733 if (need_zero_guard
18734 && (count < (unsigned HOST_WIDE_INT) size_needed
18735 || (align_bytes == 0
18736 && count < ((unsigned HOST_WIDE_INT) size_needed
18737 + desired_align - align))))
18739 /* It is possible that we copied enough so the main loop will not
18741 gcc_assert (size_needed > 1);
18742 if (label == NULL_RTX)
18743 label = gen_label_rtx ();
18744 emit_cmp_and_jump_insns (count_exp,
18745 GEN_INT (size_needed),
18746 LTU, 0, counter_mode (count_exp), 1, label);
18747 if (expected_size == -1
18748 || expected_size < (desired_align - align) / 2 + size_needed)
18749 predict_jump (REG_BR_PROB_BASE * 20 / 100);
18751 predict_jump (REG_BR_PROB_BASE * 60 / 100);
18754 if (label && size_needed == 1)
18756 emit_label (label);
18757 LABEL_NUSES (label) = 1;
18759 promoted_val = val_exp;
18760 epilogue_size_needed = 1;
18762 else if (label == NULL_RTX)
18763 epilogue_size_needed = size_needed;
18765 /* Step 3: Main loop. */
18771 gcc_unreachable ();
18773 expand_set_or_movmem_via_loop (dst, NULL, destreg, NULL, promoted_val,
18774 count_exp, QImode, 1, expected_size);
18777 expand_set_or_movmem_via_loop (dst, NULL, destreg, NULL, promoted_val,
18778 count_exp, Pmode, 1, expected_size);
18780 case unrolled_loop:
18781 expand_set_or_movmem_via_loop (dst, NULL, destreg, NULL, promoted_val,
18782 count_exp, Pmode, 4, expected_size);
18784 case rep_prefix_8_byte:
18785 expand_setmem_via_rep_stos (dst, destreg, promoted_val, count_exp,
18788 case rep_prefix_4_byte:
18789 expand_setmem_via_rep_stos (dst, destreg, promoted_val, count_exp,
18792 case rep_prefix_1_byte:
18793 expand_setmem_via_rep_stos (dst, destreg, promoted_val, count_exp,
18797 /* Adjust properly the offset of src and dest memory for aliasing. */
18798 if (CONST_INT_P (count_exp))
18799 dst = adjust_automodify_address_nv (dst, BLKmode, destreg,
18800 (count / size_needed) * size_needed);
18802 dst = change_address (dst, BLKmode, destreg);
18804 /* Step 4: Epilogue to copy the remaining bytes. */
18808 /* When the main loop is done, COUNT_EXP might hold original count,
18809 while we want to copy only COUNT_EXP & SIZE_NEEDED bytes.
18810 Epilogue code will actually copy COUNT_EXP & EPILOGUE_SIZE_NEEDED
18811 bytes. Compensate if needed. */
18813 if (size_needed < epilogue_size_needed)
18816 expand_simple_binop (counter_mode (count_exp), AND, count_exp,
18817 GEN_INT (size_needed - 1), count_exp, 1,
18819 if (tmp != count_exp)
18820 emit_move_insn (count_exp, tmp);
18822 emit_label (label);
18823 LABEL_NUSES (label) = 1;
18826 if (count_exp != const0_rtx && epilogue_size_needed > 1)
18828 if (force_loopy_epilogue)
18829 expand_setmem_epilogue_via_loop (dst, destreg, val_exp, count_exp,
18830 epilogue_size_needed);
18832 expand_setmem_epilogue (dst, destreg, promoted_val, count_exp,
18833 epilogue_size_needed);
18835 if (jump_around_label)
18836 emit_label (jump_around_label);
18840 /* Expand the appropriate insns for doing strlen if not just doing
18843 out = result, initialized with the start address
18844 align_rtx = alignment of the address.
18845 scratch = scratch register, initialized with the startaddress when
18846 not aligned, otherwise undefined
18848 This is just the body. It needs the initializations mentioned above and
18849 some address computing at the end. These things are done in i386.md. */
18852 ix86_expand_strlensi_unroll_1 (rtx out, rtx src, rtx align_rtx)
18856 rtx align_2_label = NULL_RTX;
18857 rtx align_3_label = NULL_RTX;
18858 rtx align_4_label = gen_label_rtx ();
18859 rtx end_0_label = gen_label_rtx ();
18861 rtx tmpreg = gen_reg_rtx (SImode);
18862 rtx scratch = gen_reg_rtx (SImode);
18866 if (CONST_INT_P (align_rtx))
18867 align = INTVAL (align_rtx);
18869 /* Loop to check 1..3 bytes for null to get an aligned pointer. */
18871 /* Is there a known alignment and is it less than 4? */
18874 rtx scratch1 = gen_reg_rtx (Pmode);
18875 emit_move_insn (scratch1, out);
18876 /* Is there a known alignment and is it not 2? */
18879 align_3_label = gen_label_rtx (); /* Label when aligned to 3-byte */
18880 align_2_label = gen_label_rtx (); /* Label when aligned to 2-byte */
18882 /* Leave just the 3 lower bits. */
18883 align_rtx = expand_binop (Pmode, and_optab, scratch1, GEN_INT (3),
18884 NULL_RTX, 0, OPTAB_WIDEN);
18886 emit_cmp_and_jump_insns (align_rtx, const0_rtx, EQ, NULL,
18887 Pmode, 1, align_4_label);
18888 emit_cmp_and_jump_insns (align_rtx, const2_rtx, EQ, NULL,
18889 Pmode, 1, align_2_label);
18890 emit_cmp_and_jump_insns (align_rtx, const2_rtx, GTU, NULL,
18891 Pmode, 1, align_3_label);
18895 /* Since the alignment is 2, we have to check 2 or 0 bytes;
18896 check if is aligned to 4 - byte. */
18898 align_rtx = expand_binop (Pmode, and_optab, scratch1, const2_rtx,
18899 NULL_RTX, 0, OPTAB_WIDEN);
18901 emit_cmp_and_jump_insns (align_rtx, const0_rtx, EQ, NULL,
18902 Pmode, 1, align_4_label);
18905 mem = change_address (src, QImode, out);
18907 /* Now compare the bytes. */
18909 /* Compare the first n unaligned byte on a byte per byte basis. */
18910 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL,
18911 QImode, 1, end_0_label);
18913 /* Increment the address. */
18914 emit_insn ((*ix86_gen_add3) (out, out, const1_rtx));
18916 /* Not needed with an alignment of 2 */
18919 emit_label (align_2_label);
18921 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL, QImode, 1,
18924 emit_insn ((*ix86_gen_add3) (out, out, const1_rtx));
18926 emit_label (align_3_label);
18929 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL, QImode, 1,
18932 emit_insn ((*ix86_gen_add3) (out, out, const1_rtx));
18935 /* Generate loop to check 4 bytes at a time. It is not a good idea to
18936 align this loop. It gives only huge programs, but does not help to
18938 emit_label (align_4_label);
18940 mem = change_address (src, SImode, out);
18941 emit_move_insn (scratch, mem);
18942 emit_insn ((*ix86_gen_add3) (out, out, GEN_INT (4)));
18944 /* This formula yields a nonzero result iff one of the bytes is zero.
18945 This saves three branches inside loop and many cycles. */
18947 emit_insn (gen_addsi3 (tmpreg, scratch, GEN_INT (-0x01010101)));
18948 emit_insn (gen_one_cmplsi2 (scratch, scratch));
18949 emit_insn (gen_andsi3 (tmpreg, tmpreg, scratch));
18950 emit_insn (gen_andsi3 (tmpreg, tmpreg,
18951 gen_int_mode (0x80808080, SImode)));
18952 emit_cmp_and_jump_insns (tmpreg, const0_rtx, EQ, 0, SImode, 1,
18957 rtx reg = gen_reg_rtx (SImode);
18958 rtx reg2 = gen_reg_rtx (Pmode);
18959 emit_move_insn (reg, tmpreg);
18960 emit_insn (gen_lshrsi3 (reg, reg, GEN_INT (16)));
18962 /* If zero is not in the first two bytes, move two bytes forward. */
18963 emit_insn (gen_testsi_ccno_1 (tmpreg, GEN_INT (0x8080)));
18964 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
18965 tmp = gen_rtx_EQ (VOIDmode, tmp, const0_rtx);
18966 emit_insn (gen_rtx_SET (VOIDmode, tmpreg,
18967 gen_rtx_IF_THEN_ELSE (SImode, tmp,
18970 /* Emit lea manually to avoid clobbering of flags. */
18971 emit_insn (gen_rtx_SET (SImode, reg2,
18972 gen_rtx_PLUS (Pmode, out, const2_rtx)));
18974 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
18975 tmp = gen_rtx_EQ (VOIDmode, tmp, const0_rtx);
18976 emit_insn (gen_rtx_SET (VOIDmode, out,
18977 gen_rtx_IF_THEN_ELSE (Pmode, tmp,
18984 rtx end_2_label = gen_label_rtx ();
18985 /* Is zero in the first two bytes? */
18987 emit_insn (gen_testsi_ccno_1 (tmpreg, GEN_INT (0x8080)));
18988 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
18989 tmp = gen_rtx_NE (VOIDmode, tmp, const0_rtx);
18990 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
18991 gen_rtx_LABEL_REF (VOIDmode, end_2_label),
18993 tmp = emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
18994 JUMP_LABEL (tmp) = end_2_label;
18996 /* Not in the first two. Move two bytes forward. */
18997 emit_insn (gen_lshrsi3 (tmpreg, tmpreg, GEN_INT (16)));
18998 emit_insn ((*ix86_gen_add3) (out, out, const2_rtx));
19000 emit_label (end_2_label);
19004 /* Avoid branch in fixing the byte. */
19005 tmpreg = gen_lowpart (QImode, tmpreg);
19006 emit_insn (gen_addqi3_cc (tmpreg, tmpreg, tmpreg));
19007 cmp = gen_rtx_LTU (Pmode, gen_rtx_REG (CCmode, FLAGS_REG), const0_rtx);
19008 emit_insn ((*ix86_gen_sub3_carry) (out, out, GEN_INT (3), cmp));
19010 emit_label (end_0_label);
19013 /* Expand strlen. */
19016 ix86_expand_strlen (rtx out, rtx src, rtx eoschar, rtx align)
19018 rtx addr, scratch1, scratch2, scratch3, scratch4;
19020 /* The generic case of strlen expander is long. Avoid it's
19021 expanding unless TARGET_INLINE_ALL_STRINGOPS. */
19023 if (TARGET_UNROLL_STRLEN && eoschar == const0_rtx && optimize > 1
19024 && !TARGET_INLINE_ALL_STRINGOPS
19025 && !optimize_insn_for_size_p ()
19026 && (!CONST_INT_P (align) || INTVAL (align) < 4))
19029 addr = force_reg (Pmode, XEXP (src, 0));
19030 scratch1 = gen_reg_rtx (Pmode);
19032 if (TARGET_UNROLL_STRLEN && eoschar == const0_rtx && optimize > 1
19033 && !optimize_insn_for_size_p ())
19035 /* Well it seems that some optimizer does not combine a call like
19036 foo(strlen(bar), strlen(bar));
19037 when the move and the subtraction is done here. It does calculate
19038 the length just once when these instructions are done inside of
19039 output_strlen_unroll(). But I think since &bar[strlen(bar)] is
19040 often used and I use one fewer register for the lifetime of
19041 output_strlen_unroll() this is better. */
19043 emit_move_insn (out, addr);
19045 ix86_expand_strlensi_unroll_1 (out, src, align);
19047 /* strlensi_unroll_1 returns the address of the zero at the end of
19048 the string, like memchr(), so compute the length by subtracting
19049 the start address. */
19050 emit_insn ((*ix86_gen_sub3) (out, out, addr));
19056 /* Can't use this if the user has appropriated eax, ecx, or edi. */
19057 if (fixed_regs[AX_REG] || fixed_regs[CX_REG] || fixed_regs[DI_REG])
19060 scratch2 = gen_reg_rtx (Pmode);
19061 scratch3 = gen_reg_rtx (Pmode);
19062 scratch4 = force_reg (Pmode, constm1_rtx);
19064 emit_move_insn (scratch3, addr);
19065 eoschar = force_reg (QImode, eoschar);
19067 src = replace_equiv_address_nv (src, scratch3);
19069 /* If .md starts supporting :P, this can be done in .md. */
19070 unspec = gen_rtx_UNSPEC (Pmode, gen_rtvec (4, src, eoschar, align,
19071 scratch4), UNSPEC_SCAS);
19072 emit_insn (gen_strlenqi_1 (scratch1, scratch3, unspec));
19073 emit_insn ((*ix86_gen_one_cmpl2) (scratch2, scratch1));
19074 emit_insn ((*ix86_gen_add3) (out, scratch2, constm1_rtx));
19079 /* For given symbol (function) construct code to compute address of it's PLT
19080 entry in large x86-64 PIC model. */
19082 construct_plt_address (rtx symbol)
19084 rtx tmp = gen_reg_rtx (Pmode);
19085 rtx unspec = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, symbol), UNSPEC_PLTOFF);
19087 gcc_assert (GET_CODE (symbol) == SYMBOL_REF);
19088 gcc_assert (ix86_cmodel == CM_LARGE_PIC);
19090 emit_move_insn (tmp, gen_rtx_CONST (Pmode, unspec));
19091 emit_insn (gen_adddi3 (tmp, tmp, pic_offset_table_rtx));
19096 ix86_expand_call (rtx retval, rtx fnaddr, rtx callarg1,
19098 rtx pop, int sibcall)
19100 rtx use = NULL, call;
19102 if (pop == const0_rtx)
19104 gcc_assert (!TARGET_64BIT || !pop);
19106 if (TARGET_MACHO && !TARGET_64BIT)
19109 if (flag_pic && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF)
19110 fnaddr = machopic_indirect_call_target (fnaddr);
19115 /* Static functions and indirect calls don't need the pic register. */
19116 if (flag_pic && (!TARGET_64BIT || ix86_cmodel == CM_LARGE_PIC)
19117 && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF
19118 && ! SYMBOL_REF_LOCAL_P (XEXP (fnaddr, 0)))
19119 use_reg (&use, pic_offset_table_rtx);
19122 if (TARGET_64BIT && INTVAL (callarg2) >= 0)
19124 rtx al = gen_rtx_REG (QImode, AX_REG);
19125 emit_move_insn (al, callarg2);
19126 use_reg (&use, al);
19129 if (ix86_cmodel == CM_LARGE_PIC
19130 && GET_CODE (fnaddr) == MEM
19131 && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF
19132 && !local_symbolic_operand (XEXP (fnaddr, 0), VOIDmode))
19133 fnaddr = gen_rtx_MEM (QImode, construct_plt_address (XEXP (fnaddr, 0)));
19134 else if (! call_insn_operand (XEXP (fnaddr, 0), Pmode))
19136 fnaddr = copy_to_mode_reg (Pmode, XEXP (fnaddr, 0));
19137 fnaddr = gen_rtx_MEM (QImode, fnaddr);
19139 if (sibcall && TARGET_64BIT
19140 && !constant_call_address_operand (XEXP (fnaddr, 0), Pmode))
19143 addr = copy_to_mode_reg (Pmode, XEXP (fnaddr, 0));
19144 fnaddr = gen_rtx_REG (Pmode, R11_REG);
19145 emit_move_insn (fnaddr, addr);
19146 fnaddr = gen_rtx_MEM (QImode, fnaddr);
19149 call = gen_rtx_CALL (VOIDmode, fnaddr, callarg1);
19151 call = gen_rtx_SET (VOIDmode, retval, call);
19154 pop = gen_rtx_PLUS (Pmode, stack_pointer_rtx, pop);
19155 pop = gen_rtx_SET (VOIDmode, stack_pointer_rtx, pop);
19156 call = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, call, pop));
19159 && ix86_cfun_abi () == MS_ABI
19160 && (!callarg2 || INTVAL (callarg2) != -2))
19162 /* We need to represent that SI and DI registers are clobbered
19164 static int clobbered_registers[] = {
19165 XMM6_REG, XMM7_REG, XMM8_REG,
19166 XMM9_REG, XMM10_REG, XMM11_REG,
19167 XMM12_REG, XMM13_REG, XMM14_REG,
19168 XMM15_REG, SI_REG, DI_REG
19171 rtx vec[ARRAY_SIZE (clobbered_registers) + 2];
19172 rtx unspec = gen_rtx_UNSPEC (VOIDmode, gen_rtvec (1, const0_rtx),
19173 UNSPEC_MS_TO_SYSV_CALL);
19177 for (i = 0; i < ARRAY_SIZE (clobbered_registers); i++)
19178 vec[i + 2] = gen_rtx_CLOBBER (SSE_REGNO_P (clobbered_registers[i])
19181 (SSE_REGNO_P (clobbered_registers[i])
19183 clobbered_registers[i]));
19185 call = gen_rtx_PARALLEL (VOIDmode,
19186 gen_rtvec_v (ARRAY_SIZE (clobbered_registers)
19190 call = emit_call_insn (call);
19192 CALL_INSN_FUNCTION_USAGE (call) = use;
19196 /* Clear stack slot assignments remembered from previous functions.
19197 This is called from INIT_EXPANDERS once before RTL is emitted for each
19200 static struct machine_function *
19201 ix86_init_machine_status (void)
19203 struct machine_function *f;
19205 f = GGC_CNEW (struct machine_function);
19206 f->use_fast_prologue_epilogue_nregs = -1;
19207 f->tls_descriptor_call_expanded_p = 0;
19208 f->call_abi = ix86_abi;
19213 /* Return a MEM corresponding to a stack slot with mode MODE.
19214 Allocate a new slot if necessary.
19216 The RTL for a function can have several slots available: N is
19217 which slot to use. */
19220 assign_386_stack_local (enum machine_mode mode, enum ix86_stack_slot n)
19222 struct stack_local_entry *s;
19224 gcc_assert (n < MAX_386_STACK_LOCALS);
19226 /* Virtual slot is valid only before vregs are instantiated. */
19227 gcc_assert ((n == SLOT_VIRTUAL) == !virtuals_instantiated);
19229 for (s = ix86_stack_locals; s; s = s->next)
19230 if (s->mode == mode && s->n == n)
19231 return copy_rtx (s->rtl);
19233 s = (struct stack_local_entry *)
19234 ggc_alloc (sizeof (struct stack_local_entry));
19237 s->rtl = assign_stack_local (mode, GET_MODE_SIZE (mode), 0);
19239 s->next = ix86_stack_locals;
19240 ix86_stack_locals = s;
19244 /* Construct the SYMBOL_REF for the tls_get_addr function. */
19246 static GTY(()) rtx ix86_tls_symbol;
19248 ix86_tls_get_addr (void)
19251 if (!ix86_tls_symbol)
19253 ix86_tls_symbol = gen_rtx_SYMBOL_REF (Pmode,
19254 (TARGET_ANY_GNU_TLS
19256 ? "___tls_get_addr"
19257 : "__tls_get_addr");
19260 return ix86_tls_symbol;
19263 /* Construct the SYMBOL_REF for the _TLS_MODULE_BASE_ symbol. */
19265 static GTY(()) rtx ix86_tls_module_base_symbol;
19267 ix86_tls_module_base (void)
19270 if (!ix86_tls_module_base_symbol)
19272 ix86_tls_module_base_symbol = gen_rtx_SYMBOL_REF (Pmode,
19273 "_TLS_MODULE_BASE_");
19274 SYMBOL_REF_FLAGS (ix86_tls_module_base_symbol)
19275 |= TLS_MODEL_GLOBAL_DYNAMIC << SYMBOL_FLAG_TLS_SHIFT;
19278 return ix86_tls_module_base_symbol;
19281 /* Calculate the length of the memory address in the instruction
19282 encoding. Does not include the one-byte modrm, opcode, or prefix. */
19285 memory_address_length (rtx addr)
19287 struct ix86_address parts;
19288 rtx base, index, disp;
19292 if (GET_CODE (addr) == PRE_DEC
19293 || GET_CODE (addr) == POST_INC
19294 || GET_CODE (addr) == PRE_MODIFY
19295 || GET_CODE (addr) == POST_MODIFY)
19298 ok = ix86_decompose_address (addr, &parts);
19301 if (parts.base && GET_CODE (parts.base) == SUBREG)
19302 parts.base = SUBREG_REG (parts.base);
19303 if (parts.index && GET_CODE (parts.index) == SUBREG)
19304 parts.index = SUBREG_REG (parts.index);
19307 index = parts.index;
19312 - esp as the base always wants an index,
19313 - ebp as the base always wants a displacement. */
19315 /* Register Indirect. */
19316 if (base && !index && !disp)
19318 /* esp (for its index) and ebp (for its displacement) need
19319 the two-byte modrm form. */
19320 if (addr == stack_pointer_rtx
19321 || addr == arg_pointer_rtx
19322 || addr == frame_pointer_rtx
19323 || addr == hard_frame_pointer_rtx)
19327 /* Direct Addressing. */
19328 else if (disp && !base && !index)
19333 /* Find the length of the displacement constant. */
19336 if (base && satisfies_constraint_K (disp))
19341 /* ebp always wants a displacement. */
19342 else if (base == hard_frame_pointer_rtx)
19345 /* An index requires the two-byte modrm form.... */
19347 /* ...like esp, which always wants an index. */
19348 || base == stack_pointer_rtx
19349 || base == arg_pointer_rtx
19350 || base == frame_pointer_rtx)
19357 /* Compute default value for "length_immediate" attribute. When SHORTFORM
19358 is set, expect that insn have 8bit immediate alternative. */
19360 ix86_attr_length_immediate_default (rtx insn, int shortform)
19364 extract_insn_cached (insn);
19365 for (i = recog_data.n_operands - 1; i >= 0; --i)
19366 if (CONSTANT_P (recog_data.operand[i]))
19369 if (shortform && satisfies_constraint_K (recog_data.operand[i]))
19373 switch (get_attr_mode (insn))
19384 /* Immediates for DImode instructions are encoded as 32bit sign extended values. */
19389 fatal_insn ("unknown insn mode", insn);
19395 /* Compute default value for "length_address" attribute. */
19397 ix86_attr_length_address_default (rtx insn)
19401 if (get_attr_type (insn) == TYPE_LEA)
19403 rtx set = PATTERN (insn);
19405 if (GET_CODE (set) == PARALLEL)
19406 set = XVECEXP (set, 0, 0);
19408 gcc_assert (GET_CODE (set) == SET);
19410 return memory_address_length (SET_SRC (set));
19413 extract_insn_cached (insn);
19414 for (i = recog_data.n_operands - 1; i >= 0; --i)
19415 if (MEM_P (recog_data.operand[i]))
19417 return memory_address_length (XEXP (recog_data.operand[i], 0));
19423 /* Compute default value for "length_vex" attribute. It includes
19424 2 or 3 byte VEX prefix and 1 opcode byte. */
19427 ix86_attr_length_vex_default (rtx insn, int has_0f_opcode,
19432 /* Only 0f opcode can use 2 byte VEX prefix and VEX W bit uses 3
19433 byte VEX prefix. */
19434 if (!has_0f_opcode || has_vex_w)
19437 /* We can always use 2 byte VEX prefix in 32bit. */
19441 extract_insn_cached (insn);
19443 for (i = recog_data.n_operands - 1; i >= 0; --i)
19444 if (REG_P (recog_data.operand[i]))
19446 /* REX.W bit uses 3 byte VEX prefix. */
19447 if (GET_MODE (recog_data.operand[i]) == DImode)
19452 /* REX.X or REX.B bits use 3 byte VEX prefix. */
19453 if (MEM_P (recog_data.operand[i])
19454 && x86_extended_reg_mentioned_p (recog_data.operand[i]))
19461 /* Return the maximum number of instructions a cpu can issue. */
19464 ix86_issue_rate (void)
19468 case PROCESSOR_PENTIUM:
19469 case PROCESSOR_ATOM:
19473 case PROCESSOR_PENTIUMPRO:
19474 case PROCESSOR_PENTIUM4:
19475 case PROCESSOR_ATHLON:
19477 case PROCESSOR_AMDFAM10:
19478 case PROCESSOR_NOCONA:
19479 case PROCESSOR_GENERIC32:
19480 case PROCESSOR_GENERIC64:
19483 case PROCESSOR_CORE2:
19491 /* A subroutine of ix86_adjust_cost -- return true iff INSN reads flags set
19492 by DEP_INSN and nothing set by DEP_INSN. */
19495 ix86_flags_dependent (rtx insn, rtx dep_insn, enum attr_type insn_type)
19499 /* Simplify the test for uninteresting insns. */
19500 if (insn_type != TYPE_SETCC
19501 && insn_type != TYPE_ICMOV
19502 && insn_type != TYPE_FCMOV
19503 && insn_type != TYPE_IBR)
19506 if ((set = single_set (dep_insn)) != 0)
19508 set = SET_DEST (set);
19511 else if (GET_CODE (PATTERN (dep_insn)) == PARALLEL
19512 && XVECLEN (PATTERN (dep_insn), 0) == 2
19513 && GET_CODE (XVECEXP (PATTERN (dep_insn), 0, 0)) == SET
19514 && GET_CODE (XVECEXP (PATTERN (dep_insn), 0, 1)) == SET)
19516 set = SET_DEST (XVECEXP (PATTERN (dep_insn), 0, 0));
19517 set2 = SET_DEST (XVECEXP (PATTERN (dep_insn), 0, 0));
19522 if (!REG_P (set) || REGNO (set) != FLAGS_REG)
19525 /* This test is true if the dependent insn reads the flags but
19526 not any other potentially set register. */
19527 if (!reg_overlap_mentioned_p (set, PATTERN (insn)))
19530 if (set2 && reg_overlap_mentioned_p (set2, PATTERN (insn)))
19536 /* Return true iff USE_INSN has a memory address with operands set by
19540 ix86_agi_dependent (rtx set_insn, rtx use_insn)
19543 extract_insn_cached (use_insn);
19544 for (i = recog_data.n_operands - 1; i >= 0; --i)
19545 if (MEM_P (recog_data.operand[i]))
19547 rtx addr = XEXP (recog_data.operand[i], 0);
19548 return modified_in_p (addr, set_insn) != 0;
19554 ix86_adjust_cost (rtx insn, rtx link, rtx dep_insn, int cost)
19556 enum attr_type insn_type, dep_insn_type;
19557 enum attr_memory memory;
19559 int dep_insn_code_number;
19561 /* Anti and output dependencies have zero cost on all CPUs. */
19562 if (REG_NOTE_KIND (link) != 0)
19565 dep_insn_code_number = recog_memoized (dep_insn);
19567 /* If we can't recognize the insns, we can't really do anything. */
19568 if (dep_insn_code_number < 0 || recog_memoized (insn) < 0)
19571 insn_type = get_attr_type (insn);
19572 dep_insn_type = get_attr_type (dep_insn);
19576 case PROCESSOR_PENTIUM:
19577 /* Address Generation Interlock adds a cycle of latency. */
19578 if (insn_type == TYPE_LEA)
19580 rtx addr = PATTERN (insn);
19582 if (GET_CODE (addr) == PARALLEL)
19583 addr = XVECEXP (addr, 0, 0);
19585 gcc_assert (GET_CODE (addr) == SET);
19587 addr = SET_SRC (addr);
19588 if (modified_in_p (addr, dep_insn))
19591 else if (ix86_agi_dependent (dep_insn, insn))
19594 /* ??? Compares pair with jump/setcc. */
19595 if (ix86_flags_dependent (insn, dep_insn, insn_type))
19598 /* Floating point stores require value to be ready one cycle earlier. */
19599 if (insn_type == TYPE_FMOV
19600 && get_attr_memory (insn) == MEMORY_STORE
19601 && !ix86_agi_dependent (dep_insn, insn))
19605 case PROCESSOR_PENTIUMPRO:
19606 memory = get_attr_memory (insn);
19608 /* INT->FP conversion is expensive. */
19609 if (get_attr_fp_int_src (dep_insn))
19612 /* There is one cycle extra latency between an FP op and a store. */
19613 if (insn_type == TYPE_FMOV
19614 && (set = single_set (dep_insn)) != NULL_RTX
19615 && (set2 = single_set (insn)) != NULL_RTX
19616 && rtx_equal_p (SET_DEST (set), SET_SRC (set2))
19617 && MEM_P (SET_DEST (set2)))
19620 /* Show ability of reorder buffer to hide latency of load by executing
19621 in parallel with previous instruction in case
19622 previous instruction is not needed to compute the address. */
19623 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
19624 && !ix86_agi_dependent (dep_insn, insn))
19626 /* Claim moves to take one cycle, as core can issue one load
19627 at time and the next load can start cycle later. */
19628 if (dep_insn_type == TYPE_IMOV
19629 || dep_insn_type == TYPE_FMOV)
19637 memory = get_attr_memory (insn);
19639 /* The esp dependency is resolved before the instruction is really
19641 if ((insn_type == TYPE_PUSH || insn_type == TYPE_POP)
19642 && (dep_insn_type == TYPE_PUSH || dep_insn_type == TYPE_POP))
19645 /* INT->FP conversion is expensive. */
19646 if (get_attr_fp_int_src (dep_insn))
19649 /* Show ability of reorder buffer to hide latency of load by executing
19650 in parallel with previous instruction in case
19651 previous instruction is not needed to compute the address. */
19652 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
19653 && !ix86_agi_dependent (dep_insn, insn))
19655 /* Claim moves to take one cycle, as core can issue one load
19656 at time and the next load can start cycle later. */
19657 if (dep_insn_type == TYPE_IMOV
19658 || dep_insn_type == TYPE_FMOV)
19667 case PROCESSOR_ATHLON:
19669 case PROCESSOR_AMDFAM10:
19670 case PROCESSOR_ATOM:
19671 case PROCESSOR_GENERIC32:
19672 case PROCESSOR_GENERIC64:
19673 memory = get_attr_memory (insn);
19675 /* Show ability of reorder buffer to hide latency of load by executing
19676 in parallel with previous instruction in case
19677 previous instruction is not needed to compute the address. */
19678 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
19679 && !ix86_agi_dependent (dep_insn, insn))
19681 enum attr_unit unit = get_attr_unit (insn);
19684 /* Because of the difference between the length of integer and
19685 floating unit pipeline preparation stages, the memory operands
19686 for floating point are cheaper.
19688 ??? For Athlon it the difference is most probably 2. */
19689 if (unit == UNIT_INTEGER || unit == UNIT_UNKNOWN)
19692 loadcost = TARGET_ATHLON ? 2 : 0;
19694 if (cost >= loadcost)
19707 /* How many alternative schedules to try. This should be as wide as the
19708 scheduling freedom in the DFA, but no wider. Making this value too
19709 large results extra work for the scheduler. */
19712 ia32_multipass_dfa_lookahead (void)
19716 case PROCESSOR_PENTIUM:
19719 case PROCESSOR_PENTIUMPRO:
19729 /* Compute the alignment given to a constant that is being placed in memory.
19730 EXP is the constant and ALIGN is the alignment that the object would
19732 The value of this function is used instead of that alignment to align
19736 ix86_constant_alignment (tree exp, int align)
19738 if (TREE_CODE (exp) == REAL_CST || TREE_CODE (exp) == VECTOR_CST
19739 || TREE_CODE (exp) == INTEGER_CST)
19741 if (TYPE_MODE (TREE_TYPE (exp)) == DFmode && align < 64)
19743 else if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (exp))) && align < 128)
19746 else if (!optimize_size && TREE_CODE (exp) == STRING_CST
19747 && TREE_STRING_LENGTH (exp) >= 31 && align < BITS_PER_WORD)
19748 return BITS_PER_WORD;
19753 /* Compute the alignment for a static variable.
19754 TYPE is the data type, and ALIGN is the alignment that
19755 the object would ordinarily have. The value of this function is used
19756 instead of that alignment to align the object. */
19759 ix86_data_alignment (tree type, int align)
19761 int max_align = optimize_size ? BITS_PER_WORD : MIN (256, MAX_OFILE_ALIGNMENT);
19763 if (AGGREGATE_TYPE_P (type)
19764 && TYPE_SIZE (type)
19765 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
19766 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= (unsigned) max_align
19767 || TREE_INT_CST_HIGH (TYPE_SIZE (type)))
19768 && align < max_align)
19771 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
19772 to 16byte boundary. */
19775 if (AGGREGATE_TYPE_P (type)
19776 && TYPE_SIZE (type)
19777 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
19778 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= 128
19779 || TREE_INT_CST_HIGH (TYPE_SIZE (type))) && align < 128)
19783 if (TREE_CODE (type) == ARRAY_TYPE)
19785 if (TYPE_MODE (TREE_TYPE (type)) == DFmode && align < 64)
19787 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type))) && align < 128)
19790 else if (TREE_CODE (type) == COMPLEX_TYPE)
19793 if (TYPE_MODE (type) == DCmode && align < 64)
19795 if ((TYPE_MODE (type) == XCmode
19796 || TYPE_MODE (type) == TCmode) && align < 128)
19799 else if ((TREE_CODE (type) == RECORD_TYPE
19800 || TREE_CODE (type) == UNION_TYPE
19801 || TREE_CODE (type) == QUAL_UNION_TYPE)
19802 && TYPE_FIELDS (type))
19804 if (DECL_MODE (TYPE_FIELDS (type)) == DFmode && align < 64)
19806 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type))) && align < 128)
19809 else if (TREE_CODE (type) == REAL_TYPE || TREE_CODE (type) == VECTOR_TYPE
19810 || TREE_CODE (type) == INTEGER_TYPE)
19812 if (TYPE_MODE (type) == DFmode && align < 64)
19814 if (ALIGN_MODE_128 (TYPE_MODE (type)) && align < 128)
19821 /* Compute the alignment for a local variable or a stack slot. EXP is
19822 the data type or decl itself, MODE is the widest mode available and
19823 ALIGN is the alignment that the object would ordinarily have. The
19824 value of this macro is used instead of that alignment to align the
19828 ix86_local_alignment (tree exp, enum machine_mode mode,
19829 unsigned int align)
19833 if (exp && DECL_P (exp))
19835 type = TREE_TYPE (exp);
19844 /* Don't do dynamic stack realignment for long long objects with
19845 -mpreferred-stack-boundary=2. */
19848 && ix86_preferred_stack_boundary < 64
19849 && (mode == DImode || (type && TYPE_MODE (type) == DImode))
19850 && (!type || !TYPE_USER_ALIGN (type))
19851 && (!decl || !DECL_USER_ALIGN (decl)))
19854 /* If TYPE is NULL, we are allocating a stack slot for caller-save
19855 register in MODE. We will return the largest alignment of XF
19859 if (mode == XFmode && align < GET_MODE_ALIGNMENT (DFmode))
19860 align = GET_MODE_ALIGNMENT (DFmode);
19864 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
19865 to 16byte boundary. */
19868 if (AGGREGATE_TYPE_P (type)
19869 && TYPE_SIZE (type)
19870 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
19871 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= 16
19872 || TREE_INT_CST_HIGH (TYPE_SIZE (type))) && align < 128)
19875 if (TREE_CODE (type) == ARRAY_TYPE)
19877 if (TYPE_MODE (TREE_TYPE (type)) == DFmode && align < 64)
19879 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type))) && align < 128)
19882 else if (TREE_CODE (type) == COMPLEX_TYPE)
19884 if (TYPE_MODE (type) == DCmode && align < 64)
19886 if ((TYPE_MODE (type) == XCmode
19887 || TYPE_MODE (type) == TCmode) && align < 128)
19890 else if ((TREE_CODE (type) == RECORD_TYPE
19891 || TREE_CODE (type) == UNION_TYPE
19892 || TREE_CODE (type) == QUAL_UNION_TYPE)
19893 && TYPE_FIELDS (type))
19895 if (DECL_MODE (TYPE_FIELDS (type)) == DFmode && align < 64)
19897 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type))) && align < 128)
19900 else if (TREE_CODE (type) == REAL_TYPE || TREE_CODE (type) == VECTOR_TYPE
19901 || TREE_CODE (type) == INTEGER_TYPE)
19904 if (TYPE_MODE (type) == DFmode && align < 64)
19906 if (ALIGN_MODE_128 (TYPE_MODE (type)) && align < 128)
19912 /* Emit RTL insns to initialize the variable parts of a trampoline.
19913 FNADDR is an RTX for the address of the function's pure code.
19914 CXT is an RTX for the static chain value for the function. */
19916 x86_initialize_trampoline (rtx tramp, rtx fnaddr, rtx cxt)
19920 /* Compute offset from the end of the jmp to the target function. */
19921 rtx disp = expand_binop (SImode, sub_optab, fnaddr,
19922 plus_constant (tramp, 10),
19923 NULL_RTX, 1, OPTAB_DIRECT);
19924 emit_move_insn (gen_rtx_MEM (QImode, tramp),
19925 gen_int_mode (0xb9, QImode));
19926 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 1)), cxt);
19927 emit_move_insn (gen_rtx_MEM (QImode, plus_constant (tramp, 5)),
19928 gen_int_mode (0xe9, QImode));
19929 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 6)), disp);
19934 /* Try to load address using shorter movl instead of movabs.
19935 We may want to support movq for kernel mode, but kernel does not use
19936 trampolines at the moment. */
19937 if (x86_64_zext_immediate_operand (fnaddr, VOIDmode))
19939 fnaddr = copy_to_mode_reg (DImode, fnaddr);
19940 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
19941 gen_int_mode (0xbb41, HImode));
19942 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, offset + 2)),
19943 gen_lowpart (SImode, fnaddr));
19948 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
19949 gen_int_mode (0xbb49, HImode));
19950 emit_move_insn (gen_rtx_MEM (DImode, plus_constant (tramp, offset + 2)),
19954 /* Load static chain using movabs to r10. */
19955 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
19956 gen_int_mode (0xba49, HImode));
19957 emit_move_insn (gen_rtx_MEM (DImode, plus_constant (tramp, offset + 2)),
19960 /* Jump to the r11 */
19961 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
19962 gen_int_mode (0xff49, HImode));
19963 emit_move_insn (gen_rtx_MEM (QImode, plus_constant (tramp, offset+2)),
19964 gen_int_mode (0xe3, QImode));
19966 gcc_assert (offset <= TRAMPOLINE_SIZE);
19969 #ifdef ENABLE_EXECUTE_STACK
19970 emit_library_call (gen_rtx_SYMBOL_REF (Pmode, "__enable_execute_stack"),
19971 LCT_NORMAL, VOIDmode, 1, tramp, Pmode);
19975 /* Codes for all the SSE/MMX builtins. */
19978 IX86_BUILTIN_ADDPS,
19979 IX86_BUILTIN_ADDSS,
19980 IX86_BUILTIN_DIVPS,
19981 IX86_BUILTIN_DIVSS,
19982 IX86_BUILTIN_MULPS,
19983 IX86_BUILTIN_MULSS,
19984 IX86_BUILTIN_SUBPS,
19985 IX86_BUILTIN_SUBSS,
19987 IX86_BUILTIN_CMPEQPS,
19988 IX86_BUILTIN_CMPLTPS,
19989 IX86_BUILTIN_CMPLEPS,
19990 IX86_BUILTIN_CMPGTPS,
19991 IX86_BUILTIN_CMPGEPS,
19992 IX86_BUILTIN_CMPNEQPS,
19993 IX86_BUILTIN_CMPNLTPS,
19994 IX86_BUILTIN_CMPNLEPS,
19995 IX86_BUILTIN_CMPNGTPS,
19996 IX86_BUILTIN_CMPNGEPS,
19997 IX86_BUILTIN_CMPORDPS,
19998 IX86_BUILTIN_CMPUNORDPS,
19999 IX86_BUILTIN_CMPEQSS,
20000 IX86_BUILTIN_CMPLTSS,
20001 IX86_BUILTIN_CMPLESS,
20002 IX86_BUILTIN_CMPNEQSS,
20003 IX86_BUILTIN_CMPNLTSS,
20004 IX86_BUILTIN_CMPNLESS,
20005 IX86_BUILTIN_CMPNGTSS,
20006 IX86_BUILTIN_CMPNGESS,
20007 IX86_BUILTIN_CMPORDSS,
20008 IX86_BUILTIN_CMPUNORDSS,
20010 IX86_BUILTIN_COMIEQSS,
20011 IX86_BUILTIN_COMILTSS,
20012 IX86_BUILTIN_COMILESS,
20013 IX86_BUILTIN_COMIGTSS,
20014 IX86_BUILTIN_COMIGESS,
20015 IX86_BUILTIN_COMINEQSS,
20016 IX86_BUILTIN_UCOMIEQSS,
20017 IX86_BUILTIN_UCOMILTSS,
20018 IX86_BUILTIN_UCOMILESS,
20019 IX86_BUILTIN_UCOMIGTSS,
20020 IX86_BUILTIN_UCOMIGESS,
20021 IX86_BUILTIN_UCOMINEQSS,
20023 IX86_BUILTIN_CVTPI2PS,
20024 IX86_BUILTIN_CVTPS2PI,
20025 IX86_BUILTIN_CVTSI2SS,
20026 IX86_BUILTIN_CVTSI642SS,
20027 IX86_BUILTIN_CVTSS2SI,
20028 IX86_BUILTIN_CVTSS2SI64,
20029 IX86_BUILTIN_CVTTPS2PI,
20030 IX86_BUILTIN_CVTTSS2SI,
20031 IX86_BUILTIN_CVTTSS2SI64,
20033 IX86_BUILTIN_MAXPS,
20034 IX86_BUILTIN_MAXSS,
20035 IX86_BUILTIN_MINPS,
20036 IX86_BUILTIN_MINSS,
20038 IX86_BUILTIN_LOADUPS,
20039 IX86_BUILTIN_STOREUPS,
20040 IX86_BUILTIN_MOVSS,
20042 IX86_BUILTIN_MOVHLPS,
20043 IX86_BUILTIN_MOVLHPS,
20044 IX86_BUILTIN_LOADHPS,
20045 IX86_BUILTIN_LOADLPS,
20046 IX86_BUILTIN_STOREHPS,
20047 IX86_BUILTIN_STORELPS,
20049 IX86_BUILTIN_MASKMOVQ,
20050 IX86_BUILTIN_MOVMSKPS,
20051 IX86_BUILTIN_PMOVMSKB,
20053 IX86_BUILTIN_MOVNTPS,
20054 IX86_BUILTIN_MOVNTQ,
20056 IX86_BUILTIN_LOADDQU,
20057 IX86_BUILTIN_STOREDQU,
20059 IX86_BUILTIN_PACKSSWB,
20060 IX86_BUILTIN_PACKSSDW,
20061 IX86_BUILTIN_PACKUSWB,
20063 IX86_BUILTIN_PADDB,
20064 IX86_BUILTIN_PADDW,
20065 IX86_BUILTIN_PADDD,
20066 IX86_BUILTIN_PADDQ,
20067 IX86_BUILTIN_PADDSB,
20068 IX86_BUILTIN_PADDSW,
20069 IX86_BUILTIN_PADDUSB,
20070 IX86_BUILTIN_PADDUSW,
20071 IX86_BUILTIN_PSUBB,
20072 IX86_BUILTIN_PSUBW,
20073 IX86_BUILTIN_PSUBD,
20074 IX86_BUILTIN_PSUBQ,
20075 IX86_BUILTIN_PSUBSB,
20076 IX86_BUILTIN_PSUBSW,
20077 IX86_BUILTIN_PSUBUSB,
20078 IX86_BUILTIN_PSUBUSW,
20081 IX86_BUILTIN_PANDN,
20085 IX86_BUILTIN_PAVGB,
20086 IX86_BUILTIN_PAVGW,
20088 IX86_BUILTIN_PCMPEQB,
20089 IX86_BUILTIN_PCMPEQW,
20090 IX86_BUILTIN_PCMPEQD,
20091 IX86_BUILTIN_PCMPGTB,
20092 IX86_BUILTIN_PCMPGTW,
20093 IX86_BUILTIN_PCMPGTD,
20095 IX86_BUILTIN_PMADDWD,
20097 IX86_BUILTIN_PMAXSW,
20098 IX86_BUILTIN_PMAXUB,
20099 IX86_BUILTIN_PMINSW,
20100 IX86_BUILTIN_PMINUB,
20102 IX86_BUILTIN_PMULHUW,
20103 IX86_BUILTIN_PMULHW,
20104 IX86_BUILTIN_PMULLW,
20106 IX86_BUILTIN_PSADBW,
20107 IX86_BUILTIN_PSHUFW,
20109 IX86_BUILTIN_PSLLW,
20110 IX86_BUILTIN_PSLLD,
20111 IX86_BUILTIN_PSLLQ,
20112 IX86_BUILTIN_PSRAW,
20113 IX86_BUILTIN_PSRAD,
20114 IX86_BUILTIN_PSRLW,
20115 IX86_BUILTIN_PSRLD,
20116 IX86_BUILTIN_PSRLQ,
20117 IX86_BUILTIN_PSLLWI,
20118 IX86_BUILTIN_PSLLDI,
20119 IX86_BUILTIN_PSLLQI,
20120 IX86_BUILTIN_PSRAWI,
20121 IX86_BUILTIN_PSRADI,
20122 IX86_BUILTIN_PSRLWI,
20123 IX86_BUILTIN_PSRLDI,
20124 IX86_BUILTIN_PSRLQI,
20126 IX86_BUILTIN_PUNPCKHBW,
20127 IX86_BUILTIN_PUNPCKHWD,
20128 IX86_BUILTIN_PUNPCKHDQ,
20129 IX86_BUILTIN_PUNPCKLBW,
20130 IX86_BUILTIN_PUNPCKLWD,
20131 IX86_BUILTIN_PUNPCKLDQ,
20133 IX86_BUILTIN_SHUFPS,
20135 IX86_BUILTIN_RCPPS,
20136 IX86_BUILTIN_RCPSS,
20137 IX86_BUILTIN_RSQRTPS,
20138 IX86_BUILTIN_RSQRTPS_NR,
20139 IX86_BUILTIN_RSQRTSS,
20140 IX86_BUILTIN_RSQRTF,
20141 IX86_BUILTIN_SQRTPS,
20142 IX86_BUILTIN_SQRTPS_NR,
20143 IX86_BUILTIN_SQRTSS,
20145 IX86_BUILTIN_UNPCKHPS,
20146 IX86_BUILTIN_UNPCKLPS,
20148 IX86_BUILTIN_ANDPS,
20149 IX86_BUILTIN_ANDNPS,
20151 IX86_BUILTIN_XORPS,
20154 IX86_BUILTIN_LDMXCSR,
20155 IX86_BUILTIN_STMXCSR,
20156 IX86_BUILTIN_SFENCE,
20158 /* 3DNow! Original */
20159 IX86_BUILTIN_FEMMS,
20160 IX86_BUILTIN_PAVGUSB,
20161 IX86_BUILTIN_PF2ID,
20162 IX86_BUILTIN_PFACC,
20163 IX86_BUILTIN_PFADD,
20164 IX86_BUILTIN_PFCMPEQ,
20165 IX86_BUILTIN_PFCMPGE,
20166 IX86_BUILTIN_PFCMPGT,
20167 IX86_BUILTIN_PFMAX,
20168 IX86_BUILTIN_PFMIN,
20169 IX86_BUILTIN_PFMUL,
20170 IX86_BUILTIN_PFRCP,
20171 IX86_BUILTIN_PFRCPIT1,
20172 IX86_BUILTIN_PFRCPIT2,
20173 IX86_BUILTIN_PFRSQIT1,
20174 IX86_BUILTIN_PFRSQRT,
20175 IX86_BUILTIN_PFSUB,
20176 IX86_BUILTIN_PFSUBR,
20177 IX86_BUILTIN_PI2FD,
20178 IX86_BUILTIN_PMULHRW,
20180 /* 3DNow! Athlon Extensions */
20181 IX86_BUILTIN_PF2IW,
20182 IX86_BUILTIN_PFNACC,
20183 IX86_BUILTIN_PFPNACC,
20184 IX86_BUILTIN_PI2FW,
20185 IX86_BUILTIN_PSWAPDSI,
20186 IX86_BUILTIN_PSWAPDSF,
20189 IX86_BUILTIN_ADDPD,
20190 IX86_BUILTIN_ADDSD,
20191 IX86_BUILTIN_DIVPD,
20192 IX86_BUILTIN_DIVSD,
20193 IX86_BUILTIN_MULPD,
20194 IX86_BUILTIN_MULSD,
20195 IX86_BUILTIN_SUBPD,
20196 IX86_BUILTIN_SUBSD,
20198 IX86_BUILTIN_CMPEQPD,
20199 IX86_BUILTIN_CMPLTPD,
20200 IX86_BUILTIN_CMPLEPD,
20201 IX86_BUILTIN_CMPGTPD,
20202 IX86_BUILTIN_CMPGEPD,
20203 IX86_BUILTIN_CMPNEQPD,
20204 IX86_BUILTIN_CMPNLTPD,
20205 IX86_BUILTIN_CMPNLEPD,
20206 IX86_BUILTIN_CMPNGTPD,
20207 IX86_BUILTIN_CMPNGEPD,
20208 IX86_BUILTIN_CMPORDPD,
20209 IX86_BUILTIN_CMPUNORDPD,
20210 IX86_BUILTIN_CMPEQSD,
20211 IX86_BUILTIN_CMPLTSD,
20212 IX86_BUILTIN_CMPLESD,
20213 IX86_BUILTIN_CMPNEQSD,
20214 IX86_BUILTIN_CMPNLTSD,
20215 IX86_BUILTIN_CMPNLESD,
20216 IX86_BUILTIN_CMPORDSD,
20217 IX86_BUILTIN_CMPUNORDSD,
20219 IX86_BUILTIN_COMIEQSD,
20220 IX86_BUILTIN_COMILTSD,
20221 IX86_BUILTIN_COMILESD,
20222 IX86_BUILTIN_COMIGTSD,
20223 IX86_BUILTIN_COMIGESD,
20224 IX86_BUILTIN_COMINEQSD,
20225 IX86_BUILTIN_UCOMIEQSD,
20226 IX86_BUILTIN_UCOMILTSD,
20227 IX86_BUILTIN_UCOMILESD,
20228 IX86_BUILTIN_UCOMIGTSD,
20229 IX86_BUILTIN_UCOMIGESD,
20230 IX86_BUILTIN_UCOMINEQSD,
20232 IX86_BUILTIN_MAXPD,
20233 IX86_BUILTIN_MAXSD,
20234 IX86_BUILTIN_MINPD,
20235 IX86_BUILTIN_MINSD,
20237 IX86_BUILTIN_ANDPD,
20238 IX86_BUILTIN_ANDNPD,
20240 IX86_BUILTIN_XORPD,
20242 IX86_BUILTIN_SQRTPD,
20243 IX86_BUILTIN_SQRTSD,
20245 IX86_BUILTIN_UNPCKHPD,
20246 IX86_BUILTIN_UNPCKLPD,
20248 IX86_BUILTIN_SHUFPD,
20250 IX86_BUILTIN_LOADUPD,
20251 IX86_BUILTIN_STOREUPD,
20252 IX86_BUILTIN_MOVSD,
20254 IX86_BUILTIN_LOADHPD,
20255 IX86_BUILTIN_LOADLPD,
20257 IX86_BUILTIN_CVTDQ2PD,
20258 IX86_BUILTIN_CVTDQ2PS,
20260 IX86_BUILTIN_CVTPD2DQ,
20261 IX86_BUILTIN_CVTPD2PI,
20262 IX86_BUILTIN_CVTPD2PS,
20263 IX86_BUILTIN_CVTTPD2DQ,
20264 IX86_BUILTIN_CVTTPD2PI,
20266 IX86_BUILTIN_CVTPI2PD,
20267 IX86_BUILTIN_CVTSI2SD,
20268 IX86_BUILTIN_CVTSI642SD,
20270 IX86_BUILTIN_CVTSD2SI,
20271 IX86_BUILTIN_CVTSD2SI64,
20272 IX86_BUILTIN_CVTSD2SS,
20273 IX86_BUILTIN_CVTSS2SD,
20274 IX86_BUILTIN_CVTTSD2SI,
20275 IX86_BUILTIN_CVTTSD2SI64,
20277 IX86_BUILTIN_CVTPS2DQ,
20278 IX86_BUILTIN_CVTPS2PD,
20279 IX86_BUILTIN_CVTTPS2DQ,
20281 IX86_BUILTIN_MOVNTI,
20282 IX86_BUILTIN_MOVNTPD,
20283 IX86_BUILTIN_MOVNTDQ,
20285 IX86_BUILTIN_MOVQ128,
20288 IX86_BUILTIN_MASKMOVDQU,
20289 IX86_BUILTIN_MOVMSKPD,
20290 IX86_BUILTIN_PMOVMSKB128,
20292 IX86_BUILTIN_PACKSSWB128,
20293 IX86_BUILTIN_PACKSSDW128,
20294 IX86_BUILTIN_PACKUSWB128,
20296 IX86_BUILTIN_PADDB128,
20297 IX86_BUILTIN_PADDW128,
20298 IX86_BUILTIN_PADDD128,
20299 IX86_BUILTIN_PADDQ128,
20300 IX86_BUILTIN_PADDSB128,
20301 IX86_BUILTIN_PADDSW128,
20302 IX86_BUILTIN_PADDUSB128,
20303 IX86_BUILTIN_PADDUSW128,
20304 IX86_BUILTIN_PSUBB128,
20305 IX86_BUILTIN_PSUBW128,
20306 IX86_BUILTIN_PSUBD128,
20307 IX86_BUILTIN_PSUBQ128,
20308 IX86_BUILTIN_PSUBSB128,
20309 IX86_BUILTIN_PSUBSW128,
20310 IX86_BUILTIN_PSUBUSB128,
20311 IX86_BUILTIN_PSUBUSW128,
20313 IX86_BUILTIN_PAND128,
20314 IX86_BUILTIN_PANDN128,
20315 IX86_BUILTIN_POR128,
20316 IX86_BUILTIN_PXOR128,
20318 IX86_BUILTIN_PAVGB128,
20319 IX86_BUILTIN_PAVGW128,
20321 IX86_BUILTIN_PCMPEQB128,
20322 IX86_BUILTIN_PCMPEQW128,
20323 IX86_BUILTIN_PCMPEQD128,
20324 IX86_BUILTIN_PCMPGTB128,
20325 IX86_BUILTIN_PCMPGTW128,
20326 IX86_BUILTIN_PCMPGTD128,
20328 IX86_BUILTIN_PMADDWD128,
20330 IX86_BUILTIN_PMAXSW128,
20331 IX86_BUILTIN_PMAXUB128,
20332 IX86_BUILTIN_PMINSW128,
20333 IX86_BUILTIN_PMINUB128,
20335 IX86_BUILTIN_PMULUDQ,
20336 IX86_BUILTIN_PMULUDQ128,
20337 IX86_BUILTIN_PMULHUW128,
20338 IX86_BUILTIN_PMULHW128,
20339 IX86_BUILTIN_PMULLW128,
20341 IX86_BUILTIN_PSADBW128,
20342 IX86_BUILTIN_PSHUFHW,
20343 IX86_BUILTIN_PSHUFLW,
20344 IX86_BUILTIN_PSHUFD,
20346 IX86_BUILTIN_PSLLDQI128,
20347 IX86_BUILTIN_PSLLWI128,
20348 IX86_BUILTIN_PSLLDI128,
20349 IX86_BUILTIN_PSLLQI128,
20350 IX86_BUILTIN_PSRAWI128,
20351 IX86_BUILTIN_PSRADI128,
20352 IX86_BUILTIN_PSRLDQI128,
20353 IX86_BUILTIN_PSRLWI128,
20354 IX86_BUILTIN_PSRLDI128,
20355 IX86_BUILTIN_PSRLQI128,
20357 IX86_BUILTIN_PSLLDQ128,
20358 IX86_BUILTIN_PSLLW128,
20359 IX86_BUILTIN_PSLLD128,
20360 IX86_BUILTIN_PSLLQ128,
20361 IX86_BUILTIN_PSRAW128,
20362 IX86_BUILTIN_PSRAD128,
20363 IX86_BUILTIN_PSRLW128,
20364 IX86_BUILTIN_PSRLD128,
20365 IX86_BUILTIN_PSRLQ128,
20367 IX86_BUILTIN_PUNPCKHBW128,
20368 IX86_BUILTIN_PUNPCKHWD128,
20369 IX86_BUILTIN_PUNPCKHDQ128,
20370 IX86_BUILTIN_PUNPCKHQDQ128,
20371 IX86_BUILTIN_PUNPCKLBW128,
20372 IX86_BUILTIN_PUNPCKLWD128,
20373 IX86_BUILTIN_PUNPCKLDQ128,
20374 IX86_BUILTIN_PUNPCKLQDQ128,
20376 IX86_BUILTIN_CLFLUSH,
20377 IX86_BUILTIN_MFENCE,
20378 IX86_BUILTIN_LFENCE,
20381 IX86_BUILTIN_ADDSUBPS,
20382 IX86_BUILTIN_HADDPS,
20383 IX86_BUILTIN_HSUBPS,
20384 IX86_BUILTIN_MOVSHDUP,
20385 IX86_BUILTIN_MOVSLDUP,
20386 IX86_BUILTIN_ADDSUBPD,
20387 IX86_BUILTIN_HADDPD,
20388 IX86_BUILTIN_HSUBPD,
20389 IX86_BUILTIN_LDDQU,
20391 IX86_BUILTIN_MONITOR,
20392 IX86_BUILTIN_MWAIT,
20395 IX86_BUILTIN_PHADDW,
20396 IX86_BUILTIN_PHADDD,
20397 IX86_BUILTIN_PHADDSW,
20398 IX86_BUILTIN_PHSUBW,
20399 IX86_BUILTIN_PHSUBD,
20400 IX86_BUILTIN_PHSUBSW,
20401 IX86_BUILTIN_PMADDUBSW,
20402 IX86_BUILTIN_PMULHRSW,
20403 IX86_BUILTIN_PSHUFB,
20404 IX86_BUILTIN_PSIGNB,
20405 IX86_BUILTIN_PSIGNW,
20406 IX86_BUILTIN_PSIGND,
20407 IX86_BUILTIN_PALIGNR,
20408 IX86_BUILTIN_PABSB,
20409 IX86_BUILTIN_PABSW,
20410 IX86_BUILTIN_PABSD,
20412 IX86_BUILTIN_PHADDW128,
20413 IX86_BUILTIN_PHADDD128,
20414 IX86_BUILTIN_PHADDSW128,
20415 IX86_BUILTIN_PHSUBW128,
20416 IX86_BUILTIN_PHSUBD128,
20417 IX86_BUILTIN_PHSUBSW128,
20418 IX86_BUILTIN_PMADDUBSW128,
20419 IX86_BUILTIN_PMULHRSW128,
20420 IX86_BUILTIN_PSHUFB128,
20421 IX86_BUILTIN_PSIGNB128,
20422 IX86_BUILTIN_PSIGNW128,
20423 IX86_BUILTIN_PSIGND128,
20424 IX86_BUILTIN_PALIGNR128,
20425 IX86_BUILTIN_PABSB128,
20426 IX86_BUILTIN_PABSW128,
20427 IX86_BUILTIN_PABSD128,
20429 /* AMDFAM10 - SSE4A New Instructions. */
20430 IX86_BUILTIN_MOVNTSD,
20431 IX86_BUILTIN_MOVNTSS,
20432 IX86_BUILTIN_EXTRQI,
20433 IX86_BUILTIN_EXTRQ,
20434 IX86_BUILTIN_INSERTQI,
20435 IX86_BUILTIN_INSERTQ,
20438 IX86_BUILTIN_BLENDPD,
20439 IX86_BUILTIN_BLENDPS,
20440 IX86_BUILTIN_BLENDVPD,
20441 IX86_BUILTIN_BLENDVPS,
20442 IX86_BUILTIN_PBLENDVB128,
20443 IX86_BUILTIN_PBLENDW128,
20448 IX86_BUILTIN_INSERTPS128,
20450 IX86_BUILTIN_MOVNTDQA,
20451 IX86_BUILTIN_MPSADBW128,
20452 IX86_BUILTIN_PACKUSDW128,
20453 IX86_BUILTIN_PCMPEQQ,
20454 IX86_BUILTIN_PHMINPOSUW128,
20456 IX86_BUILTIN_PMAXSB128,
20457 IX86_BUILTIN_PMAXSD128,
20458 IX86_BUILTIN_PMAXUD128,
20459 IX86_BUILTIN_PMAXUW128,
20461 IX86_BUILTIN_PMINSB128,
20462 IX86_BUILTIN_PMINSD128,
20463 IX86_BUILTIN_PMINUD128,
20464 IX86_BUILTIN_PMINUW128,
20466 IX86_BUILTIN_PMOVSXBW128,
20467 IX86_BUILTIN_PMOVSXBD128,
20468 IX86_BUILTIN_PMOVSXBQ128,
20469 IX86_BUILTIN_PMOVSXWD128,
20470 IX86_BUILTIN_PMOVSXWQ128,
20471 IX86_BUILTIN_PMOVSXDQ128,
20473 IX86_BUILTIN_PMOVZXBW128,
20474 IX86_BUILTIN_PMOVZXBD128,
20475 IX86_BUILTIN_PMOVZXBQ128,
20476 IX86_BUILTIN_PMOVZXWD128,
20477 IX86_BUILTIN_PMOVZXWQ128,
20478 IX86_BUILTIN_PMOVZXDQ128,
20480 IX86_BUILTIN_PMULDQ128,
20481 IX86_BUILTIN_PMULLD128,
20483 IX86_BUILTIN_ROUNDPD,
20484 IX86_BUILTIN_ROUNDPS,
20485 IX86_BUILTIN_ROUNDSD,
20486 IX86_BUILTIN_ROUNDSS,
20488 IX86_BUILTIN_PTESTZ,
20489 IX86_BUILTIN_PTESTC,
20490 IX86_BUILTIN_PTESTNZC,
20492 IX86_BUILTIN_VEC_INIT_V2SI,
20493 IX86_BUILTIN_VEC_INIT_V4HI,
20494 IX86_BUILTIN_VEC_INIT_V8QI,
20495 IX86_BUILTIN_VEC_EXT_V2DF,
20496 IX86_BUILTIN_VEC_EXT_V2DI,
20497 IX86_BUILTIN_VEC_EXT_V4SF,
20498 IX86_BUILTIN_VEC_EXT_V4SI,
20499 IX86_BUILTIN_VEC_EXT_V8HI,
20500 IX86_BUILTIN_VEC_EXT_V2SI,
20501 IX86_BUILTIN_VEC_EXT_V4HI,
20502 IX86_BUILTIN_VEC_EXT_V16QI,
20503 IX86_BUILTIN_VEC_SET_V2DI,
20504 IX86_BUILTIN_VEC_SET_V4SF,
20505 IX86_BUILTIN_VEC_SET_V4SI,
20506 IX86_BUILTIN_VEC_SET_V8HI,
20507 IX86_BUILTIN_VEC_SET_V4HI,
20508 IX86_BUILTIN_VEC_SET_V16QI,
20510 IX86_BUILTIN_VEC_PACK_SFIX,
20513 IX86_BUILTIN_CRC32QI,
20514 IX86_BUILTIN_CRC32HI,
20515 IX86_BUILTIN_CRC32SI,
20516 IX86_BUILTIN_CRC32DI,
20518 IX86_BUILTIN_PCMPESTRI128,
20519 IX86_BUILTIN_PCMPESTRM128,
20520 IX86_BUILTIN_PCMPESTRA128,
20521 IX86_BUILTIN_PCMPESTRC128,
20522 IX86_BUILTIN_PCMPESTRO128,
20523 IX86_BUILTIN_PCMPESTRS128,
20524 IX86_BUILTIN_PCMPESTRZ128,
20525 IX86_BUILTIN_PCMPISTRI128,
20526 IX86_BUILTIN_PCMPISTRM128,
20527 IX86_BUILTIN_PCMPISTRA128,
20528 IX86_BUILTIN_PCMPISTRC128,
20529 IX86_BUILTIN_PCMPISTRO128,
20530 IX86_BUILTIN_PCMPISTRS128,
20531 IX86_BUILTIN_PCMPISTRZ128,
20533 IX86_BUILTIN_PCMPGTQ,
20535 /* AES instructions */
20536 IX86_BUILTIN_AESENC128,
20537 IX86_BUILTIN_AESENCLAST128,
20538 IX86_BUILTIN_AESDEC128,
20539 IX86_BUILTIN_AESDECLAST128,
20540 IX86_BUILTIN_AESIMC128,
20541 IX86_BUILTIN_AESKEYGENASSIST128,
20543 /* PCLMUL instruction */
20544 IX86_BUILTIN_PCLMULQDQ128,
20547 IX86_BUILTIN_ADDPD256,
20548 IX86_BUILTIN_ADDPS256,
20549 IX86_BUILTIN_ADDSUBPD256,
20550 IX86_BUILTIN_ADDSUBPS256,
20551 IX86_BUILTIN_ANDPD256,
20552 IX86_BUILTIN_ANDPS256,
20553 IX86_BUILTIN_ANDNPD256,
20554 IX86_BUILTIN_ANDNPS256,
20555 IX86_BUILTIN_BLENDPD256,
20556 IX86_BUILTIN_BLENDPS256,
20557 IX86_BUILTIN_BLENDVPD256,
20558 IX86_BUILTIN_BLENDVPS256,
20559 IX86_BUILTIN_DIVPD256,
20560 IX86_BUILTIN_DIVPS256,
20561 IX86_BUILTIN_DPPS256,
20562 IX86_BUILTIN_HADDPD256,
20563 IX86_BUILTIN_HADDPS256,
20564 IX86_BUILTIN_HSUBPD256,
20565 IX86_BUILTIN_HSUBPS256,
20566 IX86_BUILTIN_MAXPD256,
20567 IX86_BUILTIN_MAXPS256,
20568 IX86_BUILTIN_MINPD256,
20569 IX86_BUILTIN_MINPS256,
20570 IX86_BUILTIN_MULPD256,
20571 IX86_BUILTIN_MULPS256,
20572 IX86_BUILTIN_ORPD256,
20573 IX86_BUILTIN_ORPS256,
20574 IX86_BUILTIN_SHUFPD256,
20575 IX86_BUILTIN_SHUFPS256,
20576 IX86_BUILTIN_SUBPD256,
20577 IX86_BUILTIN_SUBPS256,
20578 IX86_BUILTIN_XORPD256,
20579 IX86_BUILTIN_XORPS256,
20580 IX86_BUILTIN_CMPSD,
20581 IX86_BUILTIN_CMPSS,
20582 IX86_BUILTIN_CMPPD,
20583 IX86_BUILTIN_CMPPS,
20584 IX86_BUILTIN_CMPPD256,
20585 IX86_BUILTIN_CMPPS256,
20586 IX86_BUILTIN_CVTDQ2PD256,
20587 IX86_BUILTIN_CVTDQ2PS256,
20588 IX86_BUILTIN_CVTPD2PS256,
20589 IX86_BUILTIN_CVTPS2DQ256,
20590 IX86_BUILTIN_CVTPS2PD256,
20591 IX86_BUILTIN_CVTTPD2DQ256,
20592 IX86_BUILTIN_CVTPD2DQ256,
20593 IX86_BUILTIN_CVTTPS2DQ256,
20594 IX86_BUILTIN_EXTRACTF128PD256,
20595 IX86_BUILTIN_EXTRACTF128PS256,
20596 IX86_BUILTIN_EXTRACTF128SI256,
20597 IX86_BUILTIN_VZEROALL,
20598 IX86_BUILTIN_VZEROUPPER,
20599 IX86_BUILTIN_VZEROUPPER_REX64,
20600 IX86_BUILTIN_VPERMILVARPD,
20601 IX86_BUILTIN_VPERMILVARPS,
20602 IX86_BUILTIN_VPERMILVARPD256,
20603 IX86_BUILTIN_VPERMILVARPS256,
20604 IX86_BUILTIN_VPERMILPD,
20605 IX86_BUILTIN_VPERMILPS,
20606 IX86_BUILTIN_VPERMILPD256,
20607 IX86_BUILTIN_VPERMILPS256,
20608 IX86_BUILTIN_VPERM2F128PD256,
20609 IX86_BUILTIN_VPERM2F128PS256,
20610 IX86_BUILTIN_VPERM2F128SI256,
20611 IX86_BUILTIN_VBROADCASTSS,
20612 IX86_BUILTIN_VBROADCASTSD256,
20613 IX86_BUILTIN_VBROADCASTSS256,
20614 IX86_BUILTIN_VBROADCASTPD256,
20615 IX86_BUILTIN_VBROADCASTPS256,
20616 IX86_BUILTIN_VINSERTF128PD256,
20617 IX86_BUILTIN_VINSERTF128PS256,
20618 IX86_BUILTIN_VINSERTF128SI256,
20619 IX86_BUILTIN_LOADUPD256,
20620 IX86_BUILTIN_LOADUPS256,
20621 IX86_BUILTIN_STOREUPD256,
20622 IX86_BUILTIN_STOREUPS256,
20623 IX86_BUILTIN_LDDQU256,
20624 IX86_BUILTIN_MOVNTDQ256,
20625 IX86_BUILTIN_MOVNTPD256,
20626 IX86_BUILTIN_MOVNTPS256,
20627 IX86_BUILTIN_LOADDQU256,
20628 IX86_BUILTIN_STOREDQU256,
20629 IX86_BUILTIN_MASKLOADPD,
20630 IX86_BUILTIN_MASKLOADPS,
20631 IX86_BUILTIN_MASKSTOREPD,
20632 IX86_BUILTIN_MASKSTOREPS,
20633 IX86_BUILTIN_MASKLOADPD256,
20634 IX86_BUILTIN_MASKLOADPS256,
20635 IX86_BUILTIN_MASKSTOREPD256,
20636 IX86_BUILTIN_MASKSTOREPS256,
20637 IX86_BUILTIN_MOVSHDUP256,
20638 IX86_BUILTIN_MOVSLDUP256,
20639 IX86_BUILTIN_MOVDDUP256,
20641 IX86_BUILTIN_SQRTPD256,
20642 IX86_BUILTIN_SQRTPS256,
20643 IX86_BUILTIN_SQRTPS_NR256,
20644 IX86_BUILTIN_RSQRTPS256,
20645 IX86_BUILTIN_RSQRTPS_NR256,
20647 IX86_BUILTIN_RCPPS256,
20649 IX86_BUILTIN_ROUNDPD256,
20650 IX86_BUILTIN_ROUNDPS256,
20652 IX86_BUILTIN_UNPCKHPD256,
20653 IX86_BUILTIN_UNPCKLPD256,
20654 IX86_BUILTIN_UNPCKHPS256,
20655 IX86_BUILTIN_UNPCKLPS256,
20657 IX86_BUILTIN_SI256_SI,
20658 IX86_BUILTIN_PS256_PS,
20659 IX86_BUILTIN_PD256_PD,
20660 IX86_BUILTIN_SI_SI256,
20661 IX86_BUILTIN_PS_PS256,
20662 IX86_BUILTIN_PD_PD256,
20664 IX86_BUILTIN_VTESTZPD,
20665 IX86_BUILTIN_VTESTCPD,
20666 IX86_BUILTIN_VTESTNZCPD,
20667 IX86_BUILTIN_VTESTZPS,
20668 IX86_BUILTIN_VTESTCPS,
20669 IX86_BUILTIN_VTESTNZCPS,
20670 IX86_BUILTIN_VTESTZPD256,
20671 IX86_BUILTIN_VTESTCPD256,
20672 IX86_BUILTIN_VTESTNZCPD256,
20673 IX86_BUILTIN_VTESTZPS256,
20674 IX86_BUILTIN_VTESTCPS256,
20675 IX86_BUILTIN_VTESTNZCPS256,
20676 IX86_BUILTIN_PTESTZ256,
20677 IX86_BUILTIN_PTESTC256,
20678 IX86_BUILTIN_PTESTNZC256,
20680 IX86_BUILTIN_MOVMSKPD256,
20681 IX86_BUILTIN_MOVMSKPS256,
20683 /* TFmode support builtins. */
20685 IX86_BUILTIN_HUGE_VALQ,
20686 IX86_BUILTIN_FABSQ,
20687 IX86_BUILTIN_COPYSIGNQ,
20689 /* SSE5 instructions */
20690 IX86_BUILTIN_FMADDSS,
20691 IX86_BUILTIN_FMADDSD,
20692 IX86_BUILTIN_FMADDPS,
20693 IX86_BUILTIN_FMADDPD,
20694 IX86_BUILTIN_FMSUBSS,
20695 IX86_BUILTIN_FMSUBSD,
20696 IX86_BUILTIN_FMSUBPS,
20697 IX86_BUILTIN_FMSUBPD,
20698 IX86_BUILTIN_FNMADDSS,
20699 IX86_BUILTIN_FNMADDSD,
20700 IX86_BUILTIN_FNMADDPS,
20701 IX86_BUILTIN_FNMADDPD,
20702 IX86_BUILTIN_FNMSUBSS,
20703 IX86_BUILTIN_FNMSUBSD,
20704 IX86_BUILTIN_FNMSUBPS,
20705 IX86_BUILTIN_FNMSUBPD,
20706 IX86_BUILTIN_PCMOV,
20707 IX86_BUILTIN_PCMOV_V2DI,
20708 IX86_BUILTIN_PCMOV_V4SI,
20709 IX86_BUILTIN_PCMOV_V8HI,
20710 IX86_BUILTIN_PCMOV_V16QI,
20711 IX86_BUILTIN_PCMOV_V4SF,
20712 IX86_BUILTIN_PCMOV_V2DF,
20713 IX86_BUILTIN_PPERM,
20714 IX86_BUILTIN_PERMPS,
20715 IX86_BUILTIN_PERMPD,
20716 IX86_BUILTIN_PMACSSWW,
20717 IX86_BUILTIN_PMACSWW,
20718 IX86_BUILTIN_PMACSSWD,
20719 IX86_BUILTIN_PMACSWD,
20720 IX86_BUILTIN_PMACSSDD,
20721 IX86_BUILTIN_PMACSDD,
20722 IX86_BUILTIN_PMACSSDQL,
20723 IX86_BUILTIN_PMACSSDQH,
20724 IX86_BUILTIN_PMACSDQL,
20725 IX86_BUILTIN_PMACSDQH,
20726 IX86_BUILTIN_PMADCSSWD,
20727 IX86_BUILTIN_PMADCSWD,
20728 IX86_BUILTIN_PHADDBW,
20729 IX86_BUILTIN_PHADDBD,
20730 IX86_BUILTIN_PHADDBQ,
20731 IX86_BUILTIN_PHADDWD,
20732 IX86_BUILTIN_PHADDWQ,
20733 IX86_BUILTIN_PHADDDQ,
20734 IX86_BUILTIN_PHADDUBW,
20735 IX86_BUILTIN_PHADDUBD,
20736 IX86_BUILTIN_PHADDUBQ,
20737 IX86_BUILTIN_PHADDUWD,
20738 IX86_BUILTIN_PHADDUWQ,
20739 IX86_BUILTIN_PHADDUDQ,
20740 IX86_BUILTIN_PHSUBBW,
20741 IX86_BUILTIN_PHSUBWD,
20742 IX86_BUILTIN_PHSUBDQ,
20743 IX86_BUILTIN_PROTB,
20744 IX86_BUILTIN_PROTW,
20745 IX86_BUILTIN_PROTD,
20746 IX86_BUILTIN_PROTQ,
20747 IX86_BUILTIN_PROTB_IMM,
20748 IX86_BUILTIN_PROTW_IMM,
20749 IX86_BUILTIN_PROTD_IMM,
20750 IX86_BUILTIN_PROTQ_IMM,
20751 IX86_BUILTIN_PSHLB,
20752 IX86_BUILTIN_PSHLW,
20753 IX86_BUILTIN_PSHLD,
20754 IX86_BUILTIN_PSHLQ,
20755 IX86_BUILTIN_PSHAB,
20756 IX86_BUILTIN_PSHAW,
20757 IX86_BUILTIN_PSHAD,
20758 IX86_BUILTIN_PSHAQ,
20759 IX86_BUILTIN_FRCZSS,
20760 IX86_BUILTIN_FRCZSD,
20761 IX86_BUILTIN_FRCZPS,
20762 IX86_BUILTIN_FRCZPD,
20763 IX86_BUILTIN_CVTPH2PS,
20764 IX86_BUILTIN_CVTPS2PH,
20766 IX86_BUILTIN_COMEQSS,
20767 IX86_BUILTIN_COMNESS,
20768 IX86_BUILTIN_COMLTSS,
20769 IX86_BUILTIN_COMLESS,
20770 IX86_BUILTIN_COMGTSS,
20771 IX86_BUILTIN_COMGESS,
20772 IX86_BUILTIN_COMUEQSS,
20773 IX86_BUILTIN_COMUNESS,
20774 IX86_BUILTIN_COMULTSS,
20775 IX86_BUILTIN_COMULESS,
20776 IX86_BUILTIN_COMUGTSS,
20777 IX86_BUILTIN_COMUGESS,
20778 IX86_BUILTIN_COMORDSS,
20779 IX86_BUILTIN_COMUNORDSS,
20780 IX86_BUILTIN_COMFALSESS,
20781 IX86_BUILTIN_COMTRUESS,
20783 IX86_BUILTIN_COMEQSD,
20784 IX86_BUILTIN_COMNESD,
20785 IX86_BUILTIN_COMLTSD,
20786 IX86_BUILTIN_COMLESD,
20787 IX86_BUILTIN_COMGTSD,
20788 IX86_BUILTIN_COMGESD,
20789 IX86_BUILTIN_COMUEQSD,
20790 IX86_BUILTIN_COMUNESD,
20791 IX86_BUILTIN_COMULTSD,
20792 IX86_BUILTIN_COMULESD,
20793 IX86_BUILTIN_COMUGTSD,
20794 IX86_BUILTIN_COMUGESD,
20795 IX86_BUILTIN_COMORDSD,
20796 IX86_BUILTIN_COMUNORDSD,
20797 IX86_BUILTIN_COMFALSESD,
20798 IX86_BUILTIN_COMTRUESD,
20800 IX86_BUILTIN_COMEQPS,
20801 IX86_BUILTIN_COMNEPS,
20802 IX86_BUILTIN_COMLTPS,
20803 IX86_BUILTIN_COMLEPS,
20804 IX86_BUILTIN_COMGTPS,
20805 IX86_BUILTIN_COMGEPS,
20806 IX86_BUILTIN_COMUEQPS,
20807 IX86_BUILTIN_COMUNEPS,
20808 IX86_BUILTIN_COMULTPS,
20809 IX86_BUILTIN_COMULEPS,
20810 IX86_BUILTIN_COMUGTPS,
20811 IX86_BUILTIN_COMUGEPS,
20812 IX86_BUILTIN_COMORDPS,
20813 IX86_BUILTIN_COMUNORDPS,
20814 IX86_BUILTIN_COMFALSEPS,
20815 IX86_BUILTIN_COMTRUEPS,
20817 IX86_BUILTIN_COMEQPD,
20818 IX86_BUILTIN_COMNEPD,
20819 IX86_BUILTIN_COMLTPD,
20820 IX86_BUILTIN_COMLEPD,
20821 IX86_BUILTIN_COMGTPD,
20822 IX86_BUILTIN_COMGEPD,
20823 IX86_BUILTIN_COMUEQPD,
20824 IX86_BUILTIN_COMUNEPD,
20825 IX86_BUILTIN_COMULTPD,
20826 IX86_BUILTIN_COMULEPD,
20827 IX86_BUILTIN_COMUGTPD,
20828 IX86_BUILTIN_COMUGEPD,
20829 IX86_BUILTIN_COMORDPD,
20830 IX86_BUILTIN_COMUNORDPD,
20831 IX86_BUILTIN_COMFALSEPD,
20832 IX86_BUILTIN_COMTRUEPD,
20834 IX86_BUILTIN_PCOMEQUB,
20835 IX86_BUILTIN_PCOMNEUB,
20836 IX86_BUILTIN_PCOMLTUB,
20837 IX86_BUILTIN_PCOMLEUB,
20838 IX86_BUILTIN_PCOMGTUB,
20839 IX86_BUILTIN_PCOMGEUB,
20840 IX86_BUILTIN_PCOMFALSEUB,
20841 IX86_BUILTIN_PCOMTRUEUB,
20842 IX86_BUILTIN_PCOMEQUW,
20843 IX86_BUILTIN_PCOMNEUW,
20844 IX86_BUILTIN_PCOMLTUW,
20845 IX86_BUILTIN_PCOMLEUW,
20846 IX86_BUILTIN_PCOMGTUW,
20847 IX86_BUILTIN_PCOMGEUW,
20848 IX86_BUILTIN_PCOMFALSEUW,
20849 IX86_BUILTIN_PCOMTRUEUW,
20850 IX86_BUILTIN_PCOMEQUD,
20851 IX86_BUILTIN_PCOMNEUD,
20852 IX86_BUILTIN_PCOMLTUD,
20853 IX86_BUILTIN_PCOMLEUD,
20854 IX86_BUILTIN_PCOMGTUD,
20855 IX86_BUILTIN_PCOMGEUD,
20856 IX86_BUILTIN_PCOMFALSEUD,
20857 IX86_BUILTIN_PCOMTRUEUD,
20858 IX86_BUILTIN_PCOMEQUQ,
20859 IX86_BUILTIN_PCOMNEUQ,
20860 IX86_BUILTIN_PCOMLTUQ,
20861 IX86_BUILTIN_PCOMLEUQ,
20862 IX86_BUILTIN_PCOMGTUQ,
20863 IX86_BUILTIN_PCOMGEUQ,
20864 IX86_BUILTIN_PCOMFALSEUQ,
20865 IX86_BUILTIN_PCOMTRUEUQ,
20867 IX86_BUILTIN_PCOMEQB,
20868 IX86_BUILTIN_PCOMNEB,
20869 IX86_BUILTIN_PCOMLTB,
20870 IX86_BUILTIN_PCOMLEB,
20871 IX86_BUILTIN_PCOMGTB,
20872 IX86_BUILTIN_PCOMGEB,
20873 IX86_BUILTIN_PCOMFALSEB,
20874 IX86_BUILTIN_PCOMTRUEB,
20875 IX86_BUILTIN_PCOMEQW,
20876 IX86_BUILTIN_PCOMNEW,
20877 IX86_BUILTIN_PCOMLTW,
20878 IX86_BUILTIN_PCOMLEW,
20879 IX86_BUILTIN_PCOMGTW,
20880 IX86_BUILTIN_PCOMGEW,
20881 IX86_BUILTIN_PCOMFALSEW,
20882 IX86_BUILTIN_PCOMTRUEW,
20883 IX86_BUILTIN_PCOMEQD,
20884 IX86_BUILTIN_PCOMNED,
20885 IX86_BUILTIN_PCOMLTD,
20886 IX86_BUILTIN_PCOMLED,
20887 IX86_BUILTIN_PCOMGTD,
20888 IX86_BUILTIN_PCOMGED,
20889 IX86_BUILTIN_PCOMFALSED,
20890 IX86_BUILTIN_PCOMTRUED,
20891 IX86_BUILTIN_PCOMEQQ,
20892 IX86_BUILTIN_PCOMNEQ,
20893 IX86_BUILTIN_PCOMLTQ,
20894 IX86_BUILTIN_PCOMLEQ,
20895 IX86_BUILTIN_PCOMGTQ,
20896 IX86_BUILTIN_PCOMGEQ,
20897 IX86_BUILTIN_PCOMFALSEQ,
20898 IX86_BUILTIN_PCOMTRUEQ,
20903 /* Table for the ix86 builtin decls. */
20904 static GTY(()) tree ix86_builtins[(int) IX86_BUILTIN_MAX];
20906 /* Table of all of the builtin functions that are possible with different ISA's
20907 but are waiting to be built until a function is declared to use that
20909 struct GTY(()) builtin_isa {
20910 tree type; /* builtin type to use in the declaration */
20911 const char *name; /* function name */
20912 int isa; /* isa_flags this builtin is defined for */
20913 bool const_p; /* true if the declaration is constant */
20916 static GTY(()) struct builtin_isa ix86_builtins_isa[(int) IX86_BUILTIN_MAX];
20919 /* Add an ix86 target builtin function with CODE, NAME and TYPE. Save the MASK
20920 * of which isa_flags to use in the ix86_builtins_isa array. Stores the
20921 * function decl in the ix86_builtins array. Returns the function decl or
20922 * NULL_TREE, if the builtin was not added.
20924 * If the front end has a special hook for builtin functions, delay adding
20925 * builtin functions that aren't in the current ISA until the ISA is changed
20926 * with function specific optimization. Doing so, can save about 300K for the
20927 * default compiler. When the builtin is expanded, check at that time whether
20930 * If the front end doesn't have a special hook, record all builtins, even if
20931 * it isn't an instruction set in the current ISA in case the user uses
20932 * function specific options for a different ISA, so that we don't get scope
20933 * errors if a builtin is added in the middle of a function scope. */
20936 def_builtin (int mask, const char *name, tree type, enum ix86_builtins code)
20938 tree decl = NULL_TREE;
20940 if (!(mask & OPTION_MASK_ISA_64BIT) || TARGET_64BIT)
20942 ix86_builtins_isa[(int) code].isa = mask;
20944 if ((mask & ix86_isa_flags) != 0
20945 || (lang_hooks.builtin_function
20946 == lang_hooks.builtin_function_ext_scope))
20949 decl = add_builtin_function (name, type, code, BUILT_IN_MD, NULL,
20951 ix86_builtins[(int) code] = decl;
20952 ix86_builtins_isa[(int) code].type = NULL_TREE;
20956 ix86_builtins[(int) code] = NULL_TREE;
20957 ix86_builtins_isa[(int) code].const_p = false;
20958 ix86_builtins_isa[(int) code].type = type;
20959 ix86_builtins_isa[(int) code].name = name;
20966 /* Like def_builtin, but also marks the function decl "const". */
20969 def_builtin_const (int mask, const char *name, tree type,
20970 enum ix86_builtins code)
20972 tree decl = def_builtin (mask, name, type, code);
20974 TREE_READONLY (decl) = 1;
20976 ix86_builtins_isa[(int) code].const_p = true;
20981 /* Add any new builtin functions for a given ISA that may not have been
20982 declared. This saves a bit of space compared to adding all of the
20983 declarations to the tree, even if we didn't use them. */
20986 ix86_add_new_builtins (int isa)
20991 for (i = 0; i < (int)IX86_BUILTIN_MAX; i++)
20993 if ((ix86_builtins_isa[i].isa & isa) != 0
20994 && ix86_builtins_isa[i].type != NULL_TREE)
20996 decl = add_builtin_function_ext_scope (ix86_builtins_isa[i].name,
20997 ix86_builtins_isa[i].type,
20998 i, BUILT_IN_MD, NULL,
21001 ix86_builtins[i] = decl;
21002 ix86_builtins_isa[i].type = NULL_TREE;
21003 if (ix86_builtins_isa[i].const_p)
21004 TREE_READONLY (decl) = 1;
21009 /* Bits for builtin_description.flag. */
21011 /* Set when we don't support the comparison natively, and should
21012 swap_comparison in order to support it. */
21013 #define BUILTIN_DESC_SWAP_OPERANDS 1
21015 struct builtin_description
21017 const unsigned int mask;
21018 const enum insn_code icode;
21019 const char *const name;
21020 const enum ix86_builtins code;
21021 const enum rtx_code comparison;
21025 static const struct builtin_description bdesc_comi[] =
21027 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comieq", IX86_BUILTIN_COMIEQSS, UNEQ, 0 },
21028 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comilt", IX86_BUILTIN_COMILTSS, UNLT, 0 },
21029 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comile", IX86_BUILTIN_COMILESS, UNLE, 0 },
21030 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comigt", IX86_BUILTIN_COMIGTSS, GT, 0 },
21031 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comige", IX86_BUILTIN_COMIGESS, GE, 0 },
21032 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comineq", IX86_BUILTIN_COMINEQSS, LTGT, 0 },
21033 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomieq", IX86_BUILTIN_UCOMIEQSS, UNEQ, 0 },
21034 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomilt", IX86_BUILTIN_UCOMILTSS, UNLT, 0 },
21035 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomile", IX86_BUILTIN_UCOMILESS, UNLE, 0 },
21036 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomigt", IX86_BUILTIN_UCOMIGTSS, GT, 0 },
21037 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomige", IX86_BUILTIN_UCOMIGESS, GE, 0 },
21038 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomineq", IX86_BUILTIN_UCOMINEQSS, LTGT, 0 },
21039 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdeq", IX86_BUILTIN_COMIEQSD, UNEQ, 0 },
21040 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdlt", IX86_BUILTIN_COMILTSD, UNLT, 0 },
21041 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdle", IX86_BUILTIN_COMILESD, UNLE, 0 },
21042 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdgt", IX86_BUILTIN_COMIGTSD, GT, 0 },
21043 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdge", IX86_BUILTIN_COMIGESD, GE, 0 },
21044 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdneq", IX86_BUILTIN_COMINEQSD, LTGT, 0 },
21045 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdeq", IX86_BUILTIN_UCOMIEQSD, UNEQ, 0 },
21046 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdlt", IX86_BUILTIN_UCOMILTSD, UNLT, 0 },
21047 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdle", IX86_BUILTIN_UCOMILESD, UNLE, 0 },
21048 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdgt", IX86_BUILTIN_UCOMIGTSD, GT, 0 },
21049 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdge", IX86_BUILTIN_UCOMIGESD, GE, 0 },
21050 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdneq", IX86_BUILTIN_UCOMINEQSD, LTGT, 0 },
21053 static const struct builtin_description bdesc_pcmpestr[] =
21056 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestri128", IX86_BUILTIN_PCMPESTRI128, UNKNOWN, 0 },
21057 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestrm128", IX86_BUILTIN_PCMPESTRM128, UNKNOWN, 0 },
21058 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestria128", IX86_BUILTIN_PCMPESTRA128, UNKNOWN, (int) CCAmode },
21059 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestric128", IX86_BUILTIN_PCMPESTRC128, UNKNOWN, (int) CCCmode },
21060 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestrio128", IX86_BUILTIN_PCMPESTRO128, UNKNOWN, (int) CCOmode },
21061 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestris128", IX86_BUILTIN_PCMPESTRS128, UNKNOWN, (int) CCSmode },
21062 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestriz128", IX86_BUILTIN_PCMPESTRZ128, UNKNOWN, (int) CCZmode },
21065 static const struct builtin_description bdesc_pcmpistr[] =
21068 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistri128", IX86_BUILTIN_PCMPISTRI128, UNKNOWN, 0 },
21069 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistrm128", IX86_BUILTIN_PCMPISTRM128, UNKNOWN, 0 },
21070 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistria128", IX86_BUILTIN_PCMPISTRA128, UNKNOWN, (int) CCAmode },
21071 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistric128", IX86_BUILTIN_PCMPISTRC128, UNKNOWN, (int) CCCmode },
21072 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistrio128", IX86_BUILTIN_PCMPISTRO128, UNKNOWN, (int) CCOmode },
21073 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistris128", IX86_BUILTIN_PCMPISTRS128, UNKNOWN, (int) CCSmode },
21074 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistriz128", IX86_BUILTIN_PCMPISTRZ128, UNKNOWN, (int) CCZmode },
21077 /* Special builtin types */
21078 enum ix86_special_builtin_type
21080 SPECIAL_FTYPE_UNKNOWN,
21082 V32QI_FTYPE_PCCHAR,
21083 V16QI_FTYPE_PCCHAR,
21085 V8SF_FTYPE_PCFLOAT,
21087 V4DF_FTYPE_PCDOUBLE,
21088 V4SF_FTYPE_PCFLOAT,
21089 V2DF_FTYPE_PCDOUBLE,
21090 V8SF_FTYPE_PCV8SF_V8SF,
21091 V4DF_FTYPE_PCV4DF_V4DF,
21092 V4SF_FTYPE_V4SF_PCV2SF,
21093 V4SF_FTYPE_PCV4SF_V4SF,
21094 V2DF_FTYPE_V2DF_PCDOUBLE,
21095 V2DF_FTYPE_PCV2DF_V2DF,
21097 VOID_FTYPE_PV2SF_V4SF,
21098 VOID_FTYPE_PV4DI_V4DI,
21099 VOID_FTYPE_PV2DI_V2DI,
21100 VOID_FTYPE_PCHAR_V32QI,
21101 VOID_FTYPE_PCHAR_V16QI,
21102 VOID_FTYPE_PFLOAT_V8SF,
21103 VOID_FTYPE_PFLOAT_V4SF,
21104 VOID_FTYPE_PDOUBLE_V4DF,
21105 VOID_FTYPE_PDOUBLE_V2DF,
21107 VOID_FTYPE_PINT_INT,
21108 VOID_FTYPE_PV8SF_V8SF_V8SF,
21109 VOID_FTYPE_PV4DF_V4DF_V4DF,
21110 VOID_FTYPE_PV4SF_V4SF_V4SF,
21111 VOID_FTYPE_PV2DF_V2DF_V2DF
21114 /* Builtin types */
21115 enum ix86_builtin_type
21118 FLOAT128_FTYPE_FLOAT128,
21120 FLOAT128_FTYPE_FLOAT128_FLOAT128,
21121 INT_FTYPE_V8SF_V8SF_PTEST,
21122 INT_FTYPE_V4DI_V4DI_PTEST,
21123 INT_FTYPE_V4DF_V4DF_PTEST,
21124 INT_FTYPE_V4SF_V4SF_PTEST,
21125 INT_FTYPE_V2DI_V2DI_PTEST,
21126 INT_FTYPE_V2DF_V2DF_PTEST,
21158 V4SF_FTYPE_V4SF_VEC_MERGE,
21167 V2DF_FTYPE_V2DF_VEC_MERGE,
21178 V16QI_FTYPE_V16QI_V16QI,
21179 V16QI_FTYPE_V8HI_V8HI,
21180 V8QI_FTYPE_V8QI_V8QI,
21181 V8QI_FTYPE_V4HI_V4HI,
21182 V8HI_FTYPE_V8HI_V8HI,
21183 V8HI_FTYPE_V8HI_V8HI_COUNT,
21184 V8HI_FTYPE_V16QI_V16QI,
21185 V8HI_FTYPE_V4SI_V4SI,
21186 V8HI_FTYPE_V8HI_SI_COUNT,
21187 V8SF_FTYPE_V8SF_V8SF,
21188 V8SF_FTYPE_V8SF_V8SI,
21189 V4SI_FTYPE_V4SI_V4SI,
21190 V4SI_FTYPE_V4SI_V4SI_COUNT,
21191 V4SI_FTYPE_V8HI_V8HI,
21192 V4SI_FTYPE_V4SF_V4SF,
21193 V4SI_FTYPE_V2DF_V2DF,
21194 V4SI_FTYPE_V4SI_SI_COUNT,
21195 V4HI_FTYPE_V4HI_V4HI,
21196 V4HI_FTYPE_V4HI_V4HI_COUNT,
21197 V4HI_FTYPE_V8QI_V8QI,
21198 V4HI_FTYPE_V2SI_V2SI,
21199 V4HI_FTYPE_V4HI_SI_COUNT,
21200 V4DF_FTYPE_V4DF_V4DF,
21201 V4DF_FTYPE_V4DF_V4DI,
21202 V4SF_FTYPE_V4SF_V4SF,
21203 V4SF_FTYPE_V4SF_V4SF_SWAP,
21204 V4SF_FTYPE_V4SF_V4SI,
21205 V4SF_FTYPE_V4SF_V2SI,
21206 V4SF_FTYPE_V4SF_V2DF,
21207 V4SF_FTYPE_V4SF_DI,
21208 V4SF_FTYPE_V4SF_SI,
21209 V2DI_FTYPE_V2DI_V2DI,
21210 V2DI_FTYPE_V2DI_V2DI_COUNT,
21211 V2DI_FTYPE_V16QI_V16QI,
21212 V2DI_FTYPE_V4SI_V4SI,
21213 V2DI_FTYPE_V2DI_V16QI,
21214 V2DI_FTYPE_V2DF_V2DF,
21215 V2DI_FTYPE_V2DI_SI_COUNT,
21216 V2SI_FTYPE_V2SI_V2SI,
21217 V2SI_FTYPE_V2SI_V2SI_COUNT,
21218 V2SI_FTYPE_V4HI_V4HI,
21219 V2SI_FTYPE_V2SF_V2SF,
21220 V2SI_FTYPE_V2SI_SI_COUNT,
21221 V2DF_FTYPE_V2DF_V2DF,
21222 V2DF_FTYPE_V2DF_V2DF_SWAP,
21223 V2DF_FTYPE_V2DF_V4SF,
21224 V2DF_FTYPE_V2DF_V2DI,
21225 V2DF_FTYPE_V2DF_DI,
21226 V2DF_FTYPE_V2DF_SI,
21227 V2SF_FTYPE_V2SF_V2SF,
21228 V1DI_FTYPE_V1DI_V1DI,
21229 V1DI_FTYPE_V1DI_V1DI_COUNT,
21230 V1DI_FTYPE_V8QI_V8QI,
21231 V1DI_FTYPE_V2SI_V2SI,
21232 V1DI_FTYPE_V1DI_SI_COUNT,
21233 UINT64_FTYPE_UINT64_UINT64,
21234 UINT_FTYPE_UINT_UINT,
21235 UINT_FTYPE_UINT_USHORT,
21236 UINT_FTYPE_UINT_UCHAR,
21237 V8HI_FTYPE_V8HI_INT,
21238 V4SI_FTYPE_V4SI_INT,
21239 V4HI_FTYPE_V4HI_INT,
21240 V8SF_FTYPE_V8SF_INT,
21241 V4SI_FTYPE_V8SI_INT,
21242 V4SF_FTYPE_V8SF_INT,
21243 V2DF_FTYPE_V4DF_INT,
21244 V4DF_FTYPE_V4DF_INT,
21245 V4SF_FTYPE_V4SF_INT,
21246 V2DI_FTYPE_V2DI_INT,
21247 V2DI2TI_FTYPE_V2DI_INT,
21248 V2DF_FTYPE_V2DF_INT,
21249 V16QI_FTYPE_V16QI_V16QI_V16QI,
21250 V8SF_FTYPE_V8SF_V8SF_V8SF,
21251 V4DF_FTYPE_V4DF_V4DF_V4DF,
21252 V4SF_FTYPE_V4SF_V4SF_V4SF,
21253 V2DF_FTYPE_V2DF_V2DF_V2DF,
21254 V16QI_FTYPE_V16QI_V16QI_INT,
21255 V8SI_FTYPE_V8SI_V8SI_INT,
21256 V8SI_FTYPE_V8SI_V4SI_INT,
21257 V8HI_FTYPE_V8HI_V8HI_INT,
21258 V8SF_FTYPE_V8SF_V8SF_INT,
21259 V8SF_FTYPE_V8SF_V4SF_INT,
21260 V4SI_FTYPE_V4SI_V4SI_INT,
21261 V4DF_FTYPE_V4DF_V4DF_INT,
21262 V4DF_FTYPE_V4DF_V2DF_INT,
21263 V4SF_FTYPE_V4SF_V4SF_INT,
21264 V2DI_FTYPE_V2DI_V2DI_INT,
21265 V2DI2TI_FTYPE_V2DI_V2DI_INT,
21266 V1DI2DI_FTYPE_V1DI_V1DI_INT,
21267 V2DF_FTYPE_V2DF_V2DF_INT,
21268 V2DI_FTYPE_V2DI_UINT_UINT,
21269 V2DI_FTYPE_V2DI_V2DI_UINT_UINT
21272 /* Special builtins with variable number of arguments. */
21273 static const struct builtin_description bdesc_special_args[] =
21276 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_emms, "__builtin_ia32_emms", IX86_BUILTIN_EMMS, UNKNOWN, (int) VOID_FTYPE_VOID },
21279 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_femms, "__builtin_ia32_femms", IX86_BUILTIN_FEMMS, UNKNOWN, (int) VOID_FTYPE_VOID },
21282 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movups, "__builtin_ia32_storeups", IX86_BUILTIN_STOREUPS, UNKNOWN, (int) VOID_FTYPE_PFLOAT_V4SF },
21283 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movntv4sf, "__builtin_ia32_movntps", IX86_BUILTIN_MOVNTPS, UNKNOWN, (int) VOID_FTYPE_PFLOAT_V4SF },
21284 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movups, "__builtin_ia32_loadups", IX86_BUILTIN_LOADUPS, UNKNOWN, (int) V4SF_FTYPE_PCFLOAT },
21286 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_loadhps_exp, "__builtin_ia32_loadhps", IX86_BUILTIN_LOADHPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_PCV2SF },
21287 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_loadlps_exp, "__builtin_ia32_loadlps", IX86_BUILTIN_LOADLPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_PCV2SF },
21288 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_storehps, "__builtin_ia32_storehps", IX86_BUILTIN_STOREHPS, UNKNOWN, (int) VOID_FTYPE_PV2SF_V4SF },
21289 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_storelps, "__builtin_ia32_storelps", IX86_BUILTIN_STORELPS, UNKNOWN, (int) VOID_FTYPE_PV2SF_V4SF },
21291 /* SSE or 3DNow!A */
21292 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_sse_sfence, "__builtin_ia32_sfence", IX86_BUILTIN_SFENCE, UNKNOWN, (int) VOID_FTYPE_VOID },
21293 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_sse_movntdi, "__builtin_ia32_movntq", IX86_BUILTIN_MOVNTQ, UNKNOWN, (int) VOID_FTYPE_PDI_DI },
21296 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_lfence, "__builtin_ia32_lfence", IX86_BUILTIN_LFENCE, UNKNOWN, (int) VOID_FTYPE_VOID },
21297 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_mfence, 0, IX86_BUILTIN_MFENCE, UNKNOWN, (int) VOID_FTYPE_VOID },
21298 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movupd, "__builtin_ia32_storeupd", IX86_BUILTIN_STOREUPD, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V2DF },
21299 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movdqu, "__builtin_ia32_storedqu", IX86_BUILTIN_STOREDQU, UNKNOWN, (int) VOID_FTYPE_PCHAR_V16QI },
21300 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movntv2df, "__builtin_ia32_movntpd", IX86_BUILTIN_MOVNTPD, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V2DF },
21301 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movntv2di, "__builtin_ia32_movntdq", IX86_BUILTIN_MOVNTDQ, UNKNOWN, (int) VOID_FTYPE_PV2DI_V2DI },
21302 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movntsi, "__builtin_ia32_movnti", IX86_BUILTIN_MOVNTI, UNKNOWN, (int) VOID_FTYPE_PINT_INT },
21303 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movupd, "__builtin_ia32_loadupd", IX86_BUILTIN_LOADUPD, UNKNOWN, (int) V2DF_FTYPE_PCDOUBLE },
21304 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movdqu, "__builtin_ia32_loaddqu", IX86_BUILTIN_LOADDQU, UNKNOWN, (int) V16QI_FTYPE_PCCHAR },
21306 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_loadhpd_exp, "__builtin_ia32_loadhpd", IX86_BUILTIN_LOADHPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_PCDOUBLE },
21307 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_loadlpd_exp, "__builtin_ia32_loadlpd", IX86_BUILTIN_LOADLPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_PCDOUBLE },
21310 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_lddqu, "__builtin_ia32_lddqu", IX86_BUILTIN_LDDQU, UNKNOWN, (int) V16QI_FTYPE_PCCHAR },
21313 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_movntdqa, "__builtin_ia32_movntdqa", IX86_BUILTIN_MOVNTDQA, UNKNOWN, (int) V2DI_FTYPE_PV2DI },
21316 { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_vmmovntv2df, "__builtin_ia32_movntsd", IX86_BUILTIN_MOVNTSD, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V2DF },
21317 { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_vmmovntv4sf, "__builtin_ia32_movntss", IX86_BUILTIN_MOVNTSS, UNKNOWN, (int) VOID_FTYPE_PFLOAT_V4SF },
21320 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vzeroall, "__builtin_ia32_vzeroall", IX86_BUILTIN_VZEROALL, UNKNOWN, (int) VOID_FTYPE_VOID },
21321 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vzeroupper, 0, IX86_BUILTIN_VZEROUPPER, UNKNOWN, (int) VOID_FTYPE_VOID },
21322 { OPTION_MASK_ISA_AVX | OPTION_MASK_ISA_64BIT, CODE_FOR_avx_vzeroupper_rex64, 0, IX86_BUILTIN_VZEROUPPER_REX64, UNKNOWN, (int) VOID_FTYPE_VOID },
21324 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vbroadcastss, "__builtin_ia32_vbroadcastss", IX86_BUILTIN_VBROADCASTSS, UNKNOWN, (int) V4SF_FTYPE_PCFLOAT },
21325 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vbroadcastsd256, "__builtin_ia32_vbroadcastsd256", IX86_BUILTIN_VBROADCASTSD256, UNKNOWN, (int) V4DF_FTYPE_PCDOUBLE },
21326 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vbroadcastss256, "__builtin_ia32_vbroadcastss256", IX86_BUILTIN_VBROADCASTSS256, UNKNOWN, (int) V8SF_FTYPE_PCFLOAT },
21327 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vbroadcastf128_pd256, "__builtin_ia32_vbroadcastf128_pd256", IX86_BUILTIN_VBROADCASTPD256, UNKNOWN, (int) V4DF_FTYPE_PCV2DF },
21328 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vbroadcastf128_ps256, "__builtin_ia32_vbroadcastf128_ps256", IX86_BUILTIN_VBROADCASTPS256, UNKNOWN, (int) V8SF_FTYPE_PCV4SF },
21330 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movupd256, "__builtin_ia32_loadupd256", IX86_BUILTIN_LOADUPD256, UNKNOWN, (int) V4DF_FTYPE_PCDOUBLE },
21331 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movups256, "__builtin_ia32_loadups256", IX86_BUILTIN_LOADUPS256, UNKNOWN, (int) V8SF_FTYPE_PCFLOAT },
21332 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movupd256, "__builtin_ia32_storeupd256", IX86_BUILTIN_STOREUPD256, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V4DF },
21333 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movups256, "__builtin_ia32_storeups256", IX86_BUILTIN_STOREUPS256, UNKNOWN, (int) VOID_FTYPE_PFLOAT_V8SF },
21334 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movdqu256, "__builtin_ia32_loaddqu256", IX86_BUILTIN_LOADDQU256, UNKNOWN, (int) V32QI_FTYPE_PCCHAR },
21335 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movdqu256, "__builtin_ia32_storedqu256", IX86_BUILTIN_STOREDQU256, UNKNOWN, (int) VOID_FTYPE_PCHAR_V32QI },
21336 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_lddqu256, "__builtin_ia32_lddqu256", IX86_BUILTIN_LDDQU256, UNKNOWN, (int) V32QI_FTYPE_PCCHAR },
21338 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movntv4di, "__builtin_ia32_movntdq256", IX86_BUILTIN_MOVNTDQ256, UNKNOWN, (int) VOID_FTYPE_PV4DI_V4DI },
21339 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movntv4df, "__builtin_ia32_movntpd256", IX86_BUILTIN_MOVNTPD256, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V4DF },
21340 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movntv8sf, "__builtin_ia32_movntps256", IX86_BUILTIN_MOVNTPS256, UNKNOWN, (int) VOID_FTYPE_PFLOAT_V8SF },
21342 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskloadpd, "__builtin_ia32_maskloadpd", IX86_BUILTIN_MASKLOADPD, UNKNOWN, (int) V2DF_FTYPE_PCV2DF_V2DF },
21343 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskloadps, "__builtin_ia32_maskloadps", IX86_BUILTIN_MASKLOADPS, UNKNOWN, (int) V4SF_FTYPE_PCV4SF_V4SF },
21344 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskloadpd256, "__builtin_ia32_maskloadpd256", IX86_BUILTIN_MASKLOADPD256, UNKNOWN, (int) V4DF_FTYPE_PCV4DF_V4DF },
21345 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskloadps256, "__builtin_ia32_maskloadps256", IX86_BUILTIN_MASKLOADPS256, UNKNOWN, (int) V8SF_FTYPE_PCV8SF_V8SF },
21346 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskstorepd, "__builtin_ia32_maskstorepd", IX86_BUILTIN_MASKSTOREPD, UNKNOWN, (int) VOID_FTYPE_PV2DF_V2DF_V2DF },
21347 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskstoreps, "__builtin_ia32_maskstoreps", IX86_BUILTIN_MASKSTOREPS, UNKNOWN, (int) VOID_FTYPE_PV4SF_V4SF_V4SF },
21348 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskstorepd256, "__builtin_ia32_maskstorepd256", IX86_BUILTIN_MASKSTOREPD256, UNKNOWN, (int) VOID_FTYPE_PV4DF_V4DF_V4DF },
21349 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskstoreps256, "__builtin_ia32_maskstoreps256", IX86_BUILTIN_MASKSTOREPS256, UNKNOWN, (int) VOID_FTYPE_PV8SF_V8SF_V8SF },
21352 /* Builtins with variable number of arguments. */
21353 static const struct builtin_description bdesc_args[] =
21356 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_addv8qi3, "__builtin_ia32_paddb", IX86_BUILTIN_PADDB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
21357 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_addv4hi3, "__builtin_ia32_paddw", IX86_BUILTIN_PADDW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
21358 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_addv2si3, "__builtin_ia32_paddd", IX86_BUILTIN_PADDD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
21359 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_subv8qi3, "__builtin_ia32_psubb", IX86_BUILTIN_PSUBB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
21360 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_subv4hi3, "__builtin_ia32_psubw", IX86_BUILTIN_PSUBW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
21361 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_subv2si3, "__builtin_ia32_psubd", IX86_BUILTIN_PSUBD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
21363 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ssaddv8qi3, "__builtin_ia32_paddsb", IX86_BUILTIN_PADDSB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
21364 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ssaddv4hi3, "__builtin_ia32_paddsw", IX86_BUILTIN_PADDSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
21365 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_sssubv8qi3, "__builtin_ia32_psubsb", IX86_BUILTIN_PSUBSB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
21366 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_sssubv4hi3, "__builtin_ia32_psubsw", IX86_BUILTIN_PSUBSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
21367 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_usaddv8qi3, "__builtin_ia32_paddusb", IX86_BUILTIN_PADDUSB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
21368 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_usaddv4hi3, "__builtin_ia32_paddusw", IX86_BUILTIN_PADDUSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
21369 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ussubv8qi3, "__builtin_ia32_psubusb", IX86_BUILTIN_PSUBUSB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
21370 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ussubv4hi3, "__builtin_ia32_psubusw", IX86_BUILTIN_PSUBUSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
21372 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_mulv4hi3, "__builtin_ia32_pmullw", IX86_BUILTIN_PMULLW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
21373 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_smulv4hi3_highpart, "__builtin_ia32_pmulhw", IX86_BUILTIN_PMULHW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
21375 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_andv2si3, "__builtin_ia32_pand", IX86_BUILTIN_PAND, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
21376 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_andnotv2si3, "__builtin_ia32_pandn", IX86_BUILTIN_PANDN, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
21377 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_iorv2si3, "__builtin_ia32_por", IX86_BUILTIN_POR, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
21378 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_xorv2si3, "__builtin_ia32_pxor", IX86_BUILTIN_PXOR, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
21380 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_eqv8qi3, "__builtin_ia32_pcmpeqb", IX86_BUILTIN_PCMPEQB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
21381 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_eqv4hi3, "__builtin_ia32_pcmpeqw", IX86_BUILTIN_PCMPEQW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
21382 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_eqv2si3, "__builtin_ia32_pcmpeqd", IX86_BUILTIN_PCMPEQD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
21383 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_gtv8qi3, "__builtin_ia32_pcmpgtb", IX86_BUILTIN_PCMPGTB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
21384 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_gtv4hi3, "__builtin_ia32_pcmpgtw", IX86_BUILTIN_PCMPGTW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
21385 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_gtv2si3, "__builtin_ia32_pcmpgtd", IX86_BUILTIN_PCMPGTD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
21387 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpckhbw, "__builtin_ia32_punpckhbw", IX86_BUILTIN_PUNPCKHBW, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
21388 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpckhwd, "__builtin_ia32_punpckhwd", IX86_BUILTIN_PUNPCKHWD, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
21389 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpckhdq, "__builtin_ia32_punpckhdq", IX86_BUILTIN_PUNPCKHDQ, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
21390 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpcklbw, "__builtin_ia32_punpcklbw", IX86_BUILTIN_PUNPCKLBW, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
21391 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpcklwd, "__builtin_ia32_punpcklwd", IX86_BUILTIN_PUNPCKLWD, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI},
21392 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpckldq, "__builtin_ia32_punpckldq", IX86_BUILTIN_PUNPCKLDQ, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI},
21394 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_packsswb, "__builtin_ia32_packsswb", IX86_BUILTIN_PACKSSWB, UNKNOWN, (int) V8QI_FTYPE_V4HI_V4HI },
21395 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_packssdw, "__builtin_ia32_packssdw", IX86_BUILTIN_PACKSSDW, UNKNOWN, (int) V4HI_FTYPE_V2SI_V2SI },
21396 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_packuswb, "__builtin_ia32_packuswb", IX86_BUILTIN_PACKUSWB, UNKNOWN, (int) V8QI_FTYPE_V4HI_V4HI },
21398 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_pmaddwd, "__builtin_ia32_pmaddwd", IX86_BUILTIN_PMADDWD, UNKNOWN, (int) V2SI_FTYPE_V4HI_V4HI },
21400 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv4hi3, "__builtin_ia32_psllwi", IX86_BUILTIN_PSLLWI, UNKNOWN, (int) V4HI_FTYPE_V4HI_SI_COUNT },
21401 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv2si3, "__builtin_ia32_pslldi", IX86_BUILTIN_PSLLDI, UNKNOWN, (int) V2SI_FTYPE_V2SI_SI_COUNT },
21402 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv1di3, "__builtin_ia32_psllqi", IX86_BUILTIN_PSLLQI, UNKNOWN, (int) V1DI_FTYPE_V1DI_SI_COUNT },
21403 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv4hi3, "__builtin_ia32_psllw", IX86_BUILTIN_PSLLW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI_COUNT },
21404 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv2si3, "__builtin_ia32_pslld", IX86_BUILTIN_PSLLD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI_COUNT },
21405 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv1di3, "__builtin_ia32_psllq", IX86_BUILTIN_PSLLQ, UNKNOWN, (int) V1DI_FTYPE_V1DI_V1DI_COUNT },
21407 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv4hi3, "__builtin_ia32_psrlwi", IX86_BUILTIN_PSRLWI, UNKNOWN, (int) V4HI_FTYPE_V4HI_SI_COUNT },
21408 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv2si3, "__builtin_ia32_psrldi", IX86_BUILTIN_PSRLDI, UNKNOWN, (int) V2SI_FTYPE_V2SI_SI_COUNT },
21409 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv1di3, "__builtin_ia32_psrlqi", IX86_BUILTIN_PSRLQI, UNKNOWN, (int) V1DI_FTYPE_V1DI_SI_COUNT },
21410 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv4hi3, "__builtin_ia32_psrlw", IX86_BUILTIN_PSRLW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI_COUNT },
21411 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv2si3, "__builtin_ia32_psrld", IX86_BUILTIN_PSRLD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI_COUNT },
21412 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv1di3, "__builtin_ia32_psrlq", IX86_BUILTIN_PSRLQ, UNKNOWN, (int) V1DI_FTYPE_V1DI_V1DI_COUNT },
21414 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashrv4hi3, "__builtin_ia32_psrawi", IX86_BUILTIN_PSRAWI, UNKNOWN, (int) V4HI_FTYPE_V4HI_SI_COUNT },
21415 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashrv2si3, "__builtin_ia32_psradi", IX86_BUILTIN_PSRADI, UNKNOWN, (int) V2SI_FTYPE_V2SI_SI_COUNT },
21416 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashrv4hi3, "__builtin_ia32_psraw", IX86_BUILTIN_PSRAW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI_COUNT },
21417 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashrv2si3, "__builtin_ia32_psrad", IX86_BUILTIN_PSRAD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI_COUNT },
21420 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_pf2id, "__builtin_ia32_pf2id", IX86_BUILTIN_PF2ID, UNKNOWN, (int) V2SI_FTYPE_V2SF },
21421 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_floatv2si2, "__builtin_ia32_pi2fd", IX86_BUILTIN_PI2FD, UNKNOWN, (int) V2SF_FTYPE_V2SI },
21422 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_rcpv2sf2, "__builtin_ia32_pfrcp", IX86_BUILTIN_PFRCP, UNKNOWN, (int) V2SF_FTYPE_V2SF },
21423 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_rsqrtv2sf2, "__builtin_ia32_pfrsqrt", IX86_BUILTIN_PFRSQRT, UNKNOWN, (int) V2SF_FTYPE_V2SF },
21425 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_uavgv8qi3, "__builtin_ia32_pavgusb", IX86_BUILTIN_PAVGUSB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
21426 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_haddv2sf3, "__builtin_ia32_pfacc", IX86_BUILTIN_PFACC, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
21427 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_addv2sf3, "__builtin_ia32_pfadd", IX86_BUILTIN_PFADD, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
21428 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_eqv2sf3, "__builtin_ia32_pfcmpeq", IX86_BUILTIN_PFCMPEQ, UNKNOWN, (int) V2SI_FTYPE_V2SF_V2SF },
21429 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_gev2sf3, "__builtin_ia32_pfcmpge", IX86_BUILTIN_PFCMPGE, UNKNOWN, (int) V2SI_FTYPE_V2SF_V2SF },
21430 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_gtv2sf3, "__builtin_ia32_pfcmpgt", IX86_BUILTIN_PFCMPGT, UNKNOWN, (int) V2SI_FTYPE_V2SF_V2SF },
21431 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_smaxv2sf3, "__builtin_ia32_pfmax", IX86_BUILTIN_PFMAX, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
21432 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_sminv2sf3, "__builtin_ia32_pfmin", IX86_BUILTIN_PFMIN, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
21433 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_mulv2sf3, "__builtin_ia32_pfmul", IX86_BUILTIN_PFMUL, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
21434 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_rcpit1v2sf3, "__builtin_ia32_pfrcpit1", IX86_BUILTIN_PFRCPIT1, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
21435 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_rcpit2v2sf3, "__builtin_ia32_pfrcpit2", IX86_BUILTIN_PFRCPIT2, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
21436 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_rsqit1v2sf3, "__builtin_ia32_pfrsqit1", IX86_BUILTIN_PFRSQIT1, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
21437 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_subv2sf3, "__builtin_ia32_pfsub", IX86_BUILTIN_PFSUB, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
21438 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_subrv2sf3, "__builtin_ia32_pfsubr", IX86_BUILTIN_PFSUBR, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
21439 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_pmulhrwv4hi3, "__builtin_ia32_pmulhrw", IX86_BUILTIN_PMULHRW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
21442 { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pf2iw, "__builtin_ia32_pf2iw", IX86_BUILTIN_PF2IW, UNKNOWN, (int) V2SI_FTYPE_V2SF },
21443 { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pi2fw, "__builtin_ia32_pi2fw", IX86_BUILTIN_PI2FW, UNKNOWN, (int) V2SF_FTYPE_V2SI },
21444 { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pswapdv2si2, "__builtin_ia32_pswapdsi", IX86_BUILTIN_PSWAPDSI, UNKNOWN, (int) V2SI_FTYPE_V2SI },
21445 { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pswapdv2sf2, "__builtin_ia32_pswapdsf", IX86_BUILTIN_PSWAPDSF, UNKNOWN, (int) V2SF_FTYPE_V2SF },
21446 { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_hsubv2sf3, "__builtin_ia32_pfnacc", IX86_BUILTIN_PFNACC, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
21447 { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_addsubv2sf3, "__builtin_ia32_pfpnacc", IX86_BUILTIN_PFPNACC, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
21450 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movmskps, "__builtin_ia32_movmskps", IX86_BUILTIN_MOVMSKPS, UNKNOWN, (int) INT_FTYPE_V4SF },
21451 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_sqrtv4sf2, "__builtin_ia32_sqrtps", IX86_BUILTIN_SQRTPS, UNKNOWN, (int) V4SF_FTYPE_V4SF },
21452 { OPTION_MASK_ISA_SSE, CODE_FOR_sqrtv4sf2, "__builtin_ia32_sqrtps_nr", IX86_BUILTIN_SQRTPS_NR, UNKNOWN, (int) V4SF_FTYPE_V4SF },
21453 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_rsqrtv4sf2, "__builtin_ia32_rsqrtps", IX86_BUILTIN_RSQRTPS, UNKNOWN, (int) V4SF_FTYPE_V4SF },
21454 { OPTION_MASK_ISA_SSE, CODE_FOR_rsqrtv4sf2, "__builtin_ia32_rsqrtps_nr", IX86_BUILTIN_RSQRTPS_NR, UNKNOWN, (int) V4SF_FTYPE_V4SF },
21455 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_rcpv4sf2, "__builtin_ia32_rcpps", IX86_BUILTIN_RCPPS, UNKNOWN, (int) V4SF_FTYPE_V4SF },
21456 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvtps2pi, "__builtin_ia32_cvtps2pi", IX86_BUILTIN_CVTPS2PI, UNKNOWN, (int) V2SI_FTYPE_V4SF },
21457 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvtss2si, "__builtin_ia32_cvtss2si", IX86_BUILTIN_CVTSS2SI, UNKNOWN, (int) INT_FTYPE_V4SF },
21458 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_64BIT, CODE_FOR_sse_cvtss2siq, "__builtin_ia32_cvtss2si64", IX86_BUILTIN_CVTSS2SI64, UNKNOWN, (int) INT64_FTYPE_V4SF },
21459 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvttps2pi, "__builtin_ia32_cvttps2pi", IX86_BUILTIN_CVTTPS2PI, UNKNOWN, (int) V2SI_FTYPE_V4SF },
21460 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvttss2si, "__builtin_ia32_cvttss2si", IX86_BUILTIN_CVTTSS2SI, UNKNOWN, (int) INT_FTYPE_V4SF },
21461 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_64BIT, CODE_FOR_sse_cvttss2siq, "__builtin_ia32_cvttss2si64", IX86_BUILTIN_CVTTSS2SI64, UNKNOWN, (int) INT64_FTYPE_V4SF },
21463 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_shufps, "__builtin_ia32_shufps", IX86_BUILTIN_SHUFPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
21465 { OPTION_MASK_ISA_SSE, CODE_FOR_addv4sf3, "__builtin_ia32_addps", IX86_BUILTIN_ADDPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
21466 { OPTION_MASK_ISA_SSE, CODE_FOR_subv4sf3, "__builtin_ia32_subps", IX86_BUILTIN_SUBPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
21467 { OPTION_MASK_ISA_SSE, CODE_FOR_mulv4sf3, "__builtin_ia32_mulps", IX86_BUILTIN_MULPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
21468 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_divv4sf3, "__builtin_ia32_divps", IX86_BUILTIN_DIVPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
21469 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmaddv4sf3, "__builtin_ia32_addss", IX86_BUILTIN_ADDSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
21470 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmsubv4sf3, "__builtin_ia32_subss", IX86_BUILTIN_SUBSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
21471 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmulv4sf3, "__builtin_ia32_mulss", IX86_BUILTIN_MULSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
21472 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmdivv4sf3, "__builtin_ia32_divss", IX86_BUILTIN_DIVSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
21474 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpeqps", IX86_BUILTIN_CMPEQPS, EQ, (int) V4SF_FTYPE_V4SF_V4SF },
21475 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpltps", IX86_BUILTIN_CMPLTPS, LT, (int) V4SF_FTYPE_V4SF_V4SF },
21476 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpleps", IX86_BUILTIN_CMPLEPS, LE, (int) V4SF_FTYPE_V4SF_V4SF },
21477 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpgtps", IX86_BUILTIN_CMPGTPS, LT, (int) V4SF_FTYPE_V4SF_V4SF_SWAP },
21478 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpgeps", IX86_BUILTIN_CMPGEPS, LE, (int) V4SF_FTYPE_V4SF_V4SF_SWAP },
21479 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpunordps", IX86_BUILTIN_CMPUNORDPS, UNORDERED, (int) V4SF_FTYPE_V4SF_V4SF },
21480 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpneqps", IX86_BUILTIN_CMPNEQPS, NE, (int) V4SF_FTYPE_V4SF_V4SF },
21481 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpnltps", IX86_BUILTIN_CMPNLTPS, UNGE, (int) V4SF_FTYPE_V4SF_V4SF },
21482 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpnleps", IX86_BUILTIN_CMPNLEPS, UNGT, (int) V4SF_FTYPE_V4SF_V4SF },
21483 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpngtps", IX86_BUILTIN_CMPNGTPS, UNGE, (int) V4SF_FTYPE_V4SF_V4SF_SWAP },
21484 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpngeps", IX86_BUILTIN_CMPNGEPS, UNGT, (int) V4SF_FTYPE_V4SF_V4SF_SWAP},
21485 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpordps", IX86_BUILTIN_CMPORDPS, ORDERED, (int) V4SF_FTYPE_V4SF_V4SF },
21486 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpeqss", IX86_BUILTIN_CMPEQSS, EQ, (int) V4SF_FTYPE_V4SF_V4SF },
21487 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpltss", IX86_BUILTIN_CMPLTSS, LT, (int) V4SF_FTYPE_V4SF_V4SF },
21488 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpless", IX86_BUILTIN_CMPLESS, LE, (int) V4SF_FTYPE_V4SF_V4SF },
21489 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpunordss", IX86_BUILTIN_CMPUNORDSS, UNORDERED, (int) V4SF_FTYPE_V4SF_V4SF },
21490 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpneqss", IX86_BUILTIN_CMPNEQSS, NE, (int) V4SF_FTYPE_V4SF_V4SF },
21491 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpnltss", IX86_BUILTIN_CMPNLTSS, UNGE, (int) V4SF_FTYPE_V4SF_V4SF },
21492 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpnless", IX86_BUILTIN_CMPNLESS, UNGT, (int) V4SF_FTYPE_V4SF_V4SF },
21493 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpngtss", IX86_BUILTIN_CMPNGTSS, UNGE, (int) V4SF_FTYPE_V4SF_V4SF_SWAP },
21494 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpngess", IX86_BUILTIN_CMPNGESS, UNGT, (int) V4SF_FTYPE_V4SF_V4SF_SWAP },
21495 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpordss", IX86_BUILTIN_CMPORDSS, ORDERED, (int) V4SF_FTYPE_V4SF_V4SF },
21497 { OPTION_MASK_ISA_SSE, CODE_FOR_sminv4sf3, "__builtin_ia32_minps", IX86_BUILTIN_MINPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
21498 { OPTION_MASK_ISA_SSE, CODE_FOR_smaxv4sf3, "__builtin_ia32_maxps", IX86_BUILTIN_MAXPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
21499 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmsminv4sf3, "__builtin_ia32_minss", IX86_BUILTIN_MINSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
21500 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmsmaxv4sf3, "__builtin_ia32_maxss", IX86_BUILTIN_MAXSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
21502 { OPTION_MASK_ISA_SSE, CODE_FOR_andv4sf3, "__builtin_ia32_andps", IX86_BUILTIN_ANDPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
21503 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_andnotv4sf3, "__builtin_ia32_andnps", IX86_BUILTIN_ANDNPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
21504 { OPTION_MASK_ISA_SSE, CODE_FOR_iorv4sf3, "__builtin_ia32_orps", IX86_BUILTIN_ORPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
21505 { OPTION_MASK_ISA_SSE, CODE_FOR_xorv4sf3, "__builtin_ia32_xorps", IX86_BUILTIN_XORPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
21507 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movss, "__builtin_ia32_movss", IX86_BUILTIN_MOVSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
21508 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movhlps_exp, "__builtin_ia32_movhlps", IX86_BUILTIN_MOVHLPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
21509 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movlhps_exp, "__builtin_ia32_movlhps", IX86_BUILTIN_MOVLHPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
21510 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_unpckhps, "__builtin_ia32_unpckhps", IX86_BUILTIN_UNPCKHPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
21511 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_unpcklps, "__builtin_ia32_unpcklps", IX86_BUILTIN_UNPCKLPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
21513 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvtpi2ps, "__builtin_ia32_cvtpi2ps", IX86_BUILTIN_CVTPI2PS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V2SI },
21514 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvtsi2ss, "__builtin_ia32_cvtsi2ss", IX86_BUILTIN_CVTSI2SS, UNKNOWN, (int) V4SF_FTYPE_V4SF_SI },
21515 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_64BIT, CODE_FOR_sse_cvtsi2ssq, "__builtin_ia32_cvtsi642ss", IX86_BUILTIN_CVTSI642SS, UNKNOWN, V4SF_FTYPE_V4SF_DI },
21517 { OPTION_MASK_ISA_SSE, CODE_FOR_rsqrtsf2, "__builtin_ia32_rsqrtf", IX86_BUILTIN_RSQRTF, UNKNOWN, (int) FLOAT_FTYPE_FLOAT },
21519 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmsqrtv4sf2, "__builtin_ia32_sqrtss", IX86_BUILTIN_SQRTSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_VEC_MERGE },
21520 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmrsqrtv4sf2, "__builtin_ia32_rsqrtss", IX86_BUILTIN_RSQRTSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_VEC_MERGE },
21521 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmrcpv4sf2, "__builtin_ia32_rcpss", IX86_BUILTIN_RCPSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_VEC_MERGE },
21523 /* SSE MMX or 3Dnow!A */
21524 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_uavgv8qi3, "__builtin_ia32_pavgb", IX86_BUILTIN_PAVGB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
21525 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_uavgv4hi3, "__builtin_ia32_pavgw", IX86_BUILTIN_PAVGW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
21526 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_umulv4hi3_highpart, "__builtin_ia32_pmulhuw", IX86_BUILTIN_PMULHUW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
21528 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_umaxv8qi3, "__builtin_ia32_pmaxub", IX86_BUILTIN_PMAXUB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
21529 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_smaxv4hi3, "__builtin_ia32_pmaxsw", IX86_BUILTIN_PMAXSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
21530 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_uminv8qi3, "__builtin_ia32_pminub", IX86_BUILTIN_PMINUB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
21531 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_sminv4hi3, "__builtin_ia32_pminsw", IX86_BUILTIN_PMINSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
21533 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_psadbw, "__builtin_ia32_psadbw", IX86_BUILTIN_PSADBW, UNKNOWN, (int) V1DI_FTYPE_V8QI_V8QI },
21534 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pmovmskb, "__builtin_ia32_pmovmskb", IX86_BUILTIN_PMOVMSKB, UNKNOWN, (int) INT_FTYPE_V8QI },
21536 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pshufw, "__builtin_ia32_pshufw", IX86_BUILTIN_PSHUFW, UNKNOWN, (int) V4HI_FTYPE_V4HI_INT },
21539 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_shufpd, "__builtin_ia32_shufpd", IX86_BUILTIN_SHUFPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
21541 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movmskpd, "__builtin_ia32_movmskpd", IX86_BUILTIN_MOVMSKPD, UNKNOWN, (int) INT_FTYPE_V2DF },
21542 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_pmovmskb, "__builtin_ia32_pmovmskb128", IX86_BUILTIN_PMOVMSKB128, UNKNOWN, (int) INT_FTYPE_V16QI },
21543 { OPTION_MASK_ISA_SSE2, CODE_FOR_sqrtv2df2, "__builtin_ia32_sqrtpd", IX86_BUILTIN_SQRTPD, UNKNOWN, (int) V2DF_FTYPE_V2DF },
21544 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtdq2pd, "__builtin_ia32_cvtdq2pd", IX86_BUILTIN_CVTDQ2PD, UNKNOWN, (int) V2DF_FTYPE_V4SI },
21545 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtdq2ps, "__builtin_ia32_cvtdq2ps", IX86_BUILTIN_CVTDQ2PS, UNKNOWN, (int) V4SF_FTYPE_V4SI },
21547 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtpd2dq, "__builtin_ia32_cvtpd2dq", IX86_BUILTIN_CVTPD2DQ, UNKNOWN, (int) V4SI_FTYPE_V2DF },
21548 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtpd2pi, "__builtin_ia32_cvtpd2pi", IX86_BUILTIN_CVTPD2PI, UNKNOWN, (int) V2SI_FTYPE_V2DF },
21549 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtpd2ps, "__builtin_ia32_cvtpd2ps", IX86_BUILTIN_CVTPD2PS, UNKNOWN, (int) V4SF_FTYPE_V2DF },
21550 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvttpd2dq, "__builtin_ia32_cvttpd2dq", IX86_BUILTIN_CVTTPD2DQ, UNKNOWN, (int) V4SI_FTYPE_V2DF },
21551 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvttpd2pi, "__builtin_ia32_cvttpd2pi", IX86_BUILTIN_CVTTPD2PI, UNKNOWN, (int) V2SI_FTYPE_V2DF },
21553 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtpi2pd, "__builtin_ia32_cvtpi2pd", IX86_BUILTIN_CVTPI2PD, UNKNOWN, (int) V2DF_FTYPE_V2SI },
21555 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtsd2si, "__builtin_ia32_cvtsd2si", IX86_BUILTIN_CVTSD2SI, UNKNOWN, (int) INT_FTYPE_V2DF },
21556 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvttsd2si, "__builtin_ia32_cvttsd2si", IX86_BUILTIN_CVTTSD2SI, UNKNOWN, (int) INT_FTYPE_V2DF },
21557 { OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_64BIT, CODE_FOR_sse2_cvtsd2siq, "__builtin_ia32_cvtsd2si64", IX86_BUILTIN_CVTSD2SI64, UNKNOWN, (int) INT64_FTYPE_V2DF },
21558 { OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_64BIT, CODE_FOR_sse2_cvttsd2siq, "__builtin_ia32_cvttsd2si64", IX86_BUILTIN_CVTTSD2SI64, UNKNOWN, (int) INT64_FTYPE_V2DF },
21560 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtps2dq, "__builtin_ia32_cvtps2dq", IX86_BUILTIN_CVTPS2DQ, UNKNOWN, (int) V4SI_FTYPE_V4SF },
21561 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtps2pd, "__builtin_ia32_cvtps2pd", IX86_BUILTIN_CVTPS2PD, UNKNOWN, (int) V2DF_FTYPE_V4SF },
21562 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvttps2dq, "__builtin_ia32_cvttps2dq", IX86_BUILTIN_CVTTPS2DQ, UNKNOWN, (int) V4SI_FTYPE_V4SF },
21564 { OPTION_MASK_ISA_SSE2, CODE_FOR_addv2df3, "__builtin_ia32_addpd", IX86_BUILTIN_ADDPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
21565 { OPTION_MASK_ISA_SSE2, CODE_FOR_subv2df3, "__builtin_ia32_subpd", IX86_BUILTIN_SUBPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
21566 { OPTION_MASK_ISA_SSE2, CODE_FOR_mulv2df3, "__builtin_ia32_mulpd", IX86_BUILTIN_MULPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
21567 { OPTION_MASK_ISA_SSE2, CODE_FOR_divv2df3, "__builtin_ia32_divpd", IX86_BUILTIN_DIVPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
21568 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmaddv2df3, "__builtin_ia32_addsd", IX86_BUILTIN_ADDSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
21569 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmsubv2df3, "__builtin_ia32_subsd", IX86_BUILTIN_SUBSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
21570 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmulv2df3, "__builtin_ia32_mulsd", IX86_BUILTIN_MULSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
21571 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmdivv2df3, "__builtin_ia32_divsd", IX86_BUILTIN_DIVSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
21573 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpeqpd", IX86_BUILTIN_CMPEQPD, EQ, (int) V2DF_FTYPE_V2DF_V2DF },
21574 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpltpd", IX86_BUILTIN_CMPLTPD, LT, (int) V2DF_FTYPE_V2DF_V2DF },
21575 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmplepd", IX86_BUILTIN_CMPLEPD, LE, (int) V2DF_FTYPE_V2DF_V2DF },
21576 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpgtpd", IX86_BUILTIN_CMPGTPD, LT, (int) V2DF_FTYPE_V2DF_V2DF_SWAP },
21577 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpgepd", IX86_BUILTIN_CMPGEPD, LE, (int) V2DF_FTYPE_V2DF_V2DF_SWAP},
21578 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpunordpd", IX86_BUILTIN_CMPUNORDPD, UNORDERED, (int) V2DF_FTYPE_V2DF_V2DF },
21579 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpneqpd", IX86_BUILTIN_CMPNEQPD, NE, (int) V2DF_FTYPE_V2DF_V2DF },
21580 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpnltpd", IX86_BUILTIN_CMPNLTPD, UNGE, (int) V2DF_FTYPE_V2DF_V2DF },
21581 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpnlepd", IX86_BUILTIN_CMPNLEPD, UNGT, (int) V2DF_FTYPE_V2DF_V2DF },
21582 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpngtpd", IX86_BUILTIN_CMPNGTPD, UNGE, (int) V2DF_FTYPE_V2DF_V2DF_SWAP },
21583 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpngepd", IX86_BUILTIN_CMPNGEPD, UNGT, (int) V2DF_FTYPE_V2DF_V2DF_SWAP },
21584 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpordpd", IX86_BUILTIN_CMPORDPD, ORDERED, (int) V2DF_FTYPE_V2DF_V2DF },
21585 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpeqsd", IX86_BUILTIN_CMPEQSD, EQ, (int) V2DF_FTYPE_V2DF_V2DF },
21586 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpltsd", IX86_BUILTIN_CMPLTSD, LT, (int) V2DF_FTYPE_V2DF_V2DF },
21587 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmplesd", IX86_BUILTIN_CMPLESD, LE, (int) V2DF_FTYPE_V2DF_V2DF },
21588 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpunordsd", IX86_BUILTIN_CMPUNORDSD, UNORDERED, (int) V2DF_FTYPE_V2DF_V2DF },
21589 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpneqsd", IX86_BUILTIN_CMPNEQSD, NE, (int) V2DF_FTYPE_V2DF_V2DF },
21590 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpnltsd", IX86_BUILTIN_CMPNLTSD, UNGE, (int) V2DF_FTYPE_V2DF_V2DF },
21591 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpnlesd", IX86_BUILTIN_CMPNLESD, UNGT, (int) V2DF_FTYPE_V2DF_V2DF },
21592 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpordsd", IX86_BUILTIN_CMPORDSD, ORDERED, (int) V2DF_FTYPE_V2DF_V2DF },
21594 { OPTION_MASK_ISA_SSE2, CODE_FOR_sminv2df3, "__builtin_ia32_minpd", IX86_BUILTIN_MINPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
21595 { OPTION_MASK_ISA_SSE2, CODE_FOR_smaxv2df3, "__builtin_ia32_maxpd", IX86_BUILTIN_MAXPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
21596 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmsminv2df3, "__builtin_ia32_minsd", IX86_BUILTIN_MINSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
21597 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmsmaxv2df3, "__builtin_ia32_maxsd", IX86_BUILTIN_MAXSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
21599 { OPTION_MASK_ISA_SSE2, CODE_FOR_andv2df3, "__builtin_ia32_andpd", IX86_BUILTIN_ANDPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
21600 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_andnotv2df3, "__builtin_ia32_andnpd", IX86_BUILTIN_ANDNPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
21601 { OPTION_MASK_ISA_SSE2, CODE_FOR_iorv2df3, "__builtin_ia32_orpd", IX86_BUILTIN_ORPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
21602 { OPTION_MASK_ISA_SSE2, CODE_FOR_xorv2df3, "__builtin_ia32_xorpd", IX86_BUILTIN_XORPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
21604 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movsd, "__builtin_ia32_movsd", IX86_BUILTIN_MOVSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
21605 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_unpckhpd_exp, "__builtin_ia32_unpckhpd", IX86_BUILTIN_UNPCKHPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
21606 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_unpcklpd_exp, "__builtin_ia32_unpcklpd", IX86_BUILTIN_UNPCKLPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
21608 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_pack_sfix_v2df, "__builtin_ia32_vec_pack_sfix", IX86_BUILTIN_VEC_PACK_SFIX, UNKNOWN, (int) V4SI_FTYPE_V2DF_V2DF },
21610 { OPTION_MASK_ISA_SSE2, CODE_FOR_addv16qi3, "__builtin_ia32_paddb128", IX86_BUILTIN_PADDB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
21611 { OPTION_MASK_ISA_SSE2, CODE_FOR_addv8hi3, "__builtin_ia32_paddw128", IX86_BUILTIN_PADDW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
21612 { OPTION_MASK_ISA_SSE2, CODE_FOR_addv4si3, "__builtin_ia32_paddd128", IX86_BUILTIN_PADDD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
21613 { OPTION_MASK_ISA_SSE2, CODE_FOR_addv2di3, "__builtin_ia32_paddq128", IX86_BUILTIN_PADDQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
21614 { OPTION_MASK_ISA_SSE2, CODE_FOR_subv16qi3, "__builtin_ia32_psubb128", IX86_BUILTIN_PSUBB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
21615 { OPTION_MASK_ISA_SSE2, CODE_FOR_subv8hi3, "__builtin_ia32_psubw128", IX86_BUILTIN_PSUBW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
21616 { OPTION_MASK_ISA_SSE2, CODE_FOR_subv4si3, "__builtin_ia32_psubd128", IX86_BUILTIN_PSUBD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
21617 { OPTION_MASK_ISA_SSE2, CODE_FOR_subv2di3, "__builtin_ia32_psubq128", IX86_BUILTIN_PSUBQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
21619 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ssaddv16qi3, "__builtin_ia32_paddsb128", IX86_BUILTIN_PADDSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
21620 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ssaddv8hi3, "__builtin_ia32_paddsw128", IX86_BUILTIN_PADDSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
21621 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_sssubv16qi3, "__builtin_ia32_psubsb128", IX86_BUILTIN_PSUBSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
21622 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_sssubv8hi3, "__builtin_ia32_psubsw128", IX86_BUILTIN_PSUBSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
21623 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_usaddv16qi3, "__builtin_ia32_paddusb128", IX86_BUILTIN_PADDUSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
21624 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_usaddv8hi3, "__builtin_ia32_paddusw128", IX86_BUILTIN_PADDUSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
21625 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ussubv16qi3, "__builtin_ia32_psubusb128", IX86_BUILTIN_PSUBUSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
21626 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ussubv8hi3, "__builtin_ia32_psubusw128", IX86_BUILTIN_PSUBUSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
21628 { OPTION_MASK_ISA_SSE2, CODE_FOR_mulv8hi3, "__builtin_ia32_pmullw128", IX86_BUILTIN_PMULLW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
21629 { OPTION_MASK_ISA_SSE2, CODE_FOR_smulv8hi3_highpart, "__builtin_ia32_pmulhw128", IX86_BUILTIN_PMULHW128, UNKNOWN,(int) V8HI_FTYPE_V8HI_V8HI },
21631 { OPTION_MASK_ISA_SSE2, CODE_FOR_andv2di3, "__builtin_ia32_pand128", IX86_BUILTIN_PAND128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
21632 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_andnotv2di3, "__builtin_ia32_pandn128", IX86_BUILTIN_PANDN128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
21633 { OPTION_MASK_ISA_SSE2, CODE_FOR_iorv2di3, "__builtin_ia32_por128", IX86_BUILTIN_POR128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
21634 { OPTION_MASK_ISA_SSE2, CODE_FOR_xorv2di3, "__builtin_ia32_pxor128", IX86_BUILTIN_PXOR128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
21636 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_uavgv16qi3, "__builtin_ia32_pavgb128", IX86_BUILTIN_PAVGB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
21637 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_uavgv8hi3, "__builtin_ia32_pavgw128", IX86_BUILTIN_PAVGW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
21639 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_eqv16qi3, "__builtin_ia32_pcmpeqb128", IX86_BUILTIN_PCMPEQB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
21640 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_eqv8hi3, "__builtin_ia32_pcmpeqw128", IX86_BUILTIN_PCMPEQW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
21641 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_eqv4si3, "__builtin_ia32_pcmpeqd128", IX86_BUILTIN_PCMPEQD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
21642 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_gtv16qi3, "__builtin_ia32_pcmpgtb128", IX86_BUILTIN_PCMPGTB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
21643 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_gtv8hi3, "__builtin_ia32_pcmpgtw128", IX86_BUILTIN_PCMPGTW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
21644 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_gtv4si3, "__builtin_ia32_pcmpgtd128", IX86_BUILTIN_PCMPGTD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
21646 { OPTION_MASK_ISA_SSE2, CODE_FOR_umaxv16qi3, "__builtin_ia32_pmaxub128", IX86_BUILTIN_PMAXUB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
21647 { OPTION_MASK_ISA_SSE2, CODE_FOR_smaxv8hi3, "__builtin_ia32_pmaxsw128", IX86_BUILTIN_PMAXSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
21648 { OPTION_MASK_ISA_SSE2, CODE_FOR_uminv16qi3, "__builtin_ia32_pminub128", IX86_BUILTIN_PMINUB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
21649 { OPTION_MASK_ISA_SSE2, CODE_FOR_sminv8hi3, "__builtin_ia32_pminsw128", IX86_BUILTIN_PMINSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
21651 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_punpckhbw, "__builtin_ia32_punpckhbw128", IX86_BUILTIN_PUNPCKHBW128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
21652 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_punpckhwd, "__builtin_ia32_punpckhwd128", IX86_BUILTIN_PUNPCKHWD128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
21653 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_punpckhdq, "__builtin_ia32_punpckhdq128", IX86_BUILTIN_PUNPCKHDQ128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
21654 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_punpckhqdq, "__builtin_ia32_punpckhqdq128", IX86_BUILTIN_PUNPCKHQDQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
21655 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_punpcklbw, "__builtin_ia32_punpcklbw128", IX86_BUILTIN_PUNPCKLBW128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
21656 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_punpcklwd, "__builtin_ia32_punpcklwd128", IX86_BUILTIN_PUNPCKLWD128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
21657 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_punpckldq, "__builtin_ia32_punpckldq128", IX86_BUILTIN_PUNPCKLDQ128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
21658 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_punpcklqdq, "__builtin_ia32_punpcklqdq128", IX86_BUILTIN_PUNPCKLQDQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
21660 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_packsswb, "__builtin_ia32_packsswb128", IX86_BUILTIN_PACKSSWB128, UNKNOWN, (int) V16QI_FTYPE_V8HI_V8HI },
21661 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_packssdw, "__builtin_ia32_packssdw128", IX86_BUILTIN_PACKSSDW128, UNKNOWN, (int) V8HI_FTYPE_V4SI_V4SI },
21662 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_packuswb, "__builtin_ia32_packuswb128", IX86_BUILTIN_PACKUSWB128, UNKNOWN, (int) V16QI_FTYPE_V8HI_V8HI },
21664 { OPTION_MASK_ISA_SSE2, CODE_FOR_umulv8hi3_highpart, "__builtin_ia32_pmulhuw128", IX86_BUILTIN_PMULHUW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
21665 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_psadbw, "__builtin_ia32_psadbw128", IX86_BUILTIN_PSADBW128, UNKNOWN, (int) V2DI_FTYPE_V16QI_V16QI },
21667 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_umulv1siv1di3, "__builtin_ia32_pmuludq", IX86_BUILTIN_PMULUDQ, UNKNOWN, (int) V1DI_FTYPE_V2SI_V2SI },
21668 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_umulv2siv2di3, "__builtin_ia32_pmuludq128", IX86_BUILTIN_PMULUDQ128, UNKNOWN, (int) V2DI_FTYPE_V4SI_V4SI },
21670 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_pmaddwd, "__builtin_ia32_pmaddwd128", IX86_BUILTIN_PMADDWD128, UNKNOWN, (int) V4SI_FTYPE_V8HI_V8HI },
21672 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtsi2sd, "__builtin_ia32_cvtsi2sd", IX86_BUILTIN_CVTSI2SD, UNKNOWN, (int) V2DF_FTYPE_V2DF_SI },
21673 { OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_64BIT, CODE_FOR_sse2_cvtsi2sdq, "__builtin_ia32_cvtsi642sd", IX86_BUILTIN_CVTSI642SD, UNKNOWN, (int) V2DF_FTYPE_V2DF_DI },
21674 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtsd2ss, "__builtin_ia32_cvtsd2ss", IX86_BUILTIN_CVTSD2SS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V2DF },
21675 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtss2sd, "__builtin_ia32_cvtss2sd", IX86_BUILTIN_CVTSS2SD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V4SF },
21677 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ashlti3, "__builtin_ia32_pslldqi128", IX86_BUILTIN_PSLLDQI128, UNKNOWN, (int) V2DI2TI_FTYPE_V2DI_INT },
21678 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv8hi3, "__builtin_ia32_psllwi128", IX86_BUILTIN_PSLLWI128, UNKNOWN, (int) V8HI_FTYPE_V8HI_SI_COUNT },
21679 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv4si3, "__builtin_ia32_pslldi128", IX86_BUILTIN_PSLLDI128, UNKNOWN, (int) V4SI_FTYPE_V4SI_SI_COUNT },
21680 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv2di3, "__builtin_ia32_psllqi128", IX86_BUILTIN_PSLLQI128, UNKNOWN, (int) V2DI_FTYPE_V2DI_SI_COUNT },
21681 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv8hi3, "__builtin_ia32_psllw128", IX86_BUILTIN_PSLLW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_COUNT },
21682 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv4si3, "__builtin_ia32_pslld128", IX86_BUILTIN_PSLLD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_COUNT },
21683 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv2di3, "__builtin_ia32_psllq128", IX86_BUILTIN_PSLLQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_COUNT },
21685 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_lshrti3, "__builtin_ia32_psrldqi128", IX86_BUILTIN_PSRLDQI128, UNKNOWN, (int) V2DI2TI_FTYPE_V2DI_INT },
21686 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv8hi3, "__builtin_ia32_psrlwi128", IX86_BUILTIN_PSRLWI128, UNKNOWN, (int) V8HI_FTYPE_V8HI_SI_COUNT },
21687 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv4si3, "__builtin_ia32_psrldi128", IX86_BUILTIN_PSRLDI128, UNKNOWN, (int) V4SI_FTYPE_V4SI_SI_COUNT },
21688 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv2di3, "__builtin_ia32_psrlqi128", IX86_BUILTIN_PSRLQI128, UNKNOWN, (int) V2DI_FTYPE_V2DI_SI_COUNT },
21689 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv8hi3, "__builtin_ia32_psrlw128", IX86_BUILTIN_PSRLW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_COUNT },
21690 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv4si3, "__builtin_ia32_psrld128", IX86_BUILTIN_PSRLD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_COUNT },
21691 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv2di3, "__builtin_ia32_psrlq128", IX86_BUILTIN_PSRLQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_COUNT },
21693 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashrv8hi3, "__builtin_ia32_psrawi128", IX86_BUILTIN_PSRAWI128, UNKNOWN, (int) V8HI_FTYPE_V8HI_SI_COUNT },
21694 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashrv4si3, "__builtin_ia32_psradi128", IX86_BUILTIN_PSRADI128, UNKNOWN, (int) V4SI_FTYPE_V4SI_SI_COUNT },
21695 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashrv8hi3, "__builtin_ia32_psraw128", IX86_BUILTIN_PSRAW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_COUNT },
21696 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashrv4si3, "__builtin_ia32_psrad128", IX86_BUILTIN_PSRAD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_COUNT },
21698 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_pshufd, "__builtin_ia32_pshufd", IX86_BUILTIN_PSHUFD, UNKNOWN, (int) V4SI_FTYPE_V4SI_INT },
21699 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_pshuflw, "__builtin_ia32_pshuflw", IX86_BUILTIN_PSHUFLW, UNKNOWN, (int) V8HI_FTYPE_V8HI_INT },
21700 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_pshufhw, "__builtin_ia32_pshufhw", IX86_BUILTIN_PSHUFHW, UNKNOWN, (int) V8HI_FTYPE_V8HI_INT },
21702 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmsqrtv2df2, "__builtin_ia32_sqrtsd", IX86_BUILTIN_SQRTSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_VEC_MERGE },
21704 { OPTION_MASK_ISA_SSE2, CODE_FOR_abstf2, 0, IX86_BUILTIN_FABSQ, UNKNOWN, (int) FLOAT128_FTYPE_FLOAT128 },
21705 { OPTION_MASK_ISA_SSE2, CODE_FOR_copysigntf3, 0, IX86_BUILTIN_COPYSIGNQ, UNKNOWN, (int) FLOAT128_FTYPE_FLOAT128_FLOAT128 },
21707 { OPTION_MASK_ISA_SSE, CODE_FOR_sse2_movq128, "__builtin_ia32_movq128", IX86_BUILTIN_MOVQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI },
21710 { OPTION_MASK_ISA_SSE2, CODE_FOR_mmx_addv1di3, "__builtin_ia32_paddq", IX86_BUILTIN_PADDQ, UNKNOWN, (int) V1DI_FTYPE_V1DI_V1DI },
21711 { OPTION_MASK_ISA_SSE2, CODE_FOR_mmx_subv1di3, "__builtin_ia32_psubq", IX86_BUILTIN_PSUBQ, UNKNOWN, (int) V1DI_FTYPE_V1DI_V1DI },
21714 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_movshdup, "__builtin_ia32_movshdup", IX86_BUILTIN_MOVSHDUP, UNKNOWN, (int) V4SF_FTYPE_V4SF},
21715 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_movsldup, "__builtin_ia32_movsldup", IX86_BUILTIN_MOVSLDUP, UNKNOWN, (int) V4SF_FTYPE_V4SF },
21717 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_addsubv4sf3, "__builtin_ia32_addsubps", IX86_BUILTIN_ADDSUBPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
21718 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_addsubv2df3, "__builtin_ia32_addsubpd", IX86_BUILTIN_ADDSUBPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
21719 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_haddv4sf3, "__builtin_ia32_haddps", IX86_BUILTIN_HADDPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
21720 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_haddv2df3, "__builtin_ia32_haddpd", IX86_BUILTIN_HADDPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
21721 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_hsubv4sf3, "__builtin_ia32_hsubps", IX86_BUILTIN_HSUBPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
21722 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_hsubv2df3, "__builtin_ia32_hsubpd", IX86_BUILTIN_HSUBPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
21725 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv16qi2, "__builtin_ia32_pabsb128", IX86_BUILTIN_PABSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI },
21726 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv8qi2, "__builtin_ia32_pabsb", IX86_BUILTIN_PABSB, UNKNOWN, (int) V8QI_FTYPE_V8QI },
21727 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv8hi2, "__builtin_ia32_pabsw128", IX86_BUILTIN_PABSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI },
21728 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv4hi2, "__builtin_ia32_pabsw", IX86_BUILTIN_PABSW, UNKNOWN, (int) V4HI_FTYPE_V4HI },
21729 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv4si2, "__builtin_ia32_pabsd128", IX86_BUILTIN_PABSD128, UNKNOWN, (int) V4SI_FTYPE_V4SI },
21730 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv2si2, "__builtin_ia32_pabsd", IX86_BUILTIN_PABSD, UNKNOWN, (int) V2SI_FTYPE_V2SI },
21732 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phaddwv8hi3, "__builtin_ia32_phaddw128", IX86_BUILTIN_PHADDW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
21733 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phaddwv4hi3, "__builtin_ia32_phaddw", IX86_BUILTIN_PHADDW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
21734 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phadddv4si3, "__builtin_ia32_phaddd128", IX86_BUILTIN_PHADDD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
21735 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phadddv2si3, "__builtin_ia32_phaddd", IX86_BUILTIN_PHADDD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
21736 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phaddswv8hi3, "__builtin_ia32_phaddsw128", IX86_BUILTIN_PHADDSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
21737 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phaddswv4hi3, "__builtin_ia32_phaddsw", IX86_BUILTIN_PHADDSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
21738 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubwv8hi3, "__builtin_ia32_phsubw128", IX86_BUILTIN_PHSUBW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
21739 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubwv4hi3, "__builtin_ia32_phsubw", IX86_BUILTIN_PHSUBW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
21740 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubdv4si3, "__builtin_ia32_phsubd128", IX86_BUILTIN_PHSUBD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
21741 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubdv2si3, "__builtin_ia32_phsubd", IX86_BUILTIN_PHSUBD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
21742 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubswv8hi3, "__builtin_ia32_phsubsw128", IX86_BUILTIN_PHSUBSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
21743 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubswv4hi3, "__builtin_ia32_phsubsw", IX86_BUILTIN_PHSUBSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
21744 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pmaddubsw128, "__builtin_ia32_pmaddubsw128", IX86_BUILTIN_PMADDUBSW128, UNKNOWN, (int) V8HI_FTYPE_V16QI_V16QI },
21745 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pmaddubsw, "__builtin_ia32_pmaddubsw", IX86_BUILTIN_PMADDUBSW, UNKNOWN, (int) V4HI_FTYPE_V8QI_V8QI },
21746 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pmulhrswv8hi3, "__builtin_ia32_pmulhrsw128", IX86_BUILTIN_PMULHRSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
21747 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pmulhrswv4hi3, "__builtin_ia32_pmulhrsw", IX86_BUILTIN_PMULHRSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
21748 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pshufbv16qi3, "__builtin_ia32_pshufb128", IX86_BUILTIN_PSHUFB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
21749 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pshufbv8qi3, "__builtin_ia32_pshufb", IX86_BUILTIN_PSHUFB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
21750 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv16qi3, "__builtin_ia32_psignb128", IX86_BUILTIN_PSIGNB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
21751 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv8qi3, "__builtin_ia32_psignb", IX86_BUILTIN_PSIGNB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
21752 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv8hi3, "__builtin_ia32_psignw128", IX86_BUILTIN_PSIGNW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
21753 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv4hi3, "__builtin_ia32_psignw", IX86_BUILTIN_PSIGNW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
21754 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv4si3, "__builtin_ia32_psignd128", IX86_BUILTIN_PSIGND128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
21755 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv2si3, "__builtin_ia32_psignd", IX86_BUILTIN_PSIGND, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
21758 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_palignrti, "__builtin_ia32_palignr128", IX86_BUILTIN_PALIGNR128, UNKNOWN, (int) V2DI2TI_FTYPE_V2DI_V2DI_INT },
21759 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_palignrdi, "__builtin_ia32_palignr", IX86_BUILTIN_PALIGNR, UNKNOWN, (int) V1DI2DI_FTYPE_V1DI_V1DI_INT },
21762 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_blendpd, "__builtin_ia32_blendpd", IX86_BUILTIN_BLENDPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
21763 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_blendps, "__builtin_ia32_blendps", IX86_BUILTIN_BLENDPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
21764 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_blendvpd, "__builtin_ia32_blendvpd", IX86_BUILTIN_BLENDVPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF },
21765 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_blendvps, "__builtin_ia32_blendvps", IX86_BUILTIN_BLENDVPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF },
21766 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_dppd, "__builtin_ia32_dppd", IX86_BUILTIN_DPPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
21767 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_dpps, "__builtin_ia32_dpps", IX86_BUILTIN_DPPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
21768 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_insertps, "__builtin_ia32_insertps128", IX86_BUILTIN_INSERTPS128, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
21769 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_mpsadbw, "__builtin_ia32_mpsadbw128", IX86_BUILTIN_MPSADBW128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_INT },
21770 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_pblendvb, "__builtin_ia32_pblendvb128", IX86_BUILTIN_PBLENDVB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI },
21771 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_pblendw, "__builtin_ia32_pblendw128", IX86_BUILTIN_PBLENDW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_INT },
21773 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_extendv8qiv8hi2, "__builtin_ia32_pmovsxbw128", IX86_BUILTIN_PMOVSXBW128, UNKNOWN, (int) V8HI_FTYPE_V16QI },
21774 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_extendv4qiv4si2, "__builtin_ia32_pmovsxbd128", IX86_BUILTIN_PMOVSXBD128, UNKNOWN, (int) V4SI_FTYPE_V16QI },
21775 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_extendv2qiv2di2, "__builtin_ia32_pmovsxbq128", IX86_BUILTIN_PMOVSXBQ128, UNKNOWN, (int) V2DI_FTYPE_V16QI },
21776 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_extendv4hiv4si2, "__builtin_ia32_pmovsxwd128", IX86_BUILTIN_PMOVSXWD128, UNKNOWN, (int) V4SI_FTYPE_V8HI },
21777 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_extendv2hiv2di2, "__builtin_ia32_pmovsxwq128", IX86_BUILTIN_PMOVSXWQ128, UNKNOWN, (int) V2DI_FTYPE_V8HI },
21778 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_extendv2siv2di2, "__builtin_ia32_pmovsxdq128", IX86_BUILTIN_PMOVSXDQ128, UNKNOWN, (int) V2DI_FTYPE_V4SI },
21779 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv8qiv8hi2, "__builtin_ia32_pmovzxbw128", IX86_BUILTIN_PMOVZXBW128, UNKNOWN, (int) V8HI_FTYPE_V16QI },
21780 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv4qiv4si2, "__builtin_ia32_pmovzxbd128", IX86_BUILTIN_PMOVZXBD128, UNKNOWN, (int) V4SI_FTYPE_V16QI },
21781 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv2qiv2di2, "__builtin_ia32_pmovzxbq128", IX86_BUILTIN_PMOVZXBQ128, UNKNOWN, (int) V2DI_FTYPE_V16QI },
21782 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv4hiv4si2, "__builtin_ia32_pmovzxwd128", IX86_BUILTIN_PMOVZXWD128, UNKNOWN, (int) V4SI_FTYPE_V8HI },
21783 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv2hiv2di2, "__builtin_ia32_pmovzxwq128", IX86_BUILTIN_PMOVZXWQ128, UNKNOWN, (int) V2DI_FTYPE_V8HI },
21784 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv2siv2di2, "__builtin_ia32_pmovzxdq128", IX86_BUILTIN_PMOVZXDQ128, UNKNOWN, (int) V2DI_FTYPE_V4SI },
21785 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_phminposuw, "__builtin_ia32_phminposuw128", IX86_BUILTIN_PHMINPOSUW128, UNKNOWN, (int) V8HI_FTYPE_V8HI },
21787 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_packusdw, "__builtin_ia32_packusdw128", IX86_BUILTIN_PACKUSDW128, UNKNOWN, (int) V8HI_FTYPE_V4SI_V4SI },
21788 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_eqv2di3, "__builtin_ia32_pcmpeqq", IX86_BUILTIN_PCMPEQQ, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
21789 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_smaxv16qi3, "__builtin_ia32_pmaxsb128", IX86_BUILTIN_PMAXSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
21790 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_smaxv4si3, "__builtin_ia32_pmaxsd128", IX86_BUILTIN_PMAXSD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
21791 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_umaxv4si3, "__builtin_ia32_pmaxud128", IX86_BUILTIN_PMAXUD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
21792 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_umaxv8hi3, "__builtin_ia32_pmaxuw128", IX86_BUILTIN_PMAXUW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
21793 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sminv16qi3, "__builtin_ia32_pminsb128", IX86_BUILTIN_PMINSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
21794 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sminv4si3, "__builtin_ia32_pminsd128", IX86_BUILTIN_PMINSD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
21795 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_uminv4si3, "__builtin_ia32_pminud128", IX86_BUILTIN_PMINUD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
21796 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_uminv8hi3, "__builtin_ia32_pminuw128", IX86_BUILTIN_PMINUW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
21797 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_mulv2siv2di3, "__builtin_ia32_pmuldq128", IX86_BUILTIN_PMULDQ128, UNKNOWN, (int) V2DI_FTYPE_V4SI_V4SI },
21798 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_mulv4si3, "__builtin_ia32_pmulld128", IX86_BUILTIN_PMULLD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
21800 /* SSE4.1 and SSE5 */
21801 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundpd, "__builtin_ia32_roundpd", IX86_BUILTIN_ROUNDPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_INT },
21802 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundps, "__builtin_ia32_roundps", IX86_BUILTIN_ROUNDPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_INT },
21803 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundsd, "__builtin_ia32_roundsd", IX86_BUILTIN_ROUNDSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
21804 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundss, "__builtin_ia32_roundss", IX86_BUILTIN_ROUNDSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
21806 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_ptest, "__builtin_ia32_ptestz128", IX86_BUILTIN_PTESTZ, EQ, (int) INT_FTYPE_V2DI_V2DI_PTEST },
21807 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_ptest, "__builtin_ia32_ptestc128", IX86_BUILTIN_PTESTC, LTU, (int) INT_FTYPE_V2DI_V2DI_PTEST },
21808 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_ptest, "__builtin_ia32_ptestnzc128", IX86_BUILTIN_PTESTNZC, GTU, (int) INT_FTYPE_V2DI_V2DI_PTEST },
21811 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_gtv2di3, "__builtin_ia32_pcmpgtq", IX86_BUILTIN_PCMPGTQ, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
21812 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_crc32qi, "__builtin_ia32_crc32qi", IX86_BUILTIN_CRC32QI, UNKNOWN, (int) UINT_FTYPE_UINT_UCHAR },
21813 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_crc32hi, "__builtin_ia32_crc32hi", IX86_BUILTIN_CRC32HI, UNKNOWN, (int) UINT_FTYPE_UINT_USHORT },
21814 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_crc32si, "__builtin_ia32_crc32si", IX86_BUILTIN_CRC32SI, UNKNOWN, (int) UINT_FTYPE_UINT_UINT },
21815 { OPTION_MASK_ISA_SSE4_2 | OPTION_MASK_ISA_64BIT, CODE_FOR_sse4_2_crc32di, "__builtin_ia32_crc32di", IX86_BUILTIN_CRC32DI, UNKNOWN, (int) UINT64_FTYPE_UINT64_UINT64 },
21818 { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_extrqi, "__builtin_ia32_extrqi", IX86_BUILTIN_EXTRQI, UNKNOWN, (int) V2DI_FTYPE_V2DI_UINT_UINT },
21819 { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_extrq, "__builtin_ia32_extrq", IX86_BUILTIN_EXTRQ, UNKNOWN, (int) V2DI_FTYPE_V2DI_V16QI },
21820 { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_insertqi, "__builtin_ia32_insertqi", IX86_BUILTIN_INSERTQI, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_UINT_UINT },
21821 { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_insertq, "__builtin_ia32_insertq", IX86_BUILTIN_INSERTQ, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
21824 { OPTION_MASK_ISA_SSE2, CODE_FOR_aeskeygenassist, 0, IX86_BUILTIN_AESKEYGENASSIST128, UNKNOWN, (int) V2DI_FTYPE_V2DI_INT },
21825 { OPTION_MASK_ISA_SSE2, CODE_FOR_aesimc, 0, IX86_BUILTIN_AESIMC128, UNKNOWN, (int) V2DI_FTYPE_V2DI },
21827 { OPTION_MASK_ISA_SSE2, CODE_FOR_aesenc, 0, IX86_BUILTIN_AESENC128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
21828 { OPTION_MASK_ISA_SSE2, CODE_FOR_aesenclast, 0, IX86_BUILTIN_AESENCLAST128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
21829 { OPTION_MASK_ISA_SSE2, CODE_FOR_aesdec, 0, IX86_BUILTIN_AESDEC128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
21830 { OPTION_MASK_ISA_SSE2, CODE_FOR_aesdeclast, 0, IX86_BUILTIN_AESDECLAST128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
21833 { OPTION_MASK_ISA_SSE2, CODE_FOR_pclmulqdq, 0, IX86_BUILTIN_PCLMULQDQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_INT },
21836 { OPTION_MASK_ISA_AVX, CODE_FOR_addv4df3, "__builtin_ia32_addpd256", IX86_BUILTIN_ADDPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
21837 { OPTION_MASK_ISA_AVX, CODE_FOR_addv8sf3, "__builtin_ia32_addps256", IX86_BUILTIN_ADDPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
21838 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_addsubv4df3, "__builtin_ia32_addsubpd256", IX86_BUILTIN_ADDSUBPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
21839 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_addsubv8sf3, "__builtin_ia32_addsubps256", IX86_BUILTIN_ADDSUBPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
21840 { OPTION_MASK_ISA_AVX, CODE_FOR_andv4df3, "__builtin_ia32_andpd256", IX86_BUILTIN_ANDPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
21841 { OPTION_MASK_ISA_AVX, CODE_FOR_andv8sf3, "__builtin_ia32_andps256", IX86_BUILTIN_ANDPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
21842 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_andnotv4df3, "__builtin_ia32_andnpd256", IX86_BUILTIN_ANDNPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
21843 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_andnotv8sf3, "__builtin_ia32_andnps256", IX86_BUILTIN_ANDNPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
21844 { OPTION_MASK_ISA_AVX, CODE_FOR_divv4df3, "__builtin_ia32_divpd256", IX86_BUILTIN_DIVPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
21845 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_divv8sf3, "__builtin_ia32_divps256", IX86_BUILTIN_DIVPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
21846 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_haddv4df3, "__builtin_ia32_haddpd256", IX86_BUILTIN_HADDPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
21847 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_hsubv8sf3, "__builtin_ia32_hsubps256", IX86_BUILTIN_HSUBPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
21848 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_hsubv4df3, "__builtin_ia32_hsubpd256", IX86_BUILTIN_HSUBPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
21849 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_haddv8sf3, "__builtin_ia32_haddps256", IX86_BUILTIN_HADDPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
21850 { OPTION_MASK_ISA_AVX, CODE_FOR_smaxv4df3, "__builtin_ia32_maxpd256", IX86_BUILTIN_MAXPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
21851 { OPTION_MASK_ISA_AVX, CODE_FOR_smaxv8sf3, "__builtin_ia32_maxps256", IX86_BUILTIN_MAXPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
21852 { OPTION_MASK_ISA_AVX, CODE_FOR_sminv4df3, "__builtin_ia32_minpd256", IX86_BUILTIN_MINPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
21853 { OPTION_MASK_ISA_AVX, CODE_FOR_sminv8sf3, "__builtin_ia32_minps256", IX86_BUILTIN_MINPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
21854 { OPTION_MASK_ISA_AVX, CODE_FOR_mulv4df3, "__builtin_ia32_mulpd256", IX86_BUILTIN_MULPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
21855 { OPTION_MASK_ISA_AVX, CODE_FOR_mulv8sf3, "__builtin_ia32_mulps256", IX86_BUILTIN_MULPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
21856 { OPTION_MASK_ISA_AVX, CODE_FOR_iorv4df3, "__builtin_ia32_orpd256", IX86_BUILTIN_ORPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
21857 { OPTION_MASK_ISA_AVX, CODE_FOR_iorv8sf3, "__builtin_ia32_orps256", IX86_BUILTIN_ORPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
21858 { OPTION_MASK_ISA_AVX, CODE_FOR_subv4df3, "__builtin_ia32_subpd256", IX86_BUILTIN_SUBPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
21859 { OPTION_MASK_ISA_AVX, CODE_FOR_subv8sf3, "__builtin_ia32_subps256", IX86_BUILTIN_SUBPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
21860 { OPTION_MASK_ISA_AVX, CODE_FOR_xorv4df3, "__builtin_ia32_xorpd256", IX86_BUILTIN_XORPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
21861 { OPTION_MASK_ISA_AVX, CODE_FOR_xorv8sf3, "__builtin_ia32_xorps256", IX86_BUILTIN_XORPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
21863 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilvarv2df3, "__builtin_ia32_vpermilvarpd", IX86_BUILTIN_VPERMILVARPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DI },
21864 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilvarv4sf3, "__builtin_ia32_vpermilvarps", IX86_BUILTIN_VPERMILVARPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SI },
21865 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilvarv4df3, "__builtin_ia32_vpermilvarpd256", IX86_BUILTIN_VPERMILVARPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DI },
21866 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilvarv8sf3, "__builtin_ia32_vpermilvarps256", IX86_BUILTIN_VPERMILVARPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SI },
21868 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_blendpd256, "__builtin_ia32_blendpd256", IX86_BUILTIN_BLENDPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_INT },
21869 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_blendps256, "__builtin_ia32_blendps256", IX86_BUILTIN_BLENDPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_INT },
21870 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_blendvpd256, "__builtin_ia32_blendvpd256", IX86_BUILTIN_BLENDVPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF },
21871 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_blendvps256, "__builtin_ia32_blendvps256", IX86_BUILTIN_BLENDVPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF },
21872 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_dpps256, "__builtin_ia32_dpps256", IX86_BUILTIN_DPPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_INT },
21873 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_shufpd256, "__builtin_ia32_shufpd256", IX86_BUILTIN_SHUFPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_INT },
21874 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_shufps256, "__builtin_ia32_shufps256", IX86_BUILTIN_SHUFPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_INT },
21875 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cmpsdv2df3, "__builtin_ia32_cmpsd", IX86_BUILTIN_CMPSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
21876 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cmpssv4sf3, "__builtin_ia32_cmpss", IX86_BUILTIN_CMPSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
21877 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cmppdv2df3, "__builtin_ia32_cmppd", IX86_BUILTIN_CMPPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
21878 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cmppsv4sf3, "__builtin_ia32_cmpps", IX86_BUILTIN_CMPPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
21879 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cmppdv4df3, "__builtin_ia32_cmppd256", IX86_BUILTIN_CMPPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_INT },
21880 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cmppsv8sf3, "__builtin_ia32_cmpps256", IX86_BUILTIN_CMPPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_INT },
21881 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vextractf128v4df, "__builtin_ia32_vextractf128_pd256", IX86_BUILTIN_EXTRACTF128PD256, UNKNOWN, (int) V2DF_FTYPE_V4DF_INT },
21882 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vextractf128v8sf, "__builtin_ia32_vextractf128_ps256", IX86_BUILTIN_EXTRACTF128PS256, UNKNOWN, (int) V4SF_FTYPE_V8SF_INT },
21883 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vextractf128v8si, "__builtin_ia32_vextractf128_si256", IX86_BUILTIN_EXTRACTF128SI256, UNKNOWN, (int) V4SI_FTYPE_V8SI_INT },
21884 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cvtdq2pd256, "__builtin_ia32_cvtdq2pd256", IX86_BUILTIN_CVTDQ2PD256, UNKNOWN, (int) V4DF_FTYPE_V4SI },
21885 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cvtdq2ps256, "__builtin_ia32_cvtdq2ps256", IX86_BUILTIN_CVTDQ2PS256, UNKNOWN, (int) V8SF_FTYPE_V8SI },
21886 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cvtpd2ps256, "__builtin_ia32_cvtpd2ps256", IX86_BUILTIN_CVTPD2PS256, UNKNOWN, (int) V4SF_FTYPE_V4DF },
21887 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cvtps2dq256, "__builtin_ia32_cvtps2dq256", IX86_BUILTIN_CVTPS2DQ256, UNKNOWN, (int) V8SI_FTYPE_V8SF },
21888 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cvtps2pd256, "__builtin_ia32_cvtps2pd256", IX86_BUILTIN_CVTPS2PD256, UNKNOWN, (int) V4DF_FTYPE_V4SF },
21889 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cvttpd2dq256, "__builtin_ia32_cvttpd2dq256", IX86_BUILTIN_CVTTPD2DQ256, UNKNOWN, (int) V4SI_FTYPE_V4DF },
21890 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cvtpd2dq256, "__builtin_ia32_cvtpd2dq256", IX86_BUILTIN_CVTPD2DQ256, UNKNOWN, (int) V4SI_FTYPE_V4DF },
21891 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cvttps2dq256, "__builtin_ia32_cvttps2dq256", IX86_BUILTIN_CVTTPS2DQ256, UNKNOWN, (int) V8SI_FTYPE_V8SF },
21892 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vperm2f128v4df3, "__builtin_ia32_vperm2f128_pd256", IX86_BUILTIN_VPERM2F128PD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_INT },
21893 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vperm2f128v8sf3, "__builtin_ia32_vperm2f128_ps256", IX86_BUILTIN_VPERM2F128PS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_INT },
21894 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vperm2f128v8si3, "__builtin_ia32_vperm2f128_si256", IX86_BUILTIN_VPERM2F128SI256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_INT },
21895 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilv2df, "__builtin_ia32_vpermilpd", IX86_BUILTIN_VPERMILPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_INT },
21896 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilv4sf, "__builtin_ia32_vpermilps", IX86_BUILTIN_VPERMILPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_INT },
21897 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilv4df, "__builtin_ia32_vpermilpd256", IX86_BUILTIN_VPERMILPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_INT },
21898 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilv8sf, "__builtin_ia32_vpermilps256", IX86_BUILTIN_VPERMILPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_INT },
21899 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vinsertf128v4df, "__builtin_ia32_vinsertf128_pd256", IX86_BUILTIN_VINSERTF128PD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V2DF_INT },
21900 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vinsertf128v8sf, "__builtin_ia32_vinsertf128_ps256", IX86_BUILTIN_VINSERTF128PS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V4SF_INT },
21901 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vinsertf128v8si, "__builtin_ia32_vinsertf128_si256", IX86_BUILTIN_VINSERTF128SI256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V4SI_INT },
21903 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movshdup256, "__builtin_ia32_movshdup256", IX86_BUILTIN_MOVSHDUP256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
21904 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movsldup256, "__builtin_ia32_movsldup256", IX86_BUILTIN_MOVSLDUP256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
21905 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movddup256, "__builtin_ia32_movddup256", IX86_BUILTIN_MOVDDUP256, UNKNOWN, (int) V4DF_FTYPE_V4DF },
21907 { OPTION_MASK_ISA_AVX, CODE_FOR_sqrtv4df2, "__builtin_ia32_sqrtpd256", IX86_BUILTIN_SQRTPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF },
21908 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_sqrtv8sf2, "__builtin_ia32_sqrtps256", IX86_BUILTIN_SQRTPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
21909 { OPTION_MASK_ISA_AVX, CODE_FOR_sqrtv8sf2, "__builtin_ia32_sqrtps_nr256", IX86_BUILTIN_SQRTPS_NR256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
21910 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_rsqrtv8sf2, "__builtin_ia32_rsqrtps256", IX86_BUILTIN_RSQRTPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
21911 { OPTION_MASK_ISA_AVX, CODE_FOR_rsqrtv8sf2, "__builtin_ia32_rsqrtps_nr256", IX86_BUILTIN_RSQRTPS_NR256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
21913 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_rcpv8sf2, "__builtin_ia32_rcpps256", IX86_BUILTIN_RCPPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
21915 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundpd256, "__builtin_ia32_roundpd256", IX86_BUILTIN_ROUNDPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_INT },
21916 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundps256, "__builtin_ia32_roundps256", IX86_BUILTIN_ROUNDPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_INT },
21918 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_unpckhpd256, "__builtin_ia32_unpckhpd256", IX86_BUILTIN_UNPCKHPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
21919 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_unpcklpd256, "__builtin_ia32_unpcklpd256", IX86_BUILTIN_UNPCKLPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
21920 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_unpckhps256, "__builtin_ia32_unpckhps256", IX86_BUILTIN_UNPCKHPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
21921 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_unpcklps256, "__builtin_ia32_unpcklps256", IX86_BUILTIN_UNPCKLPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
21923 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_si256_si, "__builtin_ia32_si256_si", IX86_BUILTIN_SI256_SI, UNKNOWN, (int) V8SI_FTYPE_V4SI },
21924 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_ps256_ps, "__builtin_ia32_ps256_ps", IX86_BUILTIN_PS256_PS, UNKNOWN, (int) V8SF_FTYPE_V4SF },
21925 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_pd256_pd, "__builtin_ia32_pd256_pd", IX86_BUILTIN_PD256_PD, UNKNOWN, (int) V4DF_FTYPE_V2DF },
21926 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_si_si256, "__builtin_ia32_si_si256", IX86_BUILTIN_SI_SI256, UNKNOWN, (int) V4SI_FTYPE_V8SI },
21927 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_ps_ps256, "__builtin_ia32_ps_ps256", IX86_BUILTIN_PS_PS256, UNKNOWN, (int) V4SF_FTYPE_V8SF },
21928 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_pd_pd256, "__builtin_ia32_pd_pd256", IX86_BUILTIN_PD_PD256, UNKNOWN, (int) V2DF_FTYPE_V4DF },
21930 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestpd, "__builtin_ia32_vtestzpd", IX86_BUILTIN_VTESTZPD, EQ, (int) INT_FTYPE_V2DF_V2DF_PTEST },
21931 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestpd, "__builtin_ia32_vtestcpd", IX86_BUILTIN_VTESTCPD, LTU, (int) INT_FTYPE_V2DF_V2DF_PTEST },
21932 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestpd, "__builtin_ia32_vtestnzcpd", IX86_BUILTIN_VTESTNZCPD, GTU, (int) INT_FTYPE_V2DF_V2DF_PTEST },
21933 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestps, "__builtin_ia32_vtestzps", IX86_BUILTIN_VTESTZPS, EQ, (int) INT_FTYPE_V4SF_V4SF_PTEST },
21934 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestps, "__builtin_ia32_vtestcps", IX86_BUILTIN_VTESTCPS, LTU, (int) INT_FTYPE_V4SF_V4SF_PTEST },
21935 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestps, "__builtin_ia32_vtestnzcps", IX86_BUILTIN_VTESTNZCPS, GTU, (int) INT_FTYPE_V4SF_V4SF_PTEST },
21936 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestpd256, "__builtin_ia32_vtestzpd256", IX86_BUILTIN_VTESTZPD256, EQ, (int) INT_FTYPE_V4DF_V4DF_PTEST },
21937 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestpd256, "__builtin_ia32_vtestcpd256", IX86_BUILTIN_VTESTCPD256, LTU, (int) INT_FTYPE_V4DF_V4DF_PTEST },
21938 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestpd256, "__builtin_ia32_vtestnzcpd256", IX86_BUILTIN_VTESTNZCPD256, GTU, (int) INT_FTYPE_V4DF_V4DF_PTEST },
21939 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestps256, "__builtin_ia32_vtestzps256", IX86_BUILTIN_VTESTZPS256, EQ, (int) INT_FTYPE_V8SF_V8SF_PTEST },
21940 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestps256, "__builtin_ia32_vtestcps256", IX86_BUILTIN_VTESTCPS256, LTU, (int) INT_FTYPE_V8SF_V8SF_PTEST },
21941 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestps256, "__builtin_ia32_vtestnzcps256", IX86_BUILTIN_VTESTNZCPS256, GTU, (int) INT_FTYPE_V8SF_V8SF_PTEST },
21942 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_ptest256, "__builtin_ia32_ptestz256", IX86_BUILTIN_PTESTZ256, EQ, (int) INT_FTYPE_V4DI_V4DI_PTEST },
21943 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_ptest256, "__builtin_ia32_ptestc256", IX86_BUILTIN_PTESTC256, LTU, (int) INT_FTYPE_V4DI_V4DI_PTEST },
21944 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_ptest256, "__builtin_ia32_ptestnzc256", IX86_BUILTIN_PTESTNZC256, GTU, (int) INT_FTYPE_V4DI_V4DI_PTEST },
21946 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movmskpd256, "__builtin_ia32_movmskpd256", IX86_BUILTIN_MOVMSKPD256, UNKNOWN, (int) INT_FTYPE_V4DF },
21947 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movmskps256, "__builtin_ia32_movmskps256", IX86_BUILTIN_MOVMSKPS256, UNKNOWN, (int) INT_FTYPE_V8SF },
21951 enum multi_arg_type {
21961 MULTI_ARG_3_PERMPS,
21962 MULTI_ARG_3_PERMPD,
21969 MULTI_ARG_2_DI_IMM,
21970 MULTI_ARG_2_SI_IMM,
21971 MULTI_ARG_2_HI_IMM,
21972 MULTI_ARG_2_QI_IMM,
21973 MULTI_ARG_2_SF_CMP,
21974 MULTI_ARG_2_DF_CMP,
21975 MULTI_ARG_2_DI_CMP,
21976 MULTI_ARG_2_SI_CMP,
21977 MULTI_ARG_2_HI_CMP,
21978 MULTI_ARG_2_QI_CMP,
22001 static const struct builtin_description bdesc_multi_arg[] =
22003 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_vmfmaddv4sf4, "__builtin_ia32_fmaddss", IX86_BUILTIN_FMADDSS, UNKNOWN, (int)MULTI_ARG_3_SF },
22004 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_vmfmaddv2df4, "__builtin_ia32_fmaddsd", IX86_BUILTIN_FMADDSD, UNKNOWN, (int)MULTI_ARG_3_DF },
22005 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_fmaddv4sf4, "__builtin_ia32_fmaddps", IX86_BUILTIN_FMADDPS, UNKNOWN, (int)MULTI_ARG_3_SF },
22006 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_fmaddv2df4, "__builtin_ia32_fmaddpd", IX86_BUILTIN_FMADDPD, UNKNOWN, (int)MULTI_ARG_3_DF },
22007 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_vmfmsubv4sf4, "__builtin_ia32_fmsubss", IX86_BUILTIN_FMSUBSS, UNKNOWN, (int)MULTI_ARG_3_SF },
22008 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_vmfmsubv2df4, "__builtin_ia32_fmsubsd", IX86_BUILTIN_FMSUBSD, UNKNOWN, (int)MULTI_ARG_3_DF },
22009 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_fmsubv4sf4, "__builtin_ia32_fmsubps", IX86_BUILTIN_FMSUBPS, UNKNOWN, (int)MULTI_ARG_3_SF },
22010 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_fmsubv2df4, "__builtin_ia32_fmsubpd", IX86_BUILTIN_FMSUBPD, UNKNOWN, (int)MULTI_ARG_3_DF },
22011 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_vmfnmaddv4sf4, "__builtin_ia32_fnmaddss", IX86_BUILTIN_FNMADDSS, UNKNOWN, (int)MULTI_ARG_3_SF },
22012 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_vmfnmaddv2df4, "__builtin_ia32_fnmaddsd", IX86_BUILTIN_FNMADDSD, UNKNOWN, (int)MULTI_ARG_3_DF },
22013 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_fnmaddv4sf4, "__builtin_ia32_fnmaddps", IX86_BUILTIN_FNMADDPS, UNKNOWN, (int)MULTI_ARG_3_SF },
22014 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_fnmaddv2df4, "__builtin_ia32_fnmaddpd", IX86_BUILTIN_FNMADDPD, UNKNOWN, (int)MULTI_ARG_3_DF },
22015 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_vmfnmsubv4sf4, "__builtin_ia32_fnmsubss", IX86_BUILTIN_FNMSUBSS, UNKNOWN, (int)MULTI_ARG_3_SF },
22016 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_vmfnmsubv2df4, "__builtin_ia32_fnmsubsd", IX86_BUILTIN_FNMSUBSD, UNKNOWN, (int)MULTI_ARG_3_DF },
22017 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_fnmsubv4sf4, "__builtin_ia32_fnmsubps", IX86_BUILTIN_FNMSUBPS, UNKNOWN, (int)MULTI_ARG_3_SF },
22018 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_fnmsubv2df4, "__builtin_ia32_fnmsubpd", IX86_BUILTIN_FNMSUBPD, UNKNOWN, (int)MULTI_ARG_3_DF },
22019 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcmov_v2di, "__builtin_ia32_pcmov", IX86_BUILTIN_PCMOV, UNKNOWN, (int)MULTI_ARG_3_DI },
22020 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcmov_v2di, "__builtin_ia32_pcmov_v2di", IX86_BUILTIN_PCMOV_V2DI, UNKNOWN, (int)MULTI_ARG_3_DI },
22021 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcmov_v4si, "__builtin_ia32_pcmov_v4si", IX86_BUILTIN_PCMOV_V4SI, UNKNOWN, (int)MULTI_ARG_3_SI },
22022 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcmov_v8hi, "__builtin_ia32_pcmov_v8hi", IX86_BUILTIN_PCMOV_V8HI, UNKNOWN, (int)MULTI_ARG_3_HI },
22023 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcmov_v16qi, "__builtin_ia32_pcmov_v16qi",IX86_BUILTIN_PCMOV_V16QI,UNKNOWN, (int)MULTI_ARG_3_QI },
22024 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcmov_v2df, "__builtin_ia32_pcmov_v2df", IX86_BUILTIN_PCMOV_V2DF, UNKNOWN, (int)MULTI_ARG_3_DF },
22025 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcmov_v4sf, "__builtin_ia32_pcmov_v4sf", IX86_BUILTIN_PCMOV_V4SF, UNKNOWN, (int)MULTI_ARG_3_SF },
22026 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pperm, "__builtin_ia32_pperm", IX86_BUILTIN_PPERM, UNKNOWN, (int)MULTI_ARG_3_QI },
22027 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_permv4sf, "__builtin_ia32_permps", IX86_BUILTIN_PERMPS, UNKNOWN, (int)MULTI_ARG_3_PERMPS },
22028 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_permv2df, "__builtin_ia32_permpd", IX86_BUILTIN_PERMPD, UNKNOWN, (int)MULTI_ARG_3_PERMPD },
22029 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pmacssww, "__builtin_ia32_pmacssww", IX86_BUILTIN_PMACSSWW, UNKNOWN, (int)MULTI_ARG_3_HI },
22030 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pmacsww, "__builtin_ia32_pmacsww", IX86_BUILTIN_PMACSWW, UNKNOWN, (int)MULTI_ARG_3_HI },
22031 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pmacsswd, "__builtin_ia32_pmacsswd", IX86_BUILTIN_PMACSSWD, UNKNOWN, (int)MULTI_ARG_3_HI_SI },
22032 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pmacswd, "__builtin_ia32_pmacswd", IX86_BUILTIN_PMACSWD, UNKNOWN, (int)MULTI_ARG_3_HI_SI },
22033 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pmacssdd, "__builtin_ia32_pmacssdd", IX86_BUILTIN_PMACSSDD, UNKNOWN, (int)MULTI_ARG_3_SI },
22034 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pmacsdd, "__builtin_ia32_pmacsdd", IX86_BUILTIN_PMACSDD, UNKNOWN, (int)MULTI_ARG_3_SI },
22035 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pmacssdql, "__builtin_ia32_pmacssdql", IX86_BUILTIN_PMACSSDQL, UNKNOWN, (int)MULTI_ARG_3_SI_DI },
22036 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pmacssdqh, "__builtin_ia32_pmacssdqh", IX86_BUILTIN_PMACSSDQH, UNKNOWN, (int)MULTI_ARG_3_SI_DI },
22037 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pmacsdql, "__builtin_ia32_pmacsdql", IX86_BUILTIN_PMACSDQL, UNKNOWN, (int)MULTI_ARG_3_SI_DI },
22038 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pmacsdqh, "__builtin_ia32_pmacsdqh", IX86_BUILTIN_PMACSDQH, UNKNOWN, (int)MULTI_ARG_3_SI_DI },
22039 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pmadcsswd, "__builtin_ia32_pmadcsswd", IX86_BUILTIN_PMADCSSWD, UNKNOWN, (int)MULTI_ARG_3_HI_SI },
22040 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pmadcswd, "__builtin_ia32_pmadcswd", IX86_BUILTIN_PMADCSWD, UNKNOWN, (int)MULTI_ARG_3_HI_SI },
22041 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vrotlv2di3, "__builtin_ia32_protq", IX86_BUILTIN_PROTQ, UNKNOWN, (int)MULTI_ARG_2_DI },
22042 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vrotlv4si3, "__builtin_ia32_protd", IX86_BUILTIN_PROTD, UNKNOWN, (int)MULTI_ARG_2_SI },
22043 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vrotlv8hi3, "__builtin_ia32_protw", IX86_BUILTIN_PROTW, UNKNOWN, (int)MULTI_ARG_2_HI },
22044 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vrotlv16qi3, "__builtin_ia32_protb", IX86_BUILTIN_PROTB, UNKNOWN, (int)MULTI_ARG_2_QI },
22045 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_rotlv2di3, "__builtin_ia32_protqi", IX86_BUILTIN_PROTQ_IMM, UNKNOWN, (int)MULTI_ARG_2_DI_IMM },
22046 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_rotlv4si3, "__builtin_ia32_protdi", IX86_BUILTIN_PROTD_IMM, UNKNOWN, (int)MULTI_ARG_2_SI_IMM },
22047 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_rotlv8hi3, "__builtin_ia32_protwi", IX86_BUILTIN_PROTW_IMM, UNKNOWN, (int)MULTI_ARG_2_HI_IMM },
22048 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_rotlv16qi3, "__builtin_ia32_protbi", IX86_BUILTIN_PROTB_IMM, UNKNOWN, (int)MULTI_ARG_2_QI_IMM },
22049 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_ashlv2di3, "__builtin_ia32_pshaq", IX86_BUILTIN_PSHAQ, UNKNOWN, (int)MULTI_ARG_2_DI },
22050 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_ashlv4si3, "__builtin_ia32_pshad", IX86_BUILTIN_PSHAD, UNKNOWN, (int)MULTI_ARG_2_SI },
22051 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_ashlv8hi3, "__builtin_ia32_pshaw", IX86_BUILTIN_PSHAW, UNKNOWN, (int)MULTI_ARG_2_HI },
22052 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_ashlv16qi3, "__builtin_ia32_pshab", IX86_BUILTIN_PSHAB, UNKNOWN, (int)MULTI_ARG_2_QI },
22053 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_lshlv2di3, "__builtin_ia32_pshlq", IX86_BUILTIN_PSHLQ, UNKNOWN, (int)MULTI_ARG_2_DI },
22054 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_lshlv4si3, "__builtin_ia32_pshld", IX86_BUILTIN_PSHLD, UNKNOWN, (int)MULTI_ARG_2_SI },
22055 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_lshlv8hi3, "__builtin_ia32_pshlw", IX86_BUILTIN_PSHLW, UNKNOWN, (int)MULTI_ARG_2_HI },
22056 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_lshlv16qi3, "__builtin_ia32_pshlb", IX86_BUILTIN_PSHLB, UNKNOWN, (int)MULTI_ARG_2_QI },
22057 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmfrczv4sf2, "__builtin_ia32_frczss", IX86_BUILTIN_FRCZSS, UNKNOWN, (int)MULTI_ARG_2_SF },
22058 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmfrczv2df2, "__builtin_ia32_frczsd", IX86_BUILTIN_FRCZSD, UNKNOWN, (int)MULTI_ARG_2_DF },
22059 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_frczv4sf2, "__builtin_ia32_frczps", IX86_BUILTIN_FRCZPS, UNKNOWN, (int)MULTI_ARG_1_SF },
22060 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_frczv2df2, "__builtin_ia32_frczpd", IX86_BUILTIN_FRCZPD, UNKNOWN, (int)MULTI_ARG_1_DF },
22061 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_cvtph2ps, "__builtin_ia32_cvtph2ps", IX86_BUILTIN_CVTPH2PS, UNKNOWN, (int)MULTI_ARG_1_PH2PS },
22062 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_cvtps2ph, "__builtin_ia32_cvtps2ph", IX86_BUILTIN_CVTPS2PH, UNKNOWN, (int)MULTI_ARG_1_PS2PH },
22063 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_phaddbw, "__builtin_ia32_phaddbw", IX86_BUILTIN_PHADDBW, UNKNOWN, (int)MULTI_ARG_1_QI_HI },
22064 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_phaddbd, "__builtin_ia32_phaddbd", IX86_BUILTIN_PHADDBD, UNKNOWN, (int)MULTI_ARG_1_QI_SI },
22065 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_phaddbq, "__builtin_ia32_phaddbq", IX86_BUILTIN_PHADDBQ, UNKNOWN, (int)MULTI_ARG_1_QI_DI },
22066 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_phaddwd, "__builtin_ia32_phaddwd", IX86_BUILTIN_PHADDWD, UNKNOWN, (int)MULTI_ARG_1_HI_SI },
22067 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_phaddwq, "__builtin_ia32_phaddwq", IX86_BUILTIN_PHADDWQ, UNKNOWN, (int)MULTI_ARG_1_HI_DI },
22068 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_phadddq, "__builtin_ia32_phadddq", IX86_BUILTIN_PHADDDQ, UNKNOWN, (int)MULTI_ARG_1_SI_DI },
22069 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_phaddubw, "__builtin_ia32_phaddubw", IX86_BUILTIN_PHADDUBW, UNKNOWN, (int)MULTI_ARG_1_QI_HI },
22070 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_phaddubd, "__builtin_ia32_phaddubd", IX86_BUILTIN_PHADDUBD, UNKNOWN, (int)MULTI_ARG_1_QI_SI },
22071 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_phaddubq, "__builtin_ia32_phaddubq", IX86_BUILTIN_PHADDUBQ, UNKNOWN, (int)MULTI_ARG_1_QI_DI },
22072 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_phadduwd, "__builtin_ia32_phadduwd", IX86_BUILTIN_PHADDUWD, UNKNOWN, (int)MULTI_ARG_1_HI_SI },
22073 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_phadduwq, "__builtin_ia32_phadduwq", IX86_BUILTIN_PHADDUWQ, UNKNOWN, (int)MULTI_ARG_1_HI_DI },
22074 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_phaddudq, "__builtin_ia32_phaddudq", IX86_BUILTIN_PHADDUDQ, UNKNOWN, (int)MULTI_ARG_1_SI_DI },
22075 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_phsubbw, "__builtin_ia32_phsubbw", IX86_BUILTIN_PHSUBBW, UNKNOWN, (int)MULTI_ARG_1_QI_HI },
22076 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_phsubwd, "__builtin_ia32_phsubwd", IX86_BUILTIN_PHSUBWD, UNKNOWN, (int)MULTI_ARG_1_HI_SI },
22077 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_phsubdq, "__builtin_ia32_phsubdq", IX86_BUILTIN_PHSUBDQ, UNKNOWN, (int)MULTI_ARG_1_SI_DI },
22079 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv4sf3, "__builtin_ia32_comeqss", IX86_BUILTIN_COMEQSS, EQ, (int)MULTI_ARG_2_SF_CMP },
22080 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv4sf3, "__builtin_ia32_comness", IX86_BUILTIN_COMNESS, NE, (int)MULTI_ARG_2_SF_CMP },
22081 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv4sf3, "__builtin_ia32_comneqss", IX86_BUILTIN_COMNESS, NE, (int)MULTI_ARG_2_SF_CMP },
22082 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv4sf3, "__builtin_ia32_comltss", IX86_BUILTIN_COMLTSS, LT, (int)MULTI_ARG_2_SF_CMP },
22083 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv4sf3, "__builtin_ia32_comless", IX86_BUILTIN_COMLESS, LE, (int)MULTI_ARG_2_SF_CMP },
22084 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv4sf3, "__builtin_ia32_comgtss", IX86_BUILTIN_COMGTSS, GT, (int)MULTI_ARG_2_SF_CMP },
22085 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv4sf3, "__builtin_ia32_comgess", IX86_BUILTIN_COMGESS, GE, (int)MULTI_ARG_2_SF_CMP },
22086 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv4sf3, "__builtin_ia32_comueqss", IX86_BUILTIN_COMUEQSS, UNEQ, (int)MULTI_ARG_2_SF_CMP },
22087 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv4sf3, "__builtin_ia32_comuness", IX86_BUILTIN_COMUNESS, LTGT, (int)MULTI_ARG_2_SF_CMP },
22088 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv4sf3, "__builtin_ia32_comuneqss", IX86_BUILTIN_COMUNESS, LTGT, (int)MULTI_ARG_2_SF_CMP },
22089 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv4sf3, "__builtin_ia32_comunltss", IX86_BUILTIN_COMULTSS, UNLT, (int)MULTI_ARG_2_SF_CMP },
22090 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv4sf3, "__builtin_ia32_comunless", IX86_BUILTIN_COMULESS, UNLE, (int)MULTI_ARG_2_SF_CMP },
22091 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv4sf3, "__builtin_ia32_comungtss", IX86_BUILTIN_COMUGTSS, UNGT, (int)MULTI_ARG_2_SF_CMP },
22092 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv4sf3, "__builtin_ia32_comungess", IX86_BUILTIN_COMUGESS, UNGE, (int)MULTI_ARG_2_SF_CMP },
22093 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv4sf3, "__builtin_ia32_comordss", IX86_BUILTIN_COMORDSS, ORDERED, (int)MULTI_ARG_2_SF_CMP },
22094 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv4sf3, "__builtin_ia32_comunordss", IX86_BUILTIN_COMUNORDSS, UNORDERED, (int)MULTI_ARG_2_SF_CMP },
22096 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv2df3, "__builtin_ia32_comeqsd", IX86_BUILTIN_COMEQSD, EQ, (int)MULTI_ARG_2_DF_CMP },
22097 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv2df3, "__builtin_ia32_comnesd", IX86_BUILTIN_COMNESD, NE, (int)MULTI_ARG_2_DF_CMP },
22098 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv2df3, "__builtin_ia32_comneqsd", IX86_BUILTIN_COMNESD, NE, (int)MULTI_ARG_2_DF_CMP },
22099 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv2df3, "__builtin_ia32_comltsd", IX86_BUILTIN_COMLTSD, LT, (int)MULTI_ARG_2_DF_CMP },
22100 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv2df3, "__builtin_ia32_comlesd", IX86_BUILTIN_COMLESD, LE, (int)MULTI_ARG_2_DF_CMP },
22101 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv2df3, "__builtin_ia32_comgtsd", IX86_BUILTIN_COMGTSD, GT, (int)MULTI_ARG_2_DF_CMP },
22102 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv2df3, "__builtin_ia32_comgesd", IX86_BUILTIN_COMGESD, GE, (int)MULTI_ARG_2_DF_CMP },
22103 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv2df3, "__builtin_ia32_comueqsd", IX86_BUILTIN_COMUEQSD, UNEQ, (int)MULTI_ARG_2_DF_CMP },
22104 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv2df3, "__builtin_ia32_comunesd", IX86_BUILTIN_COMUNESD, LTGT, (int)MULTI_ARG_2_DF_CMP },
22105 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv2df3, "__builtin_ia32_comuneqsd", IX86_BUILTIN_COMUNESD, LTGT, (int)MULTI_ARG_2_DF_CMP },
22106 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv2df3, "__builtin_ia32_comunltsd", IX86_BUILTIN_COMULTSD, UNLT, (int)MULTI_ARG_2_DF_CMP },
22107 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv2df3, "__builtin_ia32_comunlesd", IX86_BUILTIN_COMULESD, UNLE, (int)MULTI_ARG_2_DF_CMP },
22108 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv2df3, "__builtin_ia32_comungtsd", IX86_BUILTIN_COMUGTSD, UNGT, (int)MULTI_ARG_2_DF_CMP },
22109 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv2df3, "__builtin_ia32_comungesd", IX86_BUILTIN_COMUGESD, UNGE, (int)MULTI_ARG_2_DF_CMP },
22110 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv2df3, "__builtin_ia32_comordsd", IX86_BUILTIN_COMORDSD, ORDERED, (int)MULTI_ARG_2_DF_CMP },
22111 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv2df3, "__builtin_ia32_comunordsd", IX86_BUILTIN_COMUNORDSD, UNORDERED, (int)MULTI_ARG_2_DF_CMP },
22113 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4sf3, "__builtin_ia32_comeqps", IX86_BUILTIN_COMEQPS, EQ, (int)MULTI_ARG_2_SF_CMP },
22114 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4sf3, "__builtin_ia32_comneps", IX86_BUILTIN_COMNEPS, NE, (int)MULTI_ARG_2_SF_CMP },
22115 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4sf3, "__builtin_ia32_comneqps", IX86_BUILTIN_COMNEPS, NE, (int)MULTI_ARG_2_SF_CMP },
22116 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4sf3, "__builtin_ia32_comltps", IX86_BUILTIN_COMLTPS, LT, (int)MULTI_ARG_2_SF_CMP },
22117 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4sf3, "__builtin_ia32_comleps", IX86_BUILTIN_COMLEPS, LE, (int)MULTI_ARG_2_SF_CMP },
22118 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4sf3, "__builtin_ia32_comgtps", IX86_BUILTIN_COMGTPS, GT, (int)MULTI_ARG_2_SF_CMP },
22119 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4sf3, "__builtin_ia32_comgeps", IX86_BUILTIN_COMGEPS, GE, (int)MULTI_ARG_2_SF_CMP },
22120 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4sf3, "__builtin_ia32_comueqps", IX86_BUILTIN_COMUEQPS, UNEQ, (int)MULTI_ARG_2_SF_CMP },
22121 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4sf3, "__builtin_ia32_comuneps", IX86_BUILTIN_COMUNEPS, LTGT, (int)MULTI_ARG_2_SF_CMP },
22122 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4sf3, "__builtin_ia32_comuneqps", IX86_BUILTIN_COMUNEPS, LTGT, (int)MULTI_ARG_2_SF_CMP },
22123 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4sf3, "__builtin_ia32_comunltps", IX86_BUILTIN_COMULTPS, UNLT, (int)MULTI_ARG_2_SF_CMP },
22124 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4sf3, "__builtin_ia32_comunleps", IX86_BUILTIN_COMULEPS, UNLE, (int)MULTI_ARG_2_SF_CMP },
22125 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4sf3, "__builtin_ia32_comungtps", IX86_BUILTIN_COMUGTPS, UNGT, (int)MULTI_ARG_2_SF_CMP },
22126 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4sf3, "__builtin_ia32_comungeps", IX86_BUILTIN_COMUGEPS, UNGE, (int)MULTI_ARG_2_SF_CMP },
22127 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4sf3, "__builtin_ia32_comordps", IX86_BUILTIN_COMORDPS, ORDERED, (int)MULTI_ARG_2_SF_CMP },
22128 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4sf3, "__builtin_ia32_comunordps", IX86_BUILTIN_COMUNORDPS, UNORDERED, (int)MULTI_ARG_2_SF_CMP },
22130 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2df3, "__builtin_ia32_comeqpd", IX86_BUILTIN_COMEQPD, EQ, (int)MULTI_ARG_2_DF_CMP },
22131 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2df3, "__builtin_ia32_comnepd", IX86_BUILTIN_COMNEPD, NE, (int)MULTI_ARG_2_DF_CMP },
22132 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2df3, "__builtin_ia32_comneqpd", IX86_BUILTIN_COMNEPD, NE, (int)MULTI_ARG_2_DF_CMP },
22133 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2df3, "__builtin_ia32_comltpd", IX86_BUILTIN_COMLTPD, LT, (int)MULTI_ARG_2_DF_CMP },
22134 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2df3, "__builtin_ia32_comlepd", IX86_BUILTIN_COMLEPD, LE, (int)MULTI_ARG_2_DF_CMP },
22135 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2df3, "__builtin_ia32_comgtpd", IX86_BUILTIN_COMGTPD, GT, (int)MULTI_ARG_2_DF_CMP },
22136 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2df3, "__builtin_ia32_comgepd", IX86_BUILTIN_COMGEPD, GE, (int)MULTI_ARG_2_DF_CMP },
22137 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2df3, "__builtin_ia32_comueqpd", IX86_BUILTIN_COMUEQPD, UNEQ, (int)MULTI_ARG_2_DF_CMP },
22138 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2df3, "__builtin_ia32_comunepd", IX86_BUILTIN_COMUNEPD, LTGT, (int)MULTI_ARG_2_DF_CMP },
22139 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2df3, "__builtin_ia32_comuneqpd", IX86_BUILTIN_COMUNEPD, LTGT, (int)MULTI_ARG_2_DF_CMP },
22140 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2df3, "__builtin_ia32_comunltpd", IX86_BUILTIN_COMULTPD, UNLT, (int)MULTI_ARG_2_DF_CMP },
22141 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2df3, "__builtin_ia32_comunlepd", IX86_BUILTIN_COMULEPD, UNLE, (int)MULTI_ARG_2_DF_CMP },
22142 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2df3, "__builtin_ia32_comungtpd", IX86_BUILTIN_COMUGTPD, UNGT, (int)MULTI_ARG_2_DF_CMP },
22143 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2df3, "__builtin_ia32_comungepd", IX86_BUILTIN_COMUGEPD, UNGE, (int)MULTI_ARG_2_DF_CMP },
22144 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2df3, "__builtin_ia32_comordpd", IX86_BUILTIN_COMORDPD, ORDERED, (int)MULTI_ARG_2_DF_CMP },
22145 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2df3, "__builtin_ia32_comunordpd", IX86_BUILTIN_COMUNORDPD, UNORDERED, (int)MULTI_ARG_2_DF_CMP },
22147 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv16qi3, "__builtin_ia32_pcomeqb", IX86_BUILTIN_PCOMEQB, EQ, (int)MULTI_ARG_2_QI_CMP },
22148 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv16qi3, "__builtin_ia32_pcomneb", IX86_BUILTIN_PCOMNEB, NE, (int)MULTI_ARG_2_QI_CMP },
22149 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv16qi3, "__builtin_ia32_pcomneqb", IX86_BUILTIN_PCOMNEB, NE, (int)MULTI_ARG_2_QI_CMP },
22150 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv16qi3, "__builtin_ia32_pcomltb", IX86_BUILTIN_PCOMLTB, LT, (int)MULTI_ARG_2_QI_CMP },
22151 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv16qi3, "__builtin_ia32_pcomleb", IX86_BUILTIN_PCOMLEB, LE, (int)MULTI_ARG_2_QI_CMP },
22152 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv16qi3, "__builtin_ia32_pcomgtb", IX86_BUILTIN_PCOMGTB, GT, (int)MULTI_ARG_2_QI_CMP },
22153 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv16qi3, "__builtin_ia32_pcomgeb", IX86_BUILTIN_PCOMGEB, GE, (int)MULTI_ARG_2_QI_CMP },
22155 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv8hi3, "__builtin_ia32_pcomeqw", IX86_BUILTIN_PCOMEQW, EQ, (int)MULTI_ARG_2_HI_CMP },
22156 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv8hi3, "__builtin_ia32_pcomnew", IX86_BUILTIN_PCOMNEW, NE, (int)MULTI_ARG_2_HI_CMP },
22157 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv8hi3, "__builtin_ia32_pcomneqw", IX86_BUILTIN_PCOMNEW, NE, (int)MULTI_ARG_2_HI_CMP },
22158 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv8hi3, "__builtin_ia32_pcomltw", IX86_BUILTIN_PCOMLTW, LT, (int)MULTI_ARG_2_HI_CMP },
22159 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv8hi3, "__builtin_ia32_pcomlew", IX86_BUILTIN_PCOMLEW, LE, (int)MULTI_ARG_2_HI_CMP },
22160 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv8hi3, "__builtin_ia32_pcomgtw", IX86_BUILTIN_PCOMGTW, GT, (int)MULTI_ARG_2_HI_CMP },
22161 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv8hi3, "__builtin_ia32_pcomgew", IX86_BUILTIN_PCOMGEW, GE, (int)MULTI_ARG_2_HI_CMP },
22163 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4si3, "__builtin_ia32_pcomeqd", IX86_BUILTIN_PCOMEQD, EQ, (int)MULTI_ARG_2_SI_CMP },
22164 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4si3, "__builtin_ia32_pcomned", IX86_BUILTIN_PCOMNED, NE, (int)MULTI_ARG_2_SI_CMP },
22165 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4si3, "__builtin_ia32_pcomneqd", IX86_BUILTIN_PCOMNED, NE, (int)MULTI_ARG_2_SI_CMP },
22166 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4si3, "__builtin_ia32_pcomltd", IX86_BUILTIN_PCOMLTD, LT, (int)MULTI_ARG_2_SI_CMP },
22167 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4si3, "__builtin_ia32_pcomled", IX86_BUILTIN_PCOMLED, LE, (int)MULTI_ARG_2_SI_CMP },
22168 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4si3, "__builtin_ia32_pcomgtd", IX86_BUILTIN_PCOMGTD, GT, (int)MULTI_ARG_2_SI_CMP },
22169 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4si3, "__builtin_ia32_pcomged", IX86_BUILTIN_PCOMGED, GE, (int)MULTI_ARG_2_SI_CMP },
22171 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2di3, "__builtin_ia32_pcomeqq", IX86_BUILTIN_PCOMEQQ, EQ, (int)MULTI_ARG_2_DI_CMP },
22172 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2di3, "__builtin_ia32_pcomneq", IX86_BUILTIN_PCOMNEQ, NE, (int)MULTI_ARG_2_DI_CMP },
22173 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2di3, "__builtin_ia32_pcomneqq", IX86_BUILTIN_PCOMNEQ, NE, (int)MULTI_ARG_2_DI_CMP },
22174 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2di3, "__builtin_ia32_pcomltq", IX86_BUILTIN_PCOMLTQ, LT, (int)MULTI_ARG_2_DI_CMP },
22175 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2di3, "__builtin_ia32_pcomleq", IX86_BUILTIN_PCOMLEQ, LE, (int)MULTI_ARG_2_DI_CMP },
22176 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2di3, "__builtin_ia32_pcomgtq", IX86_BUILTIN_PCOMGTQ, GT, (int)MULTI_ARG_2_DI_CMP },
22177 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2di3, "__builtin_ia32_pcomgeq", IX86_BUILTIN_PCOMGEQ, GE, (int)MULTI_ARG_2_DI_CMP },
22179 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_uns2v16qi3,"__builtin_ia32_pcomequb", IX86_BUILTIN_PCOMEQUB, EQ, (int)MULTI_ARG_2_QI_CMP },
22180 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_uns2v16qi3,"__builtin_ia32_pcomneub", IX86_BUILTIN_PCOMNEUB, NE, (int)MULTI_ARG_2_QI_CMP },
22181 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_uns2v16qi3,"__builtin_ia32_pcomnequb", IX86_BUILTIN_PCOMNEUB, NE, (int)MULTI_ARG_2_QI_CMP },
22182 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_unsv16qi3, "__builtin_ia32_pcomltub", IX86_BUILTIN_PCOMLTUB, LTU, (int)MULTI_ARG_2_QI_CMP },
22183 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_unsv16qi3, "__builtin_ia32_pcomleub", IX86_BUILTIN_PCOMLEUB, LEU, (int)MULTI_ARG_2_QI_CMP },
22184 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_unsv16qi3, "__builtin_ia32_pcomgtub", IX86_BUILTIN_PCOMGTUB, GTU, (int)MULTI_ARG_2_QI_CMP },
22185 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_unsv16qi3, "__builtin_ia32_pcomgeub", IX86_BUILTIN_PCOMGEUB, GEU, (int)MULTI_ARG_2_QI_CMP },
22187 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_uns2v8hi3, "__builtin_ia32_pcomequw", IX86_BUILTIN_PCOMEQUW, EQ, (int)MULTI_ARG_2_HI_CMP },
22188 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_uns2v8hi3, "__builtin_ia32_pcomneuw", IX86_BUILTIN_PCOMNEUW, NE, (int)MULTI_ARG_2_HI_CMP },
22189 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_uns2v8hi3, "__builtin_ia32_pcomnequw", IX86_BUILTIN_PCOMNEUW, NE, (int)MULTI_ARG_2_HI_CMP },
22190 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_unsv8hi3, "__builtin_ia32_pcomltuw", IX86_BUILTIN_PCOMLTUW, LTU, (int)MULTI_ARG_2_HI_CMP },
22191 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_unsv8hi3, "__builtin_ia32_pcomleuw", IX86_BUILTIN_PCOMLEUW, LEU, (int)MULTI_ARG_2_HI_CMP },
22192 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_unsv8hi3, "__builtin_ia32_pcomgtuw", IX86_BUILTIN_PCOMGTUW, GTU, (int)MULTI_ARG_2_HI_CMP },
22193 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_unsv8hi3, "__builtin_ia32_pcomgeuw", IX86_BUILTIN_PCOMGEUW, GEU, (int)MULTI_ARG_2_HI_CMP },
22195 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_uns2v4si3, "__builtin_ia32_pcomequd", IX86_BUILTIN_PCOMEQUD, EQ, (int)MULTI_ARG_2_SI_CMP },
22196 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_uns2v4si3, "__builtin_ia32_pcomneud", IX86_BUILTIN_PCOMNEUD, NE, (int)MULTI_ARG_2_SI_CMP },
22197 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_uns2v4si3, "__builtin_ia32_pcomnequd", IX86_BUILTIN_PCOMNEUD, NE, (int)MULTI_ARG_2_SI_CMP },
22198 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_unsv4si3, "__builtin_ia32_pcomltud", IX86_BUILTIN_PCOMLTUD, LTU, (int)MULTI_ARG_2_SI_CMP },
22199 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_unsv4si3, "__builtin_ia32_pcomleud", IX86_BUILTIN_PCOMLEUD, LEU, (int)MULTI_ARG_2_SI_CMP },
22200 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_unsv4si3, "__builtin_ia32_pcomgtud", IX86_BUILTIN_PCOMGTUD, GTU, (int)MULTI_ARG_2_SI_CMP },
22201 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_unsv4si3, "__builtin_ia32_pcomgeud", IX86_BUILTIN_PCOMGEUD, GEU, (int)MULTI_ARG_2_SI_CMP },
22203 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_uns2v2di3, "__builtin_ia32_pcomequq", IX86_BUILTIN_PCOMEQUQ, EQ, (int)MULTI_ARG_2_DI_CMP },
22204 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_uns2v2di3, "__builtin_ia32_pcomneuq", IX86_BUILTIN_PCOMNEUQ, NE, (int)MULTI_ARG_2_DI_CMP },
22205 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_uns2v2di3, "__builtin_ia32_pcomnequq", IX86_BUILTIN_PCOMNEUQ, NE, (int)MULTI_ARG_2_DI_CMP },
22206 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_unsv2di3, "__builtin_ia32_pcomltuq", IX86_BUILTIN_PCOMLTUQ, LTU, (int)MULTI_ARG_2_DI_CMP },
22207 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_unsv2di3, "__builtin_ia32_pcomleuq", IX86_BUILTIN_PCOMLEUQ, LEU, (int)MULTI_ARG_2_DI_CMP },
22208 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_unsv2di3, "__builtin_ia32_pcomgtuq", IX86_BUILTIN_PCOMGTUQ, GTU, (int)MULTI_ARG_2_DI_CMP },
22209 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_unsv2di3, "__builtin_ia32_pcomgeuq", IX86_BUILTIN_PCOMGEUQ, GEU, (int)MULTI_ARG_2_DI_CMP },
22211 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_com_tfv4sf3, "__builtin_ia32_comfalsess", IX86_BUILTIN_COMFALSESS, (enum rtx_code) COM_FALSE_S, (int)MULTI_ARG_2_SF_TF },
22212 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_com_tfv4sf3, "__builtin_ia32_comtruess", IX86_BUILTIN_COMTRUESS, (enum rtx_code) COM_TRUE_S, (int)MULTI_ARG_2_SF_TF },
22213 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_com_tfv4sf3, "__builtin_ia32_comfalseps", IX86_BUILTIN_COMFALSEPS, (enum rtx_code) COM_FALSE_P, (int)MULTI_ARG_2_SF_TF },
22214 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_com_tfv4sf3, "__builtin_ia32_comtrueps", IX86_BUILTIN_COMTRUEPS, (enum rtx_code) COM_TRUE_P, (int)MULTI_ARG_2_SF_TF },
22215 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_com_tfv2df3, "__builtin_ia32_comfalsesd", IX86_BUILTIN_COMFALSESD, (enum rtx_code) COM_FALSE_S, (int)MULTI_ARG_2_DF_TF },
22216 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_com_tfv2df3, "__builtin_ia32_comtruesd", IX86_BUILTIN_COMTRUESD, (enum rtx_code) COM_TRUE_S, (int)MULTI_ARG_2_DF_TF },
22217 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_com_tfv2df3, "__builtin_ia32_comfalsepd", IX86_BUILTIN_COMFALSEPD, (enum rtx_code) COM_FALSE_P, (int)MULTI_ARG_2_DF_TF },
22218 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_com_tfv2df3, "__builtin_ia32_comtruepd", IX86_BUILTIN_COMTRUEPD, (enum rtx_code) COM_TRUE_P, (int)MULTI_ARG_2_DF_TF },
22220 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcom_tfv16qi3, "__builtin_ia32_pcomfalseb", IX86_BUILTIN_PCOMFALSEB, (enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_QI_TF },
22221 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcom_tfv8hi3, "__builtin_ia32_pcomfalsew", IX86_BUILTIN_PCOMFALSEW, (enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_HI_TF },
22222 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcom_tfv4si3, "__builtin_ia32_pcomfalsed", IX86_BUILTIN_PCOMFALSED, (enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_SI_TF },
22223 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcom_tfv2di3, "__builtin_ia32_pcomfalseq", IX86_BUILTIN_PCOMFALSEQ, (enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_DI_TF },
22224 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcom_tfv16qi3, "__builtin_ia32_pcomfalseub",IX86_BUILTIN_PCOMFALSEUB,(enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_QI_TF },
22225 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcom_tfv8hi3, "__builtin_ia32_pcomfalseuw",IX86_BUILTIN_PCOMFALSEUW,(enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_HI_TF },
22226 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcom_tfv4si3, "__builtin_ia32_pcomfalseud",IX86_BUILTIN_PCOMFALSEUD,(enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_SI_TF },
22227 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcom_tfv2di3, "__builtin_ia32_pcomfalseuq",IX86_BUILTIN_PCOMFALSEUQ,(enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_DI_TF },
22229 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcom_tfv16qi3, "__builtin_ia32_pcomtrueb", IX86_BUILTIN_PCOMTRUEB, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_QI_TF },
22230 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcom_tfv8hi3, "__builtin_ia32_pcomtruew", IX86_BUILTIN_PCOMTRUEW, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_HI_TF },
22231 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcom_tfv4si3, "__builtin_ia32_pcomtrued", IX86_BUILTIN_PCOMTRUED, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_SI_TF },
22232 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcom_tfv2di3, "__builtin_ia32_pcomtrueq", IX86_BUILTIN_PCOMTRUEQ, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_DI_TF },
22233 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcom_tfv16qi3, "__builtin_ia32_pcomtrueub", IX86_BUILTIN_PCOMTRUEUB, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_QI_TF },
22234 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcom_tfv8hi3, "__builtin_ia32_pcomtrueuw", IX86_BUILTIN_PCOMTRUEUW, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_HI_TF },
22235 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcom_tfv4si3, "__builtin_ia32_pcomtrueud", IX86_BUILTIN_PCOMTRUEUD, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_SI_TF },
22236 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcom_tfv2di3, "__builtin_ia32_pcomtrueuq", IX86_BUILTIN_PCOMTRUEUQ, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_DI_TF },
22239 /* Set up all the MMX/SSE builtins, even builtins for instructions that are not
22240 in the current target ISA to allow the user to compile particular modules
22241 with different target specific options that differ from the command line
22244 ix86_init_mmx_sse_builtins (void)
22246 const struct builtin_description * d;
22249 tree V16QI_type_node = build_vector_type_for_mode (char_type_node, V16QImode);
22250 tree V2SI_type_node = build_vector_type_for_mode (intSI_type_node, V2SImode);
22251 tree V1DI_type_node
22252 = build_vector_type_for_mode (long_long_integer_type_node, V1DImode);
22253 tree V2SF_type_node = build_vector_type_for_mode (float_type_node, V2SFmode);
22254 tree V2DI_type_node
22255 = build_vector_type_for_mode (long_long_integer_type_node, V2DImode);
22256 tree V2DF_type_node = build_vector_type_for_mode (double_type_node, V2DFmode);
22257 tree V4SF_type_node = build_vector_type_for_mode (float_type_node, V4SFmode);
22258 tree V4SI_type_node = build_vector_type_for_mode (intSI_type_node, V4SImode);
22259 tree V4HI_type_node = build_vector_type_for_mode (intHI_type_node, V4HImode);
22260 tree V8QI_type_node = build_vector_type_for_mode (char_type_node, V8QImode);
22261 tree V8HI_type_node = build_vector_type_for_mode (intHI_type_node, V8HImode);
22263 tree pchar_type_node = build_pointer_type (char_type_node);
22264 tree pcchar_type_node
22265 = build_pointer_type (build_type_variant (char_type_node, 1, 0));
22266 tree pfloat_type_node = build_pointer_type (float_type_node);
22267 tree pcfloat_type_node
22268 = build_pointer_type (build_type_variant (float_type_node, 1, 0));
22269 tree pv2sf_type_node = build_pointer_type (V2SF_type_node);
22270 tree pcv2sf_type_node
22271 = build_pointer_type (build_type_variant (V2SF_type_node, 1, 0));
22272 tree pv2di_type_node = build_pointer_type (V2DI_type_node);
22273 tree pdi_type_node = build_pointer_type (long_long_unsigned_type_node);
22276 tree int_ftype_v4sf_v4sf
22277 = build_function_type_list (integer_type_node,
22278 V4SF_type_node, V4SF_type_node, NULL_TREE);
22279 tree v4si_ftype_v4sf_v4sf
22280 = build_function_type_list (V4SI_type_node,
22281 V4SF_type_node, V4SF_type_node, NULL_TREE);
22282 /* MMX/SSE/integer conversions. */
22283 tree int_ftype_v4sf
22284 = build_function_type_list (integer_type_node,
22285 V4SF_type_node, NULL_TREE);
22286 tree int64_ftype_v4sf
22287 = build_function_type_list (long_long_integer_type_node,
22288 V4SF_type_node, NULL_TREE);
22289 tree int_ftype_v8qi
22290 = build_function_type_list (integer_type_node, V8QI_type_node, NULL_TREE);
22291 tree v4sf_ftype_v4sf_int
22292 = build_function_type_list (V4SF_type_node,
22293 V4SF_type_node, integer_type_node, NULL_TREE);
22294 tree v4sf_ftype_v4sf_int64
22295 = build_function_type_list (V4SF_type_node,
22296 V4SF_type_node, long_long_integer_type_node,
22298 tree v4sf_ftype_v4sf_v2si
22299 = build_function_type_list (V4SF_type_node,
22300 V4SF_type_node, V2SI_type_node, NULL_TREE);
22302 /* Miscellaneous. */
22303 tree v8qi_ftype_v4hi_v4hi
22304 = build_function_type_list (V8QI_type_node,
22305 V4HI_type_node, V4HI_type_node, NULL_TREE);
22306 tree v4hi_ftype_v2si_v2si
22307 = build_function_type_list (V4HI_type_node,
22308 V2SI_type_node, V2SI_type_node, NULL_TREE);
22309 tree v4sf_ftype_v4sf_v4sf_int
22310 = build_function_type_list (V4SF_type_node,
22311 V4SF_type_node, V4SF_type_node,
22312 integer_type_node, NULL_TREE);
22313 tree v2si_ftype_v4hi_v4hi
22314 = build_function_type_list (V2SI_type_node,
22315 V4HI_type_node, V4HI_type_node, NULL_TREE);
22316 tree v4hi_ftype_v4hi_int
22317 = build_function_type_list (V4HI_type_node,
22318 V4HI_type_node, integer_type_node, NULL_TREE);
22319 tree v2si_ftype_v2si_int
22320 = build_function_type_list (V2SI_type_node,
22321 V2SI_type_node, integer_type_node, NULL_TREE);
22322 tree v1di_ftype_v1di_int
22323 = build_function_type_list (V1DI_type_node,
22324 V1DI_type_node, integer_type_node, NULL_TREE);
22326 tree void_ftype_void
22327 = build_function_type (void_type_node, void_list_node);
22328 tree void_ftype_unsigned
22329 = build_function_type_list (void_type_node, unsigned_type_node, NULL_TREE);
22330 tree void_ftype_unsigned_unsigned
22331 = build_function_type_list (void_type_node, unsigned_type_node,
22332 unsigned_type_node, NULL_TREE);
22333 tree void_ftype_pcvoid_unsigned_unsigned
22334 = build_function_type_list (void_type_node, const_ptr_type_node,
22335 unsigned_type_node, unsigned_type_node,
22337 tree unsigned_ftype_void
22338 = build_function_type (unsigned_type_node, void_list_node);
22339 tree v2si_ftype_v4sf
22340 = build_function_type_list (V2SI_type_node, V4SF_type_node, NULL_TREE);
22341 /* Loads/stores. */
22342 tree void_ftype_v8qi_v8qi_pchar
22343 = build_function_type_list (void_type_node,
22344 V8QI_type_node, V8QI_type_node,
22345 pchar_type_node, NULL_TREE);
22346 tree v4sf_ftype_pcfloat
22347 = build_function_type_list (V4SF_type_node, pcfloat_type_node, NULL_TREE);
22348 tree v4sf_ftype_v4sf_pcv2sf
22349 = build_function_type_list (V4SF_type_node,
22350 V4SF_type_node, pcv2sf_type_node, NULL_TREE);
22351 tree void_ftype_pv2sf_v4sf
22352 = build_function_type_list (void_type_node,
22353 pv2sf_type_node, V4SF_type_node, NULL_TREE);
22354 tree void_ftype_pfloat_v4sf
22355 = build_function_type_list (void_type_node,
22356 pfloat_type_node, V4SF_type_node, NULL_TREE);
22357 tree void_ftype_pdi_di
22358 = build_function_type_list (void_type_node,
22359 pdi_type_node, long_long_unsigned_type_node,
22361 tree void_ftype_pv2di_v2di
22362 = build_function_type_list (void_type_node,
22363 pv2di_type_node, V2DI_type_node, NULL_TREE);
22364 /* Normal vector unops. */
22365 tree v4sf_ftype_v4sf
22366 = build_function_type_list (V4SF_type_node, V4SF_type_node, NULL_TREE);
22367 tree v16qi_ftype_v16qi
22368 = build_function_type_list (V16QI_type_node, V16QI_type_node, NULL_TREE);
22369 tree v8hi_ftype_v8hi
22370 = build_function_type_list (V8HI_type_node, V8HI_type_node, NULL_TREE);
22371 tree v4si_ftype_v4si
22372 = build_function_type_list (V4SI_type_node, V4SI_type_node, NULL_TREE);
22373 tree v8qi_ftype_v8qi
22374 = build_function_type_list (V8QI_type_node, V8QI_type_node, NULL_TREE);
22375 tree v4hi_ftype_v4hi
22376 = build_function_type_list (V4HI_type_node, V4HI_type_node, NULL_TREE);
22378 /* Normal vector binops. */
22379 tree v4sf_ftype_v4sf_v4sf
22380 = build_function_type_list (V4SF_type_node,
22381 V4SF_type_node, V4SF_type_node, NULL_TREE);
22382 tree v8qi_ftype_v8qi_v8qi
22383 = build_function_type_list (V8QI_type_node,
22384 V8QI_type_node, V8QI_type_node, NULL_TREE);
22385 tree v4hi_ftype_v4hi_v4hi
22386 = build_function_type_list (V4HI_type_node,
22387 V4HI_type_node, V4HI_type_node, NULL_TREE);
22388 tree v2si_ftype_v2si_v2si
22389 = build_function_type_list (V2SI_type_node,
22390 V2SI_type_node, V2SI_type_node, NULL_TREE);
22391 tree v1di_ftype_v1di_v1di
22392 = build_function_type_list (V1DI_type_node,
22393 V1DI_type_node, V1DI_type_node, NULL_TREE);
22394 tree v1di_ftype_v1di_v1di_int
22395 = build_function_type_list (V1DI_type_node,
22396 V1DI_type_node, V1DI_type_node,
22397 integer_type_node, NULL_TREE);
22398 tree v2si_ftype_v2sf
22399 = build_function_type_list (V2SI_type_node, V2SF_type_node, NULL_TREE);
22400 tree v2sf_ftype_v2si
22401 = build_function_type_list (V2SF_type_node, V2SI_type_node, NULL_TREE);
22402 tree v2si_ftype_v2si
22403 = build_function_type_list (V2SI_type_node, V2SI_type_node, NULL_TREE);
22404 tree v2sf_ftype_v2sf
22405 = build_function_type_list (V2SF_type_node, V2SF_type_node, NULL_TREE);
22406 tree v2sf_ftype_v2sf_v2sf
22407 = build_function_type_list (V2SF_type_node,
22408 V2SF_type_node, V2SF_type_node, NULL_TREE);
22409 tree v2si_ftype_v2sf_v2sf
22410 = build_function_type_list (V2SI_type_node,
22411 V2SF_type_node, V2SF_type_node, NULL_TREE);
22412 tree pint_type_node = build_pointer_type (integer_type_node);
22413 tree pdouble_type_node = build_pointer_type (double_type_node);
22414 tree pcdouble_type_node = build_pointer_type (
22415 build_type_variant (double_type_node, 1, 0));
22416 tree int_ftype_v2df_v2df
22417 = build_function_type_list (integer_type_node,
22418 V2DF_type_node, V2DF_type_node, NULL_TREE);
22420 tree void_ftype_pcvoid
22421 = build_function_type_list (void_type_node, const_ptr_type_node, NULL_TREE);
22422 tree v4sf_ftype_v4si
22423 = build_function_type_list (V4SF_type_node, V4SI_type_node, NULL_TREE);
22424 tree v4si_ftype_v4sf
22425 = build_function_type_list (V4SI_type_node, V4SF_type_node, NULL_TREE);
22426 tree v2df_ftype_v4si
22427 = build_function_type_list (V2DF_type_node, V4SI_type_node, NULL_TREE);
22428 tree v4si_ftype_v2df
22429 = build_function_type_list (V4SI_type_node, V2DF_type_node, NULL_TREE);
22430 tree v4si_ftype_v2df_v2df
22431 = build_function_type_list (V4SI_type_node,
22432 V2DF_type_node, V2DF_type_node, NULL_TREE);
22433 tree v2si_ftype_v2df
22434 = build_function_type_list (V2SI_type_node, V2DF_type_node, NULL_TREE);
22435 tree v4sf_ftype_v2df
22436 = build_function_type_list (V4SF_type_node, V2DF_type_node, NULL_TREE);
22437 tree v2df_ftype_v2si
22438 = build_function_type_list (V2DF_type_node, V2SI_type_node, NULL_TREE);
22439 tree v2df_ftype_v4sf
22440 = build_function_type_list (V2DF_type_node, V4SF_type_node, NULL_TREE);
22441 tree int_ftype_v2df
22442 = build_function_type_list (integer_type_node, V2DF_type_node, NULL_TREE);
22443 tree int64_ftype_v2df
22444 = build_function_type_list (long_long_integer_type_node,
22445 V2DF_type_node, NULL_TREE);
22446 tree v2df_ftype_v2df_int
22447 = build_function_type_list (V2DF_type_node,
22448 V2DF_type_node, integer_type_node, NULL_TREE);
22449 tree v2df_ftype_v2df_int64
22450 = build_function_type_list (V2DF_type_node,
22451 V2DF_type_node, long_long_integer_type_node,
22453 tree v4sf_ftype_v4sf_v2df
22454 = build_function_type_list (V4SF_type_node,
22455 V4SF_type_node, V2DF_type_node, NULL_TREE);
22456 tree v2df_ftype_v2df_v4sf
22457 = build_function_type_list (V2DF_type_node,
22458 V2DF_type_node, V4SF_type_node, NULL_TREE);
22459 tree v2df_ftype_v2df_v2df_int
22460 = build_function_type_list (V2DF_type_node,
22461 V2DF_type_node, V2DF_type_node,
22464 tree v2df_ftype_v2df_pcdouble
22465 = build_function_type_list (V2DF_type_node,
22466 V2DF_type_node, pcdouble_type_node, NULL_TREE);
22467 tree void_ftype_pdouble_v2df
22468 = build_function_type_list (void_type_node,
22469 pdouble_type_node, V2DF_type_node, NULL_TREE);
22470 tree void_ftype_pint_int
22471 = build_function_type_list (void_type_node,
22472 pint_type_node, integer_type_node, NULL_TREE);
22473 tree void_ftype_v16qi_v16qi_pchar
22474 = build_function_type_list (void_type_node,
22475 V16QI_type_node, V16QI_type_node,
22476 pchar_type_node, NULL_TREE);
22477 tree v2df_ftype_pcdouble
22478 = build_function_type_list (V2DF_type_node, pcdouble_type_node, NULL_TREE);
22479 tree v2df_ftype_v2df_v2df
22480 = build_function_type_list (V2DF_type_node,
22481 V2DF_type_node, V2DF_type_node, NULL_TREE);
22482 tree v16qi_ftype_v16qi_v16qi
22483 = build_function_type_list (V16QI_type_node,
22484 V16QI_type_node, V16QI_type_node, NULL_TREE);
22485 tree v8hi_ftype_v8hi_v8hi
22486 = build_function_type_list (V8HI_type_node,
22487 V8HI_type_node, V8HI_type_node, NULL_TREE);
22488 tree v4si_ftype_v4si_v4si
22489 = build_function_type_list (V4SI_type_node,
22490 V4SI_type_node, V4SI_type_node, NULL_TREE);
22491 tree v2di_ftype_v2di_v2di
22492 = build_function_type_list (V2DI_type_node,
22493 V2DI_type_node, V2DI_type_node, NULL_TREE);
22494 tree v2di_ftype_v2df_v2df
22495 = build_function_type_list (V2DI_type_node,
22496 V2DF_type_node, V2DF_type_node, NULL_TREE);
22497 tree v2df_ftype_v2df
22498 = build_function_type_list (V2DF_type_node, V2DF_type_node, NULL_TREE);
22499 tree v2di_ftype_v2di_int
22500 = build_function_type_list (V2DI_type_node,
22501 V2DI_type_node, integer_type_node, NULL_TREE);
22502 tree v2di_ftype_v2di_v2di_int
22503 = build_function_type_list (V2DI_type_node, V2DI_type_node,
22504 V2DI_type_node, integer_type_node, NULL_TREE);
22505 tree v4si_ftype_v4si_int
22506 = build_function_type_list (V4SI_type_node,
22507 V4SI_type_node, integer_type_node, NULL_TREE);
22508 tree v8hi_ftype_v8hi_int
22509 = build_function_type_list (V8HI_type_node,
22510 V8HI_type_node, integer_type_node, NULL_TREE);
22511 tree v4si_ftype_v8hi_v8hi
22512 = build_function_type_list (V4SI_type_node,
22513 V8HI_type_node, V8HI_type_node, NULL_TREE);
22514 tree v1di_ftype_v8qi_v8qi
22515 = build_function_type_list (V1DI_type_node,
22516 V8QI_type_node, V8QI_type_node, NULL_TREE);
22517 tree v1di_ftype_v2si_v2si
22518 = build_function_type_list (V1DI_type_node,
22519 V2SI_type_node, V2SI_type_node, NULL_TREE);
22520 tree v2di_ftype_v16qi_v16qi
22521 = build_function_type_list (V2DI_type_node,
22522 V16QI_type_node, V16QI_type_node, NULL_TREE);
22523 tree v2di_ftype_v4si_v4si
22524 = build_function_type_list (V2DI_type_node,
22525 V4SI_type_node, V4SI_type_node, NULL_TREE);
22526 tree int_ftype_v16qi
22527 = build_function_type_list (integer_type_node, V16QI_type_node, NULL_TREE);
22528 tree v16qi_ftype_pcchar
22529 = build_function_type_list (V16QI_type_node, pcchar_type_node, NULL_TREE);
22530 tree void_ftype_pchar_v16qi
22531 = build_function_type_list (void_type_node,
22532 pchar_type_node, V16QI_type_node, NULL_TREE);
22534 tree v2di_ftype_v2di_unsigned_unsigned
22535 = build_function_type_list (V2DI_type_node, V2DI_type_node,
22536 unsigned_type_node, unsigned_type_node,
22538 tree v2di_ftype_v2di_v2di_unsigned_unsigned
22539 = build_function_type_list (V2DI_type_node, V2DI_type_node, V2DI_type_node,
22540 unsigned_type_node, unsigned_type_node,
22542 tree v2di_ftype_v2di_v16qi
22543 = build_function_type_list (V2DI_type_node, V2DI_type_node, V16QI_type_node,
22545 tree v2df_ftype_v2df_v2df_v2df
22546 = build_function_type_list (V2DF_type_node,
22547 V2DF_type_node, V2DF_type_node,
22548 V2DF_type_node, NULL_TREE);
22549 tree v4sf_ftype_v4sf_v4sf_v4sf
22550 = build_function_type_list (V4SF_type_node,
22551 V4SF_type_node, V4SF_type_node,
22552 V4SF_type_node, NULL_TREE);
22553 tree v8hi_ftype_v16qi
22554 = build_function_type_list (V8HI_type_node, V16QI_type_node,
22556 tree v4si_ftype_v16qi
22557 = build_function_type_list (V4SI_type_node, V16QI_type_node,
22559 tree v2di_ftype_v16qi
22560 = build_function_type_list (V2DI_type_node, V16QI_type_node,
22562 tree v4si_ftype_v8hi
22563 = build_function_type_list (V4SI_type_node, V8HI_type_node,
22565 tree v2di_ftype_v8hi
22566 = build_function_type_list (V2DI_type_node, V8HI_type_node,
22568 tree v2di_ftype_v4si
22569 = build_function_type_list (V2DI_type_node, V4SI_type_node,
22571 tree v2di_ftype_pv2di
22572 = build_function_type_list (V2DI_type_node, pv2di_type_node,
22574 tree v16qi_ftype_v16qi_v16qi_int
22575 = build_function_type_list (V16QI_type_node, V16QI_type_node,
22576 V16QI_type_node, integer_type_node,
22578 tree v16qi_ftype_v16qi_v16qi_v16qi
22579 = build_function_type_list (V16QI_type_node, V16QI_type_node,
22580 V16QI_type_node, V16QI_type_node,
22582 tree v8hi_ftype_v8hi_v8hi_int
22583 = build_function_type_list (V8HI_type_node, V8HI_type_node,
22584 V8HI_type_node, integer_type_node,
22586 tree v4si_ftype_v4si_v4si_int
22587 = build_function_type_list (V4SI_type_node, V4SI_type_node,
22588 V4SI_type_node, integer_type_node,
22590 tree int_ftype_v2di_v2di
22591 = build_function_type_list (integer_type_node,
22592 V2DI_type_node, V2DI_type_node,
22594 tree int_ftype_v16qi_int_v16qi_int_int
22595 = build_function_type_list (integer_type_node,
22602 tree v16qi_ftype_v16qi_int_v16qi_int_int
22603 = build_function_type_list (V16QI_type_node,
22610 tree int_ftype_v16qi_v16qi_int
22611 = build_function_type_list (integer_type_node,
22617 /* SSE5 instructions */
22618 tree v2di_ftype_v2di_v2di_v2di
22619 = build_function_type_list (V2DI_type_node,
22625 tree v4si_ftype_v4si_v4si_v4si
22626 = build_function_type_list (V4SI_type_node,
22632 tree v4si_ftype_v4si_v4si_v2di
22633 = build_function_type_list (V4SI_type_node,
22639 tree v8hi_ftype_v8hi_v8hi_v8hi
22640 = build_function_type_list (V8HI_type_node,
22646 tree v8hi_ftype_v8hi_v8hi_v4si
22647 = build_function_type_list (V8HI_type_node,
22653 tree v2df_ftype_v2df_v2df_v16qi
22654 = build_function_type_list (V2DF_type_node,
22660 tree v4sf_ftype_v4sf_v4sf_v16qi
22661 = build_function_type_list (V4SF_type_node,
22667 tree v2di_ftype_v2di_si
22668 = build_function_type_list (V2DI_type_node,
22673 tree v4si_ftype_v4si_si
22674 = build_function_type_list (V4SI_type_node,
22679 tree v8hi_ftype_v8hi_si
22680 = build_function_type_list (V8HI_type_node,
22685 tree v16qi_ftype_v16qi_si
22686 = build_function_type_list (V16QI_type_node,
22690 tree v4sf_ftype_v4hi
22691 = build_function_type_list (V4SF_type_node,
22695 tree v4hi_ftype_v4sf
22696 = build_function_type_list (V4HI_type_node,
22700 tree v2di_ftype_v2di
22701 = build_function_type_list (V2DI_type_node, V2DI_type_node, NULL_TREE);
22703 tree v16qi_ftype_v8hi_v8hi
22704 = build_function_type_list (V16QI_type_node,
22705 V8HI_type_node, V8HI_type_node,
22707 tree v8hi_ftype_v4si_v4si
22708 = build_function_type_list (V8HI_type_node,
22709 V4SI_type_node, V4SI_type_node,
22711 tree v8hi_ftype_v16qi_v16qi
22712 = build_function_type_list (V8HI_type_node,
22713 V16QI_type_node, V16QI_type_node,
22715 tree v4hi_ftype_v8qi_v8qi
22716 = build_function_type_list (V4HI_type_node,
22717 V8QI_type_node, V8QI_type_node,
22719 tree unsigned_ftype_unsigned_uchar
22720 = build_function_type_list (unsigned_type_node,
22721 unsigned_type_node,
22722 unsigned_char_type_node,
22724 tree unsigned_ftype_unsigned_ushort
22725 = build_function_type_list (unsigned_type_node,
22726 unsigned_type_node,
22727 short_unsigned_type_node,
22729 tree unsigned_ftype_unsigned_unsigned
22730 = build_function_type_list (unsigned_type_node,
22731 unsigned_type_node,
22732 unsigned_type_node,
22734 tree uint64_ftype_uint64_uint64
22735 = build_function_type_list (long_long_unsigned_type_node,
22736 long_long_unsigned_type_node,
22737 long_long_unsigned_type_node,
22739 tree float_ftype_float
22740 = build_function_type_list (float_type_node,
22745 tree V32QI_type_node = build_vector_type_for_mode (char_type_node,
22747 tree V8SI_type_node = build_vector_type_for_mode (intSI_type_node,
22749 tree V8SF_type_node = build_vector_type_for_mode (float_type_node,
22751 tree V4DI_type_node = build_vector_type_for_mode (long_long_integer_type_node,
22753 tree V4DF_type_node = build_vector_type_for_mode (double_type_node,
22755 tree v8sf_ftype_v8sf
22756 = build_function_type_list (V8SF_type_node,
22759 tree v8si_ftype_v8sf
22760 = build_function_type_list (V8SI_type_node,
22763 tree v8sf_ftype_v8si
22764 = build_function_type_list (V8SF_type_node,
22767 tree v4si_ftype_v4df
22768 = build_function_type_list (V4SI_type_node,
22771 tree v4df_ftype_v4df
22772 = build_function_type_list (V4DF_type_node,
22775 tree v4df_ftype_v4si
22776 = build_function_type_list (V4DF_type_node,
22779 tree v4df_ftype_v4sf
22780 = build_function_type_list (V4DF_type_node,
22783 tree v4sf_ftype_v4df
22784 = build_function_type_list (V4SF_type_node,
22787 tree v8sf_ftype_v8sf_v8sf
22788 = build_function_type_list (V8SF_type_node,
22789 V8SF_type_node, V8SF_type_node,
22791 tree v4df_ftype_v4df_v4df
22792 = build_function_type_list (V4DF_type_node,
22793 V4DF_type_node, V4DF_type_node,
22795 tree v8sf_ftype_v8sf_int
22796 = build_function_type_list (V8SF_type_node,
22797 V8SF_type_node, integer_type_node,
22799 tree v4si_ftype_v8si_int
22800 = build_function_type_list (V4SI_type_node,
22801 V8SI_type_node, integer_type_node,
22803 tree v4df_ftype_v4df_int
22804 = build_function_type_list (V4DF_type_node,
22805 V4DF_type_node, integer_type_node,
22807 tree v4sf_ftype_v8sf_int
22808 = build_function_type_list (V4SF_type_node,
22809 V8SF_type_node, integer_type_node,
22811 tree v2df_ftype_v4df_int
22812 = build_function_type_list (V2DF_type_node,
22813 V4DF_type_node, integer_type_node,
22815 tree v8sf_ftype_v8sf_v8sf_int
22816 = build_function_type_list (V8SF_type_node,
22817 V8SF_type_node, V8SF_type_node,
22820 tree v8sf_ftype_v8sf_v8sf_v8sf
22821 = build_function_type_list (V8SF_type_node,
22822 V8SF_type_node, V8SF_type_node,
22825 tree v4df_ftype_v4df_v4df_v4df
22826 = build_function_type_list (V4DF_type_node,
22827 V4DF_type_node, V4DF_type_node,
22830 tree v8si_ftype_v8si_v8si_int
22831 = build_function_type_list (V8SI_type_node,
22832 V8SI_type_node, V8SI_type_node,
22835 tree v4df_ftype_v4df_v4df_int
22836 = build_function_type_list (V4DF_type_node,
22837 V4DF_type_node, V4DF_type_node,
22840 tree v8sf_ftype_pcfloat
22841 = build_function_type_list (V8SF_type_node,
22844 tree v4df_ftype_pcdouble
22845 = build_function_type_list (V4DF_type_node,
22846 pcdouble_type_node,
22848 tree pcv4sf_type_node
22849 = build_pointer_type (build_type_variant (V4SF_type_node, 1, 0));
22850 tree pcv2df_type_node
22851 = build_pointer_type (build_type_variant (V2DF_type_node, 1, 0));
22852 tree v8sf_ftype_pcv4sf
22853 = build_function_type_list (V8SF_type_node,
22856 tree v4df_ftype_pcv2df
22857 = build_function_type_list (V4DF_type_node,
22860 tree v32qi_ftype_pcchar
22861 = build_function_type_list (V32QI_type_node,
22864 tree void_ftype_pchar_v32qi
22865 = build_function_type_list (void_type_node,
22866 pchar_type_node, V32QI_type_node,
22868 tree v8si_ftype_v8si_v4si_int
22869 = build_function_type_list (V8SI_type_node,
22870 V8SI_type_node, V4SI_type_node,
22873 tree pv4di_type_node = build_pointer_type (V4DI_type_node);
22874 tree void_ftype_pv4di_v4di
22875 = build_function_type_list (void_type_node,
22876 pv4di_type_node, V4DI_type_node,
22878 tree v8sf_ftype_v8sf_v4sf_int
22879 = build_function_type_list (V8SF_type_node,
22880 V8SF_type_node, V4SF_type_node,
22883 tree v4df_ftype_v4df_v2df_int
22884 = build_function_type_list (V4DF_type_node,
22885 V4DF_type_node, V2DF_type_node,
22888 tree void_ftype_pfloat_v8sf
22889 = build_function_type_list (void_type_node,
22890 pfloat_type_node, V8SF_type_node,
22892 tree void_ftype_pdouble_v4df
22893 = build_function_type_list (void_type_node,
22894 pdouble_type_node, V4DF_type_node,
22896 tree pv8sf_type_node = build_pointer_type (V8SF_type_node);
22897 tree pv4sf_type_node = build_pointer_type (V4SF_type_node);
22898 tree pv4df_type_node = build_pointer_type (V4DF_type_node);
22899 tree pv2df_type_node = build_pointer_type (V2DF_type_node);
22900 tree pcv8sf_type_node
22901 = build_pointer_type (build_type_variant (V8SF_type_node, 1, 0));
22902 tree pcv4df_type_node
22903 = build_pointer_type (build_type_variant (V4DF_type_node, 1, 0));
22904 tree v8sf_ftype_pcv8sf_v8sf
22905 = build_function_type_list (V8SF_type_node,
22906 pcv8sf_type_node, V8SF_type_node,
22908 tree v4df_ftype_pcv4df_v4df
22909 = build_function_type_list (V4DF_type_node,
22910 pcv4df_type_node, V4DF_type_node,
22912 tree v4sf_ftype_pcv4sf_v4sf
22913 = build_function_type_list (V4SF_type_node,
22914 pcv4sf_type_node, V4SF_type_node,
22916 tree v2df_ftype_pcv2df_v2df
22917 = build_function_type_list (V2DF_type_node,
22918 pcv2df_type_node, V2DF_type_node,
22920 tree void_ftype_pv8sf_v8sf_v8sf
22921 = build_function_type_list (void_type_node,
22922 pv8sf_type_node, V8SF_type_node,
22925 tree void_ftype_pv4df_v4df_v4df
22926 = build_function_type_list (void_type_node,
22927 pv4df_type_node, V4DF_type_node,
22930 tree void_ftype_pv4sf_v4sf_v4sf
22931 = build_function_type_list (void_type_node,
22932 pv4sf_type_node, V4SF_type_node,
22935 tree void_ftype_pv2df_v2df_v2df
22936 = build_function_type_list (void_type_node,
22937 pv2df_type_node, V2DF_type_node,
22940 tree v4df_ftype_v2df
22941 = build_function_type_list (V4DF_type_node,
22944 tree v8sf_ftype_v4sf
22945 = build_function_type_list (V8SF_type_node,
22948 tree v8si_ftype_v4si
22949 = build_function_type_list (V8SI_type_node,
22952 tree v2df_ftype_v4df
22953 = build_function_type_list (V2DF_type_node,
22956 tree v4sf_ftype_v8sf
22957 = build_function_type_list (V4SF_type_node,
22960 tree v4si_ftype_v8si
22961 = build_function_type_list (V4SI_type_node,
22964 tree int_ftype_v4df
22965 = build_function_type_list (integer_type_node,
22968 tree int_ftype_v8sf
22969 = build_function_type_list (integer_type_node,
22972 tree int_ftype_v8sf_v8sf
22973 = build_function_type_list (integer_type_node,
22974 V8SF_type_node, V8SF_type_node,
22976 tree int_ftype_v4di_v4di
22977 = build_function_type_list (integer_type_node,
22978 V4DI_type_node, V4DI_type_node,
22980 tree int_ftype_v4df_v4df
22981 = build_function_type_list (integer_type_node,
22982 V4DF_type_node, V4DF_type_node,
22984 tree v8sf_ftype_v8sf_v8si
22985 = build_function_type_list (V8SF_type_node,
22986 V8SF_type_node, V8SI_type_node,
22988 tree v4df_ftype_v4df_v4di
22989 = build_function_type_list (V4DF_type_node,
22990 V4DF_type_node, V4DI_type_node,
22992 tree v4sf_ftype_v4sf_v4si
22993 = build_function_type_list (V4SF_type_node,
22994 V4SF_type_node, V4SI_type_node, NULL_TREE);
22995 tree v2df_ftype_v2df_v2di
22996 = build_function_type_list (V2DF_type_node,
22997 V2DF_type_node, V2DI_type_node, NULL_TREE);
23001 /* Add all special builtins with variable number of operands. */
23002 for (i = 0, d = bdesc_special_args;
23003 i < ARRAY_SIZE (bdesc_special_args);
23011 switch ((enum ix86_special_builtin_type) d->flag)
23013 case VOID_FTYPE_VOID:
23014 type = void_ftype_void;
23016 case V32QI_FTYPE_PCCHAR:
23017 type = v32qi_ftype_pcchar;
23019 case V16QI_FTYPE_PCCHAR:
23020 type = v16qi_ftype_pcchar;
23022 case V8SF_FTYPE_PCV4SF:
23023 type = v8sf_ftype_pcv4sf;
23025 case V8SF_FTYPE_PCFLOAT:
23026 type = v8sf_ftype_pcfloat;
23028 case V4DF_FTYPE_PCV2DF:
23029 type = v4df_ftype_pcv2df;
23031 case V4DF_FTYPE_PCDOUBLE:
23032 type = v4df_ftype_pcdouble;
23034 case V4SF_FTYPE_PCFLOAT:
23035 type = v4sf_ftype_pcfloat;
23037 case V2DI_FTYPE_PV2DI:
23038 type = v2di_ftype_pv2di;
23040 case V2DF_FTYPE_PCDOUBLE:
23041 type = v2df_ftype_pcdouble;
23043 case V8SF_FTYPE_PCV8SF_V8SF:
23044 type = v8sf_ftype_pcv8sf_v8sf;
23046 case V4DF_FTYPE_PCV4DF_V4DF:
23047 type = v4df_ftype_pcv4df_v4df;
23049 case V4SF_FTYPE_V4SF_PCV2SF:
23050 type = v4sf_ftype_v4sf_pcv2sf;
23052 case V4SF_FTYPE_PCV4SF_V4SF:
23053 type = v4sf_ftype_pcv4sf_v4sf;
23055 case V2DF_FTYPE_V2DF_PCDOUBLE:
23056 type = v2df_ftype_v2df_pcdouble;
23058 case V2DF_FTYPE_PCV2DF_V2DF:
23059 type = v2df_ftype_pcv2df_v2df;
23061 case VOID_FTYPE_PV2SF_V4SF:
23062 type = void_ftype_pv2sf_v4sf;
23064 case VOID_FTYPE_PV4DI_V4DI:
23065 type = void_ftype_pv4di_v4di;
23067 case VOID_FTYPE_PV2DI_V2DI:
23068 type = void_ftype_pv2di_v2di;
23070 case VOID_FTYPE_PCHAR_V32QI:
23071 type = void_ftype_pchar_v32qi;
23073 case VOID_FTYPE_PCHAR_V16QI:
23074 type = void_ftype_pchar_v16qi;
23076 case VOID_FTYPE_PFLOAT_V8SF:
23077 type = void_ftype_pfloat_v8sf;
23079 case VOID_FTYPE_PFLOAT_V4SF:
23080 type = void_ftype_pfloat_v4sf;
23082 case VOID_FTYPE_PDOUBLE_V4DF:
23083 type = void_ftype_pdouble_v4df;
23085 case VOID_FTYPE_PDOUBLE_V2DF:
23086 type = void_ftype_pdouble_v2df;
23088 case VOID_FTYPE_PDI_DI:
23089 type = void_ftype_pdi_di;
23091 case VOID_FTYPE_PINT_INT:
23092 type = void_ftype_pint_int;
23094 case VOID_FTYPE_PV8SF_V8SF_V8SF:
23095 type = void_ftype_pv8sf_v8sf_v8sf;
23097 case VOID_FTYPE_PV4DF_V4DF_V4DF:
23098 type = void_ftype_pv4df_v4df_v4df;
23100 case VOID_FTYPE_PV4SF_V4SF_V4SF:
23101 type = void_ftype_pv4sf_v4sf_v4sf;
23103 case VOID_FTYPE_PV2DF_V2DF_V2DF:
23104 type = void_ftype_pv2df_v2df_v2df;
23107 gcc_unreachable ();
23110 def_builtin (d->mask, d->name, type, d->code);
23113 /* Add all builtins with variable number of operands. */
23114 for (i = 0, d = bdesc_args;
23115 i < ARRAY_SIZE (bdesc_args);
23123 switch ((enum ix86_builtin_type) d->flag)
23125 case FLOAT_FTYPE_FLOAT:
23126 type = float_ftype_float;
23128 case INT_FTYPE_V8SF_V8SF_PTEST:
23129 type = int_ftype_v8sf_v8sf;
23131 case INT_FTYPE_V4DI_V4DI_PTEST:
23132 type = int_ftype_v4di_v4di;
23134 case INT_FTYPE_V4DF_V4DF_PTEST:
23135 type = int_ftype_v4df_v4df;
23137 case INT_FTYPE_V4SF_V4SF_PTEST:
23138 type = int_ftype_v4sf_v4sf;
23140 case INT_FTYPE_V2DI_V2DI_PTEST:
23141 type = int_ftype_v2di_v2di;
23143 case INT_FTYPE_V2DF_V2DF_PTEST:
23144 type = int_ftype_v2df_v2df;
23146 case INT64_FTYPE_V4SF:
23147 type = int64_ftype_v4sf;
23149 case INT64_FTYPE_V2DF:
23150 type = int64_ftype_v2df;
23152 case INT_FTYPE_V16QI:
23153 type = int_ftype_v16qi;
23155 case INT_FTYPE_V8QI:
23156 type = int_ftype_v8qi;
23158 case INT_FTYPE_V8SF:
23159 type = int_ftype_v8sf;
23161 case INT_FTYPE_V4DF:
23162 type = int_ftype_v4df;
23164 case INT_FTYPE_V4SF:
23165 type = int_ftype_v4sf;
23167 case INT_FTYPE_V2DF:
23168 type = int_ftype_v2df;
23170 case V16QI_FTYPE_V16QI:
23171 type = v16qi_ftype_v16qi;
23173 case V8SI_FTYPE_V8SF:
23174 type = v8si_ftype_v8sf;
23176 case V8SI_FTYPE_V4SI:
23177 type = v8si_ftype_v4si;
23179 case V8HI_FTYPE_V8HI:
23180 type = v8hi_ftype_v8hi;
23182 case V8HI_FTYPE_V16QI:
23183 type = v8hi_ftype_v16qi;
23185 case V8QI_FTYPE_V8QI:
23186 type = v8qi_ftype_v8qi;
23188 case V8SF_FTYPE_V8SF:
23189 type = v8sf_ftype_v8sf;
23191 case V8SF_FTYPE_V8SI:
23192 type = v8sf_ftype_v8si;
23194 case V8SF_FTYPE_V4SF:
23195 type = v8sf_ftype_v4sf;
23197 case V4SI_FTYPE_V4DF:
23198 type = v4si_ftype_v4df;
23200 case V4SI_FTYPE_V4SI:
23201 type = v4si_ftype_v4si;
23203 case V4SI_FTYPE_V16QI:
23204 type = v4si_ftype_v16qi;
23206 case V4SI_FTYPE_V8SI:
23207 type = v4si_ftype_v8si;
23209 case V4SI_FTYPE_V8HI:
23210 type = v4si_ftype_v8hi;
23212 case V4SI_FTYPE_V4SF:
23213 type = v4si_ftype_v4sf;
23215 case V4SI_FTYPE_V2DF:
23216 type = v4si_ftype_v2df;
23218 case V4HI_FTYPE_V4HI:
23219 type = v4hi_ftype_v4hi;
23221 case V4DF_FTYPE_V4DF:
23222 type = v4df_ftype_v4df;
23224 case V4DF_FTYPE_V4SI:
23225 type = v4df_ftype_v4si;
23227 case V4DF_FTYPE_V4SF:
23228 type = v4df_ftype_v4sf;
23230 case V4DF_FTYPE_V2DF:
23231 type = v4df_ftype_v2df;
23233 case V4SF_FTYPE_V4SF:
23234 case V4SF_FTYPE_V4SF_VEC_MERGE:
23235 type = v4sf_ftype_v4sf;
23237 case V4SF_FTYPE_V8SF:
23238 type = v4sf_ftype_v8sf;
23240 case V4SF_FTYPE_V4SI:
23241 type = v4sf_ftype_v4si;
23243 case V4SF_FTYPE_V4DF:
23244 type = v4sf_ftype_v4df;
23246 case V4SF_FTYPE_V2DF:
23247 type = v4sf_ftype_v2df;
23249 case V2DI_FTYPE_V2DI:
23250 type = v2di_ftype_v2di;
23252 case V2DI_FTYPE_V16QI:
23253 type = v2di_ftype_v16qi;
23255 case V2DI_FTYPE_V8HI:
23256 type = v2di_ftype_v8hi;
23258 case V2DI_FTYPE_V4SI:
23259 type = v2di_ftype_v4si;
23261 case V2SI_FTYPE_V2SI:
23262 type = v2si_ftype_v2si;
23264 case V2SI_FTYPE_V4SF:
23265 type = v2si_ftype_v4sf;
23267 case V2SI_FTYPE_V2DF:
23268 type = v2si_ftype_v2df;
23270 case V2SI_FTYPE_V2SF:
23271 type = v2si_ftype_v2sf;
23273 case V2DF_FTYPE_V4DF:
23274 type = v2df_ftype_v4df;
23276 case V2DF_FTYPE_V4SF:
23277 type = v2df_ftype_v4sf;
23279 case V2DF_FTYPE_V2DF:
23280 case V2DF_FTYPE_V2DF_VEC_MERGE:
23281 type = v2df_ftype_v2df;
23283 case V2DF_FTYPE_V2SI:
23284 type = v2df_ftype_v2si;
23286 case V2DF_FTYPE_V4SI:
23287 type = v2df_ftype_v4si;
23289 case V2SF_FTYPE_V2SF:
23290 type = v2sf_ftype_v2sf;
23292 case V2SF_FTYPE_V2SI:
23293 type = v2sf_ftype_v2si;
23295 case V16QI_FTYPE_V16QI_V16QI:
23296 type = v16qi_ftype_v16qi_v16qi;
23298 case V16QI_FTYPE_V8HI_V8HI:
23299 type = v16qi_ftype_v8hi_v8hi;
23301 case V8QI_FTYPE_V8QI_V8QI:
23302 type = v8qi_ftype_v8qi_v8qi;
23304 case V8QI_FTYPE_V4HI_V4HI:
23305 type = v8qi_ftype_v4hi_v4hi;
23307 case V8HI_FTYPE_V8HI_V8HI:
23308 case V8HI_FTYPE_V8HI_V8HI_COUNT:
23309 type = v8hi_ftype_v8hi_v8hi;
23311 case V8HI_FTYPE_V16QI_V16QI:
23312 type = v8hi_ftype_v16qi_v16qi;
23314 case V8HI_FTYPE_V4SI_V4SI:
23315 type = v8hi_ftype_v4si_v4si;
23317 case V8HI_FTYPE_V8HI_SI_COUNT:
23318 type = v8hi_ftype_v8hi_int;
23320 case V8SF_FTYPE_V8SF_V8SF:
23321 type = v8sf_ftype_v8sf_v8sf;
23323 case V8SF_FTYPE_V8SF_V8SI:
23324 type = v8sf_ftype_v8sf_v8si;
23326 case V4SI_FTYPE_V4SI_V4SI:
23327 case V4SI_FTYPE_V4SI_V4SI_COUNT:
23328 type = v4si_ftype_v4si_v4si;
23330 case V4SI_FTYPE_V8HI_V8HI:
23331 type = v4si_ftype_v8hi_v8hi;
23333 case V4SI_FTYPE_V4SF_V4SF:
23334 type = v4si_ftype_v4sf_v4sf;
23336 case V4SI_FTYPE_V2DF_V2DF:
23337 type = v4si_ftype_v2df_v2df;
23339 case V4SI_FTYPE_V4SI_SI_COUNT:
23340 type = v4si_ftype_v4si_int;
23342 case V4HI_FTYPE_V4HI_V4HI:
23343 case V4HI_FTYPE_V4HI_V4HI_COUNT:
23344 type = v4hi_ftype_v4hi_v4hi;
23346 case V4HI_FTYPE_V8QI_V8QI:
23347 type = v4hi_ftype_v8qi_v8qi;
23349 case V4HI_FTYPE_V2SI_V2SI:
23350 type = v4hi_ftype_v2si_v2si;
23352 case V4HI_FTYPE_V4HI_SI_COUNT:
23353 type = v4hi_ftype_v4hi_int;
23355 case V4DF_FTYPE_V4DF_V4DF:
23356 type = v4df_ftype_v4df_v4df;
23358 case V4DF_FTYPE_V4DF_V4DI:
23359 type = v4df_ftype_v4df_v4di;
23361 case V4SF_FTYPE_V4SF_V4SF:
23362 case V4SF_FTYPE_V4SF_V4SF_SWAP:
23363 type = v4sf_ftype_v4sf_v4sf;
23365 case V4SF_FTYPE_V4SF_V4SI:
23366 type = v4sf_ftype_v4sf_v4si;
23368 case V4SF_FTYPE_V4SF_V2SI:
23369 type = v4sf_ftype_v4sf_v2si;
23371 case V4SF_FTYPE_V4SF_V2DF:
23372 type = v4sf_ftype_v4sf_v2df;
23374 case V4SF_FTYPE_V4SF_DI:
23375 type = v4sf_ftype_v4sf_int64;
23377 case V4SF_FTYPE_V4SF_SI:
23378 type = v4sf_ftype_v4sf_int;
23380 case V2DI_FTYPE_V2DI_V2DI:
23381 case V2DI_FTYPE_V2DI_V2DI_COUNT:
23382 type = v2di_ftype_v2di_v2di;
23384 case V2DI_FTYPE_V16QI_V16QI:
23385 type = v2di_ftype_v16qi_v16qi;
23387 case V2DI_FTYPE_V4SI_V4SI:
23388 type = v2di_ftype_v4si_v4si;
23390 case V2DI_FTYPE_V2DI_V16QI:
23391 type = v2di_ftype_v2di_v16qi;
23393 case V2DI_FTYPE_V2DF_V2DF:
23394 type = v2di_ftype_v2df_v2df;
23396 case V2DI_FTYPE_V2DI_SI_COUNT:
23397 type = v2di_ftype_v2di_int;
23399 case V2SI_FTYPE_V2SI_V2SI:
23400 case V2SI_FTYPE_V2SI_V2SI_COUNT:
23401 type = v2si_ftype_v2si_v2si;
23403 case V2SI_FTYPE_V4HI_V4HI:
23404 type = v2si_ftype_v4hi_v4hi;
23406 case V2SI_FTYPE_V2SF_V2SF:
23407 type = v2si_ftype_v2sf_v2sf;
23409 case V2SI_FTYPE_V2SI_SI_COUNT:
23410 type = v2si_ftype_v2si_int;
23412 case V2DF_FTYPE_V2DF_V2DF:
23413 case V2DF_FTYPE_V2DF_V2DF_SWAP:
23414 type = v2df_ftype_v2df_v2df;
23416 case V2DF_FTYPE_V2DF_V4SF:
23417 type = v2df_ftype_v2df_v4sf;
23419 case V2DF_FTYPE_V2DF_V2DI:
23420 type = v2df_ftype_v2df_v2di;
23422 case V2DF_FTYPE_V2DF_DI:
23423 type = v2df_ftype_v2df_int64;
23425 case V2DF_FTYPE_V2DF_SI:
23426 type = v2df_ftype_v2df_int;
23428 case V2SF_FTYPE_V2SF_V2SF:
23429 type = v2sf_ftype_v2sf_v2sf;
23431 case V1DI_FTYPE_V1DI_V1DI:
23432 case V1DI_FTYPE_V1DI_V1DI_COUNT:
23433 type = v1di_ftype_v1di_v1di;
23435 case V1DI_FTYPE_V8QI_V8QI:
23436 type = v1di_ftype_v8qi_v8qi;
23438 case V1DI_FTYPE_V2SI_V2SI:
23439 type = v1di_ftype_v2si_v2si;
23441 case V1DI_FTYPE_V1DI_SI_COUNT:
23442 type = v1di_ftype_v1di_int;
23444 case UINT64_FTYPE_UINT64_UINT64:
23445 type = uint64_ftype_uint64_uint64;
23447 case UINT_FTYPE_UINT_UINT:
23448 type = unsigned_ftype_unsigned_unsigned;
23450 case UINT_FTYPE_UINT_USHORT:
23451 type = unsigned_ftype_unsigned_ushort;
23453 case UINT_FTYPE_UINT_UCHAR:
23454 type = unsigned_ftype_unsigned_uchar;
23456 case V8HI_FTYPE_V8HI_INT:
23457 type = v8hi_ftype_v8hi_int;
23459 case V8SF_FTYPE_V8SF_INT:
23460 type = v8sf_ftype_v8sf_int;
23462 case V4SI_FTYPE_V4SI_INT:
23463 type = v4si_ftype_v4si_int;
23465 case V4SI_FTYPE_V8SI_INT:
23466 type = v4si_ftype_v8si_int;
23468 case V4HI_FTYPE_V4HI_INT:
23469 type = v4hi_ftype_v4hi_int;
23471 case V4DF_FTYPE_V4DF_INT:
23472 type = v4df_ftype_v4df_int;
23474 case V4SF_FTYPE_V4SF_INT:
23475 type = v4sf_ftype_v4sf_int;
23477 case V4SF_FTYPE_V8SF_INT:
23478 type = v4sf_ftype_v8sf_int;
23480 case V2DI_FTYPE_V2DI_INT:
23481 case V2DI2TI_FTYPE_V2DI_INT:
23482 type = v2di_ftype_v2di_int;
23484 case V2DF_FTYPE_V2DF_INT:
23485 type = v2df_ftype_v2df_int;
23487 case V2DF_FTYPE_V4DF_INT:
23488 type = v2df_ftype_v4df_int;
23490 case V16QI_FTYPE_V16QI_V16QI_V16QI:
23491 type = v16qi_ftype_v16qi_v16qi_v16qi;
23493 case V8SF_FTYPE_V8SF_V8SF_V8SF:
23494 type = v8sf_ftype_v8sf_v8sf_v8sf;
23496 case V4DF_FTYPE_V4DF_V4DF_V4DF:
23497 type = v4df_ftype_v4df_v4df_v4df;
23499 case V4SF_FTYPE_V4SF_V4SF_V4SF:
23500 type = v4sf_ftype_v4sf_v4sf_v4sf;
23502 case V2DF_FTYPE_V2DF_V2DF_V2DF:
23503 type = v2df_ftype_v2df_v2df_v2df;
23505 case V16QI_FTYPE_V16QI_V16QI_INT:
23506 type = v16qi_ftype_v16qi_v16qi_int;
23508 case V8SI_FTYPE_V8SI_V8SI_INT:
23509 type = v8si_ftype_v8si_v8si_int;
23511 case V8SI_FTYPE_V8SI_V4SI_INT:
23512 type = v8si_ftype_v8si_v4si_int;
23514 case V8HI_FTYPE_V8HI_V8HI_INT:
23515 type = v8hi_ftype_v8hi_v8hi_int;
23517 case V8SF_FTYPE_V8SF_V8SF_INT:
23518 type = v8sf_ftype_v8sf_v8sf_int;
23520 case V8SF_FTYPE_V8SF_V4SF_INT:
23521 type = v8sf_ftype_v8sf_v4sf_int;
23523 case V4SI_FTYPE_V4SI_V4SI_INT:
23524 type = v4si_ftype_v4si_v4si_int;
23526 case V4DF_FTYPE_V4DF_V4DF_INT:
23527 type = v4df_ftype_v4df_v4df_int;
23529 case V4DF_FTYPE_V4DF_V2DF_INT:
23530 type = v4df_ftype_v4df_v2df_int;
23532 case V4SF_FTYPE_V4SF_V4SF_INT:
23533 type = v4sf_ftype_v4sf_v4sf_int;
23535 case V2DI_FTYPE_V2DI_V2DI_INT:
23536 case V2DI2TI_FTYPE_V2DI_V2DI_INT:
23537 type = v2di_ftype_v2di_v2di_int;
23539 case V2DF_FTYPE_V2DF_V2DF_INT:
23540 type = v2df_ftype_v2df_v2df_int;
23542 case V2DI_FTYPE_V2DI_UINT_UINT:
23543 type = v2di_ftype_v2di_unsigned_unsigned;
23545 case V2DI_FTYPE_V2DI_V2DI_UINT_UINT:
23546 type = v2di_ftype_v2di_v2di_unsigned_unsigned;
23548 case V1DI2DI_FTYPE_V1DI_V1DI_INT:
23549 type = v1di_ftype_v1di_v1di_int;
23552 gcc_unreachable ();
23555 def_builtin_const (d->mask, d->name, type, d->code);
23558 /* pcmpestr[im] insns. */
23559 for (i = 0, d = bdesc_pcmpestr;
23560 i < ARRAY_SIZE (bdesc_pcmpestr);
23563 if (d->code == IX86_BUILTIN_PCMPESTRM128)
23564 ftype = v16qi_ftype_v16qi_int_v16qi_int_int;
23566 ftype = int_ftype_v16qi_int_v16qi_int_int;
23567 def_builtin_const (d->mask, d->name, ftype, d->code);
23570 /* pcmpistr[im] insns. */
23571 for (i = 0, d = bdesc_pcmpistr;
23572 i < ARRAY_SIZE (bdesc_pcmpistr);
23575 if (d->code == IX86_BUILTIN_PCMPISTRM128)
23576 ftype = v16qi_ftype_v16qi_v16qi_int;
23578 ftype = int_ftype_v16qi_v16qi_int;
23579 def_builtin_const (d->mask, d->name, ftype, d->code);
23582 /* comi/ucomi insns. */
23583 for (i = 0, d = bdesc_comi; i < ARRAY_SIZE (bdesc_comi); i++, d++)
23584 if (d->mask == OPTION_MASK_ISA_SSE2)
23585 def_builtin_const (d->mask, d->name, int_ftype_v2df_v2df, d->code);
23587 def_builtin_const (d->mask, d->name, int_ftype_v4sf_v4sf, d->code);
23590 def_builtin (OPTION_MASK_ISA_SSE, "__builtin_ia32_ldmxcsr", void_ftype_unsigned, IX86_BUILTIN_LDMXCSR);
23591 def_builtin (OPTION_MASK_ISA_SSE, "__builtin_ia32_stmxcsr", unsigned_ftype_void, IX86_BUILTIN_STMXCSR);
23593 /* SSE or 3DNow!A */
23594 def_builtin (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, "__builtin_ia32_maskmovq", void_ftype_v8qi_v8qi_pchar, IX86_BUILTIN_MASKMOVQ);
23597 def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_maskmovdqu", void_ftype_v16qi_v16qi_pchar, IX86_BUILTIN_MASKMOVDQU);
23599 def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_clflush", void_ftype_pcvoid, IX86_BUILTIN_CLFLUSH);
23600 x86_mfence = def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_mfence", void_ftype_void, IX86_BUILTIN_MFENCE);
23603 def_builtin (OPTION_MASK_ISA_SSE3, "__builtin_ia32_monitor", void_ftype_pcvoid_unsigned_unsigned, IX86_BUILTIN_MONITOR);
23604 def_builtin (OPTION_MASK_ISA_SSE3, "__builtin_ia32_mwait", void_ftype_unsigned_unsigned, IX86_BUILTIN_MWAIT);
23607 def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aesenc128", v2di_ftype_v2di_v2di, IX86_BUILTIN_AESENC128);
23608 def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aesenclast128", v2di_ftype_v2di_v2di, IX86_BUILTIN_AESENCLAST128);
23609 def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aesdec128", v2di_ftype_v2di_v2di, IX86_BUILTIN_AESDEC128);
23610 def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aesdeclast128", v2di_ftype_v2di_v2di, IX86_BUILTIN_AESDECLAST128);
23611 def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aesimc128", v2di_ftype_v2di, IX86_BUILTIN_AESIMC128);
23612 def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aeskeygenassist128", v2di_ftype_v2di_int, IX86_BUILTIN_AESKEYGENASSIST128);
23615 def_builtin_const (OPTION_MASK_ISA_PCLMUL, "__builtin_ia32_pclmulqdq128", v2di_ftype_v2di_v2di_int, IX86_BUILTIN_PCLMULQDQ128);
23618 def_builtin (OPTION_MASK_ISA_AVX, "__builtin_ia32_vzeroupper", void_ftype_void,
23619 TARGET_64BIT ? IX86_BUILTIN_VZEROUPPER_REX64 : IX86_BUILTIN_VZEROUPPER);
23621 /* Access to the vec_init patterns. */
23622 ftype = build_function_type_list (V2SI_type_node, integer_type_node,
23623 integer_type_node, NULL_TREE);
23624 def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_vec_init_v2si", ftype, IX86_BUILTIN_VEC_INIT_V2SI);
23626 ftype = build_function_type_list (V4HI_type_node, short_integer_type_node,
23627 short_integer_type_node,
23628 short_integer_type_node,
23629 short_integer_type_node, NULL_TREE);
23630 def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_vec_init_v4hi", ftype, IX86_BUILTIN_VEC_INIT_V4HI);
23632 ftype = build_function_type_list (V8QI_type_node, char_type_node,
23633 char_type_node, char_type_node,
23634 char_type_node, char_type_node,
23635 char_type_node, char_type_node,
23636 char_type_node, NULL_TREE);
23637 def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_vec_init_v8qi", ftype, IX86_BUILTIN_VEC_INIT_V8QI);
23639 /* Access to the vec_extract patterns. */
23640 ftype = build_function_type_list (double_type_node, V2DF_type_node,
23641 integer_type_node, NULL_TREE);
23642 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v2df", ftype, IX86_BUILTIN_VEC_EXT_V2DF);
23644 ftype = build_function_type_list (long_long_integer_type_node,
23645 V2DI_type_node, integer_type_node,
23647 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v2di", ftype, IX86_BUILTIN_VEC_EXT_V2DI);
23649 ftype = build_function_type_list (float_type_node, V4SF_type_node,
23650 integer_type_node, NULL_TREE);
23651 def_builtin_const (OPTION_MASK_ISA_SSE, "__builtin_ia32_vec_ext_v4sf", ftype, IX86_BUILTIN_VEC_EXT_V4SF);
23653 ftype = build_function_type_list (intSI_type_node, V4SI_type_node,
23654 integer_type_node, NULL_TREE);
23655 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v4si", ftype, IX86_BUILTIN_VEC_EXT_V4SI);
23657 ftype = build_function_type_list (intHI_type_node, V8HI_type_node,
23658 integer_type_node, NULL_TREE);
23659 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v8hi", ftype, IX86_BUILTIN_VEC_EXT_V8HI);
23661 ftype = build_function_type_list (intHI_type_node, V4HI_type_node,
23662 integer_type_node, NULL_TREE);
23663 def_builtin_const (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, "__builtin_ia32_vec_ext_v4hi", ftype, IX86_BUILTIN_VEC_EXT_V4HI);
23665 ftype = build_function_type_list (intSI_type_node, V2SI_type_node,
23666 integer_type_node, NULL_TREE);
23667 def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_vec_ext_v2si", ftype, IX86_BUILTIN_VEC_EXT_V2SI);
23669 ftype = build_function_type_list (intQI_type_node, V16QI_type_node,
23670 integer_type_node, NULL_TREE);
23671 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v16qi", ftype, IX86_BUILTIN_VEC_EXT_V16QI);
23673 /* Access to the vec_set patterns. */
23674 ftype = build_function_type_list (V2DI_type_node, V2DI_type_node,
23676 integer_type_node, NULL_TREE);
23677 def_builtin_const (OPTION_MASK_ISA_SSE4_1 | OPTION_MASK_ISA_64BIT, "__builtin_ia32_vec_set_v2di", ftype, IX86_BUILTIN_VEC_SET_V2DI);
23679 ftype = build_function_type_list (V4SF_type_node, V4SF_type_node,
23681 integer_type_node, NULL_TREE);
23682 def_builtin_const (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_vec_set_v4sf", ftype, IX86_BUILTIN_VEC_SET_V4SF);
23684 ftype = build_function_type_list (V4SI_type_node, V4SI_type_node,
23686 integer_type_node, NULL_TREE);
23687 def_builtin_const (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_vec_set_v4si", ftype, IX86_BUILTIN_VEC_SET_V4SI);
23689 ftype = build_function_type_list (V8HI_type_node, V8HI_type_node,
23691 integer_type_node, NULL_TREE);
23692 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_set_v8hi", ftype, IX86_BUILTIN_VEC_SET_V8HI);
23694 ftype = build_function_type_list (V4HI_type_node, V4HI_type_node,
23696 integer_type_node, NULL_TREE);
23697 def_builtin_const (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, "__builtin_ia32_vec_set_v4hi", ftype, IX86_BUILTIN_VEC_SET_V4HI);
23699 ftype = build_function_type_list (V16QI_type_node, V16QI_type_node,
23701 integer_type_node, NULL_TREE);
23702 def_builtin_const (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_vec_set_v16qi", ftype, IX86_BUILTIN_VEC_SET_V16QI);
23704 /* Add SSE5 multi-arg argument instructions */
23705 for (i = 0, d = bdesc_multi_arg; i < ARRAY_SIZE (bdesc_multi_arg); i++, d++)
23707 tree mtype = NULL_TREE;
23712 switch ((enum multi_arg_type)d->flag)
23714 case MULTI_ARG_3_SF: mtype = v4sf_ftype_v4sf_v4sf_v4sf; break;
23715 case MULTI_ARG_3_DF: mtype = v2df_ftype_v2df_v2df_v2df; break;
23716 case MULTI_ARG_3_DI: mtype = v2di_ftype_v2di_v2di_v2di; break;
23717 case MULTI_ARG_3_SI: mtype = v4si_ftype_v4si_v4si_v4si; break;
23718 case MULTI_ARG_3_SI_DI: mtype = v4si_ftype_v4si_v4si_v2di; break;
23719 case MULTI_ARG_3_HI: mtype = v8hi_ftype_v8hi_v8hi_v8hi; break;
23720 case MULTI_ARG_3_HI_SI: mtype = v8hi_ftype_v8hi_v8hi_v4si; break;
23721 case MULTI_ARG_3_QI: mtype = v16qi_ftype_v16qi_v16qi_v16qi; break;
23722 case MULTI_ARG_3_PERMPS: mtype = v4sf_ftype_v4sf_v4sf_v16qi; break;
23723 case MULTI_ARG_3_PERMPD: mtype = v2df_ftype_v2df_v2df_v16qi; break;
23724 case MULTI_ARG_2_SF: mtype = v4sf_ftype_v4sf_v4sf; break;
23725 case MULTI_ARG_2_DF: mtype = v2df_ftype_v2df_v2df; break;
23726 case MULTI_ARG_2_DI: mtype = v2di_ftype_v2di_v2di; break;
23727 case MULTI_ARG_2_SI: mtype = v4si_ftype_v4si_v4si; break;
23728 case MULTI_ARG_2_HI: mtype = v8hi_ftype_v8hi_v8hi; break;
23729 case MULTI_ARG_2_QI: mtype = v16qi_ftype_v16qi_v16qi; break;
23730 case MULTI_ARG_2_DI_IMM: mtype = v2di_ftype_v2di_si; break;
23731 case MULTI_ARG_2_SI_IMM: mtype = v4si_ftype_v4si_si; break;
23732 case MULTI_ARG_2_HI_IMM: mtype = v8hi_ftype_v8hi_si; break;
23733 case MULTI_ARG_2_QI_IMM: mtype = v16qi_ftype_v16qi_si; break;
23734 case MULTI_ARG_2_SF_CMP: mtype = v4sf_ftype_v4sf_v4sf; break;
23735 case MULTI_ARG_2_DF_CMP: mtype = v2df_ftype_v2df_v2df; break;
23736 case MULTI_ARG_2_DI_CMP: mtype = v2di_ftype_v2di_v2di; break;
23737 case MULTI_ARG_2_SI_CMP: mtype = v4si_ftype_v4si_v4si; break;
23738 case MULTI_ARG_2_HI_CMP: mtype = v8hi_ftype_v8hi_v8hi; break;
23739 case MULTI_ARG_2_QI_CMP: mtype = v16qi_ftype_v16qi_v16qi; break;
23740 case MULTI_ARG_2_SF_TF: mtype = v4sf_ftype_v4sf_v4sf; break;
23741 case MULTI_ARG_2_DF_TF: mtype = v2df_ftype_v2df_v2df; break;
23742 case MULTI_ARG_2_DI_TF: mtype = v2di_ftype_v2di_v2di; break;
23743 case MULTI_ARG_2_SI_TF: mtype = v4si_ftype_v4si_v4si; break;
23744 case MULTI_ARG_2_HI_TF: mtype = v8hi_ftype_v8hi_v8hi; break;
23745 case MULTI_ARG_2_QI_TF: mtype = v16qi_ftype_v16qi_v16qi; break;
23746 case MULTI_ARG_1_SF: mtype = v4sf_ftype_v4sf; break;
23747 case MULTI_ARG_1_DF: mtype = v2df_ftype_v2df; break;
23748 case MULTI_ARG_1_DI: mtype = v2di_ftype_v2di; break;
23749 case MULTI_ARG_1_SI: mtype = v4si_ftype_v4si; break;
23750 case MULTI_ARG_1_HI: mtype = v8hi_ftype_v8hi; break;
23751 case MULTI_ARG_1_QI: mtype = v16qi_ftype_v16qi; break;
23752 case MULTI_ARG_1_SI_DI: mtype = v2di_ftype_v4si; break;
23753 case MULTI_ARG_1_HI_DI: mtype = v2di_ftype_v8hi; break;
23754 case MULTI_ARG_1_HI_SI: mtype = v4si_ftype_v8hi; break;
23755 case MULTI_ARG_1_QI_DI: mtype = v2di_ftype_v16qi; break;
23756 case MULTI_ARG_1_QI_SI: mtype = v4si_ftype_v16qi; break;
23757 case MULTI_ARG_1_QI_HI: mtype = v8hi_ftype_v16qi; break;
23758 case MULTI_ARG_1_PH2PS: mtype = v4sf_ftype_v4hi; break;
23759 case MULTI_ARG_1_PS2PH: mtype = v4hi_ftype_v4sf; break;
23760 case MULTI_ARG_UNKNOWN:
23762 gcc_unreachable ();
23766 def_builtin_const (d->mask, d->name, mtype, d->code);
23770 /* Internal method for ix86_init_builtins. */
23773 ix86_init_builtins_va_builtins_abi (void)
23775 tree ms_va_ref, sysv_va_ref;
23776 tree fnvoid_va_end_ms, fnvoid_va_end_sysv;
23777 tree fnvoid_va_start_ms, fnvoid_va_start_sysv;
23778 tree fnvoid_va_copy_ms, fnvoid_va_copy_sysv;
23779 tree fnattr_ms = NULL_TREE, fnattr_sysv = NULL_TREE;
23783 fnattr_ms = build_tree_list (get_identifier ("ms_abi"), NULL_TREE);
23784 fnattr_sysv = build_tree_list (get_identifier ("sysv_abi"), NULL_TREE);
23785 ms_va_ref = build_reference_type (ms_va_list_type_node);
23787 build_pointer_type (TREE_TYPE (sysv_va_list_type_node));
23790 build_function_type_list (void_type_node, ms_va_ref, NULL_TREE);
23791 fnvoid_va_start_ms =
23792 build_varargs_function_type_list (void_type_node, ms_va_ref, NULL_TREE);
23793 fnvoid_va_end_sysv =
23794 build_function_type_list (void_type_node, sysv_va_ref, NULL_TREE);
23795 fnvoid_va_start_sysv =
23796 build_varargs_function_type_list (void_type_node, sysv_va_ref,
23798 fnvoid_va_copy_ms =
23799 build_function_type_list (void_type_node, ms_va_ref, ms_va_list_type_node,
23801 fnvoid_va_copy_sysv =
23802 build_function_type_list (void_type_node, sysv_va_ref,
23803 sysv_va_ref, NULL_TREE);
23805 add_builtin_function ("__builtin_ms_va_start", fnvoid_va_start_ms,
23806 BUILT_IN_VA_START, BUILT_IN_NORMAL, NULL, fnattr_ms);
23807 add_builtin_function ("__builtin_ms_va_end", fnvoid_va_end_ms,
23808 BUILT_IN_VA_END, BUILT_IN_NORMAL, NULL, fnattr_ms);
23809 add_builtin_function ("__builtin_ms_va_copy", fnvoid_va_copy_ms,
23810 BUILT_IN_VA_COPY, BUILT_IN_NORMAL, NULL, fnattr_ms);
23811 add_builtin_function ("__builtin_sysv_va_start", fnvoid_va_start_sysv,
23812 BUILT_IN_VA_START, BUILT_IN_NORMAL, NULL, fnattr_sysv);
23813 add_builtin_function ("__builtin_sysv_va_end", fnvoid_va_end_sysv,
23814 BUILT_IN_VA_END, BUILT_IN_NORMAL, NULL, fnattr_sysv);
23815 add_builtin_function ("__builtin_sysv_va_copy", fnvoid_va_copy_sysv,
23816 BUILT_IN_VA_COPY, BUILT_IN_NORMAL, NULL, fnattr_sysv);
23820 ix86_init_builtins (void)
23822 tree float128_type_node = make_node (REAL_TYPE);
23825 /* The __float80 type. */
23826 if (TYPE_MODE (long_double_type_node) == XFmode)
23827 (*lang_hooks.types.register_builtin_type) (long_double_type_node,
23831 /* The __float80 type. */
23832 tree float80_type_node = make_node (REAL_TYPE);
23834 TYPE_PRECISION (float80_type_node) = 80;
23835 layout_type (float80_type_node);
23836 (*lang_hooks.types.register_builtin_type) (float80_type_node,
23840 /* The __float128 type. */
23841 TYPE_PRECISION (float128_type_node) = 128;
23842 layout_type (float128_type_node);
23843 (*lang_hooks.types.register_builtin_type) (float128_type_node,
23846 /* TFmode support builtins. */
23847 ftype = build_function_type (float128_type_node, void_list_node);
23848 decl = add_builtin_function ("__builtin_infq", ftype,
23849 IX86_BUILTIN_INFQ, BUILT_IN_MD,
23851 ix86_builtins[(int) IX86_BUILTIN_INFQ] = decl;
23853 decl = add_builtin_function ("__builtin_huge_valq", ftype,
23854 IX86_BUILTIN_HUGE_VALQ, BUILT_IN_MD,
23856 ix86_builtins[(int) IX86_BUILTIN_HUGE_VALQ] = decl;
23858 /* We will expand them to normal call if SSE2 isn't available since
23859 they are used by libgcc. */
23860 ftype = build_function_type_list (float128_type_node,
23861 float128_type_node,
23863 decl = add_builtin_function ("__builtin_fabsq", ftype,
23864 IX86_BUILTIN_FABSQ, BUILT_IN_MD,
23865 "__fabstf2", NULL_TREE);
23866 ix86_builtins[(int) IX86_BUILTIN_FABSQ] = decl;
23867 TREE_READONLY (decl) = 1;
23869 ftype = build_function_type_list (float128_type_node,
23870 float128_type_node,
23871 float128_type_node,
23873 decl = add_builtin_function ("__builtin_copysignq", ftype,
23874 IX86_BUILTIN_COPYSIGNQ, BUILT_IN_MD,
23875 "__copysigntf3", NULL_TREE);
23876 ix86_builtins[(int) IX86_BUILTIN_COPYSIGNQ] = decl;
23877 TREE_READONLY (decl) = 1;
23879 ix86_init_mmx_sse_builtins ();
23881 ix86_init_builtins_va_builtins_abi ();
23884 /* Errors in the source file can cause expand_expr to return const0_rtx
23885 where we expect a vector. To avoid crashing, use one of the vector
23886 clear instructions. */
23888 safe_vector_operand (rtx x, enum machine_mode mode)
23890 if (x == const0_rtx)
23891 x = CONST0_RTX (mode);
23895 /* Subroutine of ix86_expand_builtin to take care of binop insns. */
23898 ix86_expand_binop_builtin (enum insn_code icode, tree exp, rtx target)
23901 tree arg0 = CALL_EXPR_ARG (exp, 0);
23902 tree arg1 = CALL_EXPR_ARG (exp, 1);
23903 rtx op0 = expand_normal (arg0);
23904 rtx op1 = expand_normal (arg1);
23905 enum machine_mode tmode = insn_data[icode].operand[0].mode;
23906 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
23907 enum machine_mode mode1 = insn_data[icode].operand[2].mode;
23909 if (VECTOR_MODE_P (mode0))
23910 op0 = safe_vector_operand (op0, mode0);
23911 if (VECTOR_MODE_P (mode1))
23912 op1 = safe_vector_operand (op1, mode1);
23914 if (optimize || !target
23915 || GET_MODE (target) != tmode
23916 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
23917 target = gen_reg_rtx (tmode);
23919 if (GET_MODE (op1) == SImode && mode1 == TImode)
23921 rtx x = gen_reg_rtx (V4SImode);
23922 emit_insn (gen_sse2_loadd (x, op1));
23923 op1 = gen_lowpart (TImode, x);
23926 if (!(*insn_data[icode].operand[1].predicate) (op0, mode0))
23927 op0 = copy_to_mode_reg (mode0, op0);
23928 if (!(*insn_data[icode].operand[2].predicate) (op1, mode1))
23929 op1 = copy_to_mode_reg (mode1, op1);
23931 pat = GEN_FCN (icode) (target, op0, op1);
23940 /* Subroutine of ix86_expand_builtin to take care of 2-4 argument insns. */
23943 ix86_expand_multi_arg_builtin (enum insn_code icode, tree exp, rtx target,
23944 enum multi_arg_type m_type,
23945 enum rtx_code sub_code)
23950 bool comparison_p = false;
23952 bool last_arg_constant = false;
23953 int num_memory = 0;
23956 enum machine_mode mode;
23959 enum machine_mode tmode = insn_data[icode].operand[0].mode;
23963 case MULTI_ARG_3_SF:
23964 case MULTI_ARG_3_DF:
23965 case MULTI_ARG_3_DI:
23966 case MULTI_ARG_3_SI:
23967 case MULTI_ARG_3_SI_DI:
23968 case MULTI_ARG_3_HI:
23969 case MULTI_ARG_3_HI_SI:
23970 case MULTI_ARG_3_QI:
23971 case MULTI_ARG_3_PERMPS:
23972 case MULTI_ARG_3_PERMPD:
23976 case MULTI_ARG_2_SF:
23977 case MULTI_ARG_2_DF:
23978 case MULTI_ARG_2_DI:
23979 case MULTI_ARG_2_SI:
23980 case MULTI_ARG_2_HI:
23981 case MULTI_ARG_2_QI:
23985 case MULTI_ARG_2_DI_IMM:
23986 case MULTI_ARG_2_SI_IMM:
23987 case MULTI_ARG_2_HI_IMM:
23988 case MULTI_ARG_2_QI_IMM:
23990 last_arg_constant = true;
23993 case MULTI_ARG_1_SF:
23994 case MULTI_ARG_1_DF:
23995 case MULTI_ARG_1_DI:
23996 case MULTI_ARG_1_SI:
23997 case MULTI_ARG_1_HI:
23998 case MULTI_ARG_1_QI:
23999 case MULTI_ARG_1_SI_DI:
24000 case MULTI_ARG_1_HI_DI:
24001 case MULTI_ARG_1_HI_SI:
24002 case MULTI_ARG_1_QI_DI:
24003 case MULTI_ARG_1_QI_SI:
24004 case MULTI_ARG_1_QI_HI:
24005 case MULTI_ARG_1_PH2PS:
24006 case MULTI_ARG_1_PS2PH:
24010 case MULTI_ARG_2_SF_CMP:
24011 case MULTI_ARG_2_DF_CMP:
24012 case MULTI_ARG_2_DI_CMP:
24013 case MULTI_ARG_2_SI_CMP:
24014 case MULTI_ARG_2_HI_CMP:
24015 case MULTI_ARG_2_QI_CMP:
24017 comparison_p = true;
24020 case MULTI_ARG_2_SF_TF:
24021 case MULTI_ARG_2_DF_TF:
24022 case MULTI_ARG_2_DI_TF:
24023 case MULTI_ARG_2_SI_TF:
24024 case MULTI_ARG_2_HI_TF:
24025 case MULTI_ARG_2_QI_TF:
24030 case MULTI_ARG_UNKNOWN:
24032 gcc_unreachable ();
24035 if (optimize || !target
24036 || GET_MODE (target) != tmode
24037 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
24038 target = gen_reg_rtx (tmode);
24040 gcc_assert (nargs <= 4);
24042 for (i = 0; i < nargs; i++)
24044 tree arg = CALL_EXPR_ARG (exp, i);
24045 rtx op = expand_normal (arg);
24046 int adjust = (comparison_p) ? 1 : 0;
24047 enum machine_mode mode = insn_data[icode].operand[i+adjust+1].mode;
24049 if (last_arg_constant && i == nargs-1)
24051 if (GET_CODE (op) != CONST_INT)
24053 error ("last argument must be an immediate");
24054 return gen_reg_rtx (tmode);
24059 if (VECTOR_MODE_P (mode))
24060 op = safe_vector_operand (op, mode);
24062 /* If we aren't optimizing, only allow one memory operand to be
24064 if (memory_operand (op, mode))
24067 gcc_assert (GET_MODE (op) == mode || GET_MODE (op) == VOIDmode);
24070 || ! (*insn_data[icode].operand[i+adjust+1].predicate) (op, mode)
24072 op = force_reg (mode, op);
24076 args[i].mode = mode;
24082 pat = GEN_FCN (icode) (target, args[0].op);
24087 pat = GEN_FCN (icode) (target, args[0].op, args[1].op,
24088 GEN_INT ((int)sub_code));
24089 else if (! comparison_p)
24090 pat = GEN_FCN (icode) (target, args[0].op, args[1].op);
24093 rtx cmp_op = gen_rtx_fmt_ee (sub_code, GET_MODE (target),
24097 pat = GEN_FCN (icode) (target, cmp_op, args[0].op, args[1].op);
24102 pat = GEN_FCN (icode) (target, args[0].op, args[1].op, args[2].op);
24106 gcc_unreachable ();
24116 /* Subroutine of ix86_expand_args_builtin to take care of scalar unop
24117 insns with vec_merge. */
24120 ix86_expand_unop_vec_merge_builtin (enum insn_code icode, tree exp,
24124 tree arg0 = CALL_EXPR_ARG (exp, 0);
24125 rtx op1, op0 = expand_normal (arg0);
24126 enum machine_mode tmode = insn_data[icode].operand[0].mode;
24127 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
24129 if (optimize || !target
24130 || GET_MODE (target) != tmode
24131 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
24132 target = gen_reg_rtx (tmode);
24134 if (VECTOR_MODE_P (mode0))
24135 op0 = safe_vector_operand (op0, mode0);
24137 if ((optimize && !register_operand (op0, mode0))
24138 || ! (*insn_data[icode].operand[1].predicate) (op0, mode0))
24139 op0 = copy_to_mode_reg (mode0, op0);
24142 if (! (*insn_data[icode].operand[2].predicate) (op1, mode0))
24143 op1 = copy_to_mode_reg (mode0, op1);
24145 pat = GEN_FCN (icode) (target, op0, op1);
24152 /* Subroutine of ix86_expand_builtin to take care of comparison insns. */
24155 ix86_expand_sse_compare (const struct builtin_description *d,
24156 tree exp, rtx target, bool swap)
24159 tree arg0 = CALL_EXPR_ARG (exp, 0);
24160 tree arg1 = CALL_EXPR_ARG (exp, 1);
24161 rtx op0 = expand_normal (arg0);
24162 rtx op1 = expand_normal (arg1);
24164 enum machine_mode tmode = insn_data[d->icode].operand[0].mode;
24165 enum machine_mode mode0 = insn_data[d->icode].operand[1].mode;
24166 enum machine_mode mode1 = insn_data[d->icode].operand[2].mode;
24167 enum rtx_code comparison = d->comparison;
24169 if (VECTOR_MODE_P (mode0))
24170 op0 = safe_vector_operand (op0, mode0);
24171 if (VECTOR_MODE_P (mode1))
24172 op1 = safe_vector_operand (op1, mode1);
24174 /* Swap operands if we have a comparison that isn't available in
24178 rtx tmp = gen_reg_rtx (mode1);
24179 emit_move_insn (tmp, op1);
24184 if (optimize || !target
24185 || GET_MODE (target) != tmode
24186 || ! (*insn_data[d->icode].operand[0].predicate) (target, tmode))
24187 target = gen_reg_rtx (tmode);
24189 if ((optimize && !register_operand (op0, mode0))
24190 || ! (*insn_data[d->icode].operand[1].predicate) (op0, mode0))
24191 op0 = copy_to_mode_reg (mode0, op0);
24192 if ((optimize && !register_operand (op1, mode1))
24193 || ! (*insn_data[d->icode].operand[2].predicate) (op1, mode1))
24194 op1 = copy_to_mode_reg (mode1, op1);
24196 op2 = gen_rtx_fmt_ee (comparison, mode0, op0, op1);
24197 pat = GEN_FCN (d->icode) (target, op0, op1, op2);
24204 /* Subroutine of ix86_expand_builtin to take care of comi insns. */
24207 ix86_expand_sse_comi (const struct builtin_description *d, tree exp,
24211 tree arg0 = CALL_EXPR_ARG (exp, 0);
24212 tree arg1 = CALL_EXPR_ARG (exp, 1);
24213 rtx op0 = expand_normal (arg0);
24214 rtx op1 = expand_normal (arg1);
24215 enum machine_mode mode0 = insn_data[d->icode].operand[0].mode;
24216 enum machine_mode mode1 = insn_data[d->icode].operand[1].mode;
24217 enum rtx_code comparison = d->comparison;
24219 if (VECTOR_MODE_P (mode0))
24220 op0 = safe_vector_operand (op0, mode0);
24221 if (VECTOR_MODE_P (mode1))
24222 op1 = safe_vector_operand (op1, mode1);
24224 /* Swap operands if we have a comparison that isn't available in
24226 if (d->flag & BUILTIN_DESC_SWAP_OPERANDS)
24233 target = gen_reg_rtx (SImode);
24234 emit_move_insn (target, const0_rtx);
24235 target = gen_rtx_SUBREG (QImode, target, 0);
24237 if ((optimize && !register_operand (op0, mode0))
24238 || !(*insn_data[d->icode].operand[0].predicate) (op0, mode0))
24239 op0 = copy_to_mode_reg (mode0, op0);
24240 if ((optimize && !register_operand (op1, mode1))
24241 || !(*insn_data[d->icode].operand[1].predicate) (op1, mode1))
24242 op1 = copy_to_mode_reg (mode1, op1);
24244 pat = GEN_FCN (d->icode) (op0, op1);
24248 emit_insn (gen_rtx_SET (VOIDmode,
24249 gen_rtx_STRICT_LOW_PART (VOIDmode, target),
24250 gen_rtx_fmt_ee (comparison, QImode,
24254 return SUBREG_REG (target);
24257 /* Subroutine of ix86_expand_builtin to take care of ptest insns. */
24260 ix86_expand_sse_ptest (const struct builtin_description *d, tree exp,
24264 tree arg0 = CALL_EXPR_ARG (exp, 0);
24265 tree arg1 = CALL_EXPR_ARG (exp, 1);
24266 rtx op0 = expand_normal (arg0);
24267 rtx op1 = expand_normal (arg1);
24268 enum machine_mode mode0 = insn_data[d->icode].operand[0].mode;
24269 enum machine_mode mode1 = insn_data[d->icode].operand[1].mode;
24270 enum rtx_code comparison = d->comparison;
24272 if (VECTOR_MODE_P (mode0))
24273 op0 = safe_vector_operand (op0, mode0);
24274 if (VECTOR_MODE_P (mode1))
24275 op1 = safe_vector_operand (op1, mode1);
24277 target = gen_reg_rtx (SImode);
24278 emit_move_insn (target, const0_rtx);
24279 target = gen_rtx_SUBREG (QImode, target, 0);
24281 if ((optimize && !register_operand (op0, mode0))
24282 || !(*insn_data[d->icode].operand[0].predicate) (op0, mode0))
24283 op0 = copy_to_mode_reg (mode0, op0);
24284 if ((optimize && !register_operand (op1, mode1))
24285 || !(*insn_data[d->icode].operand[1].predicate) (op1, mode1))
24286 op1 = copy_to_mode_reg (mode1, op1);
24288 pat = GEN_FCN (d->icode) (op0, op1);
24292 emit_insn (gen_rtx_SET (VOIDmode,
24293 gen_rtx_STRICT_LOW_PART (VOIDmode, target),
24294 gen_rtx_fmt_ee (comparison, QImode,
24298 return SUBREG_REG (target);
24301 /* Subroutine of ix86_expand_builtin to take care of pcmpestr[im] insns. */
24304 ix86_expand_sse_pcmpestr (const struct builtin_description *d,
24305 tree exp, rtx target)
24308 tree arg0 = CALL_EXPR_ARG (exp, 0);
24309 tree arg1 = CALL_EXPR_ARG (exp, 1);
24310 tree arg2 = CALL_EXPR_ARG (exp, 2);
24311 tree arg3 = CALL_EXPR_ARG (exp, 3);
24312 tree arg4 = CALL_EXPR_ARG (exp, 4);
24313 rtx scratch0, scratch1;
24314 rtx op0 = expand_normal (arg0);
24315 rtx op1 = expand_normal (arg1);
24316 rtx op2 = expand_normal (arg2);
24317 rtx op3 = expand_normal (arg3);
24318 rtx op4 = expand_normal (arg4);
24319 enum machine_mode tmode0, tmode1, modev2, modei3, modev4, modei5, modeimm;
24321 tmode0 = insn_data[d->icode].operand[0].mode;
24322 tmode1 = insn_data[d->icode].operand[1].mode;
24323 modev2 = insn_data[d->icode].operand[2].mode;
24324 modei3 = insn_data[d->icode].operand[3].mode;
24325 modev4 = insn_data[d->icode].operand[4].mode;
24326 modei5 = insn_data[d->icode].operand[5].mode;
24327 modeimm = insn_data[d->icode].operand[6].mode;
24329 if (VECTOR_MODE_P (modev2))
24330 op0 = safe_vector_operand (op0, modev2);
24331 if (VECTOR_MODE_P (modev4))
24332 op2 = safe_vector_operand (op2, modev4);
24334 if (! (*insn_data[d->icode].operand[2].predicate) (op0, modev2))
24335 op0 = copy_to_mode_reg (modev2, op0);
24336 if (! (*insn_data[d->icode].operand[3].predicate) (op1, modei3))
24337 op1 = copy_to_mode_reg (modei3, op1);
24338 if ((optimize && !register_operand (op2, modev4))
24339 || !(*insn_data[d->icode].operand[4].predicate) (op2, modev4))
24340 op2 = copy_to_mode_reg (modev4, op2);
24341 if (! (*insn_data[d->icode].operand[5].predicate) (op3, modei5))
24342 op3 = copy_to_mode_reg (modei5, op3);
24344 if (! (*insn_data[d->icode].operand[6].predicate) (op4, modeimm))
24346 error ("the fifth argument must be a 8-bit immediate");
24350 if (d->code == IX86_BUILTIN_PCMPESTRI128)
24352 if (optimize || !target
24353 || GET_MODE (target) != tmode0
24354 || ! (*insn_data[d->icode].operand[0].predicate) (target, tmode0))
24355 target = gen_reg_rtx (tmode0);
24357 scratch1 = gen_reg_rtx (tmode1);
24359 pat = GEN_FCN (d->icode) (target, scratch1, op0, op1, op2, op3, op4);
24361 else if (d->code == IX86_BUILTIN_PCMPESTRM128)
24363 if (optimize || !target
24364 || GET_MODE (target) != tmode1
24365 || ! (*insn_data[d->icode].operand[1].predicate) (target, tmode1))
24366 target = gen_reg_rtx (tmode1);
24368 scratch0 = gen_reg_rtx (tmode0);
24370 pat = GEN_FCN (d->icode) (scratch0, target, op0, op1, op2, op3, op4);
24374 gcc_assert (d->flag);
24376 scratch0 = gen_reg_rtx (tmode0);
24377 scratch1 = gen_reg_rtx (tmode1);
24379 pat = GEN_FCN (d->icode) (scratch0, scratch1, op0, op1, op2, op3, op4);
24389 target = gen_reg_rtx (SImode);
24390 emit_move_insn (target, const0_rtx);
24391 target = gen_rtx_SUBREG (QImode, target, 0);
24394 (gen_rtx_SET (VOIDmode, gen_rtx_STRICT_LOW_PART (VOIDmode, target),
24395 gen_rtx_fmt_ee (EQ, QImode,
24396 gen_rtx_REG ((enum machine_mode) d->flag,
24399 return SUBREG_REG (target);
24406 /* Subroutine of ix86_expand_builtin to take care of pcmpistr[im] insns. */
24409 ix86_expand_sse_pcmpistr (const struct builtin_description *d,
24410 tree exp, rtx target)
24413 tree arg0 = CALL_EXPR_ARG (exp, 0);
24414 tree arg1 = CALL_EXPR_ARG (exp, 1);
24415 tree arg2 = CALL_EXPR_ARG (exp, 2);
24416 rtx scratch0, scratch1;
24417 rtx op0 = expand_normal (arg0);
24418 rtx op1 = expand_normal (arg1);
24419 rtx op2 = expand_normal (arg2);
24420 enum machine_mode tmode0, tmode1, modev2, modev3, modeimm;
24422 tmode0 = insn_data[d->icode].operand[0].mode;
24423 tmode1 = insn_data[d->icode].operand[1].mode;
24424 modev2 = insn_data[d->icode].operand[2].mode;
24425 modev3 = insn_data[d->icode].operand[3].mode;
24426 modeimm = insn_data[d->icode].operand[4].mode;
24428 if (VECTOR_MODE_P (modev2))
24429 op0 = safe_vector_operand (op0, modev2);
24430 if (VECTOR_MODE_P (modev3))
24431 op1 = safe_vector_operand (op1, modev3);
24433 if (! (*insn_data[d->icode].operand[2].predicate) (op0, modev2))
24434 op0 = copy_to_mode_reg (modev2, op0);
24435 if ((optimize && !register_operand (op1, modev3))
24436 || !(*insn_data[d->icode].operand[3].predicate) (op1, modev3))
24437 op1 = copy_to_mode_reg (modev3, op1);
24439 if (! (*insn_data[d->icode].operand[4].predicate) (op2, modeimm))
24441 error ("the third argument must be a 8-bit immediate");
24445 if (d->code == IX86_BUILTIN_PCMPISTRI128)
24447 if (optimize || !target
24448 || GET_MODE (target) != tmode0
24449 || ! (*insn_data[d->icode].operand[0].predicate) (target, tmode0))
24450 target = gen_reg_rtx (tmode0);
24452 scratch1 = gen_reg_rtx (tmode1);
24454 pat = GEN_FCN (d->icode) (target, scratch1, op0, op1, op2);
24456 else if (d->code == IX86_BUILTIN_PCMPISTRM128)
24458 if (optimize || !target
24459 || GET_MODE (target) != tmode1
24460 || ! (*insn_data[d->icode].operand[1].predicate) (target, tmode1))
24461 target = gen_reg_rtx (tmode1);
24463 scratch0 = gen_reg_rtx (tmode0);
24465 pat = GEN_FCN (d->icode) (scratch0, target, op0, op1, op2);
24469 gcc_assert (d->flag);
24471 scratch0 = gen_reg_rtx (tmode0);
24472 scratch1 = gen_reg_rtx (tmode1);
24474 pat = GEN_FCN (d->icode) (scratch0, scratch1, op0, op1, op2);
24484 target = gen_reg_rtx (SImode);
24485 emit_move_insn (target, const0_rtx);
24486 target = gen_rtx_SUBREG (QImode, target, 0);
24489 (gen_rtx_SET (VOIDmode, gen_rtx_STRICT_LOW_PART (VOIDmode, target),
24490 gen_rtx_fmt_ee (EQ, QImode,
24491 gen_rtx_REG ((enum machine_mode) d->flag,
24494 return SUBREG_REG (target);
24500 /* Subroutine of ix86_expand_builtin to take care of insns with
24501 variable number of operands. */
24504 ix86_expand_args_builtin (const struct builtin_description *d,
24505 tree exp, rtx target)
24507 rtx pat, real_target;
24508 unsigned int i, nargs;
24509 unsigned int nargs_constant = 0;
24510 int num_memory = 0;
24514 enum machine_mode mode;
24516 bool last_arg_count = false;
24517 enum insn_code icode = d->icode;
24518 const struct insn_data *insn_p = &insn_data[icode];
24519 enum machine_mode tmode = insn_p->operand[0].mode;
24520 enum machine_mode rmode = VOIDmode;
24522 enum rtx_code comparison = d->comparison;
24524 switch ((enum ix86_builtin_type) d->flag)
24526 case INT_FTYPE_V8SF_V8SF_PTEST:
24527 case INT_FTYPE_V4DI_V4DI_PTEST:
24528 case INT_FTYPE_V4DF_V4DF_PTEST:
24529 case INT_FTYPE_V4SF_V4SF_PTEST:
24530 case INT_FTYPE_V2DI_V2DI_PTEST:
24531 case INT_FTYPE_V2DF_V2DF_PTEST:
24532 return ix86_expand_sse_ptest (d, exp, target);
24533 case FLOAT128_FTYPE_FLOAT128:
24534 case FLOAT_FTYPE_FLOAT:
24535 case INT64_FTYPE_V4SF:
24536 case INT64_FTYPE_V2DF:
24537 case INT_FTYPE_V16QI:
24538 case INT_FTYPE_V8QI:
24539 case INT_FTYPE_V8SF:
24540 case INT_FTYPE_V4DF:
24541 case INT_FTYPE_V4SF:
24542 case INT_FTYPE_V2DF:
24543 case V16QI_FTYPE_V16QI:
24544 case V8SI_FTYPE_V8SF:
24545 case V8SI_FTYPE_V4SI:
24546 case V8HI_FTYPE_V8HI:
24547 case V8HI_FTYPE_V16QI:
24548 case V8QI_FTYPE_V8QI:
24549 case V8SF_FTYPE_V8SF:
24550 case V8SF_FTYPE_V8SI:
24551 case V8SF_FTYPE_V4SF:
24552 case V4SI_FTYPE_V4SI:
24553 case V4SI_FTYPE_V16QI:
24554 case V4SI_FTYPE_V4SF:
24555 case V4SI_FTYPE_V8SI:
24556 case V4SI_FTYPE_V8HI:
24557 case V4SI_FTYPE_V4DF:
24558 case V4SI_FTYPE_V2DF:
24559 case V4HI_FTYPE_V4HI:
24560 case V4DF_FTYPE_V4DF:
24561 case V4DF_FTYPE_V4SI:
24562 case V4DF_FTYPE_V4SF:
24563 case V4DF_FTYPE_V2DF:
24564 case V4SF_FTYPE_V4SF:
24565 case V4SF_FTYPE_V4SI:
24566 case V4SF_FTYPE_V8SF:
24567 case V4SF_FTYPE_V4DF:
24568 case V4SF_FTYPE_V2DF:
24569 case V2DI_FTYPE_V2DI:
24570 case V2DI_FTYPE_V16QI:
24571 case V2DI_FTYPE_V8HI:
24572 case V2DI_FTYPE_V4SI:
24573 case V2DF_FTYPE_V2DF:
24574 case V2DF_FTYPE_V4SI:
24575 case V2DF_FTYPE_V4DF:
24576 case V2DF_FTYPE_V4SF:
24577 case V2DF_FTYPE_V2SI:
24578 case V2SI_FTYPE_V2SI:
24579 case V2SI_FTYPE_V4SF:
24580 case V2SI_FTYPE_V2SF:
24581 case V2SI_FTYPE_V2DF:
24582 case V2SF_FTYPE_V2SF:
24583 case V2SF_FTYPE_V2SI:
24586 case V4SF_FTYPE_V4SF_VEC_MERGE:
24587 case V2DF_FTYPE_V2DF_VEC_MERGE:
24588 return ix86_expand_unop_vec_merge_builtin (icode, exp, target);
24589 case FLOAT128_FTYPE_FLOAT128_FLOAT128:
24590 case V16QI_FTYPE_V16QI_V16QI:
24591 case V16QI_FTYPE_V8HI_V8HI:
24592 case V8QI_FTYPE_V8QI_V8QI:
24593 case V8QI_FTYPE_V4HI_V4HI:
24594 case V8HI_FTYPE_V8HI_V8HI:
24595 case V8HI_FTYPE_V16QI_V16QI:
24596 case V8HI_FTYPE_V4SI_V4SI:
24597 case V8SF_FTYPE_V8SF_V8SF:
24598 case V8SF_FTYPE_V8SF_V8SI:
24599 case V4SI_FTYPE_V4SI_V4SI:
24600 case V4SI_FTYPE_V8HI_V8HI:
24601 case V4SI_FTYPE_V4SF_V4SF:
24602 case V4SI_FTYPE_V2DF_V2DF:
24603 case V4HI_FTYPE_V4HI_V4HI:
24604 case V4HI_FTYPE_V8QI_V8QI:
24605 case V4HI_FTYPE_V2SI_V2SI:
24606 case V4DF_FTYPE_V4DF_V4DF:
24607 case V4DF_FTYPE_V4DF_V4DI:
24608 case V4SF_FTYPE_V4SF_V4SF:
24609 case V4SF_FTYPE_V4SF_V4SI:
24610 case V4SF_FTYPE_V4SF_V2SI:
24611 case V4SF_FTYPE_V4SF_V2DF:
24612 case V4SF_FTYPE_V4SF_DI:
24613 case V4SF_FTYPE_V4SF_SI:
24614 case V2DI_FTYPE_V2DI_V2DI:
24615 case V2DI_FTYPE_V16QI_V16QI:
24616 case V2DI_FTYPE_V4SI_V4SI:
24617 case V2DI_FTYPE_V2DI_V16QI:
24618 case V2DI_FTYPE_V2DF_V2DF:
24619 case V2SI_FTYPE_V2SI_V2SI:
24620 case V2SI_FTYPE_V4HI_V4HI:
24621 case V2SI_FTYPE_V2SF_V2SF:
24622 case V2DF_FTYPE_V2DF_V2DF:
24623 case V2DF_FTYPE_V2DF_V4SF:
24624 case V2DF_FTYPE_V2DF_V2DI:
24625 case V2DF_FTYPE_V2DF_DI:
24626 case V2DF_FTYPE_V2DF_SI:
24627 case V2SF_FTYPE_V2SF_V2SF:
24628 case V1DI_FTYPE_V1DI_V1DI:
24629 case V1DI_FTYPE_V8QI_V8QI:
24630 case V1DI_FTYPE_V2SI_V2SI:
24631 if (comparison == UNKNOWN)
24632 return ix86_expand_binop_builtin (icode, exp, target);
24635 case V4SF_FTYPE_V4SF_V4SF_SWAP:
24636 case V2DF_FTYPE_V2DF_V2DF_SWAP:
24637 gcc_assert (comparison != UNKNOWN);
24641 case V8HI_FTYPE_V8HI_V8HI_COUNT:
24642 case V8HI_FTYPE_V8HI_SI_COUNT:
24643 case V4SI_FTYPE_V4SI_V4SI_COUNT:
24644 case V4SI_FTYPE_V4SI_SI_COUNT:
24645 case V4HI_FTYPE_V4HI_V4HI_COUNT:
24646 case V4HI_FTYPE_V4HI_SI_COUNT:
24647 case V2DI_FTYPE_V2DI_V2DI_COUNT:
24648 case V2DI_FTYPE_V2DI_SI_COUNT:
24649 case V2SI_FTYPE_V2SI_V2SI_COUNT:
24650 case V2SI_FTYPE_V2SI_SI_COUNT:
24651 case V1DI_FTYPE_V1DI_V1DI_COUNT:
24652 case V1DI_FTYPE_V1DI_SI_COUNT:
24654 last_arg_count = true;
24656 case UINT64_FTYPE_UINT64_UINT64:
24657 case UINT_FTYPE_UINT_UINT:
24658 case UINT_FTYPE_UINT_USHORT:
24659 case UINT_FTYPE_UINT_UCHAR:
24662 case V2DI2TI_FTYPE_V2DI_INT:
24665 nargs_constant = 1;
24667 case V8HI_FTYPE_V8HI_INT:
24668 case V8SF_FTYPE_V8SF_INT:
24669 case V4SI_FTYPE_V4SI_INT:
24670 case V4SI_FTYPE_V8SI_INT:
24671 case V4HI_FTYPE_V4HI_INT:
24672 case V4DF_FTYPE_V4DF_INT:
24673 case V4SF_FTYPE_V4SF_INT:
24674 case V4SF_FTYPE_V8SF_INT:
24675 case V2DI_FTYPE_V2DI_INT:
24676 case V2DF_FTYPE_V2DF_INT:
24677 case V2DF_FTYPE_V4DF_INT:
24679 nargs_constant = 1;
24681 case V16QI_FTYPE_V16QI_V16QI_V16QI:
24682 case V8SF_FTYPE_V8SF_V8SF_V8SF:
24683 case V4DF_FTYPE_V4DF_V4DF_V4DF:
24684 case V4SF_FTYPE_V4SF_V4SF_V4SF:
24685 case V2DF_FTYPE_V2DF_V2DF_V2DF:
24688 case V16QI_FTYPE_V16QI_V16QI_INT:
24689 case V8HI_FTYPE_V8HI_V8HI_INT:
24690 case V8SI_FTYPE_V8SI_V8SI_INT:
24691 case V8SI_FTYPE_V8SI_V4SI_INT:
24692 case V8SF_FTYPE_V8SF_V8SF_INT:
24693 case V8SF_FTYPE_V8SF_V4SF_INT:
24694 case V4SI_FTYPE_V4SI_V4SI_INT:
24695 case V4DF_FTYPE_V4DF_V4DF_INT:
24696 case V4DF_FTYPE_V4DF_V2DF_INT:
24697 case V4SF_FTYPE_V4SF_V4SF_INT:
24698 case V2DI_FTYPE_V2DI_V2DI_INT:
24699 case V2DF_FTYPE_V2DF_V2DF_INT:
24701 nargs_constant = 1;
24703 case V2DI2TI_FTYPE_V2DI_V2DI_INT:
24706 nargs_constant = 1;
24708 case V1DI2DI_FTYPE_V1DI_V1DI_INT:
24711 nargs_constant = 1;
24713 case V2DI_FTYPE_V2DI_UINT_UINT:
24715 nargs_constant = 2;
24717 case V2DI_FTYPE_V2DI_V2DI_UINT_UINT:
24719 nargs_constant = 2;
24722 gcc_unreachable ();
24725 gcc_assert (nargs <= ARRAY_SIZE (args));
24727 if (comparison != UNKNOWN)
24729 gcc_assert (nargs == 2);
24730 return ix86_expand_sse_compare (d, exp, target, swap);
24733 if (rmode == VOIDmode || rmode == tmode)
24737 || GET_MODE (target) != tmode
24738 || ! (*insn_p->operand[0].predicate) (target, tmode))
24739 target = gen_reg_rtx (tmode);
24740 real_target = target;
24744 target = gen_reg_rtx (rmode);
24745 real_target = simplify_gen_subreg (tmode, target, rmode, 0);
24748 for (i = 0; i < nargs; i++)
24750 tree arg = CALL_EXPR_ARG (exp, i);
24751 rtx op = expand_normal (arg);
24752 enum machine_mode mode = insn_p->operand[i + 1].mode;
24753 bool match = (*insn_p->operand[i + 1].predicate) (op, mode);
24755 if (last_arg_count && (i + 1) == nargs)
24757 /* SIMD shift insns take either an 8-bit immediate or
24758 register as count. But builtin functions take int as
24759 count. If count doesn't match, we put it in register. */
24762 op = simplify_gen_subreg (SImode, op, GET_MODE (op), 0);
24763 if (!(*insn_p->operand[i + 1].predicate) (op, mode))
24764 op = copy_to_reg (op);
24767 else if ((nargs - i) <= nargs_constant)
24772 case CODE_FOR_sse4_1_roundpd:
24773 case CODE_FOR_sse4_1_roundps:
24774 case CODE_FOR_sse4_1_roundsd:
24775 case CODE_FOR_sse4_1_roundss:
24776 case CODE_FOR_sse4_1_blendps:
24777 case CODE_FOR_avx_blendpd256:
24778 case CODE_FOR_avx_vpermilv4df:
24779 case CODE_FOR_avx_roundpd256:
24780 case CODE_FOR_avx_roundps256:
24781 error ("the last argument must be a 4-bit immediate");
24784 case CODE_FOR_sse4_1_blendpd:
24785 case CODE_FOR_avx_vpermilv2df:
24786 error ("the last argument must be a 2-bit immediate");
24789 case CODE_FOR_avx_vextractf128v4df:
24790 case CODE_FOR_avx_vextractf128v8sf:
24791 case CODE_FOR_avx_vextractf128v8si:
24792 case CODE_FOR_avx_vinsertf128v4df:
24793 case CODE_FOR_avx_vinsertf128v8sf:
24794 case CODE_FOR_avx_vinsertf128v8si:
24795 error ("the last argument must be a 1-bit immediate");
24798 case CODE_FOR_avx_cmpsdv2df3:
24799 case CODE_FOR_avx_cmpssv4sf3:
24800 case CODE_FOR_avx_cmppdv2df3:
24801 case CODE_FOR_avx_cmppsv4sf3:
24802 case CODE_FOR_avx_cmppdv4df3:
24803 case CODE_FOR_avx_cmppsv8sf3:
24804 error ("the last argument must be a 5-bit immediate");
24808 switch (nargs_constant)
24811 if ((nargs - i) == nargs_constant)
24813 error ("the next to last argument must be an 8-bit immediate");
24817 error ("the last argument must be an 8-bit immediate");
24820 gcc_unreachable ();
24827 if (VECTOR_MODE_P (mode))
24828 op = safe_vector_operand (op, mode);
24830 /* If we aren't optimizing, only allow one memory operand to
24832 if (memory_operand (op, mode))
24835 if (GET_MODE (op) == mode || GET_MODE (op) == VOIDmode)
24837 if (optimize || !match || num_memory > 1)
24838 op = copy_to_mode_reg (mode, op);
24842 op = copy_to_reg (op);
24843 op = simplify_gen_subreg (mode, op, GET_MODE (op), 0);
24848 args[i].mode = mode;
24854 pat = GEN_FCN (icode) (real_target, args[0].op);
24857 pat = GEN_FCN (icode) (real_target, args[0].op, args[1].op);
24860 pat = GEN_FCN (icode) (real_target, args[0].op, args[1].op,
24864 pat = GEN_FCN (icode) (real_target, args[0].op, args[1].op,
24865 args[2].op, args[3].op);
24868 gcc_unreachable ();
24878 /* Subroutine of ix86_expand_builtin to take care of special insns
24879 with variable number of operands. */
24882 ix86_expand_special_args_builtin (const struct builtin_description *d,
24883 tree exp, rtx target)
24887 unsigned int i, nargs, arg_adjust, memory;
24891 enum machine_mode mode;
24893 enum insn_code icode = d->icode;
24894 bool last_arg_constant = false;
24895 const struct insn_data *insn_p = &insn_data[icode];
24896 enum machine_mode tmode = insn_p->operand[0].mode;
24897 enum { load, store } klass;
24899 switch ((enum ix86_special_builtin_type) d->flag)
24901 case VOID_FTYPE_VOID:
24902 emit_insn (GEN_FCN (icode) (target));
24904 case V2DI_FTYPE_PV2DI:
24905 case V32QI_FTYPE_PCCHAR:
24906 case V16QI_FTYPE_PCCHAR:
24907 case V8SF_FTYPE_PCV4SF:
24908 case V8SF_FTYPE_PCFLOAT:
24909 case V4SF_FTYPE_PCFLOAT:
24910 case V4DF_FTYPE_PCV2DF:
24911 case V4DF_FTYPE_PCDOUBLE:
24912 case V2DF_FTYPE_PCDOUBLE:
24917 case VOID_FTYPE_PV2SF_V4SF:
24918 case VOID_FTYPE_PV4DI_V4DI:
24919 case VOID_FTYPE_PV2DI_V2DI:
24920 case VOID_FTYPE_PCHAR_V32QI:
24921 case VOID_FTYPE_PCHAR_V16QI:
24922 case VOID_FTYPE_PFLOAT_V8SF:
24923 case VOID_FTYPE_PFLOAT_V4SF:
24924 case VOID_FTYPE_PDOUBLE_V4DF:
24925 case VOID_FTYPE_PDOUBLE_V2DF:
24926 case VOID_FTYPE_PDI_DI:
24927 case VOID_FTYPE_PINT_INT:
24930 /* Reserve memory operand for target. */
24931 memory = ARRAY_SIZE (args);
24933 case V4SF_FTYPE_V4SF_PCV2SF:
24934 case V2DF_FTYPE_V2DF_PCDOUBLE:
24939 case V8SF_FTYPE_PCV8SF_V8SF:
24940 case V4DF_FTYPE_PCV4DF_V4DF:
24941 case V4SF_FTYPE_PCV4SF_V4SF:
24942 case V2DF_FTYPE_PCV2DF_V2DF:
24947 case VOID_FTYPE_PV8SF_V8SF_V8SF:
24948 case VOID_FTYPE_PV4DF_V4DF_V4DF:
24949 case VOID_FTYPE_PV4SF_V4SF_V4SF:
24950 case VOID_FTYPE_PV2DF_V2DF_V2DF:
24953 /* Reserve memory operand for target. */
24954 memory = ARRAY_SIZE (args);
24957 gcc_unreachable ();
24960 gcc_assert (nargs <= ARRAY_SIZE (args));
24962 if (klass == store)
24964 arg = CALL_EXPR_ARG (exp, 0);
24965 op = expand_normal (arg);
24966 gcc_assert (target == 0);
24967 target = gen_rtx_MEM (tmode, copy_to_mode_reg (Pmode, op));
24975 || GET_MODE (target) != tmode
24976 || ! (*insn_p->operand[0].predicate) (target, tmode))
24977 target = gen_reg_rtx (tmode);
24980 for (i = 0; i < nargs; i++)
24982 enum machine_mode mode = insn_p->operand[i + 1].mode;
24985 arg = CALL_EXPR_ARG (exp, i + arg_adjust);
24986 op = expand_normal (arg);
24987 match = (*insn_p->operand[i + 1].predicate) (op, mode);
24989 if (last_arg_constant && (i + 1) == nargs)
24995 error ("the last argument must be an 8-bit immediate");
25003 /* This must be the memory operand. */
25004 op = gen_rtx_MEM (mode, copy_to_mode_reg (Pmode, op));
25005 gcc_assert (GET_MODE (op) == mode
25006 || GET_MODE (op) == VOIDmode);
25010 /* This must be register. */
25011 if (VECTOR_MODE_P (mode))
25012 op = safe_vector_operand (op, mode);
25014 gcc_assert (GET_MODE (op) == mode
25015 || GET_MODE (op) == VOIDmode);
25016 op = copy_to_mode_reg (mode, op);
25021 args[i].mode = mode;
25027 pat = GEN_FCN (icode) (target, args[0].op);
25030 pat = GEN_FCN (icode) (target, args[0].op, args[1].op);
25033 gcc_unreachable ();
25039 return klass == store ? 0 : target;
25042 /* Return the integer constant in ARG. Constrain it to be in the range
25043 of the subparts of VEC_TYPE; issue an error if not. */
25046 get_element_number (tree vec_type, tree arg)
25048 unsigned HOST_WIDE_INT elt, max = TYPE_VECTOR_SUBPARTS (vec_type) - 1;
25050 if (!host_integerp (arg, 1)
25051 || (elt = tree_low_cst (arg, 1), elt > max))
25053 error ("selector must be an integer constant in the range 0..%wi", max);
25060 /* A subroutine of ix86_expand_builtin. These builtins are a wrapper around
25061 ix86_expand_vector_init. We DO have language-level syntax for this, in
25062 the form of (type){ init-list }. Except that since we can't place emms
25063 instructions from inside the compiler, we can't allow the use of MMX
25064 registers unless the user explicitly asks for it. So we do *not* define
25065 vec_set/vec_extract/vec_init patterns for MMX modes in mmx.md. Instead
25066 we have builtins invoked by mmintrin.h that gives us license to emit
25067 these sorts of instructions. */
25070 ix86_expand_vec_init_builtin (tree type, tree exp, rtx target)
25072 enum machine_mode tmode = TYPE_MODE (type);
25073 enum machine_mode inner_mode = GET_MODE_INNER (tmode);
25074 int i, n_elt = GET_MODE_NUNITS (tmode);
25075 rtvec v = rtvec_alloc (n_elt);
25077 gcc_assert (VECTOR_MODE_P (tmode));
25078 gcc_assert (call_expr_nargs (exp) == n_elt);
25080 for (i = 0; i < n_elt; ++i)
25082 rtx x = expand_normal (CALL_EXPR_ARG (exp, i));
25083 RTVEC_ELT (v, i) = gen_lowpart (inner_mode, x);
25086 if (!target || !register_operand (target, tmode))
25087 target = gen_reg_rtx (tmode);
25089 ix86_expand_vector_init (true, target, gen_rtx_PARALLEL (tmode, v));
25093 /* A subroutine of ix86_expand_builtin. These builtins are a wrapper around
25094 ix86_expand_vector_extract. They would be redundant (for non-MMX) if we
25095 had a language-level syntax for referencing vector elements. */
25098 ix86_expand_vec_ext_builtin (tree exp, rtx target)
25100 enum machine_mode tmode, mode0;
25105 arg0 = CALL_EXPR_ARG (exp, 0);
25106 arg1 = CALL_EXPR_ARG (exp, 1);
25108 op0 = expand_normal (arg0);
25109 elt = get_element_number (TREE_TYPE (arg0), arg1);
25111 tmode = TYPE_MODE (TREE_TYPE (TREE_TYPE (arg0)));
25112 mode0 = TYPE_MODE (TREE_TYPE (arg0));
25113 gcc_assert (VECTOR_MODE_P (mode0));
25115 op0 = force_reg (mode0, op0);
25117 if (optimize || !target || !register_operand (target, tmode))
25118 target = gen_reg_rtx (tmode);
25120 ix86_expand_vector_extract (true, target, op0, elt);
25125 /* A subroutine of ix86_expand_builtin. These builtins are a wrapper around
25126 ix86_expand_vector_set. They would be redundant (for non-MMX) if we had
25127 a language-level syntax for referencing vector elements. */
25130 ix86_expand_vec_set_builtin (tree exp)
25132 enum machine_mode tmode, mode1;
25133 tree arg0, arg1, arg2;
25135 rtx op0, op1, target;
25137 arg0 = CALL_EXPR_ARG (exp, 0);
25138 arg1 = CALL_EXPR_ARG (exp, 1);
25139 arg2 = CALL_EXPR_ARG (exp, 2);
25141 tmode = TYPE_MODE (TREE_TYPE (arg0));
25142 mode1 = TYPE_MODE (TREE_TYPE (TREE_TYPE (arg0)));
25143 gcc_assert (VECTOR_MODE_P (tmode));
25145 op0 = expand_expr (arg0, NULL_RTX, tmode, EXPAND_NORMAL);
25146 op1 = expand_expr (arg1, NULL_RTX, mode1, EXPAND_NORMAL);
25147 elt = get_element_number (TREE_TYPE (arg0), arg2);
25149 if (GET_MODE (op1) != mode1 && GET_MODE (op1) != VOIDmode)
25150 op1 = convert_modes (mode1, GET_MODE (op1), op1, true);
25152 op0 = force_reg (tmode, op0);
25153 op1 = force_reg (mode1, op1);
25155 /* OP0 is the source of these builtin functions and shouldn't be
25156 modified. Create a copy, use it and return it as target. */
25157 target = gen_reg_rtx (tmode);
25158 emit_move_insn (target, op0);
25159 ix86_expand_vector_set (true, target, op1, elt);
25164 /* Expand an expression EXP that calls a built-in function,
25165 with result going to TARGET if that's convenient
25166 (and in mode MODE if that's convenient).
25167 SUBTARGET may be used as the target for computing one of EXP's operands.
25168 IGNORE is nonzero if the value is to be ignored. */
25171 ix86_expand_builtin (tree exp, rtx target, rtx subtarget ATTRIBUTE_UNUSED,
25172 enum machine_mode mode ATTRIBUTE_UNUSED,
25173 int ignore ATTRIBUTE_UNUSED)
25175 const struct builtin_description *d;
25177 enum insn_code icode;
25178 tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
25179 tree arg0, arg1, arg2;
25180 rtx op0, op1, op2, pat;
25181 enum machine_mode mode0, mode1, mode2;
25182 unsigned int fcode = DECL_FUNCTION_CODE (fndecl);
25184 /* Determine whether the builtin function is available under the current ISA.
25185 Originally the builtin was not created if it wasn't applicable to the
25186 current ISA based on the command line switches. With function specific
25187 options, we need to check in the context of the function making the call
25188 whether it is supported. */
25189 if (ix86_builtins_isa[fcode].isa
25190 && !(ix86_builtins_isa[fcode].isa & ix86_isa_flags))
25192 char *opts = ix86_target_string (ix86_builtins_isa[fcode].isa, 0, NULL,
25193 NULL, NULL, false);
25196 error ("%qE needs unknown isa option", fndecl);
25199 gcc_assert (opts != NULL);
25200 error ("%qE needs isa option %s", fndecl, opts);
25208 case IX86_BUILTIN_MASKMOVQ:
25209 case IX86_BUILTIN_MASKMOVDQU:
25210 icode = (fcode == IX86_BUILTIN_MASKMOVQ
25211 ? CODE_FOR_mmx_maskmovq
25212 : CODE_FOR_sse2_maskmovdqu);
25213 /* Note the arg order is different from the operand order. */
25214 arg1 = CALL_EXPR_ARG (exp, 0);
25215 arg2 = CALL_EXPR_ARG (exp, 1);
25216 arg0 = CALL_EXPR_ARG (exp, 2);
25217 op0 = expand_normal (arg0);
25218 op1 = expand_normal (arg1);
25219 op2 = expand_normal (arg2);
25220 mode0 = insn_data[icode].operand[0].mode;
25221 mode1 = insn_data[icode].operand[1].mode;
25222 mode2 = insn_data[icode].operand[2].mode;
25224 op0 = force_reg (Pmode, op0);
25225 op0 = gen_rtx_MEM (mode1, op0);
25227 if (! (*insn_data[icode].operand[0].predicate) (op0, mode0))
25228 op0 = copy_to_mode_reg (mode0, op0);
25229 if (! (*insn_data[icode].operand[1].predicate) (op1, mode1))
25230 op1 = copy_to_mode_reg (mode1, op1);
25231 if (! (*insn_data[icode].operand[2].predicate) (op2, mode2))
25232 op2 = copy_to_mode_reg (mode2, op2);
25233 pat = GEN_FCN (icode) (op0, op1, op2);
25239 case IX86_BUILTIN_LDMXCSR:
25240 op0 = expand_normal (CALL_EXPR_ARG (exp, 0));
25241 target = assign_386_stack_local (SImode, SLOT_VIRTUAL);
25242 emit_move_insn (target, op0);
25243 emit_insn (gen_sse_ldmxcsr (target));
25246 case IX86_BUILTIN_STMXCSR:
25247 target = assign_386_stack_local (SImode, SLOT_VIRTUAL);
25248 emit_insn (gen_sse_stmxcsr (target));
25249 return copy_to_mode_reg (SImode, target);
25251 case IX86_BUILTIN_CLFLUSH:
25252 arg0 = CALL_EXPR_ARG (exp, 0);
25253 op0 = expand_normal (arg0);
25254 icode = CODE_FOR_sse2_clflush;
25255 if (! (*insn_data[icode].operand[0].predicate) (op0, Pmode))
25256 op0 = copy_to_mode_reg (Pmode, op0);
25258 emit_insn (gen_sse2_clflush (op0));
25261 case IX86_BUILTIN_MONITOR:
25262 arg0 = CALL_EXPR_ARG (exp, 0);
25263 arg1 = CALL_EXPR_ARG (exp, 1);
25264 arg2 = CALL_EXPR_ARG (exp, 2);
25265 op0 = expand_normal (arg0);
25266 op1 = expand_normal (arg1);
25267 op2 = expand_normal (arg2);
25269 op0 = copy_to_mode_reg (Pmode, op0);
25271 op1 = copy_to_mode_reg (SImode, op1);
25273 op2 = copy_to_mode_reg (SImode, op2);
25274 emit_insn ((*ix86_gen_monitor) (op0, op1, op2));
25277 case IX86_BUILTIN_MWAIT:
25278 arg0 = CALL_EXPR_ARG (exp, 0);
25279 arg1 = CALL_EXPR_ARG (exp, 1);
25280 op0 = expand_normal (arg0);
25281 op1 = expand_normal (arg1);
25283 op0 = copy_to_mode_reg (SImode, op0);
25285 op1 = copy_to_mode_reg (SImode, op1);
25286 emit_insn (gen_sse3_mwait (op0, op1));
25289 case IX86_BUILTIN_VEC_INIT_V2SI:
25290 case IX86_BUILTIN_VEC_INIT_V4HI:
25291 case IX86_BUILTIN_VEC_INIT_V8QI:
25292 return ix86_expand_vec_init_builtin (TREE_TYPE (exp), exp, target);
25294 case IX86_BUILTIN_VEC_EXT_V2DF:
25295 case IX86_BUILTIN_VEC_EXT_V2DI:
25296 case IX86_BUILTIN_VEC_EXT_V4SF:
25297 case IX86_BUILTIN_VEC_EXT_V4SI:
25298 case IX86_BUILTIN_VEC_EXT_V8HI:
25299 case IX86_BUILTIN_VEC_EXT_V2SI:
25300 case IX86_BUILTIN_VEC_EXT_V4HI:
25301 case IX86_BUILTIN_VEC_EXT_V16QI:
25302 return ix86_expand_vec_ext_builtin (exp, target);
25304 case IX86_BUILTIN_VEC_SET_V2DI:
25305 case IX86_BUILTIN_VEC_SET_V4SF:
25306 case IX86_BUILTIN_VEC_SET_V4SI:
25307 case IX86_BUILTIN_VEC_SET_V8HI:
25308 case IX86_BUILTIN_VEC_SET_V4HI:
25309 case IX86_BUILTIN_VEC_SET_V16QI:
25310 return ix86_expand_vec_set_builtin (exp);
25312 case IX86_BUILTIN_INFQ:
25313 case IX86_BUILTIN_HUGE_VALQ:
25315 REAL_VALUE_TYPE inf;
25319 tmp = CONST_DOUBLE_FROM_REAL_VALUE (inf, mode);
25321 tmp = validize_mem (force_const_mem (mode, tmp));
25324 target = gen_reg_rtx (mode);
25326 emit_move_insn (target, tmp);
25334 for (i = 0, d = bdesc_special_args;
25335 i < ARRAY_SIZE (bdesc_special_args);
25337 if (d->code == fcode)
25338 return ix86_expand_special_args_builtin (d, exp, target);
25340 for (i = 0, d = bdesc_args;
25341 i < ARRAY_SIZE (bdesc_args);
25343 if (d->code == fcode)
25346 case IX86_BUILTIN_FABSQ:
25347 case IX86_BUILTIN_COPYSIGNQ:
25349 /* Emit a normal call if SSE2 isn't available. */
25350 return expand_call (exp, target, ignore);
25352 return ix86_expand_args_builtin (d, exp, target);
25355 for (i = 0, d = bdesc_comi; i < ARRAY_SIZE (bdesc_comi); i++, d++)
25356 if (d->code == fcode)
25357 return ix86_expand_sse_comi (d, exp, target);
25359 for (i = 0, d = bdesc_pcmpestr;
25360 i < ARRAY_SIZE (bdesc_pcmpestr);
25362 if (d->code == fcode)
25363 return ix86_expand_sse_pcmpestr (d, exp, target);
25365 for (i = 0, d = bdesc_pcmpistr;
25366 i < ARRAY_SIZE (bdesc_pcmpistr);
25368 if (d->code == fcode)
25369 return ix86_expand_sse_pcmpistr (d, exp, target);
25371 for (i = 0, d = bdesc_multi_arg; i < ARRAY_SIZE (bdesc_multi_arg); i++, d++)
25372 if (d->code == fcode)
25373 return ix86_expand_multi_arg_builtin (d->icode, exp, target,
25374 (enum multi_arg_type)d->flag,
25377 gcc_unreachable ();
25380 /* Returns a function decl for a vectorized version of the builtin function
25381 with builtin function code FN and the result vector type TYPE, or NULL_TREE
25382 if it is not available. */
25385 ix86_builtin_vectorized_function (unsigned int fn, tree type_out,
25388 enum machine_mode in_mode, out_mode;
25391 if (TREE_CODE (type_out) != VECTOR_TYPE
25392 || TREE_CODE (type_in) != VECTOR_TYPE)
25395 out_mode = TYPE_MODE (TREE_TYPE (type_out));
25396 out_n = TYPE_VECTOR_SUBPARTS (type_out);
25397 in_mode = TYPE_MODE (TREE_TYPE (type_in));
25398 in_n = TYPE_VECTOR_SUBPARTS (type_in);
25402 case BUILT_IN_SQRT:
25403 if (out_mode == DFmode && out_n == 2
25404 && in_mode == DFmode && in_n == 2)
25405 return ix86_builtins[IX86_BUILTIN_SQRTPD];
25408 case BUILT_IN_SQRTF:
25409 if (out_mode == SFmode && out_n == 4
25410 && in_mode == SFmode && in_n == 4)
25411 return ix86_builtins[IX86_BUILTIN_SQRTPS_NR];
25414 case BUILT_IN_LRINT:
25415 if (out_mode == SImode && out_n == 4
25416 && in_mode == DFmode && in_n == 2)
25417 return ix86_builtins[IX86_BUILTIN_VEC_PACK_SFIX];
25420 case BUILT_IN_LRINTF:
25421 if (out_mode == SImode && out_n == 4
25422 && in_mode == SFmode && in_n == 4)
25423 return ix86_builtins[IX86_BUILTIN_CVTPS2DQ];
25430 /* Dispatch to a handler for a vectorization library. */
25431 if (ix86_veclib_handler)
25432 return (*ix86_veclib_handler) ((enum built_in_function) fn, type_out,
25438 /* Handler for an SVML-style interface to
25439 a library with vectorized intrinsics. */
25442 ix86_veclibabi_svml (enum built_in_function fn, tree type_out, tree type_in)
25445 tree fntype, new_fndecl, args;
25448 enum machine_mode el_mode, in_mode;
25451 /* The SVML is suitable for unsafe math only. */
25452 if (!flag_unsafe_math_optimizations)
25455 el_mode = TYPE_MODE (TREE_TYPE (type_out));
25456 n = TYPE_VECTOR_SUBPARTS (type_out);
25457 in_mode = TYPE_MODE (TREE_TYPE (type_in));
25458 in_n = TYPE_VECTOR_SUBPARTS (type_in);
25459 if (el_mode != in_mode
25467 case BUILT_IN_LOG10:
25469 case BUILT_IN_TANH:
25471 case BUILT_IN_ATAN:
25472 case BUILT_IN_ATAN2:
25473 case BUILT_IN_ATANH:
25474 case BUILT_IN_CBRT:
25475 case BUILT_IN_SINH:
25477 case BUILT_IN_ASINH:
25478 case BUILT_IN_ASIN:
25479 case BUILT_IN_COSH:
25481 case BUILT_IN_ACOSH:
25482 case BUILT_IN_ACOS:
25483 if (el_mode != DFmode || n != 2)
25487 case BUILT_IN_EXPF:
25488 case BUILT_IN_LOGF:
25489 case BUILT_IN_LOG10F:
25490 case BUILT_IN_POWF:
25491 case BUILT_IN_TANHF:
25492 case BUILT_IN_TANF:
25493 case BUILT_IN_ATANF:
25494 case BUILT_IN_ATAN2F:
25495 case BUILT_IN_ATANHF:
25496 case BUILT_IN_CBRTF:
25497 case BUILT_IN_SINHF:
25498 case BUILT_IN_SINF:
25499 case BUILT_IN_ASINHF:
25500 case BUILT_IN_ASINF:
25501 case BUILT_IN_COSHF:
25502 case BUILT_IN_COSF:
25503 case BUILT_IN_ACOSHF:
25504 case BUILT_IN_ACOSF:
25505 if (el_mode != SFmode || n != 4)
25513 bname = IDENTIFIER_POINTER (DECL_NAME (implicit_built_in_decls[fn]));
25515 if (fn == BUILT_IN_LOGF)
25516 strcpy (name, "vmlsLn4");
25517 else if (fn == BUILT_IN_LOG)
25518 strcpy (name, "vmldLn2");
25521 sprintf (name, "vmls%s", bname+10);
25522 name[strlen (name)-1] = '4';
25525 sprintf (name, "vmld%s2", bname+10);
25527 /* Convert to uppercase. */
25531 for (args = DECL_ARGUMENTS (implicit_built_in_decls[fn]); args;
25532 args = TREE_CHAIN (args))
25536 fntype = build_function_type_list (type_out, type_in, NULL);
25538 fntype = build_function_type_list (type_out, type_in, type_in, NULL);
25540 /* Build a function declaration for the vectorized function. */
25541 new_fndecl = build_decl (FUNCTION_DECL, get_identifier (name), fntype);
25542 TREE_PUBLIC (new_fndecl) = 1;
25543 DECL_EXTERNAL (new_fndecl) = 1;
25544 DECL_IS_NOVOPS (new_fndecl) = 1;
25545 TREE_READONLY (new_fndecl) = 1;
25550 /* Handler for an ACML-style interface to
25551 a library with vectorized intrinsics. */
25554 ix86_veclibabi_acml (enum built_in_function fn, tree type_out, tree type_in)
25556 char name[20] = "__vr.._";
25557 tree fntype, new_fndecl, args;
25560 enum machine_mode el_mode, in_mode;
25563 /* The ACML is 64bits only and suitable for unsafe math only as
25564 it does not correctly support parts of IEEE with the required
25565 precision such as denormals. */
25567 || !flag_unsafe_math_optimizations)
25570 el_mode = TYPE_MODE (TREE_TYPE (type_out));
25571 n = TYPE_VECTOR_SUBPARTS (type_out);
25572 in_mode = TYPE_MODE (TREE_TYPE (type_in));
25573 in_n = TYPE_VECTOR_SUBPARTS (type_in);
25574 if (el_mode != in_mode
25584 case BUILT_IN_LOG2:
25585 case BUILT_IN_LOG10:
25588 if (el_mode != DFmode
25593 case BUILT_IN_SINF:
25594 case BUILT_IN_COSF:
25595 case BUILT_IN_EXPF:
25596 case BUILT_IN_POWF:
25597 case BUILT_IN_LOGF:
25598 case BUILT_IN_LOG2F:
25599 case BUILT_IN_LOG10F:
25602 if (el_mode != SFmode
25611 bname = IDENTIFIER_POINTER (DECL_NAME (implicit_built_in_decls[fn]));
25612 sprintf (name + 7, "%s", bname+10);
25615 for (args = DECL_ARGUMENTS (implicit_built_in_decls[fn]); args;
25616 args = TREE_CHAIN (args))
25620 fntype = build_function_type_list (type_out, type_in, NULL);
25622 fntype = build_function_type_list (type_out, type_in, type_in, NULL);
25624 /* Build a function declaration for the vectorized function. */
25625 new_fndecl = build_decl (FUNCTION_DECL, get_identifier (name), fntype);
25626 TREE_PUBLIC (new_fndecl) = 1;
25627 DECL_EXTERNAL (new_fndecl) = 1;
25628 DECL_IS_NOVOPS (new_fndecl) = 1;
25629 TREE_READONLY (new_fndecl) = 1;
25635 /* Returns a decl of a function that implements conversion of an integer vector
25636 into a floating-point vector, or vice-versa. TYPE is the type of the integer
25637 side of the conversion.
25638 Return NULL_TREE if it is not available. */
25641 ix86_vectorize_builtin_conversion (unsigned int code, tree type)
25643 if (TREE_CODE (type) != VECTOR_TYPE
25644 /* There are only conversions from/to signed integers. */
25645 || TYPE_UNSIGNED (TREE_TYPE (type)))
25651 switch (TYPE_MODE (type))
25654 return ix86_builtins[IX86_BUILTIN_CVTDQ2PS];
25659 case FIX_TRUNC_EXPR:
25660 switch (TYPE_MODE (type))
25663 return ix86_builtins[IX86_BUILTIN_CVTTPS2DQ];
25673 /* Returns a code for a target-specific builtin that implements
25674 reciprocal of the function, or NULL_TREE if not available. */
25677 ix86_builtin_reciprocal (unsigned int fn, bool md_fn,
25678 bool sqrt ATTRIBUTE_UNUSED)
25680 if (! (TARGET_SSE_MATH && TARGET_RECIP && !optimize_insn_for_size_p ()
25681 && flag_finite_math_only && !flag_trapping_math
25682 && flag_unsafe_math_optimizations))
25686 /* Machine dependent builtins. */
25689 /* Vectorized version of sqrt to rsqrt conversion. */
25690 case IX86_BUILTIN_SQRTPS_NR:
25691 return ix86_builtins[IX86_BUILTIN_RSQRTPS_NR];
25697 /* Normal builtins. */
25700 /* Sqrt to rsqrt conversion. */
25701 case BUILT_IN_SQRTF:
25702 return ix86_builtins[IX86_BUILTIN_RSQRTF];
25709 /* Store OPERAND to the memory after reload is completed. This means
25710 that we can't easily use assign_stack_local. */
25712 ix86_force_to_memory (enum machine_mode mode, rtx operand)
25716 gcc_assert (reload_completed);
25717 if (!TARGET_64BIT_MS_ABI && TARGET_RED_ZONE)
25719 result = gen_rtx_MEM (mode,
25720 gen_rtx_PLUS (Pmode,
25722 GEN_INT (-RED_ZONE_SIZE)));
25723 emit_move_insn (result, operand);
25725 else if ((TARGET_64BIT_MS_ABI || !TARGET_RED_ZONE) && TARGET_64BIT)
25731 operand = gen_lowpart (DImode, operand);
25735 gen_rtx_SET (VOIDmode,
25736 gen_rtx_MEM (DImode,
25737 gen_rtx_PRE_DEC (DImode,
25738 stack_pointer_rtx)),
25742 gcc_unreachable ();
25744 result = gen_rtx_MEM (mode, stack_pointer_rtx);
25753 split_di (&operand, 1, operands, operands + 1);
25755 gen_rtx_SET (VOIDmode,
25756 gen_rtx_MEM (SImode,
25757 gen_rtx_PRE_DEC (Pmode,
25758 stack_pointer_rtx)),
25761 gen_rtx_SET (VOIDmode,
25762 gen_rtx_MEM (SImode,
25763 gen_rtx_PRE_DEC (Pmode,
25764 stack_pointer_rtx)),
25769 /* Store HImodes as SImodes. */
25770 operand = gen_lowpart (SImode, operand);
25774 gen_rtx_SET (VOIDmode,
25775 gen_rtx_MEM (GET_MODE (operand),
25776 gen_rtx_PRE_DEC (SImode,
25777 stack_pointer_rtx)),
25781 gcc_unreachable ();
25783 result = gen_rtx_MEM (mode, stack_pointer_rtx);
25788 /* Free operand from the memory. */
25790 ix86_free_from_memory (enum machine_mode mode)
25792 if (!TARGET_RED_ZONE || TARGET_64BIT_MS_ABI)
25796 if (mode == DImode || TARGET_64BIT)
25800 /* Use LEA to deallocate stack space. In peephole2 it will be converted
25801 to pop or add instruction if registers are available. */
25802 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
25803 gen_rtx_PLUS (Pmode, stack_pointer_rtx,
25808 /* Put float CONST_DOUBLE in the constant pool instead of fp regs.
25809 QImode must go into class Q_REGS.
25810 Narrow ALL_REGS to GENERAL_REGS. This supports allowing movsf and
25811 movdf to do mem-to-mem moves through integer regs. */
25813 ix86_preferred_reload_class (rtx x, enum reg_class regclass)
25815 enum machine_mode mode = GET_MODE (x);
25817 /* We're only allowed to return a subclass of CLASS. Many of the
25818 following checks fail for NO_REGS, so eliminate that early. */
25819 if (regclass == NO_REGS)
25822 /* All classes can load zeros. */
25823 if (x == CONST0_RTX (mode))
25826 /* Force constants into memory if we are loading a (nonzero) constant into
25827 an MMX or SSE register. This is because there are no MMX/SSE instructions
25828 to load from a constant. */
25830 && (MAYBE_MMX_CLASS_P (regclass) || MAYBE_SSE_CLASS_P (regclass)))
25833 /* Prefer SSE regs only, if we can use them for math. */
25834 if (TARGET_SSE_MATH && !TARGET_MIX_SSE_I387 && SSE_FLOAT_MODE_P (mode))
25835 return SSE_CLASS_P (regclass) ? regclass : NO_REGS;
25837 /* Floating-point constants need more complex checks. */
25838 if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) != VOIDmode)
25840 /* General regs can load everything. */
25841 if (reg_class_subset_p (regclass, GENERAL_REGS))
25844 /* Floats can load 0 and 1 plus some others. Note that we eliminated
25845 zero above. We only want to wind up preferring 80387 registers if
25846 we plan on doing computation with them. */
25848 && standard_80387_constant_p (x))
25850 /* Limit class to non-sse. */
25851 if (regclass == FLOAT_SSE_REGS)
25853 if (regclass == FP_TOP_SSE_REGS)
25855 if (regclass == FP_SECOND_SSE_REGS)
25856 return FP_SECOND_REG;
25857 if (regclass == FLOAT_INT_REGS || regclass == FLOAT_REGS)
25864 /* Generally when we see PLUS here, it's the function invariant
25865 (plus soft-fp const_int). Which can only be computed into general
25867 if (GET_CODE (x) == PLUS)
25868 return reg_class_subset_p (regclass, GENERAL_REGS) ? regclass : NO_REGS;
25870 /* QImode constants are easy to load, but non-constant QImode data
25871 must go into Q_REGS. */
25872 if (GET_MODE (x) == QImode && !CONSTANT_P (x))
25874 if (reg_class_subset_p (regclass, Q_REGS))
25876 if (reg_class_subset_p (Q_REGS, regclass))
25884 /* Discourage putting floating-point values in SSE registers unless
25885 SSE math is being used, and likewise for the 387 registers. */
25887 ix86_preferred_output_reload_class (rtx x, enum reg_class regclass)
25889 enum machine_mode mode = GET_MODE (x);
25891 /* Restrict the output reload class to the register bank that we are doing
25892 math on. If we would like not to return a subset of CLASS, reject this
25893 alternative: if reload cannot do this, it will still use its choice. */
25894 mode = GET_MODE (x);
25895 if (TARGET_SSE_MATH && SSE_FLOAT_MODE_P (mode))
25896 return MAYBE_SSE_CLASS_P (regclass) ? SSE_REGS : NO_REGS;
25898 if (X87_FLOAT_MODE_P (mode))
25900 if (regclass == FP_TOP_SSE_REGS)
25902 else if (regclass == FP_SECOND_SSE_REGS)
25903 return FP_SECOND_REG;
25905 return FLOAT_CLASS_P (regclass) ? regclass : NO_REGS;
25911 static enum reg_class
25912 ix86_secondary_reload (bool in_p, rtx x, enum reg_class rclass,
25913 enum machine_mode mode,
25914 secondary_reload_info *sri ATTRIBUTE_UNUSED)
25916 /* QImode spills from non-QI registers require
25917 intermediate register on 32bit targets. */
25918 if (!in_p && mode == QImode && !TARGET_64BIT
25919 && (rclass == GENERAL_REGS
25920 || rclass == LEGACY_REGS
25921 || rclass == INDEX_REGS))
25930 if (regno >= FIRST_PSEUDO_REGISTER || GET_CODE (x) == SUBREG)
25931 regno = true_regnum (x);
25933 /* Return Q_REGS if the operand is in memory. */
25941 /* If we are copying between general and FP registers, we need a memory
25942 location. The same is true for SSE and MMX registers.
25944 To optimize register_move_cost performance, allow inline variant.
25946 The macro can't work reliably when one of the CLASSES is class containing
25947 registers from multiple units (SSE, MMX, integer). We avoid this by never
25948 combining those units in single alternative in the machine description.
25949 Ensure that this constraint holds to avoid unexpected surprises.
25951 When STRICT is false, we are being called from REGISTER_MOVE_COST, so do not
25952 enforce these sanity checks. */
25955 inline_secondary_memory_needed (enum reg_class class1, enum reg_class class2,
25956 enum machine_mode mode, int strict)
25958 if (MAYBE_FLOAT_CLASS_P (class1) != FLOAT_CLASS_P (class1)
25959 || MAYBE_FLOAT_CLASS_P (class2) != FLOAT_CLASS_P (class2)
25960 || MAYBE_SSE_CLASS_P (class1) != SSE_CLASS_P (class1)
25961 || MAYBE_SSE_CLASS_P (class2) != SSE_CLASS_P (class2)
25962 || MAYBE_MMX_CLASS_P (class1) != MMX_CLASS_P (class1)
25963 || MAYBE_MMX_CLASS_P (class2) != MMX_CLASS_P (class2))
25965 gcc_assert (!strict);
25969 if (FLOAT_CLASS_P (class1) != FLOAT_CLASS_P (class2))
25972 /* ??? This is a lie. We do have moves between mmx/general, and for
25973 mmx/sse2. But by saying we need secondary memory we discourage the
25974 register allocator from using the mmx registers unless needed. */
25975 if (MMX_CLASS_P (class1) != MMX_CLASS_P (class2))
25978 if (SSE_CLASS_P (class1) != SSE_CLASS_P (class2))
25980 /* SSE1 doesn't have any direct moves from other classes. */
25984 /* If the target says that inter-unit moves are more expensive
25985 than moving through memory, then don't generate them. */
25986 if (!TARGET_INTER_UNIT_MOVES)
25989 /* Between SSE and general, we have moves no larger than word size. */
25990 if (GET_MODE_SIZE (mode) > UNITS_PER_WORD)
25998 ix86_secondary_memory_needed (enum reg_class class1, enum reg_class class2,
25999 enum machine_mode mode, int strict)
26001 return inline_secondary_memory_needed (class1, class2, mode, strict);
26004 /* Return true if the registers in CLASS cannot represent the change from
26005 modes FROM to TO. */
26008 ix86_cannot_change_mode_class (enum machine_mode from, enum machine_mode to,
26009 enum reg_class regclass)
26014 /* x87 registers can't do subreg at all, as all values are reformatted
26015 to extended precision. */
26016 if (MAYBE_FLOAT_CLASS_P (regclass))
26019 if (MAYBE_SSE_CLASS_P (regclass) || MAYBE_MMX_CLASS_P (regclass))
26021 /* Vector registers do not support QI or HImode loads. If we don't
26022 disallow a change to these modes, reload will assume it's ok to
26023 drop the subreg from (subreg:SI (reg:HI 100) 0). This affects
26024 the vec_dupv4hi pattern. */
26025 if (GET_MODE_SIZE (from) < 4)
26028 /* Vector registers do not support subreg with nonzero offsets, which
26029 are otherwise valid for integer registers. Since we can't see
26030 whether we have a nonzero offset from here, prohibit all
26031 nonparadoxical subregs changing size. */
26032 if (GET_MODE_SIZE (to) < GET_MODE_SIZE (from))
26039 /* Return the cost of moving data of mode M between a
26040 register and memory. A value of 2 is the default; this cost is
26041 relative to those in `REGISTER_MOVE_COST'.
26043 This function is used extensively by register_move_cost that is used to
26044 build tables at startup. Make it inline in this case.
26045 When IN is 2, return maximum of in and out move cost.
26047 If moving between registers and memory is more expensive than
26048 between two registers, you should define this macro to express the
26051 Model also increased moving costs of QImode registers in non
26055 inline_memory_move_cost (enum machine_mode mode, enum reg_class regclass,
26059 if (FLOAT_CLASS_P (regclass))
26077 return MAX (ix86_cost->fp_load [index], ix86_cost->fp_store [index]);
26078 return in ? ix86_cost->fp_load [index] : ix86_cost->fp_store [index];
26080 if (SSE_CLASS_P (regclass))
26083 switch (GET_MODE_SIZE (mode))
26098 return MAX (ix86_cost->sse_load [index], ix86_cost->sse_store [index]);
26099 return in ? ix86_cost->sse_load [index] : ix86_cost->sse_store [index];
26101 if (MMX_CLASS_P (regclass))
26104 switch (GET_MODE_SIZE (mode))
26116 return MAX (ix86_cost->mmx_load [index], ix86_cost->mmx_store [index]);
26117 return in ? ix86_cost->mmx_load [index] : ix86_cost->mmx_store [index];
26119 switch (GET_MODE_SIZE (mode))
26122 if (Q_CLASS_P (regclass) || TARGET_64BIT)
26125 return ix86_cost->int_store[0];
26126 if (TARGET_PARTIAL_REG_DEPENDENCY
26127 && optimize_function_for_speed_p (cfun))
26128 cost = ix86_cost->movzbl_load;
26130 cost = ix86_cost->int_load[0];
26132 return MAX (cost, ix86_cost->int_store[0]);
26138 return MAX (ix86_cost->movzbl_load, ix86_cost->int_store[0] + 4);
26140 return ix86_cost->movzbl_load;
26142 return ix86_cost->int_store[0] + 4;
26147 return MAX (ix86_cost->int_load[1], ix86_cost->int_store[1]);
26148 return in ? ix86_cost->int_load[1] : ix86_cost->int_store[1];
26150 /* Compute number of 32bit moves needed. TFmode is moved as XFmode. */
26151 if (mode == TFmode)
26154 cost = MAX (ix86_cost->int_load[2] , ix86_cost->int_store[2]);
26156 cost = ix86_cost->int_load[2];
26158 cost = ix86_cost->int_store[2];
26159 return (cost * (((int) GET_MODE_SIZE (mode)
26160 + UNITS_PER_WORD - 1) / UNITS_PER_WORD));
26165 ix86_memory_move_cost (enum machine_mode mode, enum reg_class regclass, int in)
26167 return inline_memory_move_cost (mode, regclass, in);
26171 /* Return the cost of moving data from a register in class CLASS1 to
26172 one in class CLASS2.
26174 It is not required that the cost always equal 2 when FROM is the same as TO;
26175 on some machines it is expensive to move between registers if they are not
26176 general registers. */
26179 ix86_register_move_cost (enum machine_mode mode, enum reg_class class1,
26180 enum reg_class class2)
26182 /* In case we require secondary memory, compute cost of the store followed
26183 by load. In order to avoid bad register allocation choices, we need
26184 for this to be *at least* as high as the symmetric MEMORY_MOVE_COST. */
26186 if (inline_secondary_memory_needed (class1, class2, mode, 0))
26190 cost += inline_memory_move_cost (mode, class1, 2);
26191 cost += inline_memory_move_cost (mode, class2, 2);
26193 /* In case of copying from general_purpose_register we may emit multiple
26194 stores followed by single load causing memory size mismatch stall.
26195 Count this as arbitrarily high cost of 20. */
26196 if (CLASS_MAX_NREGS (class1, mode) > CLASS_MAX_NREGS (class2, mode))
26199 /* In the case of FP/MMX moves, the registers actually overlap, and we
26200 have to switch modes in order to treat them differently. */
26201 if ((MMX_CLASS_P (class1) && MAYBE_FLOAT_CLASS_P (class2))
26202 || (MMX_CLASS_P (class2) && MAYBE_FLOAT_CLASS_P (class1)))
26208 /* Moves between SSE/MMX and integer unit are expensive. */
26209 if (MMX_CLASS_P (class1) != MMX_CLASS_P (class2)
26210 || SSE_CLASS_P (class1) != SSE_CLASS_P (class2))
26212 /* ??? By keeping returned value relatively high, we limit the number
26213 of moves between integer and MMX/SSE registers for all targets.
26214 Additionally, high value prevents problem with x86_modes_tieable_p(),
26215 where integer modes in MMX/SSE registers are not tieable
26216 because of missing QImode and HImode moves to, from or between
26217 MMX/SSE registers. */
26218 return MAX (8, ix86_cost->mmxsse_to_integer);
26220 if (MAYBE_FLOAT_CLASS_P (class1))
26221 return ix86_cost->fp_move;
26222 if (MAYBE_SSE_CLASS_P (class1))
26223 return ix86_cost->sse_move;
26224 if (MAYBE_MMX_CLASS_P (class1))
26225 return ix86_cost->mmx_move;
26229 /* Return 1 if hard register REGNO can hold a value of machine-mode MODE. */
26232 ix86_hard_regno_mode_ok (int regno, enum machine_mode mode)
26234 /* Flags and only flags can only hold CCmode values. */
26235 if (CC_REGNO_P (regno))
26236 return GET_MODE_CLASS (mode) == MODE_CC;
26237 if (GET_MODE_CLASS (mode) == MODE_CC
26238 || GET_MODE_CLASS (mode) == MODE_RANDOM
26239 || GET_MODE_CLASS (mode) == MODE_PARTIAL_INT)
26241 if (FP_REGNO_P (regno))
26242 return VALID_FP_MODE_P (mode);
26243 if (SSE_REGNO_P (regno))
26245 /* We implement the move patterns for all vector modes into and
26246 out of SSE registers, even when no operation instructions
26247 are available. OImode move is available only when AVX is
26249 return ((TARGET_AVX && mode == OImode)
26250 || VALID_AVX256_REG_MODE (mode)
26251 || VALID_SSE_REG_MODE (mode)
26252 || VALID_SSE2_REG_MODE (mode)
26253 || VALID_MMX_REG_MODE (mode)
26254 || VALID_MMX_REG_MODE_3DNOW (mode));
26256 if (MMX_REGNO_P (regno))
26258 /* We implement the move patterns for 3DNOW modes even in MMX mode,
26259 so if the register is available at all, then we can move data of
26260 the given mode into or out of it. */
26261 return (VALID_MMX_REG_MODE (mode)
26262 || VALID_MMX_REG_MODE_3DNOW (mode));
26265 if (mode == QImode)
26267 /* Take care for QImode values - they can be in non-QI regs,
26268 but then they do cause partial register stalls. */
26269 if (regno <= BX_REG || TARGET_64BIT)
26271 if (!TARGET_PARTIAL_REG_STALL)
26273 return reload_in_progress || reload_completed;
26275 /* We handle both integer and floats in the general purpose registers. */
26276 else if (VALID_INT_MODE_P (mode))
26278 else if (VALID_FP_MODE_P (mode))
26280 else if (VALID_DFP_MODE_P (mode))
26282 /* Lots of MMX code casts 8 byte vector modes to DImode. If we then go
26283 on to use that value in smaller contexts, this can easily force a
26284 pseudo to be allocated to GENERAL_REGS. Since this is no worse than
26285 supporting DImode, allow it. */
26286 else if (VALID_MMX_REG_MODE_3DNOW (mode) || VALID_MMX_REG_MODE (mode))
26292 /* A subroutine of ix86_modes_tieable_p. Return true if MODE is a
26293 tieable integer mode. */
26296 ix86_tieable_integer_mode_p (enum machine_mode mode)
26305 return TARGET_64BIT || !TARGET_PARTIAL_REG_STALL;
26308 return TARGET_64BIT;
26315 /* Return true if MODE1 is accessible in a register that can hold MODE2
26316 without copying. That is, all register classes that can hold MODE2
26317 can also hold MODE1. */
26320 ix86_modes_tieable_p (enum machine_mode mode1, enum machine_mode mode2)
26322 if (mode1 == mode2)
26325 if (ix86_tieable_integer_mode_p (mode1)
26326 && ix86_tieable_integer_mode_p (mode2))
26329 /* MODE2 being XFmode implies fp stack or general regs, which means we
26330 can tie any smaller floating point modes to it. Note that we do not
26331 tie this with TFmode. */
26332 if (mode2 == XFmode)
26333 return mode1 == SFmode || mode1 == DFmode;
26335 /* MODE2 being DFmode implies fp stack, general or sse regs, which means
26336 that we can tie it with SFmode. */
26337 if (mode2 == DFmode)
26338 return mode1 == SFmode;
26340 /* If MODE2 is only appropriate for an SSE register, then tie with
26341 any other mode acceptable to SSE registers. */
26342 if (GET_MODE_SIZE (mode2) == 16
26343 && ix86_hard_regno_mode_ok (FIRST_SSE_REG, mode2))
26344 return (GET_MODE_SIZE (mode1) == 16
26345 && ix86_hard_regno_mode_ok (FIRST_SSE_REG, mode1));
26347 /* If MODE2 is appropriate for an MMX register, then tie
26348 with any other mode acceptable to MMX registers. */
26349 if (GET_MODE_SIZE (mode2) == 8
26350 && ix86_hard_regno_mode_ok (FIRST_MMX_REG, mode2))
26351 return (GET_MODE_SIZE (mode1) == 8
26352 && ix86_hard_regno_mode_ok (FIRST_MMX_REG, mode1));
26357 /* Compute a (partial) cost for rtx X. Return true if the complete
26358 cost has been computed, and false if subexpressions should be
26359 scanned. In either case, *TOTAL contains the cost result. */
26362 ix86_rtx_costs (rtx x, int code, int outer_code_i, int *total, bool speed)
26364 enum rtx_code outer_code = (enum rtx_code) outer_code_i;
26365 enum machine_mode mode = GET_MODE (x);
26366 const struct processor_costs *cost = speed ? ix86_cost : &ix86_size_cost;
26374 if (TARGET_64BIT && !x86_64_immediate_operand (x, VOIDmode))
26376 else if (TARGET_64BIT && !x86_64_zext_immediate_operand (x, VOIDmode))
26378 else if (flag_pic && SYMBOLIC_CONST (x)
26380 || (!GET_CODE (x) != LABEL_REF
26381 && (GET_CODE (x) != SYMBOL_REF
26382 || !SYMBOL_REF_LOCAL_P (x)))))
26389 if (mode == VOIDmode)
26392 switch (standard_80387_constant_p (x))
26397 default: /* Other constants */
26402 /* Start with (MEM (SYMBOL_REF)), since that's where
26403 it'll probably end up. Add a penalty for size. */
26404 *total = (COSTS_N_INSNS (1)
26405 + (flag_pic != 0 && !TARGET_64BIT)
26406 + (mode == SFmode ? 0 : mode == DFmode ? 1 : 2));
26412 /* The zero extensions is often completely free on x86_64, so make
26413 it as cheap as possible. */
26414 if (TARGET_64BIT && mode == DImode
26415 && GET_MODE (XEXP (x, 0)) == SImode)
26417 else if (TARGET_ZERO_EXTEND_WITH_AND)
26418 *total = cost->add;
26420 *total = cost->movzx;
26424 *total = cost->movsx;
26428 if (CONST_INT_P (XEXP (x, 1))
26429 && (GET_MODE (XEXP (x, 0)) != DImode || TARGET_64BIT))
26431 HOST_WIDE_INT value = INTVAL (XEXP (x, 1));
26434 *total = cost->add;
26437 if ((value == 2 || value == 3)
26438 && cost->lea <= cost->shift_const)
26440 *total = cost->lea;
26450 if (!TARGET_64BIT && GET_MODE (XEXP (x, 0)) == DImode)
26452 if (CONST_INT_P (XEXP (x, 1)))
26454 if (INTVAL (XEXP (x, 1)) > 32)
26455 *total = cost->shift_const + COSTS_N_INSNS (2);
26457 *total = cost->shift_const * 2;
26461 if (GET_CODE (XEXP (x, 1)) == AND)
26462 *total = cost->shift_var * 2;
26464 *total = cost->shift_var * 6 + COSTS_N_INSNS (2);
26469 if (CONST_INT_P (XEXP (x, 1)))
26470 *total = cost->shift_const;
26472 *total = cost->shift_var;
26477 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
26479 /* ??? SSE scalar cost should be used here. */
26480 *total = cost->fmul;
26483 else if (X87_FLOAT_MODE_P (mode))
26485 *total = cost->fmul;
26488 else if (FLOAT_MODE_P (mode))
26490 /* ??? SSE vector cost should be used here. */
26491 *total = cost->fmul;
26496 rtx op0 = XEXP (x, 0);
26497 rtx op1 = XEXP (x, 1);
26499 if (CONST_INT_P (XEXP (x, 1)))
26501 unsigned HOST_WIDE_INT value = INTVAL (XEXP (x, 1));
26502 for (nbits = 0; value != 0; value &= value - 1)
26506 /* This is arbitrary. */
26509 /* Compute costs correctly for widening multiplication. */
26510 if ((GET_CODE (op0) == SIGN_EXTEND || GET_CODE (op0) == ZERO_EXTEND)
26511 && GET_MODE_SIZE (GET_MODE (XEXP (op0, 0))) * 2
26512 == GET_MODE_SIZE (mode))
26514 int is_mulwiden = 0;
26515 enum machine_mode inner_mode = GET_MODE (op0);
26517 if (GET_CODE (op0) == GET_CODE (op1))
26518 is_mulwiden = 1, op1 = XEXP (op1, 0);
26519 else if (CONST_INT_P (op1))
26521 if (GET_CODE (op0) == SIGN_EXTEND)
26522 is_mulwiden = trunc_int_for_mode (INTVAL (op1), inner_mode)
26525 is_mulwiden = !(INTVAL (op1) & ~GET_MODE_MASK (inner_mode));
26529 op0 = XEXP (op0, 0), mode = GET_MODE (op0);
26532 *total = (cost->mult_init[MODE_INDEX (mode)]
26533 + nbits * cost->mult_bit
26534 + rtx_cost (op0, outer_code, speed) + rtx_cost (op1, outer_code, speed));
26543 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
26544 /* ??? SSE cost should be used here. */
26545 *total = cost->fdiv;
26546 else if (X87_FLOAT_MODE_P (mode))
26547 *total = cost->fdiv;
26548 else if (FLOAT_MODE_P (mode))
26549 /* ??? SSE vector cost should be used here. */
26550 *total = cost->fdiv;
26552 *total = cost->divide[MODE_INDEX (mode)];
26556 if (GET_MODE_CLASS (mode) == MODE_INT
26557 && GET_MODE_BITSIZE (mode) <= GET_MODE_BITSIZE (Pmode))
26559 if (GET_CODE (XEXP (x, 0)) == PLUS
26560 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
26561 && CONST_INT_P (XEXP (XEXP (XEXP (x, 0), 0), 1))
26562 && CONSTANT_P (XEXP (x, 1)))
26564 HOST_WIDE_INT val = INTVAL (XEXP (XEXP (XEXP (x, 0), 0), 1));
26565 if (val == 2 || val == 4 || val == 8)
26567 *total = cost->lea;
26568 *total += rtx_cost (XEXP (XEXP (x, 0), 1), outer_code, speed);
26569 *total += rtx_cost (XEXP (XEXP (XEXP (x, 0), 0), 0),
26570 outer_code, speed);
26571 *total += rtx_cost (XEXP (x, 1), outer_code, speed);
26575 else if (GET_CODE (XEXP (x, 0)) == MULT
26576 && CONST_INT_P (XEXP (XEXP (x, 0), 1)))
26578 HOST_WIDE_INT val = INTVAL (XEXP (XEXP (x, 0), 1));
26579 if (val == 2 || val == 4 || val == 8)
26581 *total = cost->lea;
26582 *total += rtx_cost (XEXP (XEXP (x, 0), 0), outer_code, speed);
26583 *total += rtx_cost (XEXP (x, 1), outer_code, speed);
26587 else if (GET_CODE (XEXP (x, 0)) == PLUS)
26589 *total = cost->lea;
26590 *total += rtx_cost (XEXP (XEXP (x, 0), 0), outer_code, speed);
26591 *total += rtx_cost (XEXP (XEXP (x, 0), 1), outer_code, speed);
26592 *total += rtx_cost (XEXP (x, 1), outer_code, speed);
26599 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
26601 /* ??? SSE cost should be used here. */
26602 *total = cost->fadd;
26605 else if (X87_FLOAT_MODE_P (mode))
26607 *total = cost->fadd;
26610 else if (FLOAT_MODE_P (mode))
26612 /* ??? SSE vector cost should be used here. */
26613 *total = cost->fadd;
26621 if (!TARGET_64BIT && mode == DImode)
26623 *total = (cost->add * 2
26624 + (rtx_cost (XEXP (x, 0), outer_code, speed)
26625 << (GET_MODE (XEXP (x, 0)) != DImode))
26626 + (rtx_cost (XEXP (x, 1), outer_code, speed)
26627 << (GET_MODE (XEXP (x, 1)) != DImode)));
26633 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
26635 /* ??? SSE cost should be used here. */
26636 *total = cost->fchs;
26639 else if (X87_FLOAT_MODE_P (mode))
26641 *total = cost->fchs;
26644 else if (FLOAT_MODE_P (mode))
26646 /* ??? SSE vector cost should be used here. */
26647 *total = cost->fchs;
26653 if (!TARGET_64BIT && mode == DImode)
26654 *total = cost->add * 2;
26656 *total = cost->add;
26660 if (GET_CODE (XEXP (x, 0)) == ZERO_EXTRACT
26661 && XEXP (XEXP (x, 0), 1) == const1_rtx
26662 && CONST_INT_P (XEXP (XEXP (x, 0), 2))
26663 && XEXP (x, 1) == const0_rtx)
26665 /* This kind of construct is implemented using test[bwl].
26666 Treat it as if we had an AND. */
26667 *total = (cost->add
26668 + rtx_cost (XEXP (XEXP (x, 0), 0), outer_code, speed)
26669 + rtx_cost (const1_rtx, outer_code, speed));
26675 if (!(SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH))
26680 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
26681 /* ??? SSE cost should be used here. */
26682 *total = cost->fabs;
26683 else if (X87_FLOAT_MODE_P (mode))
26684 *total = cost->fabs;
26685 else if (FLOAT_MODE_P (mode))
26686 /* ??? SSE vector cost should be used here. */
26687 *total = cost->fabs;
26691 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
26692 /* ??? SSE cost should be used here. */
26693 *total = cost->fsqrt;
26694 else if (X87_FLOAT_MODE_P (mode))
26695 *total = cost->fsqrt;
26696 else if (FLOAT_MODE_P (mode))
26697 /* ??? SSE vector cost should be used here. */
26698 *total = cost->fsqrt;
26702 if (XINT (x, 1) == UNSPEC_TP)
26713 static int current_machopic_label_num;
26715 /* Given a symbol name and its associated stub, write out the
26716 definition of the stub. */
26719 machopic_output_stub (FILE *file, const char *symb, const char *stub)
26721 unsigned int length;
26722 char *binder_name, *symbol_name, lazy_ptr_name[32];
26723 int label = ++current_machopic_label_num;
26725 /* For 64-bit we shouldn't get here. */
26726 gcc_assert (!TARGET_64BIT);
26728 /* Lose our funky encoding stuff so it doesn't contaminate the stub. */
26729 symb = (*targetm.strip_name_encoding) (symb);
26731 length = strlen (stub);
26732 binder_name = XALLOCAVEC (char, length + 32);
26733 GEN_BINDER_NAME_FOR_STUB (binder_name, stub, length);
26735 length = strlen (symb);
26736 symbol_name = XALLOCAVEC (char, length + 32);
26737 GEN_SYMBOL_NAME_FOR_SYMBOL (symbol_name, symb, length);
26739 sprintf (lazy_ptr_name, "L%d$lz", label);
26742 switch_to_section (darwin_sections[machopic_picsymbol_stub_section]);
26744 switch_to_section (darwin_sections[machopic_symbol_stub_section]);
26746 fprintf (file, "%s:\n", stub);
26747 fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
26751 fprintf (file, "\tcall\tLPC$%d\nLPC$%d:\tpopl\t%%eax\n", label, label);
26752 fprintf (file, "\tmovl\t%s-LPC$%d(%%eax),%%edx\n", lazy_ptr_name, label);
26753 fprintf (file, "\tjmp\t*%%edx\n");
26756 fprintf (file, "\tjmp\t*%s\n", lazy_ptr_name);
26758 fprintf (file, "%s:\n", binder_name);
26762 fprintf (file, "\tlea\t%s-LPC$%d(%%eax),%%eax\n", lazy_ptr_name, label);
26763 fprintf (file, "\tpushl\t%%eax\n");
26766 fprintf (file, "\tpushl\t$%s\n", lazy_ptr_name);
26768 fprintf (file, "\tjmp\tdyld_stub_binding_helper\n");
26770 switch_to_section (darwin_sections[machopic_lazy_symbol_ptr_section]);
26771 fprintf (file, "%s:\n", lazy_ptr_name);
26772 fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
26773 fprintf (file, "\t.long %s\n", binder_name);
26777 darwin_x86_file_end (void)
26779 darwin_file_end ();
26782 #endif /* TARGET_MACHO */
26784 /* Order the registers for register allocator. */
26787 x86_order_regs_for_local_alloc (void)
26792 /* First allocate the local general purpose registers. */
26793 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
26794 if (GENERAL_REGNO_P (i) && call_used_regs[i])
26795 reg_alloc_order [pos++] = i;
26797 /* Global general purpose registers. */
26798 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
26799 if (GENERAL_REGNO_P (i) && !call_used_regs[i])
26800 reg_alloc_order [pos++] = i;
26802 /* x87 registers come first in case we are doing FP math
26804 if (!TARGET_SSE_MATH)
26805 for (i = FIRST_STACK_REG; i <= LAST_STACK_REG; i++)
26806 reg_alloc_order [pos++] = i;
26808 /* SSE registers. */
26809 for (i = FIRST_SSE_REG; i <= LAST_SSE_REG; i++)
26810 reg_alloc_order [pos++] = i;
26811 for (i = FIRST_REX_SSE_REG; i <= LAST_REX_SSE_REG; i++)
26812 reg_alloc_order [pos++] = i;
26814 /* x87 registers. */
26815 if (TARGET_SSE_MATH)
26816 for (i = FIRST_STACK_REG; i <= LAST_STACK_REG; i++)
26817 reg_alloc_order [pos++] = i;
26819 for (i = FIRST_MMX_REG; i <= LAST_MMX_REG; i++)
26820 reg_alloc_order [pos++] = i;
26822 /* Initialize the rest of array as we do not allocate some registers
26824 while (pos < FIRST_PSEUDO_REGISTER)
26825 reg_alloc_order [pos++] = 0;
26828 /* Handle a "ms_abi" or "sysv" attribute; arguments as in
26829 struct attribute_spec.handler. */
26831 ix86_handle_abi_attribute (tree *node, tree name,
26832 tree args ATTRIBUTE_UNUSED,
26833 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
26835 if (TREE_CODE (*node) != FUNCTION_TYPE
26836 && TREE_CODE (*node) != METHOD_TYPE
26837 && TREE_CODE (*node) != FIELD_DECL
26838 && TREE_CODE (*node) != TYPE_DECL)
26840 warning (OPT_Wattributes, "%qs attribute only applies to functions",
26841 IDENTIFIER_POINTER (name));
26842 *no_add_attrs = true;
26847 warning (OPT_Wattributes, "%qs attribute only available for 64-bit",
26848 IDENTIFIER_POINTER (name));
26849 *no_add_attrs = true;
26853 /* Can combine regparm with all attributes but fastcall. */
26854 if (is_attribute_p ("ms_abi", name))
26856 if (lookup_attribute ("sysv_abi", TYPE_ATTRIBUTES (*node)))
26858 error ("ms_abi and sysv_abi attributes are not compatible");
26863 else if (is_attribute_p ("sysv_abi", name))
26865 if (lookup_attribute ("ms_abi", TYPE_ATTRIBUTES (*node)))
26867 error ("ms_abi and sysv_abi attributes are not compatible");
26876 /* Handle a "ms_struct" or "gcc_struct" attribute; arguments as in
26877 struct attribute_spec.handler. */
26879 ix86_handle_struct_attribute (tree *node, tree name,
26880 tree args ATTRIBUTE_UNUSED,
26881 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
26884 if (DECL_P (*node))
26886 if (TREE_CODE (*node) == TYPE_DECL)
26887 type = &TREE_TYPE (*node);
26892 if (!(type && (TREE_CODE (*type) == RECORD_TYPE
26893 || TREE_CODE (*type) == UNION_TYPE)))
26895 warning (OPT_Wattributes, "%qs attribute ignored",
26896 IDENTIFIER_POINTER (name));
26897 *no_add_attrs = true;
26900 else if ((is_attribute_p ("ms_struct", name)
26901 && lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (*type)))
26902 || ((is_attribute_p ("gcc_struct", name)
26903 && lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (*type)))))
26905 warning (OPT_Wattributes, "%qs incompatible attribute ignored",
26906 IDENTIFIER_POINTER (name));
26907 *no_add_attrs = true;
26914 ix86_ms_bitfield_layout_p (const_tree record_type)
26916 return (TARGET_MS_BITFIELD_LAYOUT &&
26917 !lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (record_type)))
26918 || lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (record_type));
26921 /* Returns an expression indicating where the this parameter is
26922 located on entry to the FUNCTION. */
26925 x86_this_parameter (tree function)
26927 tree type = TREE_TYPE (function);
26928 bool aggr = aggregate_value_p (TREE_TYPE (type), type) != 0;
26933 const int *parm_regs;
26935 if (ix86_function_type_abi (type) == MS_ABI)
26936 parm_regs = x86_64_ms_abi_int_parameter_registers;
26938 parm_regs = x86_64_int_parameter_registers;
26939 return gen_rtx_REG (DImode, parm_regs[aggr]);
26942 nregs = ix86_function_regparm (type, function);
26944 if (nregs > 0 && !stdarg_p (type))
26948 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type)))
26949 regno = aggr ? DX_REG : CX_REG;
26957 return gen_rtx_MEM (SImode,
26958 plus_constant (stack_pointer_rtx, 4));
26961 return gen_rtx_REG (SImode, regno);
26964 return gen_rtx_MEM (SImode, plus_constant (stack_pointer_rtx, aggr ? 8 : 4));
26967 /* Determine whether x86_output_mi_thunk can succeed. */
26970 x86_can_output_mi_thunk (const_tree thunk ATTRIBUTE_UNUSED,
26971 HOST_WIDE_INT delta ATTRIBUTE_UNUSED,
26972 HOST_WIDE_INT vcall_offset, const_tree function)
26974 /* 64-bit can handle anything. */
26978 /* For 32-bit, everything's fine if we have one free register. */
26979 if (ix86_function_regparm (TREE_TYPE (function), function) < 3)
26982 /* Need a free register for vcall_offset. */
26986 /* Need a free register for GOT references. */
26987 if (flag_pic && !(*targetm.binds_local_p) (function))
26990 /* Otherwise ok. */
26994 /* Output the assembler code for a thunk function. THUNK_DECL is the
26995 declaration for the thunk function itself, FUNCTION is the decl for
26996 the target function. DELTA is an immediate constant offset to be
26997 added to THIS. If VCALL_OFFSET is nonzero, the word at
26998 *(*this + vcall_offset) should be added to THIS. */
27001 x86_output_mi_thunk (FILE *file ATTRIBUTE_UNUSED,
27002 tree thunk ATTRIBUTE_UNUSED, HOST_WIDE_INT delta,
27003 HOST_WIDE_INT vcall_offset, tree function)
27006 rtx this_param = x86_this_parameter (function);
27009 /* If VCALL_OFFSET, we'll need THIS in a register. Might as well
27010 pull it in now and let DELTA benefit. */
27011 if (REG_P (this_param))
27012 this_reg = this_param;
27013 else if (vcall_offset)
27015 /* Put the this parameter into %eax. */
27016 xops[0] = this_param;
27017 xops[1] = this_reg = gen_rtx_REG (Pmode, AX_REG);
27018 output_asm_insn ("mov%z1\t{%0, %1|%1, %0}", xops);
27021 this_reg = NULL_RTX;
27023 /* Adjust the this parameter by a fixed constant. */
27026 xops[0] = GEN_INT (delta);
27027 xops[1] = this_reg ? this_reg : this_param;
27030 if (!x86_64_general_operand (xops[0], DImode))
27032 tmp = gen_rtx_REG (DImode, R10_REG);
27034 output_asm_insn ("mov{q}\t{%1, %0|%0, %1}", xops);
27036 xops[1] = this_param;
27038 output_asm_insn ("add{q}\t{%0, %1|%1, %0}", xops);
27041 output_asm_insn ("add{l}\t{%0, %1|%1, %0}", xops);
27044 /* Adjust the this parameter by a value stored in the vtable. */
27048 tmp = gen_rtx_REG (DImode, R10_REG);
27051 int tmp_regno = CX_REG;
27052 if (lookup_attribute ("fastcall",
27053 TYPE_ATTRIBUTES (TREE_TYPE (function))))
27054 tmp_regno = AX_REG;
27055 tmp = gen_rtx_REG (SImode, tmp_regno);
27058 xops[0] = gen_rtx_MEM (Pmode, this_reg);
27060 output_asm_insn ("mov%z1\t{%0, %1|%1, %0}", xops);
27062 /* Adjust the this parameter. */
27063 xops[0] = gen_rtx_MEM (Pmode, plus_constant (tmp, vcall_offset));
27064 if (TARGET_64BIT && !memory_operand (xops[0], Pmode))
27066 rtx tmp2 = gen_rtx_REG (DImode, R11_REG);
27067 xops[0] = GEN_INT (vcall_offset);
27069 output_asm_insn ("mov{q}\t{%0, %1|%1, %0}", xops);
27070 xops[0] = gen_rtx_MEM (Pmode, gen_rtx_PLUS (Pmode, tmp, tmp2));
27072 xops[1] = this_reg;
27073 output_asm_insn ("add%z1\t{%0, %1|%1, %0}", xops);
27076 /* If necessary, drop THIS back to its stack slot. */
27077 if (this_reg && this_reg != this_param)
27079 xops[0] = this_reg;
27080 xops[1] = this_param;
27081 output_asm_insn ("mov%z1\t{%0, %1|%1, %0}", xops);
27084 xops[0] = XEXP (DECL_RTL (function), 0);
27087 if (!flag_pic || (*targetm.binds_local_p) (function))
27088 output_asm_insn ("jmp\t%P0", xops);
27089 /* All thunks should be in the same object as their target,
27090 and thus binds_local_p should be true. */
27091 else if (TARGET_64BIT && cfun->machine->call_abi == MS_ABI)
27092 gcc_unreachable ();
27095 tmp = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, xops[0]), UNSPEC_GOTPCREL);
27096 tmp = gen_rtx_CONST (Pmode, tmp);
27097 tmp = gen_rtx_MEM (QImode, tmp);
27099 output_asm_insn ("jmp\t%A0", xops);
27104 if (!flag_pic || (*targetm.binds_local_p) (function))
27105 output_asm_insn ("jmp\t%P0", xops);
27110 rtx sym_ref = XEXP (DECL_RTL (function), 0);
27111 tmp = (gen_rtx_SYMBOL_REF
27113 machopic_indirection_name (sym_ref, /*stub_p=*/true)));
27114 tmp = gen_rtx_MEM (QImode, tmp);
27116 output_asm_insn ("jmp\t%0", xops);
27119 #endif /* TARGET_MACHO */
27121 tmp = gen_rtx_REG (SImode, CX_REG);
27122 output_set_got (tmp, NULL_RTX);
27125 output_asm_insn ("mov{l}\t{%0@GOT(%1), %1|%1, %0@GOT[%1]}", xops);
27126 output_asm_insn ("jmp\t{*}%1", xops);
27132 x86_file_start (void)
27134 default_file_start ();
27136 darwin_file_start ();
27138 if (X86_FILE_START_VERSION_DIRECTIVE)
27139 fputs ("\t.version\t\"01.01\"\n", asm_out_file);
27140 if (X86_FILE_START_FLTUSED)
27141 fputs ("\t.global\t__fltused\n", asm_out_file);
27142 if (ix86_asm_dialect == ASM_INTEL)
27143 fputs ("\t.intel_syntax noprefix\n", asm_out_file);
27147 x86_field_alignment (tree field, int computed)
27149 enum machine_mode mode;
27150 tree type = TREE_TYPE (field);
27152 if (TARGET_64BIT || TARGET_ALIGN_DOUBLE)
27154 mode = TYPE_MODE (strip_array_types (type));
27155 if (mode == DFmode || mode == DCmode
27156 || GET_MODE_CLASS (mode) == MODE_INT
27157 || GET_MODE_CLASS (mode) == MODE_COMPLEX_INT)
27158 return MIN (32, computed);
27162 /* Output assembler code to FILE to increment profiler label # LABELNO
27163 for profiling a function entry. */
27165 x86_function_profiler (FILE *file, int labelno ATTRIBUTE_UNUSED)
27169 #ifndef NO_PROFILE_COUNTERS
27170 fprintf (file, "\tleaq\t%sP%d@(%%rip),%%r11\n", LPREFIX, labelno);
27173 if (DEFAULT_ABI == SYSV_ABI && flag_pic)
27174 fprintf (file, "\tcall\t*%s@GOTPCREL(%%rip)\n", MCOUNT_NAME);
27176 fprintf (file, "\tcall\t%s\n", MCOUNT_NAME);
27180 #ifndef NO_PROFILE_COUNTERS
27181 fprintf (file, "\tleal\t%sP%d@GOTOFF(%%ebx),%%%s\n",
27182 LPREFIX, labelno, PROFILE_COUNT_REGISTER);
27184 fprintf (file, "\tcall\t*%s@GOT(%%ebx)\n", MCOUNT_NAME);
27188 #ifndef NO_PROFILE_COUNTERS
27189 fprintf (file, "\tmovl\t$%sP%d,%%%s\n", LPREFIX, labelno,
27190 PROFILE_COUNT_REGISTER);
27192 fprintf (file, "\tcall\t%s\n", MCOUNT_NAME);
27196 /* We don't have exact information about the insn sizes, but we may assume
27197 quite safely that we are informed about all 1 byte insns and memory
27198 address sizes. This is enough to eliminate unnecessary padding in
27202 min_insn_size (rtx insn)
27206 if (!INSN_P (insn) || !active_insn_p (insn))
27209 /* Discard alignments we've emit and jump instructions. */
27210 if (GET_CODE (PATTERN (insn)) == UNSPEC_VOLATILE
27211 && XINT (PATTERN (insn), 1) == UNSPECV_ALIGN)
27214 && (GET_CODE (PATTERN (insn)) == ADDR_VEC
27215 || GET_CODE (PATTERN (insn)) == ADDR_DIFF_VEC))
27218 /* Important case - calls are always 5 bytes.
27219 It is common to have many calls in the row. */
27221 && symbolic_reference_mentioned_p (PATTERN (insn))
27222 && !SIBLING_CALL_P (insn))
27224 if (get_attr_length (insn) <= 1)
27227 /* For normal instructions we may rely on the sizes of addresses
27228 and the presence of symbol to require 4 bytes of encoding.
27229 This is not the case for jumps where references are PC relative. */
27230 if (!JUMP_P (insn))
27232 l = get_attr_length_address (insn);
27233 if (l < 4 && symbolic_reference_mentioned_p (PATTERN (insn)))
27242 /* AMD K8 core mispredicts jumps when there are more than 3 jumps in 16 byte
27246 ix86_avoid_jump_misspredicts (void)
27248 rtx insn, start = get_insns ();
27249 int nbytes = 0, njumps = 0;
27252 /* Look for all minimal intervals of instructions containing 4 jumps.
27253 The intervals are bounded by START and INSN. NBYTES is the total
27254 size of instructions in the interval including INSN and not including
27255 START. When the NBYTES is smaller than 16 bytes, it is possible
27256 that the end of START and INSN ends up in the same 16byte page.
27258 The smallest offset in the page INSN can start is the case where START
27259 ends on the offset 0. Offset of INSN is then NBYTES - sizeof (INSN).
27260 We add p2align to 16byte window with maxskip 17 - NBYTES + sizeof (INSN).
27262 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
27265 nbytes += min_insn_size (insn);
27267 fprintf(dump_file, "Insn %i estimated to %i bytes\n",
27268 INSN_UID (insn), min_insn_size (insn));
27270 && GET_CODE (PATTERN (insn)) != ADDR_VEC
27271 && GET_CODE (PATTERN (insn)) != ADDR_DIFF_VEC)
27279 start = NEXT_INSN (start);
27280 if ((JUMP_P (start)
27281 && GET_CODE (PATTERN (start)) != ADDR_VEC
27282 && GET_CODE (PATTERN (start)) != ADDR_DIFF_VEC)
27284 njumps--, isjump = 1;
27287 nbytes -= min_insn_size (start);
27289 gcc_assert (njumps >= 0);
27291 fprintf (dump_file, "Interval %i to %i has %i bytes\n",
27292 INSN_UID (start), INSN_UID (insn), nbytes);
27294 if (njumps == 3 && isjump && nbytes < 16)
27296 int padsize = 15 - nbytes + min_insn_size (insn);
27299 fprintf (dump_file, "Padding insn %i by %i bytes!\n",
27300 INSN_UID (insn), padsize);
27301 emit_insn_before (gen_align (GEN_INT (padsize)), insn);
27306 /* AMD Athlon works faster
27307 when RET is not destination of conditional jump or directly preceded
27308 by other jump instruction. We avoid the penalty by inserting NOP just
27309 before the RET instructions in such cases. */
27311 ix86_pad_returns (void)
27316 FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR->preds)
27318 basic_block bb = e->src;
27319 rtx ret = BB_END (bb);
27321 bool replace = false;
27323 if (!JUMP_P (ret) || GET_CODE (PATTERN (ret)) != RETURN
27324 || optimize_bb_for_size_p (bb))
27326 for (prev = PREV_INSN (ret); prev; prev = PREV_INSN (prev))
27327 if (active_insn_p (prev) || LABEL_P (prev))
27329 if (prev && LABEL_P (prev))
27334 FOR_EACH_EDGE (e, ei, bb->preds)
27335 if (EDGE_FREQUENCY (e) && e->src->index >= 0
27336 && !(e->flags & EDGE_FALLTHRU))
27341 prev = prev_active_insn (ret);
27343 && ((JUMP_P (prev) && any_condjump_p (prev))
27346 /* Empty functions get branch mispredict even when the jump destination
27347 is not visible to us. */
27348 if (!prev && cfun->function_frequency > FUNCTION_FREQUENCY_UNLIKELY_EXECUTED)
27353 emit_insn_before (gen_return_internal_long (), ret);
27359 /* Implement machine specific optimizations. We implement padding of returns
27360 for K8 CPUs and pass to avoid 4 jumps in the single 16 byte window. */
27364 if (TARGET_PAD_RETURNS && optimize
27365 && optimize_function_for_speed_p (cfun))
27366 ix86_pad_returns ();
27367 if (TARGET_FOUR_JUMP_LIMIT && optimize
27368 && optimize_function_for_speed_p (cfun))
27369 ix86_avoid_jump_misspredicts ();
27372 /* Return nonzero when QImode register that must be represented via REX prefix
27375 x86_extended_QIreg_mentioned_p (rtx insn)
27378 extract_insn_cached (insn);
27379 for (i = 0; i < recog_data.n_operands; i++)
27380 if (REG_P (recog_data.operand[i])
27381 && REGNO (recog_data.operand[i]) > BX_REG)
27386 /* Return nonzero when P points to register encoded via REX prefix.
27387 Called via for_each_rtx. */
27389 extended_reg_mentioned_1 (rtx *p, void *data ATTRIBUTE_UNUSED)
27391 unsigned int regno;
27394 regno = REGNO (*p);
27395 return REX_INT_REGNO_P (regno) || REX_SSE_REGNO_P (regno);
27398 /* Return true when INSN mentions register that must be encoded using REX
27401 x86_extended_reg_mentioned_p (rtx insn)
27403 return for_each_rtx (INSN_P (insn) ? &PATTERN (insn) : &insn,
27404 extended_reg_mentioned_1, NULL);
27407 /* Generate an unsigned DImode/SImode to FP conversion. This is the same code
27408 optabs would emit if we didn't have TFmode patterns. */
27411 x86_emit_floatuns (rtx operands[2])
27413 rtx neglab, donelab, i0, i1, f0, in, out;
27414 enum machine_mode mode, inmode;
27416 inmode = GET_MODE (operands[1]);
27417 gcc_assert (inmode == SImode || inmode == DImode);
27420 in = force_reg (inmode, operands[1]);
27421 mode = GET_MODE (out);
27422 neglab = gen_label_rtx ();
27423 donelab = gen_label_rtx ();
27424 f0 = gen_reg_rtx (mode);
27426 emit_cmp_and_jump_insns (in, const0_rtx, LT, const0_rtx, inmode, 0, neglab);
27428 expand_float (out, in, 0);
27430 emit_jump_insn (gen_jump (donelab));
27433 emit_label (neglab);
27435 i0 = expand_simple_binop (inmode, LSHIFTRT, in, const1_rtx, NULL,
27437 i1 = expand_simple_binop (inmode, AND, in, const1_rtx, NULL,
27439 i0 = expand_simple_binop (inmode, IOR, i0, i1, i0, 1, OPTAB_DIRECT);
27441 expand_float (f0, i0, 0);
27443 emit_insn (gen_rtx_SET (VOIDmode, out, gen_rtx_PLUS (mode, f0, f0)));
27445 emit_label (donelab);
27448 /* A subroutine of ix86_expand_vector_init. Store into TARGET a vector
27449 with all elements equal to VAR. Return true if successful. */
27452 ix86_expand_vector_init_duplicate (bool mmx_ok, enum machine_mode mode,
27453 rtx target, rtx val)
27455 enum machine_mode hmode, smode, wsmode, wvmode;
27470 val = force_reg (GET_MODE_INNER (mode), val);
27471 x = gen_rtx_VEC_DUPLICATE (mode, val);
27472 emit_insn (gen_rtx_SET (VOIDmode, target, x));
27478 if (TARGET_SSE || TARGET_3DNOW_A)
27480 val = gen_lowpart (SImode, val);
27481 x = gen_rtx_TRUNCATE (HImode, val);
27482 x = gen_rtx_VEC_DUPLICATE (mode, x);
27483 emit_insn (gen_rtx_SET (VOIDmode, target, x));
27505 /* Extend HImode to SImode using a paradoxical SUBREG. */
27506 tmp1 = gen_reg_rtx (SImode);
27507 emit_move_insn (tmp1, gen_lowpart (SImode, val));
27508 /* Insert the SImode value as low element of V4SImode vector. */
27509 tmp2 = gen_reg_rtx (V4SImode);
27510 tmp1 = gen_rtx_VEC_MERGE (V4SImode,
27511 gen_rtx_VEC_DUPLICATE (V4SImode, tmp1),
27512 CONST0_RTX (V4SImode),
27514 emit_insn (gen_rtx_SET (VOIDmode, tmp2, tmp1));
27515 /* Cast the V4SImode vector back to a V8HImode vector. */
27516 tmp1 = gen_reg_rtx (V8HImode);
27517 emit_move_insn (tmp1, gen_lowpart (V8HImode, tmp2));
27518 /* Duplicate the low short through the whole low SImode word. */
27519 emit_insn (gen_sse2_punpcklwd (tmp1, tmp1, tmp1));
27520 /* Cast the V8HImode vector back to a V4SImode vector. */
27521 tmp2 = gen_reg_rtx (V4SImode);
27522 emit_move_insn (tmp2, gen_lowpart (V4SImode, tmp1));
27523 /* Replicate the low element of the V4SImode vector. */
27524 emit_insn (gen_sse2_pshufd (tmp2, tmp2, const0_rtx));
27525 /* Cast the V2SImode back to V8HImode, and store in target. */
27526 emit_move_insn (target, gen_lowpart (V8HImode, tmp2));
27537 /* Extend QImode to SImode using a paradoxical SUBREG. */
27538 tmp1 = gen_reg_rtx (SImode);
27539 emit_move_insn (tmp1, gen_lowpart (SImode, val));
27540 /* Insert the SImode value as low element of V4SImode vector. */
27541 tmp2 = gen_reg_rtx (V4SImode);
27542 tmp1 = gen_rtx_VEC_MERGE (V4SImode,
27543 gen_rtx_VEC_DUPLICATE (V4SImode, tmp1),
27544 CONST0_RTX (V4SImode),
27546 emit_insn (gen_rtx_SET (VOIDmode, tmp2, tmp1));
27547 /* Cast the V4SImode vector back to a V16QImode vector. */
27548 tmp1 = gen_reg_rtx (V16QImode);
27549 emit_move_insn (tmp1, gen_lowpart (V16QImode, tmp2));
27550 /* Duplicate the low byte through the whole low SImode word. */
27551 emit_insn (gen_sse2_punpcklbw (tmp1, tmp1, tmp1));
27552 emit_insn (gen_sse2_punpcklbw (tmp1, tmp1, tmp1));
27553 /* Cast the V16QImode vector back to a V4SImode vector. */
27554 tmp2 = gen_reg_rtx (V4SImode);
27555 emit_move_insn (tmp2, gen_lowpart (V4SImode, tmp1));
27556 /* Replicate the low element of the V4SImode vector. */
27557 emit_insn (gen_sse2_pshufd (tmp2, tmp2, const0_rtx));
27558 /* Cast the V2SImode back to V16QImode, and store in target. */
27559 emit_move_insn (target, gen_lowpart (V16QImode, tmp2));
27567 /* Replicate the value once into the next wider mode and recurse. */
27568 val = convert_modes (wsmode, smode, val, true);
27569 x = expand_simple_binop (wsmode, ASHIFT, val,
27570 GEN_INT (GET_MODE_BITSIZE (smode)),
27571 NULL_RTX, 1, OPTAB_LIB_WIDEN);
27572 val = expand_simple_binop (wsmode, IOR, val, x, x, 1, OPTAB_LIB_WIDEN);
27574 x = gen_reg_rtx (wvmode);
27575 if (!ix86_expand_vector_init_duplicate (mmx_ok, wvmode, x, val))
27576 gcc_unreachable ();
27577 emit_move_insn (target, gen_lowpart (mode, x));
27600 rtx tmp = gen_reg_rtx (hmode);
27601 ix86_expand_vector_init_duplicate (mmx_ok, hmode, tmp, val);
27602 emit_insn (gen_rtx_SET (VOIDmode, target,
27603 gen_rtx_VEC_CONCAT (mode, tmp, tmp)));
27612 /* A subroutine of ix86_expand_vector_init. Store into TARGET a vector
27613 whose ONE_VAR element is VAR, and other elements are zero. Return true
27617 ix86_expand_vector_init_one_nonzero (bool mmx_ok, enum machine_mode mode,
27618 rtx target, rtx var, int one_var)
27620 enum machine_mode vsimode;
27623 bool use_vector_set = false;
27628 /* For SSE4.1, we normally use vector set. But if the second
27629 element is zero and inter-unit moves are OK, we use movq
27631 use_vector_set = (TARGET_64BIT
27633 && !(TARGET_INTER_UNIT_MOVES
27639 use_vector_set = TARGET_SSE4_1;
27642 use_vector_set = TARGET_SSE2;
27645 use_vector_set = TARGET_SSE || TARGET_3DNOW_A;
27652 use_vector_set = TARGET_AVX;
27655 /* Use ix86_expand_vector_set in 64bit mode only. */
27656 use_vector_set = TARGET_AVX && TARGET_64BIT;
27662 if (use_vector_set)
27664 emit_insn (gen_rtx_SET (VOIDmode, target, CONST0_RTX (mode)));
27665 var = force_reg (GET_MODE_INNER (mode), var);
27666 ix86_expand_vector_set (mmx_ok, target, var, one_var);
27682 var = force_reg (GET_MODE_INNER (mode), var);
27683 x = gen_rtx_VEC_CONCAT (mode, var, CONST0_RTX (GET_MODE_INNER (mode)));
27684 emit_insn (gen_rtx_SET (VOIDmode, target, x));
27689 if (!REG_P (target) || REGNO (target) < FIRST_PSEUDO_REGISTER)
27690 new_target = gen_reg_rtx (mode);
27692 new_target = target;
27693 var = force_reg (GET_MODE_INNER (mode), var);
27694 x = gen_rtx_VEC_DUPLICATE (mode, var);
27695 x = gen_rtx_VEC_MERGE (mode, x, CONST0_RTX (mode), const1_rtx);
27696 emit_insn (gen_rtx_SET (VOIDmode, new_target, x));
27699 /* We need to shuffle the value to the correct position, so
27700 create a new pseudo to store the intermediate result. */
27702 /* With SSE2, we can use the integer shuffle insns. */
27703 if (mode != V4SFmode && TARGET_SSE2)
27705 emit_insn (gen_sse2_pshufd_1 (new_target, new_target,
27707 GEN_INT (one_var == 1 ? 0 : 1),
27708 GEN_INT (one_var == 2 ? 0 : 1),
27709 GEN_INT (one_var == 3 ? 0 : 1)));
27710 if (target != new_target)
27711 emit_move_insn (target, new_target);
27715 /* Otherwise convert the intermediate result to V4SFmode and
27716 use the SSE1 shuffle instructions. */
27717 if (mode != V4SFmode)
27719 tmp = gen_reg_rtx (V4SFmode);
27720 emit_move_insn (tmp, gen_lowpart (V4SFmode, new_target));
27725 emit_insn (gen_sse_shufps_v4sf (tmp, tmp, tmp,
27727 GEN_INT (one_var == 1 ? 0 : 1),
27728 GEN_INT (one_var == 2 ? 0+4 : 1+4),
27729 GEN_INT (one_var == 3 ? 0+4 : 1+4)));
27731 if (mode != V4SFmode)
27732 emit_move_insn (target, gen_lowpart (V4SImode, tmp));
27733 else if (tmp != target)
27734 emit_move_insn (target, tmp);
27736 else if (target != new_target)
27737 emit_move_insn (target, new_target);
27742 vsimode = V4SImode;
27748 vsimode = V2SImode;
27754 /* Zero extend the variable element to SImode and recurse. */
27755 var = convert_modes (SImode, GET_MODE_INNER (mode), var, true);
27757 x = gen_reg_rtx (vsimode);
27758 if (!ix86_expand_vector_init_one_nonzero (mmx_ok, vsimode, x,
27760 gcc_unreachable ();
27762 emit_move_insn (target, gen_lowpart (mode, x));
27770 /* A subroutine of ix86_expand_vector_init. Store into TARGET a vector
27771 consisting of the values in VALS. It is known that all elements
27772 except ONE_VAR are constants. Return true if successful. */
27775 ix86_expand_vector_init_one_var (bool mmx_ok, enum machine_mode mode,
27776 rtx target, rtx vals, int one_var)
27778 rtx var = XVECEXP (vals, 0, one_var);
27779 enum machine_mode wmode;
27782 const_vec = copy_rtx (vals);
27783 XVECEXP (const_vec, 0, one_var) = CONST0_RTX (GET_MODE_INNER (mode));
27784 const_vec = gen_rtx_CONST_VECTOR (mode, XVEC (const_vec, 0));
27792 /* For the two element vectors, it's just as easy to use
27793 the general case. */
27797 /* Use ix86_expand_vector_set in 64bit mode only. */
27820 /* There's no way to set one QImode entry easily. Combine
27821 the variable value with its adjacent constant value, and
27822 promote to an HImode set. */
27823 x = XVECEXP (vals, 0, one_var ^ 1);
27826 var = convert_modes (HImode, QImode, var, true);
27827 var = expand_simple_binop (HImode, ASHIFT, var, GEN_INT (8),
27828 NULL_RTX, 1, OPTAB_LIB_WIDEN);
27829 x = GEN_INT (INTVAL (x) & 0xff);
27833 var = convert_modes (HImode, QImode, var, true);
27834 x = gen_int_mode (INTVAL (x) << 8, HImode);
27836 if (x != const0_rtx)
27837 var = expand_simple_binop (HImode, IOR, var, x, var,
27838 1, OPTAB_LIB_WIDEN);
27840 x = gen_reg_rtx (wmode);
27841 emit_move_insn (x, gen_lowpart (wmode, const_vec));
27842 ix86_expand_vector_set (mmx_ok, x, var, one_var >> 1);
27844 emit_move_insn (target, gen_lowpart (mode, x));
27851 emit_move_insn (target, const_vec);
27852 ix86_expand_vector_set (mmx_ok, target, var, one_var);
27856 /* A subroutine of ix86_expand_vector_init_general. Use vector
27857 concatenate to handle the most general case: all values variable,
27858 and none identical. */
27861 ix86_expand_vector_init_concat (enum machine_mode mode,
27862 rtx target, rtx *ops, int n)
27864 enum machine_mode cmode, hmode = VOIDmode;
27865 rtx first[8], second[4];
27905 gcc_unreachable ();
27908 if (!register_operand (ops[1], cmode))
27909 ops[1] = force_reg (cmode, ops[1]);
27910 if (!register_operand (ops[0], cmode))
27911 ops[0] = force_reg (cmode, ops[0]);
27912 emit_insn (gen_rtx_SET (VOIDmode, target,
27913 gen_rtx_VEC_CONCAT (mode, ops[0],
27933 gcc_unreachable ();
27949 gcc_unreachable ();
27954 /* FIXME: We process inputs backward to help RA. PR 36222. */
27957 for (; i > 0; i -= 2, j--)
27959 first[j] = gen_reg_rtx (cmode);
27960 v = gen_rtvec (2, ops[i - 1], ops[i]);
27961 ix86_expand_vector_init (false, first[j],
27962 gen_rtx_PARALLEL (cmode, v));
27968 gcc_assert (hmode != VOIDmode);
27969 for (i = j = 0; i < n; i += 2, j++)
27971 second[j] = gen_reg_rtx (hmode);
27972 ix86_expand_vector_init_concat (hmode, second [j],
27976 ix86_expand_vector_init_concat (mode, target, second, n);
27979 ix86_expand_vector_init_concat (mode, target, first, n);
27983 gcc_unreachable ();
27987 /* A subroutine of ix86_expand_vector_init_general. Use vector
27988 interleave to handle the most general case: all values variable,
27989 and none identical. */
27992 ix86_expand_vector_init_interleave (enum machine_mode mode,
27993 rtx target, rtx *ops, int n)
27995 enum machine_mode first_imode, second_imode, third_imode, inner_mode;
27998 rtx (*gen_load_even) (rtx, rtx, rtx);
27999 rtx (*gen_interleave_first_low) (rtx, rtx, rtx);
28000 rtx (*gen_interleave_second_low) (rtx, rtx, rtx);
28005 gen_load_even = gen_vec_setv8hi;
28006 gen_interleave_first_low = gen_vec_interleave_lowv4si;
28007 gen_interleave_second_low = gen_vec_interleave_lowv2di;
28008 inner_mode = HImode;
28009 first_imode = V4SImode;
28010 second_imode = V2DImode;
28011 third_imode = VOIDmode;
28014 gen_load_even = gen_vec_setv16qi;
28015 gen_interleave_first_low = gen_vec_interleave_lowv8hi;
28016 gen_interleave_second_low = gen_vec_interleave_lowv4si;
28017 inner_mode = QImode;
28018 first_imode = V8HImode;
28019 second_imode = V4SImode;
28020 third_imode = V2DImode;
28023 gcc_unreachable ();
28026 for (i = 0; i < n; i++)
28028 /* Extend the odd elment to SImode using a paradoxical SUBREG. */
28029 op0 = gen_reg_rtx (SImode);
28030 emit_move_insn (op0, gen_lowpart (SImode, ops [i + i]));
28032 /* Insert the SImode value as low element of V4SImode vector. */
28033 op1 = gen_reg_rtx (V4SImode);
28034 op0 = gen_rtx_VEC_MERGE (V4SImode,
28035 gen_rtx_VEC_DUPLICATE (V4SImode,
28037 CONST0_RTX (V4SImode),
28039 emit_insn (gen_rtx_SET (VOIDmode, op1, op0));
28041 /* Cast the V4SImode vector back to a vector in orignal mode. */
28042 op0 = gen_reg_rtx (mode);
28043 emit_move_insn (op0, gen_lowpart (mode, op1));
28045 /* Load even elements into the second positon. */
28046 emit_insn ((*gen_load_even) (op0,
28047 force_reg (inner_mode,
28051 /* Cast vector to FIRST_IMODE vector. */
28052 ops[i] = gen_reg_rtx (first_imode);
28053 emit_move_insn (ops[i], gen_lowpart (first_imode, op0));
28056 /* Interleave low FIRST_IMODE vectors. */
28057 for (i = j = 0; i < n; i += 2, j++)
28059 op0 = gen_reg_rtx (first_imode);
28060 emit_insn ((*gen_interleave_first_low) (op0, ops[i], ops[i + 1]));
28062 /* Cast FIRST_IMODE vector to SECOND_IMODE vector. */
28063 ops[j] = gen_reg_rtx (second_imode);
28064 emit_move_insn (ops[j], gen_lowpart (second_imode, op0));
28067 /* Interleave low SECOND_IMODE vectors. */
28068 switch (second_imode)
28071 for (i = j = 0; i < n / 2; i += 2, j++)
28073 op0 = gen_reg_rtx (second_imode);
28074 emit_insn ((*gen_interleave_second_low) (op0, ops[i],
28077 /* Cast the SECOND_IMODE vector to the THIRD_IMODE
28079 ops[j] = gen_reg_rtx (third_imode);
28080 emit_move_insn (ops[j], gen_lowpart (third_imode, op0));
28082 second_imode = V2DImode;
28083 gen_interleave_second_low = gen_vec_interleave_lowv2di;
28087 op0 = gen_reg_rtx (second_imode);
28088 emit_insn ((*gen_interleave_second_low) (op0, ops[0],
28091 /* Cast the SECOND_IMODE vector back to a vector on original
28093 emit_insn (gen_rtx_SET (VOIDmode, target,
28094 gen_lowpart (mode, op0)));
28098 gcc_unreachable ();
28102 /* A subroutine of ix86_expand_vector_init. Handle the most general case:
28103 all values variable, and none identical. */
28106 ix86_expand_vector_init_general (bool mmx_ok, enum machine_mode mode,
28107 rtx target, rtx vals)
28109 rtx ops[32], op0, op1;
28110 enum machine_mode half_mode = VOIDmode;
28117 if (!mmx_ok && !TARGET_SSE)
28129 n = GET_MODE_NUNITS (mode);
28130 for (i = 0; i < n; i++)
28131 ops[i] = XVECEXP (vals, 0, i);
28132 ix86_expand_vector_init_concat (mode, target, ops, n);
28136 half_mode = V16QImode;
28140 half_mode = V8HImode;
28144 n = GET_MODE_NUNITS (mode);
28145 for (i = 0; i < n; i++)
28146 ops[i] = XVECEXP (vals, 0, i);
28147 op0 = gen_reg_rtx (half_mode);
28148 op1 = gen_reg_rtx (half_mode);
28149 ix86_expand_vector_init_interleave (half_mode, op0, ops,
28151 ix86_expand_vector_init_interleave (half_mode, op1,
28152 &ops [n >> 1], n >> 2);
28153 emit_insn (gen_rtx_SET (VOIDmode, target,
28154 gen_rtx_VEC_CONCAT (mode, op0, op1)));
28158 if (!TARGET_SSE4_1)
28166 /* Don't use ix86_expand_vector_init_interleave if we can't
28167 move from GPR to SSE register directly. */
28168 if (!TARGET_INTER_UNIT_MOVES)
28171 n = GET_MODE_NUNITS (mode);
28172 for (i = 0; i < n; i++)
28173 ops[i] = XVECEXP (vals, 0, i);
28174 ix86_expand_vector_init_interleave (mode, target, ops, n >> 1);
28182 gcc_unreachable ();
28186 int i, j, n_elts, n_words, n_elt_per_word;
28187 enum machine_mode inner_mode;
28188 rtx words[4], shift;
28190 inner_mode = GET_MODE_INNER (mode);
28191 n_elts = GET_MODE_NUNITS (mode);
28192 n_words = GET_MODE_SIZE (mode) / UNITS_PER_WORD;
28193 n_elt_per_word = n_elts / n_words;
28194 shift = GEN_INT (GET_MODE_BITSIZE (inner_mode));
28196 for (i = 0; i < n_words; ++i)
28198 rtx word = NULL_RTX;
28200 for (j = 0; j < n_elt_per_word; ++j)
28202 rtx elt = XVECEXP (vals, 0, (i+1)*n_elt_per_word - j - 1);
28203 elt = convert_modes (word_mode, inner_mode, elt, true);
28209 word = expand_simple_binop (word_mode, ASHIFT, word, shift,
28210 word, 1, OPTAB_LIB_WIDEN);
28211 word = expand_simple_binop (word_mode, IOR, word, elt,
28212 word, 1, OPTAB_LIB_WIDEN);
28220 emit_move_insn (target, gen_lowpart (mode, words[0]));
28221 else if (n_words == 2)
28223 rtx tmp = gen_reg_rtx (mode);
28224 emit_clobber (tmp);
28225 emit_move_insn (gen_lowpart (word_mode, tmp), words[0]);
28226 emit_move_insn (gen_highpart (word_mode, tmp), words[1]);
28227 emit_move_insn (target, tmp);
28229 else if (n_words == 4)
28231 rtx tmp = gen_reg_rtx (V4SImode);
28232 gcc_assert (word_mode == SImode);
28233 vals = gen_rtx_PARALLEL (V4SImode, gen_rtvec_v (4, words));
28234 ix86_expand_vector_init_general (false, V4SImode, tmp, vals);
28235 emit_move_insn (target, gen_lowpart (mode, tmp));
28238 gcc_unreachable ();
28242 /* Initialize vector TARGET via VALS. Suppress the use of MMX
28243 instructions unless MMX_OK is true. */
28246 ix86_expand_vector_init (bool mmx_ok, rtx target, rtx vals)
28248 enum machine_mode mode = GET_MODE (target);
28249 enum machine_mode inner_mode = GET_MODE_INNER (mode);
28250 int n_elts = GET_MODE_NUNITS (mode);
28251 int n_var = 0, one_var = -1;
28252 bool all_same = true, all_const_zero = true;
28256 for (i = 0; i < n_elts; ++i)
28258 x = XVECEXP (vals, 0, i);
28259 if (!(CONST_INT_P (x)
28260 || GET_CODE (x) == CONST_DOUBLE
28261 || GET_CODE (x) == CONST_FIXED))
28262 n_var++, one_var = i;
28263 else if (x != CONST0_RTX (inner_mode))
28264 all_const_zero = false;
28265 if (i > 0 && !rtx_equal_p (x, XVECEXP (vals, 0, 0)))
28269 /* Constants are best loaded from the constant pool. */
28272 emit_move_insn (target, gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0)));
28276 /* If all values are identical, broadcast the value. */
28278 && ix86_expand_vector_init_duplicate (mmx_ok, mode, target,
28279 XVECEXP (vals, 0, 0)))
28282 /* Values where only one field is non-constant are best loaded from
28283 the pool and overwritten via move later. */
28287 && ix86_expand_vector_init_one_nonzero (mmx_ok, mode, target,
28288 XVECEXP (vals, 0, one_var),
28292 if (ix86_expand_vector_init_one_var (mmx_ok, mode, target, vals, one_var))
28296 ix86_expand_vector_init_general (mmx_ok, mode, target, vals);
28300 ix86_expand_vector_set (bool mmx_ok, rtx target, rtx val, int elt)
28302 enum machine_mode mode = GET_MODE (target);
28303 enum machine_mode inner_mode = GET_MODE_INNER (mode);
28304 enum machine_mode half_mode;
28305 bool use_vec_merge = false;
28307 static rtx (*gen_extract[6][2]) (rtx, rtx)
28309 { gen_vec_extract_lo_v32qi, gen_vec_extract_hi_v32qi },
28310 { gen_vec_extract_lo_v16hi, gen_vec_extract_hi_v16hi },
28311 { gen_vec_extract_lo_v8si, gen_vec_extract_hi_v8si },
28312 { gen_vec_extract_lo_v4di, gen_vec_extract_hi_v4di },
28313 { gen_vec_extract_lo_v8sf, gen_vec_extract_hi_v8sf },
28314 { gen_vec_extract_lo_v4df, gen_vec_extract_hi_v4df }
28316 static rtx (*gen_insert[6][2]) (rtx, rtx, rtx)
28318 { gen_vec_set_lo_v32qi, gen_vec_set_hi_v32qi },
28319 { gen_vec_set_lo_v16hi, gen_vec_set_hi_v16hi },
28320 { gen_vec_set_lo_v8si, gen_vec_set_hi_v8si },
28321 { gen_vec_set_lo_v4di, gen_vec_set_hi_v4di },
28322 { gen_vec_set_lo_v8sf, gen_vec_set_hi_v8sf },
28323 { gen_vec_set_lo_v4df, gen_vec_set_hi_v4df }
28333 tmp = gen_reg_rtx (GET_MODE_INNER (mode));
28334 ix86_expand_vector_extract (true, tmp, target, 1 - elt);
28336 tmp = gen_rtx_VEC_CONCAT (mode, tmp, val);
28338 tmp = gen_rtx_VEC_CONCAT (mode, val, tmp);
28339 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
28345 use_vec_merge = TARGET_SSE4_1;
28353 /* For the two element vectors, we implement a VEC_CONCAT with
28354 the extraction of the other element. */
28356 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, GEN_INT (1 - elt)));
28357 tmp = gen_rtx_VEC_SELECT (inner_mode, target, tmp);
28360 op0 = val, op1 = tmp;
28362 op0 = tmp, op1 = val;
28364 tmp = gen_rtx_VEC_CONCAT (mode, op0, op1);
28365 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
28370 use_vec_merge = TARGET_SSE4_1;
28377 use_vec_merge = true;
28381 /* tmp = target = A B C D */
28382 tmp = copy_to_reg (target);
28383 /* target = A A B B */
28384 emit_insn (gen_sse_unpcklps (target, target, target));
28385 /* target = X A B B */
28386 ix86_expand_vector_set (false, target, val, 0);
28387 /* target = A X C D */
28388 emit_insn (gen_sse_shufps_v4sf (target, target, tmp,
28389 GEN_INT (1), GEN_INT (0),
28390 GEN_INT (2+4), GEN_INT (3+4)));
28394 /* tmp = target = A B C D */
28395 tmp = copy_to_reg (target);
28396 /* tmp = X B C D */
28397 ix86_expand_vector_set (false, tmp, val, 0);
28398 /* target = A B X D */
28399 emit_insn (gen_sse_shufps_v4sf (target, target, tmp,
28400 GEN_INT (0), GEN_INT (1),
28401 GEN_INT (0+4), GEN_INT (3+4)));
28405 /* tmp = target = A B C D */
28406 tmp = copy_to_reg (target);
28407 /* tmp = X B C D */
28408 ix86_expand_vector_set (false, tmp, val, 0);
28409 /* target = A B X D */
28410 emit_insn (gen_sse_shufps_v4sf (target, target, tmp,
28411 GEN_INT (0), GEN_INT (1),
28412 GEN_INT (2+4), GEN_INT (0+4)));
28416 gcc_unreachable ();
28421 use_vec_merge = TARGET_SSE4_1;
28425 /* Element 0 handled by vec_merge below. */
28428 use_vec_merge = true;
28434 /* With SSE2, use integer shuffles to swap element 0 and ELT,
28435 store into element 0, then shuffle them back. */
28439 order[0] = GEN_INT (elt);
28440 order[1] = const1_rtx;
28441 order[2] = const2_rtx;
28442 order[3] = GEN_INT (3);
28443 order[elt] = const0_rtx;
28445 emit_insn (gen_sse2_pshufd_1 (target, target, order[0],
28446 order[1], order[2], order[3]));
28448 ix86_expand_vector_set (false, target, val, 0);
28450 emit_insn (gen_sse2_pshufd_1 (target, target, order[0],
28451 order[1], order[2], order[3]));
28455 /* For SSE1, we have to reuse the V4SF code. */
28456 ix86_expand_vector_set (false, gen_lowpart (V4SFmode, target),
28457 gen_lowpart (SFmode, val), elt);
28462 use_vec_merge = TARGET_SSE2;
28465 use_vec_merge = mmx_ok && (TARGET_SSE || TARGET_3DNOW_A);
28469 use_vec_merge = TARGET_SSE4_1;
28476 half_mode = V16QImode;
28482 half_mode = V8HImode;
28488 half_mode = V4SImode;
28494 half_mode = V2DImode;
28500 half_mode = V4SFmode;
28506 half_mode = V2DFmode;
28512 /* Compute offset. */
28516 gcc_assert (i <= 1);
28518 /* Extract the half. */
28519 tmp = gen_reg_rtx (half_mode);
28520 emit_insn ((*gen_extract[j][i]) (tmp, target));
28522 /* Put val in tmp at elt. */
28523 ix86_expand_vector_set (false, tmp, val, elt);
28526 emit_insn ((*gen_insert[j][i]) (target, target, tmp));
28535 tmp = gen_rtx_VEC_DUPLICATE (mode, val);
28536 tmp = gen_rtx_VEC_MERGE (mode, tmp, target, GEN_INT (1 << elt));
28537 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
28541 rtx mem = assign_stack_temp (mode, GET_MODE_SIZE (mode), false);
28543 emit_move_insn (mem, target);
28545 tmp = adjust_address (mem, inner_mode, elt*GET_MODE_SIZE (inner_mode));
28546 emit_move_insn (tmp, val);
28548 emit_move_insn (target, mem);
28553 ix86_expand_vector_extract (bool mmx_ok, rtx target, rtx vec, int elt)
28555 enum machine_mode mode = GET_MODE (vec);
28556 enum machine_mode inner_mode = GET_MODE_INNER (mode);
28557 bool use_vec_extr = false;
28570 use_vec_extr = true;
28574 use_vec_extr = TARGET_SSE4_1;
28586 tmp = gen_reg_rtx (mode);
28587 emit_insn (gen_sse_shufps_v4sf (tmp, vec, vec,
28588 GEN_INT (elt), GEN_INT (elt),
28589 GEN_INT (elt+4), GEN_INT (elt+4)));
28593 tmp = gen_reg_rtx (mode);
28594 emit_insn (gen_sse_unpckhps (tmp, vec, vec));
28598 gcc_unreachable ();
28601 use_vec_extr = true;
28606 use_vec_extr = TARGET_SSE4_1;
28620 tmp = gen_reg_rtx (mode);
28621 emit_insn (gen_sse2_pshufd_1 (tmp, vec,
28622 GEN_INT (elt), GEN_INT (elt),
28623 GEN_INT (elt), GEN_INT (elt)));
28627 tmp = gen_reg_rtx (mode);
28628 emit_insn (gen_sse2_punpckhdq (tmp, vec, vec));
28632 gcc_unreachable ();
28635 use_vec_extr = true;
28640 /* For SSE1, we have to reuse the V4SF code. */
28641 ix86_expand_vector_extract (false, gen_lowpart (SFmode, target),
28642 gen_lowpart (V4SFmode, vec), elt);
28648 use_vec_extr = TARGET_SSE2;
28651 use_vec_extr = mmx_ok && (TARGET_SSE || TARGET_3DNOW_A);
28655 use_vec_extr = TARGET_SSE4_1;
28659 /* ??? Could extract the appropriate HImode element and shift. */
28666 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, GEN_INT (elt)));
28667 tmp = gen_rtx_VEC_SELECT (inner_mode, vec, tmp);
28669 /* Let the rtl optimizers know about the zero extension performed. */
28670 if (inner_mode == QImode || inner_mode == HImode)
28672 tmp = gen_rtx_ZERO_EXTEND (SImode, tmp);
28673 target = gen_lowpart (SImode, target);
28676 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
28680 rtx mem = assign_stack_temp (mode, GET_MODE_SIZE (mode), false);
28682 emit_move_insn (mem, vec);
28684 tmp = adjust_address (mem, inner_mode, elt*GET_MODE_SIZE (inner_mode));
28685 emit_move_insn (target, tmp);
28689 /* Expand a vector reduction on V4SFmode for SSE1. FN is the binary
28690 pattern to reduce; DEST is the destination; IN is the input vector. */
28693 ix86_expand_reduc_v4sf (rtx (*fn) (rtx, rtx, rtx), rtx dest, rtx in)
28695 rtx tmp1, tmp2, tmp3;
28697 tmp1 = gen_reg_rtx (V4SFmode);
28698 tmp2 = gen_reg_rtx (V4SFmode);
28699 tmp3 = gen_reg_rtx (V4SFmode);
28701 emit_insn (gen_sse_movhlps (tmp1, in, in));
28702 emit_insn (fn (tmp2, tmp1, in));
28704 emit_insn (gen_sse_shufps_v4sf (tmp3, tmp2, tmp2,
28705 GEN_INT (1), GEN_INT (1),
28706 GEN_INT (1+4), GEN_INT (1+4)));
28707 emit_insn (fn (dest, tmp2, tmp3));
28710 /* Target hook for scalar_mode_supported_p. */
28712 ix86_scalar_mode_supported_p (enum machine_mode mode)
28714 if (DECIMAL_FLOAT_MODE_P (mode))
28716 else if (mode == TFmode)
28719 return default_scalar_mode_supported_p (mode);
28722 /* Implements target hook vector_mode_supported_p. */
28724 ix86_vector_mode_supported_p (enum machine_mode mode)
28726 if (TARGET_SSE && VALID_SSE_REG_MODE (mode))
28728 if (TARGET_SSE2 && VALID_SSE2_REG_MODE (mode))
28730 if (TARGET_AVX && VALID_AVX256_REG_MODE (mode))
28732 if (TARGET_MMX && VALID_MMX_REG_MODE (mode))
28734 if (TARGET_3DNOW && VALID_MMX_REG_MODE_3DNOW (mode))
28739 /* Target hook for c_mode_for_suffix. */
28740 static enum machine_mode
28741 ix86_c_mode_for_suffix (char suffix)
28751 /* Worker function for TARGET_MD_ASM_CLOBBERS.
28753 We do this in the new i386 backend to maintain source compatibility
28754 with the old cc0-based compiler. */
28757 ix86_md_asm_clobbers (tree outputs ATTRIBUTE_UNUSED,
28758 tree inputs ATTRIBUTE_UNUSED,
28761 clobbers = tree_cons (NULL_TREE, build_string (5, "flags"),
28763 clobbers = tree_cons (NULL_TREE, build_string (4, "fpsr"),
28768 /* Implements target vector targetm.asm.encode_section_info. This
28769 is not used by netware. */
28771 static void ATTRIBUTE_UNUSED
28772 ix86_encode_section_info (tree decl, rtx rtl, int first)
28774 default_encode_section_info (decl, rtl, first);
28776 if (TREE_CODE (decl) == VAR_DECL
28777 && (TREE_STATIC (decl) || DECL_EXTERNAL (decl))
28778 && ix86_in_large_data_p (decl))
28779 SYMBOL_REF_FLAGS (XEXP (rtl, 0)) |= SYMBOL_FLAG_FAR_ADDR;
28782 /* Worker function for REVERSE_CONDITION. */
28785 ix86_reverse_condition (enum rtx_code code, enum machine_mode mode)
28787 return (mode != CCFPmode && mode != CCFPUmode
28788 ? reverse_condition (code)
28789 : reverse_condition_maybe_unordered (code));
28792 /* Output code to perform an x87 FP register move, from OPERANDS[1]
28796 output_387_reg_move (rtx insn, rtx *operands)
28798 if (REG_P (operands[0]))
28800 if (REG_P (operands[1])
28801 && find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
28803 if (REGNO (operands[0]) == FIRST_STACK_REG)
28804 return output_387_ffreep (operands, 0);
28805 return "fstp\t%y0";
28807 if (STACK_TOP_P (operands[0]))
28808 return "fld%Z1\t%y1";
28811 else if (MEM_P (operands[0]))
28813 gcc_assert (REG_P (operands[1]));
28814 if (find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
28815 return "fstp%Z0\t%y0";
28818 /* There is no non-popping store to memory for XFmode.
28819 So if we need one, follow the store with a load. */
28820 if (GET_MODE (operands[0]) == XFmode)
28821 return "fstp%Z0\t%y0\n\tfld%Z0\t%y0";
28823 return "fst%Z0\t%y0";
28830 /* Output code to perform a conditional jump to LABEL, if C2 flag in
28831 FP status register is set. */
28834 ix86_emit_fp_unordered_jump (rtx label)
28836 rtx reg = gen_reg_rtx (HImode);
28839 emit_insn (gen_x86_fnstsw_1 (reg));
28841 if (TARGET_SAHF && (TARGET_USE_SAHF || optimize_insn_for_size_p ()))
28843 emit_insn (gen_x86_sahf_1 (reg));
28845 temp = gen_rtx_REG (CCmode, FLAGS_REG);
28846 temp = gen_rtx_UNORDERED (VOIDmode, temp, const0_rtx);
28850 emit_insn (gen_testqi_ext_ccno_0 (reg, GEN_INT (0x04)));
28852 temp = gen_rtx_REG (CCNOmode, FLAGS_REG);
28853 temp = gen_rtx_NE (VOIDmode, temp, const0_rtx);
28856 temp = gen_rtx_IF_THEN_ELSE (VOIDmode, temp,
28857 gen_rtx_LABEL_REF (VOIDmode, label),
28859 temp = gen_rtx_SET (VOIDmode, pc_rtx, temp);
28861 emit_jump_insn (temp);
28862 predict_jump (REG_BR_PROB_BASE * 10 / 100);
28865 /* Output code to perform a log1p XFmode calculation. */
28867 void ix86_emit_i387_log1p (rtx op0, rtx op1)
28869 rtx label1 = gen_label_rtx ();
28870 rtx label2 = gen_label_rtx ();
28872 rtx tmp = gen_reg_rtx (XFmode);
28873 rtx tmp2 = gen_reg_rtx (XFmode);
28875 emit_insn (gen_absxf2 (tmp, op1));
28876 emit_insn (gen_cmpxf (tmp,
28877 CONST_DOUBLE_FROM_REAL_VALUE (
28878 REAL_VALUE_ATOF ("0.29289321881345247561810596348408353", XFmode),
28880 emit_jump_insn (gen_bge (label1));
28882 emit_move_insn (tmp2, standard_80387_constant_rtx (4)); /* fldln2 */
28883 emit_insn (gen_fyl2xp1xf3_i387 (op0, op1, tmp2));
28884 emit_jump (label2);
28886 emit_label (label1);
28887 emit_move_insn (tmp, CONST1_RTX (XFmode));
28888 emit_insn (gen_addxf3 (tmp, op1, tmp));
28889 emit_move_insn (tmp2, standard_80387_constant_rtx (4)); /* fldln2 */
28890 emit_insn (gen_fyl2xxf3_i387 (op0, tmp, tmp2));
28892 emit_label (label2);
28895 /* Output code to perform a Newton-Rhapson approximation of a single precision
28896 floating point divide [http://en.wikipedia.org/wiki/N-th_root_algorithm]. */
28898 void ix86_emit_swdivsf (rtx res, rtx a, rtx b, enum machine_mode mode)
28900 rtx x0, x1, e0, e1, two;
28902 x0 = gen_reg_rtx (mode);
28903 e0 = gen_reg_rtx (mode);
28904 e1 = gen_reg_rtx (mode);
28905 x1 = gen_reg_rtx (mode);
28907 two = CONST_DOUBLE_FROM_REAL_VALUE (dconst2, SFmode);
28909 if (VECTOR_MODE_P (mode))
28910 two = ix86_build_const_vector (SFmode, true, two);
28912 two = force_reg (mode, two);
28914 /* a / b = a * rcp(b) * (2.0 - b * rcp(b)) */
28916 /* x0 = rcp(b) estimate */
28917 emit_insn (gen_rtx_SET (VOIDmode, x0,
28918 gen_rtx_UNSPEC (mode, gen_rtvec (1, b),
28921 emit_insn (gen_rtx_SET (VOIDmode, e0,
28922 gen_rtx_MULT (mode, x0, b)));
28924 emit_insn (gen_rtx_SET (VOIDmode, e1,
28925 gen_rtx_MINUS (mode, two, e0)));
28927 emit_insn (gen_rtx_SET (VOIDmode, x1,
28928 gen_rtx_MULT (mode, x0, e1)));
28930 emit_insn (gen_rtx_SET (VOIDmode, res,
28931 gen_rtx_MULT (mode, a, x1)));
28934 /* Output code to perform a Newton-Rhapson approximation of a
28935 single precision floating point [reciprocal] square root. */
28937 void ix86_emit_swsqrtsf (rtx res, rtx a, enum machine_mode mode,
28940 rtx x0, e0, e1, e2, e3, mthree, mhalf;
28943 x0 = gen_reg_rtx (mode);
28944 e0 = gen_reg_rtx (mode);
28945 e1 = gen_reg_rtx (mode);
28946 e2 = gen_reg_rtx (mode);
28947 e3 = gen_reg_rtx (mode);
28949 real_from_integer (&r, VOIDmode, -3, -1, 0);
28950 mthree = CONST_DOUBLE_FROM_REAL_VALUE (r, SFmode);
28952 real_arithmetic (&r, NEGATE_EXPR, &dconsthalf, NULL);
28953 mhalf = CONST_DOUBLE_FROM_REAL_VALUE (r, SFmode);
28955 if (VECTOR_MODE_P (mode))
28957 mthree = ix86_build_const_vector (SFmode, true, mthree);
28958 mhalf = ix86_build_const_vector (SFmode, true, mhalf);
28961 /* sqrt(a) = -0.5 * a * rsqrtss(a) * (a * rsqrtss(a) * rsqrtss(a) - 3.0)
28962 rsqrt(a) = -0.5 * rsqrtss(a) * (a * rsqrtss(a) * rsqrtss(a) - 3.0) */
28964 /* x0 = rsqrt(a) estimate */
28965 emit_insn (gen_rtx_SET (VOIDmode, x0,
28966 gen_rtx_UNSPEC (mode, gen_rtvec (1, a),
28969 /* If (a == 0.0) Filter out infinity to prevent NaN for sqrt(0.0). */
28974 zero = gen_reg_rtx (mode);
28975 mask = gen_reg_rtx (mode);
28977 zero = force_reg (mode, CONST0_RTX(mode));
28978 emit_insn (gen_rtx_SET (VOIDmode, mask,
28979 gen_rtx_NE (mode, zero, a)));
28981 emit_insn (gen_rtx_SET (VOIDmode, x0,
28982 gen_rtx_AND (mode, x0, mask)));
28986 emit_insn (gen_rtx_SET (VOIDmode, e0,
28987 gen_rtx_MULT (mode, x0, a)));
28989 emit_insn (gen_rtx_SET (VOIDmode, e1,
28990 gen_rtx_MULT (mode, e0, x0)));
28993 mthree = force_reg (mode, mthree);
28994 emit_insn (gen_rtx_SET (VOIDmode, e2,
28995 gen_rtx_PLUS (mode, e1, mthree)));
28997 mhalf = force_reg (mode, mhalf);
28999 /* e3 = -.5 * x0 */
29000 emit_insn (gen_rtx_SET (VOIDmode, e3,
29001 gen_rtx_MULT (mode, x0, mhalf)));
29003 /* e3 = -.5 * e0 */
29004 emit_insn (gen_rtx_SET (VOIDmode, e3,
29005 gen_rtx_MULT (mode, e0, mhalf)));
29006 /* ret = e2 * e3 */
29007 emit_insn (gen_rtx_SET (VOIDmode, res,
29008 gen_rtx_MULT (mode, e2, e3)));
29011 /* Solaris implementation of TARGET_ASM_NAMED_SECTION. */
29013 static void ATTRIBUTE_UNUSED
29014 i386_solaris_elf_named_section (const char *name, unsigned int flags,
29017 /* With Binutils 2.15, the "@unwind" marker must be specified on
29018 every occurrence of the ".eh_frame" section, not just the first
29021 && strcmp (name, ".eh_frame") == 0)
29023 fprintf (asm_out_file, "\t.section\t%s,\"%s\",@unwind\n", name,
29024 flags & SECTION_WRITE ? "aw" : "a");
29027 default_elf_asm_named_section (name, flags, decl);
29030 /* Return the mangling of TYPE if it is an extended fundamental type. */
29032 static const char *
29033 ix86_mangle_type (const_tree type)
29035 type = TYPE_MAIN_VARIANT (type);
29037 if (TREE_CODE (type) != VOID_TYPE && TREE_CODE (type) != BOOLEAN_TYPE
29038 && TREE_CODE (type) != INTEGER_TYPE && TREE_CODE (type) != REAL_TYPE)
29041 switch (TYPE_MODE (type))
29044 /* __float128 is "g". */
29047 /* "long double" or __float80 is "e". */
29054 /* For 32-bit code we can save PIC register setup by using
29055 __stack_chk_fail_local hidden function instead of calling
29056 __stack_chk_fail directly. 64-bit code doesn't need to setup any PIC
29057 register, so it is better to call __stack_chk_fail directly. */
29060 ix86_stack_protect_fail (void)
29062 return TARGET_64BIT
29063 ? default_external_stack_protect_fail ()
29064 : default_hidden_stack_protect_fail ();
29067 /* Select a format to encode pointers in exception handling data. CODE
29068 is 0 for data, 1 for code labels, 2 for function pointers. GLOBAL is
29069 true if the symbol may be affected by dynamic relocations.
29071 ??? All x86 object file formats are capable of representing this.
29072 After all, the relocation needed is the same as for the call insn.
29073 Whether or not a particular assembler allows us to enter such, I
29074 guess we'll have to see. */
29076 asm_preferred_eh_data_format (int code, int global)
29080 int type = DW_EH_PE_sdata8;
29082 || ix86_cmodel == CM_SMALL_PIC
29083 || (ix86_cmodel == CM_MEDIUM_PIC && (global || code)))
29084 type = DW_EH_PE_sdata4;
29085 return (global ? DW_EH_PE_indirect : 0) | DW_EH_PE_pcrel | type;
29087 if (ix86_cmodel == CM_SMALL
29088 || (ix86_cmodel == CM_MEDIUM && code))
29089 return DW_EH_PE_udata4;
29090 return DW_EH_PE_absptr;
29093 /* Expand copysign from SIGN to the positive value ABS_VALUE
29094 storing in RESULT. If MASK is non-null, it shall be a mask to mask out
29097 ix86_sse_copysign_to_positive (rtx result, rtx abs_value, rtx sign, rtx mask)
29099 enum machine_mode mode = GET_MODE (sign);
29100 rtx sgn = gen_reg_rtx (mode);
29101 if (mask == NULL_RTX)
29103 mask = ix86_build_signbit_mask (mode, VECTOR_MODE_P (mode), false);
29104 if (!VECTOR_MODE_P (mode))
29106 /* We need to generate a scalar mode mask in this case. */
29107 rtx tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, const0_rtx));
29108 tmp = gen_rtx_VEC_SELECT (mode, mask, tmp);
29109 mask = gen_reg_rtx (mode);
29110 emit_insn (gen_rtx_SET (VOIDmode, mask, tmp));
29114 mask = gen_rtx_NOT (mode, mask);
29115 emit_insn (gen_rtx_SET (VOIDmode, sgn,
29116 gen_rtx_AND (mode, mask, sign)));
29117 emit_insn (gen_rtx_SET (VOIDmode, result,
29118 gen_rtx_IOR (mode, abs_value, sgn)));
29121 /* Expand fabs (OP0) and return a new rtx that holds the result. The
29122 mask for masking out the sign-bit is stored in *SMASK, if that is
29125 ix86_expand_sse_fabs (rtx op0, rtx *smask)
29127 enum machine_mode mode = GET_MODE (op0);
29130 xa = gen_reg_rtx (mode);
29131 mask = ix86_build_signbit_mask (mode, VECTOR_MODE_P (mode), true);
29132 if (!VECTOR_MODE_P (mode))
29134 /* We need to generate a scalar mode mask in this case. */
29135 rtx tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, const0_rtx));
29136 tmp = gen_rtx_VEC_SELECT (mode, mask, tmp);
29137 mask = gen_reg_rtx (mode);
29138 emit_insn (gen_rtx_SET (VOIDmode, mask, tmp));
29140 emit_insn (gen_rtx_SET (VOIDmode, xa,
29141 gen_rtx_AND (mode, op0, mask)));
29149 /* Expands a comparison of OP0 with OP1 using comparison code CODE,
29150 swapping the operands if SWAP_OPERANDS is true. The expanded
29151 code is a forward jump to a newly created label in case the
29152 comparison is true. The generated label rtx is returned. */
29154 ix86_expand_sse_compare_and_jump (enum rtx_code code, rtx op0, rtx op1,
29155 bool swap_operands)
29166 label = gen_label_rtx ();
29167 tmp = gen_rtx_REG (CCFPUmode, FLAGS_REG);
29168 emit_insn (gen_rtx_SET (VOIDmode, tmp,
29169 gen_rtx_COMPARE (CCFPUmode, op0, op1)));
29170 tmp = gen_rtx_fmt_ee (code, VOIDmode, tmp, const0_rtx);
29171 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
29172 gen_rtx_LABEL_REF (VOIDmode, label), pc_rtx);
29173 tmp = emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
29174 JUMP_LABEL (tmp) = label;
29179 /* Expand a mask generating SSE comparison instruction comparing OP0 with OP1
29180 using comparison code CODE. Operands are swapped for the comparison if
29181 SWAP_OPERANDS is true. Returns a rtx for the generated mask. */
29183 ix86_expand_sse_compare_mask (enum rtx_code code, rtx op0, rtx op1,
29184 bool swap_operands)
29186 enum machine_mode mode = GET_MODE (op0);
29187 rtx mask = gen_reg_rtx (mode);
29196 if (mode == DFmode)
29197 emit_insn (gen_sse2_maskcmpdf3 (mask, op0, op1,
29198 gen_rtx_fmt_ee (code, mode, op0, op1)));
29200 emit_insn (gen_sse_maskcmpsf3 (mask, op0, op1,
29201 gen_rtx_fmt_ee (code, mode, op0, op1)));
29206 /* Generate and return a rtx of mode MODE for 2**n where n is the number
29207 of bits of the mantissa of MODE, which must be one of DFmode or SFmode. */
29209 ix86_gen_TWO52 (enum machine_mode mode)
29211 REAL_VALUE_TYPE TWO52r;
29214 real_ldexp (&TWO52r, &dconst1, mode == DFmode ? 52 : 23);
29215 TWO52 = const_double_from_real_value (TWO52r, mode);
29216 TWO52 = force_reg (mode, TWO52);
29221 /* Expand SSE sequence for computing lround from OP1 storing
29224 ix86_expand_lround (rtx op0, rtx op1)
29226 /* C code for the stuff we're doing below:
29227 tmp = op1 + copysign (nextafter (0.5, 0.0), op1)
29230 enum machine_mode mode = GET_MODE (op1);
29231 const struct real_format *fmt;
29232 REAL_VALUE_TYPE pred_half, half_minus_pred_half;
29235 /* load nextafter (0.5, 0.0) */
29236 fmt = REAL_MODE_FORMAT (mode);
29237 real_2expN (&half_minus_pred_half, -(fmt->p) - 1, mode);
29238 REAL_ARITHMETIC (pred_half, MINUS_EXPR, dconsthalf, half_minus_pred_half);
29240 /* adj = copysign (0.5, op1) */
29241 adj = force_reg (mode, const_double_from_real_value (pred_half, mode));
29242 ix86_sse_copysign_to_positive (adj, adj, force_reg (mode, op1), NULL_RTX);
29244 /* adj = op1 + adj */
29245 adj = expand_simple_binop (mode, PLUS, adj, op1, NULL_RTX, 0, OPTAB_DIRECT);
29247 /* op0 = (imode)adj */
29248 expand_fix (op0, adj, 0);
29251 /* Expand SSE2 sequence for computing lround from OPERAND1 storing
29254 ix86_expand_lfloorceil (rtx op0, rtx op1, bool do_floor)
29256 /* C code for the stuff we're doing below (for do_floor):
29258 xi -= (double)xi > op1 ? 1 : 0;
29261 enum machine_mode fmode = GET_MODE (op1);
29262 enum machine_mode imode = GET_MODE (op0);
29263 rtx ireg, freg, label, tmp;
29265 /* reg = (long)op1 */
29266 ireg = gen_reg_rtx (imode);
29267 expand_fix (ireg, op1, 0);
29269 /* freg = (double)reg */
29270 freg = gen_reg_rtx (fmode);
29271 expand_float (freg, ireg, 0);
29273 /* ireg = (freg > op1) ? ireg - 1 : ireg */
29274 label = ix86_expand_sse_compare_and_jump (UNLE,
29275 freg, op1, !do_floor);
29276 tmp = expand_simple_binop (imode, do_floor ? MINUS : PLUS,
29277 ireg, const1_rtx, NULL_RTX, 0, OPTAB_DIRECT);
29278 emit_move_insn (ireg, tmp);
29280 emit_label (label);
29281 LABEL_NUSES (label) = 1;
29283 emit_move_insn (op0, ireg);
29286 /* Expand rint (IEEE round to nearest) rounding OPERAND1 and storing the
29287 result in OPERAND0. */
29289 ix86_expand_rint (rtx operand0, rtx operand1)
29291 /* C code for the stuff we're doing below:
29292 xa = fabs (operand1);
29293 if (!isless (xa, 2**52))
29295 xa = xa + 2**52 - 2**52;
29296 return copysign (xa, operand1);
29298 enum machine_mode mode = GET_MODE (operand0);
29299 rtx res, xa, label, TWO52, mask;
29301 res = gen_reg_rtx (mode);
29302 emit_move_insn (res, operand1);
29304 /* xa = abs (operand1) */
29305 xa = ix86_expand_sse_fabs (res, &mask);
29307 /* if (!isless (xa, TWO52)) goto label; */
29308 TWO52 = ix86_gen_TWO52 (mode);
29309 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
29311 xa = expand_simple_binop (mode, PLUS, xa, TWO52, NULL_RTX, 0, OPTAB_DIRECT);
29312 xa = expand_simple_binop (mode, MINUS, xa, TWO52, xa, 0, OPTAB_DIRECT);
29314 ix86_sse_copysign_to_positive (res, xa, res, mask);
29316 emit_label (label);
29317 LABEL_NUSES (label) = 1;
29319 emit_move_insn (operand0, res);
29322 /* Expand SSE2 sequence for computing floor or ceil from OPERAND1 storing
29325 ix86_expand_floorceildf_32 (rtx operand0, rtx operand1, bool do_floor)
29327 /* C code for the stuff we expand below.
29328 double xa = fabs (x), x2;
29329 if (!isless (xa, TWO52))
29331 xa = xa + TWO52 - TWO52;
29332 x2 = copysign (xa, x);
29341 enum machine_mode mode = GET_MODE (operand0);
29342 rtx xa, TWO52, tmp, label, one, res, mask;
29344 TWO52 = ix86_gen_TWO52 (mode);
29346 /* Temporary for holding the result, initialized to the input
29347 operand to ease control flow. */
29348 res = gen_reg_rtx (mode);
29349 emit_move_insn (res, operand1);
29351 /* xa = abs (operand1) */
29352 xa = ix86_expand_sse_fabs (res, &mask);
29354 /* if (!isless (xa, TWO52)) goto label; */
29355 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
29357 /* xa = xa + TWO52 - TWO52; */
29358 xa = expand_simple_binop (mode, PLUS, xa, TWO52, NULL_RTX, 0, OPTAB_DIRECT);
29359 xa = expand_simple_binop (mode, MINUS, xa, TWO52, xa, 0, OPTAB_DIRECT);
29361 /* xa = copysign (xa, operand1) */
29362 ix86_sse_copysign_to_positive (xa, xa, res, mask);
29364 /* generate 1.0 or -1.0 */
29365 one = force_reg (mode,
29366 const_double_from_real_value (do_floor
29367 ? dconst1 : dconstm1, mode));
29369 /* Compensate: xa = xa - (xa > operand1 ? 1 : 0) */
29370 tmp = ix86_expand_sse_compare_mask (UNGT, xa, res, !do_floor);
29371 emit_insn (gen_rtx_SET (VOIDmode, tmp,
29372 gen_rtx_AND (mode, one, tmp)));
29373 /* We always need to subtract here to preserve signed zero. */
29374 tmp = expand_simple_binop (mode, MINUS,
29375 xa, tmp, NULL_RTX, 0, OPTAB_DIRECT);
29376 emit_move_insn (res, tmp);
29378 emit_label (label);
29379 LABEL_NUSES (label) = 1;
29381 emit_move_insn (operand0, res);
29384 /* Expand SSE2 sequence for computing floor or ceil from OPERAND1 storing
29387 ix86_expand_floorceil (rtx operand0, rtx operand1, bool do_floor)
29389 /* C code for the stuff we expand below.
29390 double xa = fabs (x), x2;
29391 if (!isless (xa, TWO52))
29393 x2 = (double)(long)x;
29400 if (HONOR_SIGNED_ZEROS (mode))
29401 return copysign (x2, x);
29404 enum machine_mode mode = GET_MODE (operand0);
29405 rtx xa, xi, TWO52, tmp, label, one, res, mask;
29407 TWO52 = ix86_gen_TWO52 (mode);
29409 /* Temporary for holding the result, initialized to the input
29410 operand to ease control flow. */
29411 res = gen_reg_rtx (mode);
29412 emit_move_insn (res, operand1);
29414 /* xa = abs (operand1) */
29415 xa = ix86_expand_sse_fabs (res, &mask);
29417 /* if (!isless (xa, TWO52)) goto label; */
29418 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
29420 /* xa = (double)(long)x */
29421 xi = gen_reg_rtx (mode == DFmode ? DImode : SImode);
29422 expand_fix (xi, res, 0);
29423 expand_float (xa, xi, 0);
29426 one = force_reg (mode, const_double_from_real_value (dconst1, mode));
29428 /* Compensate: xa = xa - (xa > operand1 ? 1 : 0) */
29429 tmp = ix86_expand_sse_compare_mask (UNGT, xa, res, !do_floor);
29430 emit_insn (gen_rtx_SET (VOIDmode, tmp,
29431 gen_rtx_AND (mode, one, tmp)));
29432 tmp = expand_simple_binop (mode, do_floor ? MINUS : PLUS,
29433 xa, tmp, NULL_RTX, 0, OPTAB_DIRECT);
29434 emit_move_insn (res, tmp);
29436 if (HONOR_SIGNED_ZEROS (mode))
29437 ix86_sse_copysign_to_positive (res, res, force_reg (mode, operand1), mask);
29439 emit_label (label);
29440 LABEL_NUSES (label) = 1;
29442 emit_move_insn (operand0, res);
29445 /* Expand SSE sequence for computing round from OPERAND1 storing
29446 into OPERAND0. Sequence that works without relying on DImode truncation
29447 via cvttsd2siq that is only available on 64bit targets. */
29449 ix86_expand_rounddf_32 (rtx operand0, rtx operand1)
29451 /* C code for the stuff we expand below.
29452 double xa = fabs (x), xa2, x2;
29453 if (!isless (xa, TWO52))
29455 Using the absolute value and copying back sign makes
29456 -0.0 -> -0.0 correct.
29457 xa2 = xa + TWO52 - TWO52;
29462 else if (dxa > 0.5)
29464 x2 = copysign (xa2, x);
29467 enum machine_mode mode = GET_MODE (operand0);
29468 rtx xa, xa2, dxa, TWO52, tmp, label, half, mhalf, one, res, mask;
29470 TWO52 = ix86_gen_TWO52 (mode);
29472 /* Temporary for holding the result, initialized to the input
29473 operand to ease control flow. */
29474 res = gen_reg_rtx (mode);
29475 emit_move_insn (res, operand1);
29477 /* xa = abs (operand1) */
29478 xa = ix86_expand_sse_fabs (res, &mask);
29480 /* if (!isless (xa, TWO52)) goto label; */
29481 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
29483 /* xa2 = xa + TWO52 - TWO52; */
29484 xa2 = expand_simple_binop (mode, PLUS, xa, TWO52, NULL_RTX, 0, OPTAB_DIRECT);
29485 xa2 = expand_simple_binop (mode, MINUS, xa2, TWO52, xa2, 0, OPTAB_DIRECT);
29487 /* dxa = xa2 - xa; */
29488 dxa = expand_simple_binop (mode, MINUS, xa2, xa, NULL_RTX, 0, OPTAB_DIRECT);
29490 /* generate 0.5, 1.0 and -0.5 */
29491 half = force_reg (mode, const_double_from_real_value (dconsthalf, mode));
29492 one = expand_simple_binop (mode, PLUS, half, half, NULL_RTX, 0, OPTAB_DIRECT);
29493 mhalf = expand_simple_binop (mode, MINUS, half, one, NULL_RTX,
29497 tmp = gen_reg_rtx (mode);
29498 /* xa2 = xa2 - (dxa > 0.5 ? 1 : 0) */
29499 tmp = ix86_expand_sse_compare_mask (UNGT, dxa, half, false);
29500 emit_insn (gen_rtx_SET (VOIDmode, tmp,
29501 gen_rtx_AND (mode, one, tmp)));
29502 xa2 = expand_simple_binop (mode, MINUS, xa2, tmp, NULL_RTX, 0, OPTAB_DIRECT);
29503 /* xa2 = xa2 + (dxa <= -0.5 ? 1 : 0) */
29504 tmp = ix86_expand_sse_compare_mask (UNGE, mhalf, dxa, false);
29505 emit_insn (gen_rtx_SET (VOIDmode, tmp,
29506 gen_rtx_AND (mode, one, tmp)));
29507 xa2 = expand_simple_binop (mode, PLUS, xa2, tmp, NULL_RTX, 0, OPTAB_DIRECT);
29509 /* res = copysign (xa2, operand1) */
29510 ix86_sse_copysign_to_positive (res, xa2, force_reg (mode, operand1), mask);
29512 emit_label (label);
29513 LABEL_NUSES (label) = 1;
29515 emit_move_insn (operand0, res);
29518 /* Expand SSE sequence for computing trunc from OPERAND1 storing
29521 ix86_expand_trunc (rtx operand0, rtx operand1)
29523 /* C code for SSE variant we expand below.
29524 double xa = fabs (x), x2;
29525 if (!isless (xa, TWO52))
29527 x2 = (double)(long)x;
29528 if (HONOR_SIGNED_ZEROS (mode))
29529 return copysign (x2, x);
29532 enum machine_mode mode = GET_MODE (operand0);
29533 rtx xa, xi, TWO52, label, res, mask;
29535 TWO52 = ix86_gen_TWO52 (mode);
29537 /* Temporary for holding the result, initialized to the input
29538 operand to ease control flow. */
29539 res = gen_reg_rtx (mode);
29540 emit_move_insn (res, operand1);
29542 /* xa = abs (operand1) */
29543 xa = ix86_expand_sse_fabs (res, &mask);
29545 /* if (!isless (xa, TWO52)) goto label; */
29546 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
29548 /* x = (double)(long)x */
29549 xi = gen_reg_rtx (mode == DFmode ? DImode : SImode);
29550 expand_fix (xi, res, 0);
29551 expand_float (res, xi, 0);
29553 if (HONOR_SIGNED_ZEROS (mode))
29554 ix86_sse_copysign_to_positive (res, res, force_reg (mode, operand1), mask);
29556 emit_label (label);
29557 LABEL_NUSES (label) = 1;
29559 emit_move_insn (operand0, res);
29562 /* Expand SSE sequence for computing trunc from OPERAND1 storing
29565 ix86_expand_truncdf_32 (rtx operand0, rtx operand1)
29567 enum machine_mode mode = GET_MODE (operand0);
29568 rtx xa, mask, TWO52, label, one, res, smask, tmp;
29570 /* C code for SSE variant we expand below.
29571 double xa = fabs (x), x2;
29572 if (!isless (xa, TWO52))
29574 xa2 = xa + TWO52 - TWO52;
29578 x2 = copysign (xa2, x);
29582 TWO52 = ix86_gen_TWO52 (mode);
29584 /* Temporary for holding the result, initialized to the input
29585 operand to ease control flow. */
29586 res = gen_reg_rtx (mode);
29587 emit_move_insn (res, operand1);
29589 /* xa = abs (operand1) */
29590 xa = ix86_expand_sse_fabs (res, &smask);
29592 /* if (!isless (xa, TWO52)) goto label; */
29593 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
29595 /* res = xa + TWO52 - TWO52; */
29596 tmp = expand_simple_binop (mode, PLUS, xa, TWO52, NULL_RTX, 0, OPTAB_DIRECT);
29597 tmp = expand_simple_binop (mode, MINUS, tmp, TWO52, tmp, 0, OPTAB_DIRECT);
29598 emit_move_insn (res, tmp);
29601 one = force_reg (mode, const_double_from_real_value (dconst1, mode));
29603 /* Compensate: res = xa2 - (res > xa ? 1 : 0) */
29604 mask = ix86_expand_sse_compare_mask (UNGT, res, xa, false);
29605 emit_insn (gen_rtx_SET (VOIDmode, mask,
29606 gen_rtx_AND (mode, mask, one)));
29607 tmp = expand_simple_binop (mode, MINUS,
29608 res, mask, NULL_RTX, 0, OPTAB_DIRECT);
29609 emit_move_insn (res, tmp);
29611 /* res = copysign (res, operand1) */
29612 ix86_sse_copysign_to_positive (res, res, force_reg (mode, operand1), smask);
29614 emit_label (label);
29615 LABEL_NUSES (label) = 1;
29617 emit_move_insn (operand0, res);
29620 /* Expand SSE sequence for computing round from OPERAND1 storing
29623 ix86_expand_round (rtx operand0, rtx operand1)
29625 /* C code for the stuff we're doing below:
29626 double xa = fabs (x);
29627 if (!isless (xa, TWO52))
29629 xa = (double)(long)(xa + nextafter (0.5, 0.0));
29630 return copysign (xa, x);
29632 enum machine_mode mode = GET_MODE (operand0);
29633 rtx res, TWO52, xa, label, xi, half, mask;
29634 const struct real_format *fmt;
29635 REAL_VALUE_TYPE pred_half, half_minus_pred_half;
29637 /* Temporary for holding the result, initialized to the input
29638 operand to ease control flow. */
29639 res = gen_reg_rtx (mode);
29640 emit_move_insn (res, operand1);
29642 TWO52 = ix86_gen_TWO52 (mode);
29643 xa = ix86_expand_sse_fabs (res, &mask);
29644 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
29646 /* load nextafter (0.5, 0.0) */
29647 fmt = REAL_MODE_FORMAT (mode);
29648 real_2expN (&half_minus_pred_half, -(fmt->p) - 1, mode);
29649 REAL_ARITHMETIC (pred_half, MINUS_EXPR, dconsthalf, half_minus_pred_half);
29651 /* xa = xa + 0.5 */
29652 half = force_reg (mode, const_double_from_real_value (pred_half, mode));
29653 xa = expand_simple_binop (mode, PLUS, xa, half, NULL_RTX, 0, OPTAB_DIRECT);
29655 /* xa = (double)(int64_t)xa */
29656 xi = gen_reg_rtx (mode == DFmode ? DImode : SImode);
29657 expand_fix (xi, xa, 0);
29658 expand_float (xa, xi, 0);
29660 /* res = copysign (xa, operand1) */
29661 ix86_sse_copysign_to_positive (res, xa, force_reg (mode, operand1), mask);
29663 emit_label (label);
29664 LABEL_NUSES (label) = 1;
29666 emit_move_insn (operand0, res);
29670 /* Validate whether a SSE5 instruction is valid or not.
29671 OPERANDS is the array of operands.
29672 NUM is the number of operands.
29673 USES_OC0 is true if the instruction uses OC0 and provides 4 variants.
29674 NUM_MEMORY is the maximum number of memory operands to accept.
29675 when COMMUTATIVE is set, operand 1 and 2 can be swapped. */
29678 ix86_sse5_valid_op_p (rtx operands[], rtx insn ATTRIBUTE_UNUSED, int num,
29679 bool uses_oc0, int num_memory, bool commutative)
29685 /* Count the number of memory arguments */
29688 for (i = 0; i < num; i++)
29690 enum machine_mode mode = GET_MODE (operands[i]);
29691 if (register_operand (operands[i], mode))
29694 else if (memory_operand (operands[i], mode))
29696 mem_mask |= (1 << i);
29702 rtx pattern = PATTERN (insn);
29704 /* allow 0 for pcmov */
29705 if (GET_CODE (pattern) != SET
29706 || GET_CODE (SET_SRC (pattern)) != IF_THEN_ELSE
29708 || operands[i] != CONST0_RTX (mode))
29713 /* Special case pmacsdq{l,h} where we allow the 3rd argument to be
29714 a memory operation. */
29715 if (num_memory < 0)
29717 num_memory = -num_memory;
29718 if ((mem_mask & (1 << (num-1))) != 0)
29720 mem_mask &= ~(1 << (num-1));
29725 /* If there were no memory operations, allow the insn */
29729 /* Do not allow the destination register to be a memory operand. */
29730 else if (mem_mask & (1 << 0))
29733 /* If there are too many memory operations, disallow the instruction. While
29734 the hardware only allows 1 memory reference, before register allocation
29735 for some insns, we allow two memory operations sometimes in order to allow
29736 code like the following to be optimized:
29738 float fmadd (float *a, float *b, float *c) { return (*a * *b) + *c; }
29740 or similar cases that are vectorized into using the fmaddss
29742 else if (mem_count > num_memory)
29745 /* Don't allow more than one memory operation if not optimizing. */
29746 else if (mem_count > 1 && !optimize)
29749 else if (num == 4 && mem_count == 1)
29751 /* formats (destination is the first argument), example fmaddss:
29752 xmm1, xmm1, xmm2, xmm3/mem
29753 xmm1, xmm1, xmm2/mem, xmm3
29754 xmm1, xmm2, xmm3/mem, xmm1
29755 xmm1, xmm2/mem, xmm3, xmm1 */
29757 return ((mem_mask == (1 << 1))
29758 || (mem_mask == (1 << 2))
29759 || (mem_mask == (1 << 3)));
29761 /* format, example pmacsdd:
29762 xmm1, xmm2, xmm3/mem, xmm1 */
29764 return (mem_mask == (1 << 2) || mem_mask == (1 << 1));
29766 return (mem_mask == (1 << 2));
29769 else if (num == 4 && num_memory == 2)
29771 /* If there are two memory operations, we can load one of the memory ops
29772 into the destination register. This is for optimizing the
29773 multiply/add ops, which the combiner has optimized both the multiply
29774 and the add insns to have a memory operation. We have to be careful
29775 that the destination doesn't overlap with the inputs. */
29776 rtx op0 = operands[0];
29778 if (reg_mentioned_p (op0, operands[1])
29779 || reg_mentioned_p (op0, operands[2])
29780 || reg_mentioned_p (op0, operands[3]))
29783 /* formats (destination is the first argument), example fmaddss:
29784 xmm1, xmm1, xmm2, xmm3/mem
29785 xmm1, xmm1, xmm2/mem, xmm3
29786 xmm1, xmm2, xmm3/mem, xmm1
29787 xmm1, xmm2/mem, xmm3, xmm1
29789 For the oc0 case, we will load either operands[1] or operands[3] into
29790 operands[0], so any combination of 2 memory operands is ok. */
29794 /* format, example pmacsdd:
29795 xmm1, xmm2, xmm3/mem, xmm1
29797 For the integer multiply/add instructions be more restrictive and
29798 require operands[2] and operands[3] to be the memory operands. */
29800 return (mem_mask == ((1 << 1) | (1 << 3)) || ((1 << 2) | (1 << 3)));
29802 return (mem_mask == ((1 << 2) | (1 << 3)));
29805 else if (num == 3 && num_memory == 1)
29807 /* formats, example protb:
29808 xmm1, xmm2, xmm3/mem
29809 xmm1, xmm2/mem, xmm3 */
29811 return ((mem_mask == (1 << 1)) || (mem_mask == (1 << 2)));
29813 /* format, example comeq:
29814 xmm1, xmm2, xmm3/mem */
29816 return (mem_mask == (1 << 2));
29820 gcc_unreachable ();
29826 /* Fixup an SSE5 instruction that has 2 memory input references into a form the
29827 hardware will allow by using the destination register to load one of the
29828 memory operations. Presently this is used by the multiply/add routines to
29829 allow 2 memory references. */
29832 ix86_expand_sse5_multiple_memory (rtx operands[],
29834 enum machine_mode mode)
29836 rtx op0 = operands[0];
29838 || memory_operand (op0, mode)
29839 || reg_mentioned_p (op0, operands[1])
29840 || reg_mentioned_p (op0, operands[2])
29841 || reg_mentioned_p (op0, operands[3]))
29842 gcc_unreachable ();
29844 /* For 2 memory operands, pick either operands[1] or operands[3] to move into
29845 the destination register. */
29846 if (memory_operand (operands[1], mode))
29848 emit_move_insn (op0, operands[1]);
29851 else if (memory_operand (operands[3], mode))
29853 emit_move_insn (op0, operands[3]);
29857 gcc_unreachable ();
29863 /* Table of valid machine attributes. */
29864 static const struct attribute_spec ix86_attribute_table[] =
29866 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler } */
29867 /* Stdcall attribute says callee is responsible for popping arguments
29868 if they are not variable. */
29869 { "stdcall", 0, 0, false, true, true, ix86_handle_cconv_attribute },
29870 /* Fastcall attribute says callee is responsible for popping arguments
29871 if they are not variable. */
29872 { "fastcall", 0, 0, false, true, true, ix86_handle_cconv_attribute },
29873 /* Cdecl attribute says the callee is a normal C declaration */
29874 { "cdecl", 0, 0, false, true, true, ix86_handle_cconv_attribute },
29875 /* Regparm attribute specifies how many integer arguments are to be
29876 passed in registers. */
29877 { "regparm", 1, 1, false, true, true, ix86_handle_cconv_attribute },
29878 /* Sseregparm attribute says we are using x86_64 calling conventions
29879 for FP arguments. */
29880 { "sseregparm", 0, 0, false, true, true, ix86_handle_cconv_attribute },
29881 /* force_align_arg_pointer says this function realigns the stack at entry. */
29882 { (const char *)&ix86_force_align_arg_pointer_string, 0, 0,
29883 false, true, true, ix86_handle_cconv_attribute },
29884 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
29885 { "dllimport", 0, 0, false, false, false, handle_dll_attribute },
29886 { "dllexport", 0, 0, false, false, false, handle_dll_attribute },
29887 { "shared", 0, 0, true, false, false, ix86_handle_shared_attribute },
29889 { "ms_struct", 0, 0, false, false, false, ix86_handle_struct_attribute },
29890 { "gcc_struct", 0, 0, false, false, false, ix86_handle_struct_attribute },
29891 #ifdef SUBTARGET_ATTRIBUTE_TABLE
29892 SUBTARGET_ATTRIBUTE_TABLE,
29894 /* ms_abi and sysv_abi calling convention function attributes. */
29895 { "ms_abi", 0, 0, false, true, true, ix86_handle_abi_attribute },
29896 { "sysv_abi", 0, 0, false, true, true, ix86_handle_abi_attribute },
29898 { NULL, 0, 0, false, false, false, NULL }
29901 /* Implement targetm.vectorize.builtin_vectorization_cost. */
29903 x86_builtin_vectorization_cost (bool runtime_test)
29905 /* If the branch of the runtime test is taken - i.e. - the vectorized
29906 version is skipped - this incurs a misprediction cost (because the
29907 vectorized version is expected to be the fall-through). So we subtract
29908 the latency of a mispredicted branch from the costs that are incured
29909 when the vectorized version is executed.
29911 TODO: The values in individual target tables have to be tuned or new
29912 fields may be needed. For eg. on K8, the default branch path is the
29913 not-taken path. If the taken path is predicted correctly, the minimum
29914 penalty of going down the taken-path is 1 cycle. If the taken-path is
29915 not predicted correctly, then the minimum penalty is 10 cycles. */
29919 return (-(ix86_cost->cond_taken_branch_cost));
29925 /* This function returns the calling abi specific va_list type node.
29926 It returns the FNDECL specific va_list type. */
29929 ix86_fn_abi_va_list (tree fndecl)
29932 return va_list_type_node;
29933 gcc_assert (fndecl != NULL_TREE);
29935 if (ix86_function_abi ((const_tree) fndecl) == MS_ABI)
29936 return ms_va_list_type_node;
29938 return sysv_va_list_type_node;
29941 /* Returns the canonical va_list type specified by TYPE. If there
29942 is no valid TYPE provided, it return NULL_TREE. */
29945 ix86_canonical_va_list_type (tree type)
29949 /* Resolve references and pointers to va_list type. */
29950 if (INDIRECT_REF_P (type))
29951 type = TREE_TYPE (type);
29952 else if (POINTER_TYPE_P (type) && POINTER_TYPE_P (TREE_TYPE(type)))
29953 type = TREE_TYPE (type);
29957 wtype = va_list_type_node;
29958 gcc_assert (wtype != NULL_TREE);
29960 if (TREE_CODE (wtype) == ARRAY_TYPE)
29962 /* If va_list is an array type, the argument may have decayed
29963 to a pointer type, e.g. by being passed to another function.
29964 In that case, unwrap both types so that we can compare the
29965 underlying records. */
29966 if (TREE_CODE (htype) == ARRAY_TYPE
29967 || POINTER_TYPE_P (htype))
29969 wtype = TREE_TYPE (wtype);
29970 htype = TREE_TYPE (htype);
29973 if (TYPE_MAIN_VARIANT (wtype) == TYPE_MAIN_VARIANT (htype))
29974 return va_list_type_node;
29975 wtype = sysv_va_list_type_node;
29976 gcc_assert (wtype != NULL_TREE);
29978 if (TREE_CODE (wtype) == ARRAY_TYPE)
29980 /* If va_list is an array type, the argument may have decayed
29981 to a pointer type, e.g. by being passed to another function.
29982 In that case, unwrap both types so that we can compare the
29983 underlying records. */
29984 if (TREE_CODE (htype) == ARRAY_TYPE
29985 || POINTER_TYPE_P (htype))
29987 wtype = TREE_TYPE (wtype);
29988 htype = TREE_TYPE (htype);
29991 if (TYPE_MAIN_VARIANT (wtype) == TYPE_MAIN_VARIANT (htype))
29992 return sysv_va_list_type_node;
29993 wtype = ms_va_list_type_node;
29994 gcc_assert (wtype != NULL_TREE);
29996 if (TREE_CODE (wtype) == ARRAY_TYPE)
29998 /* If va_list is an array type, the argument may have decayed
29999 to a pointer type, e.g. by being passed to another function.
30000 In that case, unwrap both types so that we can compare the
30001 underlying records. */
30002 if (TREE_CODE (htype) == ARRAY_TYPE
30003 || POINTER_TYPE_P (htype))
30005 wtype = TREE_TYPE (wtype);
30006 htype = TREE_TYPE (htype);
30009 if (TYPE_MAIN_VARIANT (wtype) == TYPE_MAIN_VARIANT (htype))
30010 return ms_va_list_type_node;
30013 return std_canonical_va_list_type (type);
30016 /* Iterate through the target-specific builtin types for va_list.
30017 IDX denotes the iterator, *PTREE is set to the result type of
30018 the va_list builtin, and *PNAME to its internal type.
30019 Returns zero if there is no element for this index, otherwise
30020 IDX should be increased upon the next call.
30021 Note, do not iterate a base builtin's name like __builtin_va_list.
30022 Used from c_common_nodes_and_builtins. */
30025 ix86_enum_va_list (int idx, const char **pname, tree *ptree)
30031 *ptree = ms_va_list_type_node;
30032 *pname = "__builtin_ms_va_list";
30035 *ptree = sysv_va_list_type_node;
30036 *pname = "__builtin_sysv_va_list";
30044 /* Initialize the GCC target structure. */
30045 #undef TARGET_RETURN_IN_MEMORY
30046 #define TARGET_RETURN_IN_MEMORY ix86_return_in_memory
30048 #undef TARGET_LEGITIMIZE_ADDRESS
30049 #define TARGET_LEGITIMIZE_ADDRESS ix86_legitimize_address
30051 #undef TARGET_ATTRIBUTE_TABLE
30052 #define TARGET_ATTRIBUTE_TABLE ix86_attribute_table
30053 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
30054 # undef TARGET_MERGE_DECL_ATTRIBUTES
30055 # define TARGET_MERGE_DECL_ATTRIBUTES merge_dllimport_decl_attributes
30058 #undef TARGET_COMP_TYPE_ATTRIBUTES
30059 #define TARGET_COMP_TYPE_ATTRIBUTES ix86_comp_type_attributes
30061 #undef TARGET_INIT_BUILTINS
30062 #define TARGET_INIT_BUILTINS ix86_init_builtins
30063 #undef TARGET_EXPAND_BUILTIN
30064 #define TARGET_EXPAND_BUILTIN ix86_expand_builtin
30066 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION
30067 #define TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION \
30068 ix86_builtin_vectorized_function
30070 #undef TARGET_VECTORIZE_BUILTIN_CONVERSION
30071 #define TARGET_VECTORIZE_BUILTIN_CONVERSION ix86_vectorize_builtin_conversion
30073 #undef TARGET_BUILTIN_RECIPROCAL
30074 #define TARGET_BUILTIN_RECIPROCAL ix86_builtin_reciprocal
30076 #undef TARGET_ASM_FUNCTION_EPILOGUE
30077 #define TARGET_ASM_FUNCTION_EPILOGUE ix86_output_function_epilogue
30079 #undef TARGET_ENCODE_SECTION_INFO
30080 #ifndef SUBTARGET_ENCODE_SECTION_INFO
30081 #define TARGET_ENCODE_SECTION_INFO ix86_encode_section_info
30083 #define TARGET_ENCODE_SECTION_INFO SUBTARGET_ENCODE_SECTION_INFO
30086 #undef TARGET_ASM_OPEN_PAREN
30087 #define TARGET_ASM_OPEN_PAREN ""
30088 #undef TARGET_ASM_CLOSE_PAREN
30089 #define TARGET_ASM_CLOSE_PAREN ""
30091 #undef TARGET_ASM_ALIGNED_HI_OP
30092 #define TARGET_ASM_ALIGNED_HI_OP ASM_SHORT
30093 #undef TARGET_ASM_ALIGNED_SI_OP
30094 #define TARGET_ASM_ALIGNED_SI_OP ASM_LONG
30096 #undef TARGET_ASM_ALIGNED_DI_OP
30097 #define TARGET_ASM_ALIGNED_DI_OP ASM_QUAD
30100 #undef TARGET_ASM_UNALIGNED_HI_OP
30101 #define TARGET_ASM_UNALIGNED_HI_OP TARGET_ASM_ALIGNED_HI_OP
30102 #undef TARGET_ASM_UNALIGNED_SI_OP
30103 #define TARGET_ASM_UNALIGNED_SI_OP TARGET_ASM_ALIGNED_SI_OP
30104 #undef TARGET_ASM_UNALIGNED_DI_OP
30105 #define TARGET_ASM_UNALIGNED_DI_OP TARGET_ASM_ALIGNED_DI_OP
30107 #undef TARGET_SCHED_ADJUST_COST
30108 #define TARGET_SCHED_ADJUST_COST ix86_adjust_cost
30109 #undef TARGET_SCHED_ISSUE_RATE
30110 #define TARGET_SCHED_ISSUE_RATE ix86_issue_rate
30111 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD
30112 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD \
30113 ia32_multipass_dfa_lookahead
30115 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
30116 #define TARGET_FUNCTION_OK_FOR_SIBCALL ix86_function_ok_for_sibcall
30119 #undef TARGET_HAVE_TLS
30120 #define TARGET_HAVE_TLS true
30122 #undef TARGET_CANNOT_FORCE_CONST_MEM
30123 #define TARGET_CANNOT_FORCE_CONST_MEM ix86_cannot_force_const_mem
30124 #undef TARGET_USE_BLOCKS_FOR_CONSTANT_P
30125 #define TARGET_USE_BLOCKS_FOR_CONSTANT_P hook_bool_mode_const_rtx_true
30127 #undef TARGET_DELEGITIMIZE_ADDRESS
30128 #define TARGET_DELEGITIMIZE_ADDRESS ix86_delegitimize_address
30130 #undef TARGET_MS_BITFIELD_LAYOUT_P
30131 #define TARGET_MS_BITFIELD_LAYOUT_P ix86_ms_bitfield_layout_p
30134 #undef TARGET_BINDS_LOCAL_P
30135 #define TARGET_BINDS_LOCAL_P darwin_binds_local_p
30137 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
30138 #undef TARGET_BINDS_LOCAL_P
30139 #define TARGET_BINDS_LOCAL_P i386_pe_binds_local_p
30142 #undef TARGET_ASM_OUTPUT_MI_THUNK
30143 #define TARGET_ASM_OUTPUT_MI_THUNK x86_output_mi_thunk
30144 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
30145 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK x86_can_output_mi_thunk
30147 #undef TARGET_ASM_FILE_START
30148 #define TARGET_ASM_FILE_START x86_file_start
30150 #undef TARGET_DEFAULT_TARGET_FLAGS
30151 #define TARGET_DEFAULT_TARGET_FLAGS \
30153 | TARGET_SUBTARGET_DEFAULT \
30154 | TARGET_TLS_DIRECT_SEG_REFS_DEFAULT)
30156 #undef TARGET_HANDLE_OPTION
30157 #define TARGET_HANDLE_OPTION ix86_handle_option
30159 #undef TARGET_RTX_COSTS
30160 #define TARGET_RTX_COSTS ix86_rtx_costs
30161 #undef TARGET_ADDRESS_COST
30162 #define TARGET_ADDRESS_COST ix86_address_cost
30164 #undef TARGET_FIXED_CONDITION_CODE_REGS
30165 #define TARGET_FIXED_CONDITION_CODE_REGS ix86_fixed_condition_code_regs
30166 #undef TARGET_CC_MODES_COMPATIBLE
30167 #define TARGET_CC_MODES_COMPATIBLE ix86_cc_modes_compatible
30169 #undef TARGET_MACHINE_DEPENDENT_REORG
30170 #define TARGET_MACHINE_DEPENDENT_REORG ix86_reorg
30172 #undef TARGET_BUILTIN_SETJMP_FRAME_VALUE
30173 #define TARGET_BUILTIN_SETJMP_FRAME_VALUE ix86_builtin_setjmp_frame_value
30175 #undef TARGET_BUILD_BUILTIN_VA_LIST
30176 #define TARGET_BUILD_BUILTIN_VA_LIST ix86_build_builtin_va_list
30178 #undef TARGET_FN_ABI_VA_LIST
30179 #define TARGET_FN_ABI_VA_LIST ix86_fn_abi_va_list
30181 #undef TARGET_CANONICAL_VA_LIST_TYPE
30182 #define TARGET_CANONICAL_VA_LIST_TYPE ix86_canonical_va_list_type
30184 #undef TARGET_EXPAND_BUILTIN_VA_START
30185 #define TARGET_EXPAND_BUILTIN_VA_START ix86_va_start
30187 #undef TARGET_MD_ASM_CLOBBERS
30188 #define TARGET_MD_ASM_CLOBBERS ix86_md_asm_clobbers
30190 #undef TARGET_PROMOTE_PROTOTYPES
30191 #define TARGET_PROMOTE_PROTOTYPES hook_bool_const_tree_true
30192 #undef TARGET_STRUCT_VALUE_RTX
30193 #define TARGET_STRUCT_VALUE_RTX ix86_struct_value_rtx
30194 #undef TARGET_SETUP_INCOMING_VARARGS
30195 #define TARGET_SETUP_INCOMING_VARARGS ix86_setup_incoming_varargs
30196 #undef TARGET_MUST_PASS_IN_STACK
30197 #define TARGET_MUST_PASS_IN_STACK ix86_must_pass_in_stack
30198 #undef TARGET_PASS_BY_REFERENCE
30199 #define TARGET_PASS_BY_REFERENCE ix86_pass_by_reference
30200 #undef TARGET_INTERNAL_ARG_POINTER
30201 #define TARGET_INTERNAL_ARG_POINTER ix86_internal_arg_pointer
30202 #undef TARGET_UPDATE_STACK_BOUNDARY
30203 #define TARGET_UPDATE_STACK_BOUNDARY ix86_update_stack_boundary
30204 #undef TARGET_GET_DRAP_RTX
30205 #define TARGET_GET_DRAP_RTX ix86_get_drap_rtx
30206 #undef TARGET_DWARF_HANDLE_FRAME_UNSPEC
30207 #define TARGET_DWARF_HANDLE_FRAME_UNSPEC ix86_dwarf_handle_frame_unspec
30208 #undef TARGET_STRICT_ARGUMENT_NAMING
30209 #define TARGET_STRICT_ARGUMENT_NAMING hook_bool_CUMULATIVE_ARGS_true
30211 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
30212 #define TARGET_GIMPLIFY_VA_ARG_EXPR ix86_gimplify_va_arg
30214 #undef TARGET_SCALAR_MODE_SUPPORTED_P
30215 #define TARGET_SCALAR_MODE_SUPPORTED_P ix86_scalar_mode_supported_p
30217 #undef TARGET_VECTOR_MODE_SUPPORTED_P
30218 #define TARGET_VECTOR_MODE_SUPPORTED_P ix86_vector_mode_supported_p
30220 #undef TARGET_C_MODE_FOR_SUFFIX
30221 #define TARGET_C_MODE_FOR_SUFFIX ix86_c_mode_for_suffix
30224 #undef TARGET_ASM_OUTPUT_DWARF_DTPREL
30225 #define TARGET_ASM_OUTPUT_DWARF_DTPREL i386_output_dwarf_dtprel
30228 #ifdef SUBTARGET_INSERT_ATTRIBUTES
30229 #undef TARGET_INSERT_ATTRIBUTES
30230 #define TARGET_INSERT_ATTRIBUTES SUBTARGET_INSERT_ATTRIBUTES
30233 #undef TARGET_MANGLE_TYPE
30234 #define TARGET_MANGLE_TYPE ix86_mangle_type
30236 #undef TARGET_STACK_PROTECT_FAIL
30237 #define TARGET_STACK_PROTECT_FAIL ix86_stack_protect_fail
30239 #undef TARGET_FUNCTION_VALUE
30240 #define TARGET_FUNCTION_VALUE ix86_function_value
30242 #undef TARGET_SECONDARY_RELOAD
30243 #define TARGET_SECONDARY_RELOAD ix86_secondary_reload
30245 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST
30246 #define TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST x86_builtin_vectorization_cost
30248 #undef TARGET_SET_CURRENT_FUNCTION
30249 #define TARGET_SET_CURRENT_FUNCTION ix86_set_current_function
30251 #undef TARGET_OPTION_VALID_ATTRIBUTE_P
30252 #define TARGET_OPTION_VALID_ATTRIBUTE_P ix86_valid_target_attribute_p
30254 #undef TARGET_OPTION_SAVE
30255 #define TARGET_OPTION_SAVE ix86_function_specific_save
30257 #undef TARGET_OPTION_RESTORE
30258 #define TARGET_OPTION_RESTORE ix86_function_specific_restore
30260 #undef TARGET_OPTION_PRINT
30261 #define TARGET_OPTION_PRINT ix86_function_specific_print
30263 #undef TARGET_OPTION_CAN_INLINE_P
30264 #define TARGET_OPTION_CAN_INLINE_P ix86_can_inline_p
30266 #undef TARGET_EXPAND_TO_RTL_HOOK
30267 #define TARGET_EXPAND_TO_RTL_HOOK ix86_maybe_switch_abi
30269 struct gcc_target targetm = TARGET_INITIALIZER;
30271 #include "gt-i386.h"