1 /* Subroutines used for code generation on IA-32.
2 Copyright (C) 1988, 1992, 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001,
3 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009
4 Free Software Foundation, Inc.
6 This file is part of GCC.
8 GCC is free software; you can redistribute it and/or modify
9 it under the terms of the GNU General Public License as published by
10 the Free Software Foundation; either version 3, or (at your option)
13 GCC is distributed in the hope that it will be useful,
14 but WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 GNU General Public License for more details.
18 You should have received a copy of the GNU General Public License
19 along with GCC; see the file COPYING3. If not see
20 <http://www.gnu.org/licenses/>. */
24 #include "coretypes.h"
30 #include "hard-reg-set.h"
32 #include "insn-config.h"
33 #include "conditions.h"
35 #include "insn-codes.h"
36 #include "insn-attr.h"
45 #include "basic-block.h"
48 #include "target-def.h"
49 #include "langhooks.h"
54 #include "tm-constrs.h"
58 static int x86_builtin_vectorization_cost (bool);
59 static rtx legitimize_dllimport_symbol (rtx, bool);
61 #ifndef CHECK_STACK_LIMIT
62 #define CHECK_STACK_LIMIT (-1)
65 /* Return index of given mode in mult and division cost tables. */
66 #define MODE_INDEX(mode) \
67 ((mode) == QImode ? 0 \
68 : (mode) == HImode ? 1 \
69 : (mode) == SImode ? 2 \
70 : (mode) == DImode ? 3 \
73 /* Processor costs (relative to an add) */
74 /* We assume COSTS_N_INSNS is defined as (N)*4 and an addition is 2 bytes. */
75 #define COSTS_N_BYTES(N) ((N) * 2)
77 #define DUMMY_STRINGOP_ALGS {libcall, {{-1, libcall}}}
80 struct processor_costs ix86_size_cost = {/* costs for tuning for size */
81 COSTS_N_BYTES (2), /* cost of an add instruction */
82 COSTS_N_BYTES (3), /* cost of a lea instruction */
83 COSTS_N_BYTES (2), /* variable shift costs */
84 COSTS_N_BYTES (3), /* constant shift costs */
85 {COSTS_N_BYTES (3), /* cost of starting multiply for QI */
86 COSTS_N_BYTES (3), /* HI */
87 COSTS_N_BYTES (3), /* SI */
88 COSTS_N_BYTES (3), /* DI */
89 COSTS_N_BYTES (5)}, /* other */
90 0, /* cost of multiply per each bit set */
91 {COSTS_N_BYTES (3), /* cost of a divide/mod for QI */
92 COSTS_N_BYTES (3), /* HI */
93 COSTS_N_BYTES (3), /* SI */
94 COSTS_N_BYTES (3), /* DI */
95 COSTS_N_BYTES (5)}, /* other */
96 COSTS_N_BYTES (3), /* cost of movsx */
97 COSTS_N_BYTES (3), /* cost of movzx */
100 2, /* cost for loading QImode using movzbl */
101 {2, 2, 2}, /* cost of loading integer registers
102 in QImode, HImode and SImode.
103 Relative to reg-reg move (2). */
104 {2, 2, 2}, /* cost of storing integer registers */
105 2, /* cost of reg,reg fld/fst */
106 {2, 2, 2}, /* cost of loading fp registers
107 in SFmode, DFmode and XFmode */
108 {2, 2, 2}, /* cost of storing fp registers
109 in SFmode, DFmode and XFmode */
110 3, /* cost of moving MMX register */
111 {3, 3}, /* cost of loading MMX registers
112 in SImode and DImode */
113 {3, 3}, /* cost of storing MMX registers
114 in SImode and DImode */
115 3, /* cost of moving SSE register */
116 {3, 3, 3}, /* cost of loading SSE registers
117 in SImode, DImode and TImode */
118 {3, 3, 3}, /* cost of storing SSE registers
119 in SImode, DImode and TImode */
120 3, /* MMX or SSE register to integer */
121 0, /* size of l1 cache */
122 0, /* size of l2 cache */
123 0, /* size of prefetch block */
124 0, /* number of parallel prefetches */
126 COSTS_N_BYTES (2), /* cost of FADD and FSUB insns. */
127 COSTS_N_BYTES (2), /* cost of FMUL instruction. */
128 COSTS_N_BYTES (2), /* cost of FDIV instruction. */
129 COSTS_N_BYTES (2), /* cost of FABS instruction. */
130 COSTS_N_BYTES (2), /* cost of FCHS instruction. */
131 COSTS_N_BYTES (2), /* cost of FSQRT instruction. */
132 {{rep_prefix_1_byte, {{-1, rep_prefix_1_byte}}},
133 {rep_prefix_1_byte, {{-1, rep_prefix_1_byte}}}},
134 {{rep_prefix_1_byte, {{-1, rep_prefix_1_byte}}},
135 {rep_prefix_1_byte, {{-1, rep_prefix_1_byte}}}},
136 1, /* scalar_stmt_cost. */
137 1, /* scalar load_cost. */
138 1, /* scalar_store_cost. */
139 1, /* vec_stmt_cost. */
140 1, /* vec_to_scalar_cost. */
141 1, /* scalar_to_vec_cost. */
142 1, /* vec_align_load_cost. */
143 1, /* vec_unalign_load_cost. */
144 1, /* vec_store_cost. */
145 1, /* cond_taken_branch_cost. */
146 1, /* cond_not_taken_branch_cost. */
149 /* Processor costs (relative to an add) */
151 struct processor_costs i386_cost = { /* 386 specific costs */
152 COSTS_N_INSNS (1), /* cost of an add instruction */
153 COSTS_N_INSNS (1), /* cost of a lea instruction */
154 COSTS_N_INSNS (3), /* variable shift costs */
155 COSTS_N_INSNS (2), /* constant shift costs */
156 {COSTS_N_INSNS (6), /* cost of starting multiply for QI */
157 COSTS_N_INSNS (6), /* HI */
158 COSTS_N_INSNS (6), /* SI */
159 COSTS_N_INSNS (6), /* DI */
160 COSTS_N_INSNS (6)}, /* other */
161 COSTS_N_INSNS (1), /* cost of multiply per each bit set */
162 {COSTS_N_INSNS (23), /* cost of a divide/mod for QI */
163 COSTS_N_INSNS (23), /* HI */
164 COSTS_N_INSNS (23), /* SI */
165 COSTS_N_INSNS (23), /* DI */
166 COSTS_N_INSNS (23)}, /* other */
167 COSTS_N_INSNS (3), /* cost of movsx */
168 COSTS_N_INSNS (2), /* cost of movzx */
169 15, /* "large" insn */
171 4, /* cost for loading QImode using movzbl */
172 {2, 4, 2}, /* cost of loading integer registers
173 in QImode, HImode and SImode.
174 Relative to reg-reg move (2). */
175 {2, 4, 2}, /* cost of storing integer registers */
176 2, /* cost of reg,reg fld/fst */
177 {8, 8, 8}, /* cost of loading fp registers
178 in SFmode, DFmode and XFmode */
179 {8, 8, 8}, /* cost of storing fp registers
180 in SFmode, DFmode and XFmode */
181 2, /* cost of moving MMX register */
182 {4, 8}, /* cost of loading MMX registers
183 in SImode and DImode */
184 {4, 8}, /* cost of storing MMX registers
185 in SImode and DImode */
186 2, /* cost of moving SSE register */
187 {4, 8, 16}, /* cost of loading SSE registers
188 in SImode, DImode and TImode */
189 {4, 8, 16}, /* cost of storing SSE registers
190 in SImode, DImode and TImode */
191 3, /* MMX or SSE register to integer */
192 0, /* size of l1 cache */
193 0, /* size of l2 cache */
194 0, /* size of prefetch block */
195 0, /* number of parallel prefetches */
197 COSTS_N_INSNS (23), /* cost of FADD and FSUB insns. */
198 COSTS_N_INSNS (27), /* cost of FMUL instruction. */
199 COSTS_N_INSNS (88), /* cost of FDIV instruction. */
200 COSTS_N_INSNS (22), /* cost of FABS instruction. */
201 COSTS_N_INSNS (24), /* cost of FCHS instruction. */
202 COSTS_N_INSNS (122), /* cost of FSQRT instruction. */
203 {{rep_prefix_1_byte, {{-1, rep_prefix_1_byte}}},
204 DUMMY_STRINGOP_ALGS},
205 {{rep_prefix_1_byte, {{-1, rep_prefix_1_byte}}},
206 DUMMY_STRINGOP_ALGS},
207 1, /* scalar_stmt_cost. */
208 1, /* scalar load_cost. */
209 1, /* scalar_store_cost. */
210 1, /* vec_stmt_cost. */
211 1, /* vec_to_scalar_cost. */
212 1, /* scalar_to_vec_cost. */
213 1, /* vec_align_load_cost. */
214 2, /* vec_unalign_load_cost. */
215 1, /* vec_store_cost. */
216 3, /* cond_taken_branch_cost. */
217 1, /* cond_not_taken_branch_cost. */
221 struct processor_costs i486_cost = { /* 486 specific costs */
222 COSTS_N_INSNS (1), /* cost of an add instruction */
223 COSTS_N_INSNS (1), /* cost of a lea instruction */
224 COSTS_N_INSNS (3), /* variable shift costs */
225 COSTS_N_INSNS (2), /* constant shift costs */
226 {COSTS_N_INSNS (12), /* cost of starting multiply for QI */
227 COSTS_N_INSNS (12), /* HI */
228 COSTS_N_INSNS (12), /* SI */
229 COSTS_N_INSNS (12), /* DI */
230 COSTS_N_INSNS (12)}, /* other */
231 1, /* cost of multiply per each bit set */
232 {COSTS_N_INSNS (40), /* cost of a divide/mod for QI */
233 COSTS_N_INSNS (40), /* HI */
234 COSTS_N_INSNS (40), /* SI */
235 COSTS_N_INSNS (40), /* DI */
236 COSTS_N_INSNS (40)}, /* other */
237 COSTS_N_INSNS (3), /* cost of movsx */
238 COSTS_N_INSNS (2), /* cost of movzx */
239 15, /* "large" insn */
241 4, /* cost for loading QImode using movzbl */
242 {2, 4, 2}, /* cost of loading integer registers
243 in QImode, HImode and SImode.
244 Relative to reg-reg move (2). */
245 {2, 4, 2}, /* cost of storing integer registers */
246 2, /* cost of reg,reg fld/fst */
247 {8, 8, 8}, /* cost of loading fp registers
248 in SFmode, DFmode and XFmode */
249 {8, 8, 8}, /* cost of storing fp registers
250 in SFmode, DFmode and XFmode */
251 2, /* cost of moving MMX register */
252 {4, 8}, /* cost of loading MMX registers
253 in SImode and DImode */
254 {4, 8}, /* cost of storing MMX registers
255 in SImode and DImode */
256 2, /* cost of moving SSE register */
257 {4, 8, 16}, /* cost of loading SSE registers
258 in SImode, DImode and TImode */
259 {4, 8, 16}, /* cost of storing SSE registers
260 in SImode, DImode and TImode */
261 3, /* MMX or SSE register to integer */
262 4, /* size of l1 cache. 486 has 8kB cache
263 shared for code and data, so 4kB is
264 not really precise. */
265 4, /* size of l2 cache */
266 0, /* size of prefetch block */
267 0, /* number of parallel prefetches */
269 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
270 COSTS_N_INSNS (16), /* cost of FMUL instruction. */
271 COSTS_N_INSNS (73), /* cost of FDIV instruction. */
272 COSTS_N_INSNS (3), /* cost of FABS instruction. */
273 COSTS_N_INSNS (3), /* cost of FCHS instruction. */
274 COSTS_N_INSNS (83), /* cost of FSQRT instruction. */
275 {{rep_prefix_4_byte, {{-1, rep_prefix_4_byte}}},
276 DUMMY_STRINGOP_ALGS},
277 {{rep_prefix_4_byte, {{-1, rep_prefix_4_byte}}},
278 DUMMY_STRINGOP_ALGS},
279 1, /* scalar_stmt_cost. */
280 1, /* scalar load_cost. */
281 1, /* scalar_store_cost. */
282 1, /* vec_stmt_cost. */
283 1, /* vec_to_scalar_cost. */
284 1, /* scalar_to_vec_cost. */
285 1, /* vec_align_load_cost. */
286 2, /* vec_unalign_load_cost. */
287 1, /* vec_store_cost. */
288 3, /* cond_taken_branch_cost. */
289 1, /* cond_not_taken_branch_cost. */
293 struct processor_costs pentium_cost = {
294 COSTS_N_INSNS (1), /* cost of an add instruction */
295 COSTS_N_INSNS (1), /* cost of a lea instruction */
296 COSTS_N_INSNS (4), /* variable shift costs */
297 COSTS_N_INSNS (1), /* constant shift costs */
298 {COSTS_N_INSNS (11), /* cost of starting multiply for QI */
299 COSTS_N_INSNS (11), /* HI */
300 COSTS_N_INSNS (11), /* SI */
301 COSTS_N_INSNS (11), /* DI */
302 COSTS_N_INSNS (11)}, /* other */
303 0, /* cost of multiply per each bit set */
304 {COSTS_N_INSNS (25), /* cost of a divide/mod for QI */
305 COSTS_N_INSNS (25), /* HI */
306 COSTS_N_INSNS (25), /* SI */
307 COSTS_N_INSNS (25), /* DI */
308 COSTS_N_INSNS (25)}, /* other */
309 COSTS_N_INSNS (3), /* cost of movsx */
310 COSTS_N_INSNS (2), /* cost of movzx */
311 8, /* "large" insn */
313 6, /* cost for loading QImode using movzbl */
314 {2, 4, 2}, /* cost of loading integer registers
315 in QImode, HImode and SImode.
316 Relative to reg-reg move (2). */
317 {2, 4, 2}, /* cost of storing integer registers */
318 2, /* cost of reg,reg fld/fst */
319 {2, 2, 6}, /* cost of loading fp registers
320 in SFmode, DFmode and XFmode */
321 {4, 4, 6}, /* cost of storing fp registers
322 in SFmode, DFmode and XFmode */
323 8, /* cost of moving MMX register */
324 {8, 8}, /* cost of loading MMX registers
325 in SImode and DImode */
326 {8, 8}, /* cost of storing MMX registers
327 in SImode and DImode */
328 2, /* cost of moving SSE register */
329 {4, 8, 16}, /* cost of loading SSE registers
330 in SImode, DImode and TImode */
331 {4, 8, 16}, /* cost of storing SSE registers
332 in SImode, DImode and TImode */
333 3, /* MMX or SSE register to integer */
334 8, /* size of l1 cache. */
335 8, /* size of l2 cache */
336 0, /* size of prefetch block */
337 0, /* number of parallel prefetches */
339 COSTS_N_INSNS (3), /* cost of FADD and FSUB insns. */
340 COSTS_N_INSNS (3), /* cost of FMUL instruction. */
341 COSTS_N_INSNS (39), /* cost of FDIV instruction. */
342 COSTS_N_INSNS (1), /* cost of FABS instruction. */
343 COSTS_N_INSNS (1), /* cost of FCHS instruction. */
344 COSTS_N_INSNS (70), /* cost of FSQRT instruction. */
345 {{libcall, {{256, rep_prefix_4_byte}, {-1, libcall}}},
346 DUMMY_STRINGOP_ALGS},
347 {{libcall, {{-1, rep_prefix_4_byte}}},
348 DUMMY_STRINGOP_ALGS},
349 1, /* scalar_stmt_cost. */
350 1, /* scalar load_cost. */
351 1, /* scalar_store_cost. */
352 1, /* vec_stmt_cost. */
353 1, /* vec_to_scalar_cost. */
354 1, /* scalar_to_vec_cost. */
355 1, /* vec_align_load_cost. */
356 2, /* vec_unalign_load_cost. */
357 1, /* vec_store_cost. */
358 3, /* cond_taken_branch_cost. */
359 1, /* cond_not_taken_branch_cost. */
363 struct processor_costs pentiumpro_cost = {
364 COSTS_N_INSNS (1), /* cost of an add instruction */
365 COSTS_N_INSNS (1), /* cost of a lea instruction */
366 COSTS_N_INSNS (1), /* variable shift costs */
367 COSTS_N_INSNS (1), /* constant shift costs */
368 {COSTS_N_INSNS (4), /* cost of starting multiply for QI */
369 COSTS_N_INSNS (4), /* HI */
370 COSTS_N_INSNS (4), /* SI */
371 COSTS_N_INSNS (4), /* DI */
372 COSTS_N_INSNS (4)}, /* other */
373 0, /* cost of multiply per each bit set */
374 {COSTS_N_INSNS (17), /* cost of a divide/mod for QI */
375 COSTS_N_INSNS (17), /* HI */
376 COSTS_N_INSNS (17), /* SI */
377 COSTS_N_INSNS (17), /* DI */
378 COSTS_N_INSNS (17)}, /* other */
379 COSTS_N_INSNS (1), /* cost of movsx */
380 COSTS_N_INSNS (1), /* cost of movzx */
381 8, /* "large" insn */
383 2, /* cost for loading QImode using movzbl */
384 {4, 4, 4}, /* cost of loading integer registers
385 in QImode, HImode and SImode.
386 Relative to reg-reg move (2). */
387 {2, 2, 2}, /* cost of storing integer registers */
388 2, /* cost of reg,reg fld/fst */
389 {2, 2, 6}, /* cost of loading fp registers
390 in SFmode, DFmode and XFmode */
391 {4, 4, 6}, /* cost of storing fp registers
392 in SFmode, DFmode and XFmode */
393 2, /* cost of moving MMX register */
394 {2, 2}, /* cost of loading MMX registers
395 in SImode and DImode */
396 {2, 2}, /* cost of storing MMX registers
397 in SImode and DImode */
398 2, /* cost of moving SSE register */
399 {2, 2, 8}, /* cost of loading SSE registers
400 in SImode, DImode and TImode */
401 {2, 2, 8}, /* cost of storing SSE registers
402 in SImode, DImode and TImode */
403 3, /* MMX or SSE register to integer */
404 8, /* size of l1 cache. */
405 256, /* size of l2 cache */
406 32, /* size of prefetch block */
407 6, /* number of parallel prefetches */
409 COSTS_N_INSNS (3), /* cost of FADD and FSUB insns. */
410 COSTS_N_INSNS (5), /* cost of FMUL instruction. */
411 COSTS_N_INSNS (56), /* cost of FDIV instruction. */
412 COSTS_N_INSNS (2), /* cost of FABS instruction. */
413 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
414 COSTS_N_INSNS (56), /* cost of FSQRT instruction. */
415 /* PentiumPro has optimized rep instructions for blocks aligned by 8 bytes (we ensure
416 the alignment). For small blocks inline loop is still a noticeable win, for bigger
417 blocks either rep movsl or rep movsb is way to go. Rep movsb has apparently
418 more expensive startup time in CPU, but after 4K the difference is down in the noise.
420 {{rep_prefix_4_byte, {{128, loop}, {1024, unrolled_loop},
421 {8192, rep_prefix_4_byte}, {-1, rep_prefix_1_byte}}},
422 DUMMY_STRINGOP_ALGS},
423 {{rep_prefix_4_byte, {{1024, unrolled_loop},
424 {8192, rep_prefix_4_byte}, {-1, libcall}}},
425 DUMMY_STRINGOP_ALGS},
426 1, /* scalar_stmt_cost. */
427 1, /* scalar load_cost. */
428 1, /* scalar_store_cost. */
429 1, /* vec_stmt_cost. */
430 1, /* vec_to_scalar_cost. */
431 1, /* scalar_to_vec_cost. */
432 1, /* vec_align_load_cost. */
433 2, /* vec_unalign_load_cost. */
434 1, /* vec_store_cost. */
435 3, /* cond_taken_branch_cost. */
436 1, /* cond_not_taken_branch_cost. */
440 struct processor_costs geode_cost = {
441 COSTS_N_INSNS (1), /* cost of an add instruction */
442 COSTS_N_INSNS (1), /* cost of a lea instruction */
443 COSTS_N_INSNS (2), /* variable shift costs */
444 COSTS_N_INSNS (1), /* constant shift costs */
445 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
446 COSTS_N_INSNS (4), /* HI */
447 COSTS_N_INSNS (7), /* SI */
448 COSTS_N_INSNS (7), /* DI */
449 COSTS_N_INSNS (7)}, /* other */
450 0, /* cost of multiply per each bit set */
451 {COSTS_N_INSNS (15), /* cost of a divide/mod for QI */
452 COSTS_N_INSNS (23), /* HI */
453 COSTS_N_INSNS (39), /* SI */
454 COSTS_N_INSNS (39), /* DI */
455 COSTS_N_INSNS (39)}, /* other */
456 COSTS_N_INSNS (1), /* cost of movsx */
457 COSTS_N_INSNS (1), /* cost of movzx */
458 8, /* "large" insn */
460 1, /* cost for loading QImode using movzbl */
461 {1, 1, 1}, /* cost of loading integer registers
462 in QImode, HImode and SImode.
463 Relative to reg-reg move (2). */
464 {1, 1, 1}, /* cost of storing integer registers */
465 1, /* cost of reg,reg fld/fst */
466 {1, 1, 1}, /* cost of loading fp registers
467 in SFmode, DFmode and XFmode */
468 {4, 6, 6}, /* cost of storing fp registers
469 in SFmode, DFmode and XFmode */
471 1, /* cost of moving MMX register */
472 {1, 1}, /* cost of loading MMX registers
473 in SImode and DImode */
474 {1, 1}, /* cost of storing MMX registers
475 in SImode and DImode */
476 1, /* cost of moving SSE register */
477 {1, 1, 1}, /* cost of loading SSE registers
478 in SImode, DImode and TImode */
479 {1, 1, 1}, /* cost of storing SSE registers
480 in SImode, DImode and TImode */
481 1, /* MMX or SSE register to integer */
482 64, /* size of l1 cache. */
483 128, /* size of l2 cache. */
484 32, /* size of prefetch block */
485 1, /* number of parallel prefetches */
487 COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */
488 COSTS_N_INSNS (11), /* cost of FMUL instruction. */
489 COSTS_N_INSNS (47), /* cost of FDIV instruction. */
490 COSTS_N_INSNS (1), /* cost of FABS instruction. */
491 COSTS_N_INSNS (1), /* cost of FCHS instruction. */
492 COSTS_N_INSNS (54), /* cost of FSQRT instruction. */
493 {{libcall, {{256, rep_prefix_4_byte}, {-1, libcall}}},
494 DUMMY_STRINGOP_ALGS},
495 {{libcall, {{256, rep_prefix_4_byte}, {-1, libcall}}},
496 DUMMY_STRINGOP_ALGS},
497 1, /* scalar_stmt_cost. */
498 1, /* scalar load_cost. */
499 1, /* scalar_store_cost. */
500 1, /* vec_stmt_cost. */
501 1, /* vec_to_scalar_cost. */
502 1, /* scalar_to_vec_cost. */
503 1, /* vec_align_load_cost. */
504 2, /* vec_unalign_load_cost. */
505 1, /* vec_store_cost. */
506 3, /* cond_taken_branch_cost. */
507 1, /* cond_not_taken_branch_cost. */
511 struct processor_costs k6_cost = {
512 COSTS_N_INSNS (1), /* cost of an add instruction */
513 COSTS_N_INSNS (2), /* cost of a lea instruction */
514 COSTS_N_INSNS (1), /* variable shift costs */
515 COSTS_N_INSNS (1), /* constant shift costs */
516 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
517 COSTS_N_INSNS (3), /* HI */
518 COSTS_N_INSNS (3), /* SI */
519 COSTS_N_INSNS (3), /* DI */
520 COSTS_N_INSNS (3)}, /* other */
521 0, /* cost of multiply per each bit set */
522 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
523 COSTS_N_INSNS (18), /* HI */
524 COSTS_N_INSNS (18), /* SI */
525 COSTS_N_INSNS (18), /* DI */
526 COSTS_N_INSNS (18)}, /* other */
527 COSTS_N_INSNS (2), /* cost of movsx */
528 COSTS_N_INSNS (2), /* cost of movzx */
529 8, /* "large" insn */
531 3, /* cost for loading QImode using movzbl */
532 {4, 5, 4}, /* cost of loading integer registers
533 in QImode, HImode and SImode.
534 Relative to reg-reg move (2). */
535 {2, 3, 2}, /* cost of storing integer registers */
536 4, /* cost of reg,reg fld/fst */
537 {6, 6, 6}, /* cost of loading fp registers
538 in SFmode, DFmode and XFmode */
539 {4, 4, 4}, /* cost of storing fp registers
540 in SFmode, DFmode and XFmode */
541 2, /* cost of moving MMX register */
542 {2, 2}, /* cost of loading MMX registers
543 in SImode and DImode */
544 {2, 2}, /* cost of storing MMX registers
545 in SImode and DImode */
546 2, /* cost of moving SSE register */
547 {2, 2, 8}, /* cost of loading SSE registers
548 in SImode, DImode and TImode */
549 {2, 2, 8}, /* cost of storing SSE registers
550 in SImode, DImode and TImode */
551 6, /* MMX or SSE register to integer */
552 32, /* size of l1 cache. */
553 32, /* size of l2 cache. Some models
554 have integrated l2 cache, but
555 optimizing for k6 is not important
556 enough to worry about that. */
557 32, /* size of prefetch block */
558 1, /* number of parallel prefetches */
560 COSTS_N_INSNS (2), /* cost of FADD and FSUB insns. */
561 COSTS_N_INSNS (2), /* cost of FMUL instruction. */
562 COSTS_N_INSNS (56), /* cost of FDIV instruction. */
563 COSTS_N_INSNS (2), /* cost of FABS instruction. */
564 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
565 COSTS_N_INSNS (56), /* cost of FSQRT instruction. */
566 {{libcall, {{256, rep_prefix_4_byte}, {-1, libcall}}},
567 DUMMY_STRINGOP_ALGS},
568 {{libcall, {{256, rep_prefix_4_byte}, {-1, libcall}}},
569 DUMMY_STRINGOP_ALGS},
570 1, /* scalar_stmt_cost. */
571 1, /* scalar load_cost. */
572 1, /* scalar_store_cost. */
573 1, /* vec_stmt_cost. */
574 1, /* vec_to_scalar_cost. */
575 1, /* scalar_to_vec_cost. */
576 1, /* vec_align_load_cost. */
577 2, /* vec_unalign_load_cost. */
578 1, /* vec_store_cost. */
579 3, /* cond_taken_branch_cost. */
580 1, /* cond_not_taken_branch_cost. */
584 struct processor_costs athlon_cost = {
585 COSTS_N_INSNS (1), /* cost of an add instruction */
586 COSTS_N_INSNS (2), /* cost of a lea instruction */
587 COSTS_N_INSNS (1), /* variable shift costs */
588 COSTS_N_INSNS (1), /* constant shift costs */
589 {COSTS_N_INSNS (5), /* cost of starting multiply for QI */
590 COSTS_N_INSNS (5), /* HI */
591 COSTS_N_INSNS (5), /* SI */
592 COSTS_N_INSNS (5), /* DI */
593 COSTS_N_INSNS (5)}, /* other */
594 0, /* cost of multiply per each bit set */
595 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
596 COSTS_N_INSNS (26), /* HI */
597 COSTS_N_INSNS (42), /* SI */
598 COSTS_N_INSNS (74), /* DI */
599 COSTS_N_INSNS (74)}, /* other */
600 COSTS_N_INSNS (1), /* cost of movsx */
601 COSTS_N_INSNS (1), /* cost of movzx */
602 8, /* "large" insn */
604 4, /* cost for loading QImode using movzbl */
605 {3, 4, 3}, /* cost of loading integer registers
606 in QImode, HImode and SImode.
607 Relative to reg-reg move (2). */
608 {3, 4, 3}, /* cost of storing integer registers */
609 4, /* cost of reg,reg fld/fst */
610 {4, 4, 12}, /* cost of loading fp registers
611 in SFmode, DFmode and XFmode */
612 {6, 6, 8}, /* cost of storing fp registers
613 in SFmode, DFmode and XFmode */
614 2, /* cost of moving MMX register */
615 {4, 4}, /* cost of loading MMX registers
616 in SImode and DImode */
617 {4, 4}, /* cost of storing MMX registers
618 in SImode and DImode */
619 2, /* cost of moving SSE register */
620 {4, 4, 6}, /* cost of loading SSE registers
621 in SImode, DImode and TImode */
622 {4, 4, 5}, /* cost of storing SSE registers
623 in SImode, DImode and TImode */
624 5, /* MMX or SSE register to integer */
625 64, /* size of l1 cache. */
626 256, /* size of l2 cache. */
627 64, /* size of prefetch block */
628 6, /* number of parallel prefetches */
630 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
631 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
632 COSTS_N_INSNS (24), /* cost of FDIV instruction. */
633 COSTS_N_INSNS (2), /* cost of FABS instruction. */
634 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
635 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
636 /* For some reason, Athlon deals better with REP prefix (relative to loops)
637 compared to K8. Alignment becomes important after 8 bytes for memcpy and
638 128 bytes for memset. */
639 {{libcall, {{2048, rep_prefix_4_byte}, {-1, libcall}}},
640 DUMMY_STRINGOP_ALGS},
641 {{libcall, {{2048, rep_prefix_4_byte}, {-1, libcall}}},
642 DUMMY_STRINGOP_ALGS},
643 1, /* scalar_stmt_cost. */
644 1, /* scalar load_cost. */
645 1, /* scalar_store_cost. */
646 1, /* vec_stmt_cost. */
647 1, /* vec_to_scalar_cost. */
648 1, /* scalar_to_vec_cost. */
649 1, /* vec_align_load_cost. */
650 2, /* vec_unalign_load_cost. */
651 1, /* vec_store_cost. */
652 3, /* cond_taken_branch_cost. */
653 1, /* cond_not_taken_branch_cost. */
657 struct processor_costs k8_cost = {
658 COSTS_N_INSNS (1), /* cost of an add instruction */
659 COSTS_N_INSNS (2), /* cost of a lea instruction */
660 COSTS_N_INSNS (1), /* variable shift costs */
661 COSTS_N_INSNS (1), /* constant shift costs */
662 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
663 COSTS_N_INSNS (4), /* HI */
664 COSTS_N_INSNS (3), /* SI */
665 COSTS_N_INSNS (4), /* DI */
666 COSTS_N_INSNS (5)}, /* other */
667 0, /* cost of multiply per each bit set */
668 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
669 COSTS_N_INSNS (26), /* HI */
670 COSTS_N_INSNS (42), /* SI */
671 COSTS_N_INSNS (74), /* DI */
672 COSTS_N_INSNS (74)}, /* other */
673 COSTS_N_INSNS (1), /* cost of movsx */
674 COSTS_N_INSNS (1), /* cost of movzx */
675 8, /* "large" insn */
677 4, /* cost for loading QImode using movzbl */
678 {3, 4, 3}, /* cost of loading integer registers
679 in QImode, HImode and SImode.
680 Relative to reg-reg move (2). */
681 {3, 4, 3}, /* cost of storing integer registers */
682 4, /* cost of reg,reg fld/fst */
683 {4, 4, 12}, /* cost of loading fp registers
684 in SFmode, DFmode and XFmode */
685 {6, 6, 8}, /* cost of storing fp registers
686 in SFmode, DFmode and XFmode */
687 2, /* cost of moving MMX register */
688 {3, 3}, /* cost of loading MMX registers
689 in SImode and DImode */
690 {4, 4}, /* cost of storing MMX registers
691 in SImode and DImode */
692 2, /* cost of moving SSE register */
693 {4, 3, 6}, /* cost of loading SSE registers
694 in SImode, DImode and TImode */
695 {4, 4, 5}, /* cost of storing SSE registers
696 in SImode, DImode and TImode */
697 5, /* MMX or SSE register to integer */
698 64, /* size of l1 cache. */
699 512, /* size of l2 cache. */
700 64, /* size of prefetch block */
701 /* New AMD processors never drop prefetches; if they cannot be performed
702 immediately, they are queued. We set number of simultaneous prefetches
703 to a large constant to reflect this (it probably is not a good idea not
704 to limit number of prefetches at all, as their execution also takes some
706 100, /* number of parallel prefetches */
708 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
709 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
710 COSTS_N_INSNS (19), /* cost of FDIV instruction. */
711 COSTS_N_INSNS (2), /* cost of FABS instruction. */
712 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
713 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
714 /* K8 has optimized REP instruction for medium sized blocks, but for very small
715 blocks it is better to use loop. For large blocks, libcall can do
716 nontemporary accesses and beat inline considerably. */
717 {{libcall, {{6, loop}, {14, unrolled_loop}, {-1, rep_prefix_4_byte}}},
718 {libcall, {{16, loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
719 {{libcall, {{8, loop}, {24, unrolled_loop},
720 {2048, rep_prefix_4_byte}, {-1, libcall}}},
721 {libcall, {{48, unrolled_loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
722 4, /* scalar_stmt_cost. */
723 2, /* scalar load_cost. */
724 2, /* scalar_store_cost. */
725 5, /* vec_stmt_cost. */
726 0, /* vec_to_scalar_cost. */
727 2, /* scalar_to_vec_cost. */
728 2, /* vec_align_load_cost. */
729 3, /* vec_unalign_load_cost. */
730 3, /* vec_store_cost. */
731 3, /* cond_taken_branch_cost. */
732 2, /* cond_not_taken_branch_cost. */
735 struct processor_costs amdfam10_cost = {
736 COSTS_N_INSNS (1), /* cost of an add instruction */
737 COSTS_N_INSNS (2), /* cost of a lea instruction */
738 COSTS_N_INSNS (1), /* variable shift costs */
739 COSTS_N_INSNS (1), /* constant shift costs */
740 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
741 COSTS_N_INSNS (4), /* HI */
742 COSTS_N_INSNS (3), /* SI */
743 COSTS_N_INSNS (4), /* DI */
744 COSTS_N_INSNS (5)}, /* other */
745 0, /* cost of multiply per each bit set */
746 {COSTS_N_INSNS (19), /* cost of a divide/mod for QI */
747 COSTS_N_INSNS (35), /* HI */
748 COSTS_N_INSNS (51), /* SI */
749 COSTS_N_INSNS (83), /* DI */
750 COSTS_N_INSNS (83)}, /* other */
751 COSTS_N_INSNS (1), /* cost of movsx */
752 COSTS_N_INSNS (1), /* cost of movzx */
753 8, /* "large" insn */
755 4, /* cost for loading QImode using movzbl */
756 {3, 4, 3}, /* cost of loading integer registers
757 in QImode, HImode and SImode.
758 Relative to reg-reg move (2). */
759 {3, 4, 3}, /* cost of storing integer registers */
760 4, /* cost of reg,reg fld/fst */
761 {4, 4, 12}, /* cost of loading fp registers
762 in SFmode, DFmode and XFmode */
763 {6, 6, 8}, /* cost of storing fp registers
764 in SFmode, DFmode and XFmode */
765 2, /* cost of moving MMX register */
766 {3, 3}, /* cost of loading MMX registers
767 in SImode and DImode */
768 {4, 4}, /* cost of storing MMX registers
769 in SImode and DImode */
770 2, /* cost of moving SSE register */
771 {4, 4, 3}, /* cost of loading SSE registers
772 in SImode, DImode and TImode */
773 {4, 4, 5}, /* cost of storing SSE registers
774 in SImode, DImode and TImode */
775 3, /* MMX or SSE register to integer */
777 MOVD reg64, xmmreg Double FSTORE 4
778 MOVD reg32, xmmreg Double FSTORE 4
780 MOVD reg64, xmmreg Double FADD 3
782 MOVD reg32, xmmreg Double FADD 3
784 64, /* size of l1 cache. */
785 512, /* size of l2 cache. */
786 64, /* size of prefetch block */
787 /* New AMD processors never drop prefetches; if they cannot be performed
788 immediately, they are queued. We set number of simultaneous prefetches
789 to a large constant to reflect this (it probably is not a good idea not
790 to limit number of prefetches at all, as their execution also takes some
792 100, /* number of parallel prefetches */
794 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
795 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
796 COSTS_N_INSNS (19), /* cost of FDIV instruction. */
797 COSTS_N_INSNS (2), /* cost of FABS instruction. */
798 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
799 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
801 /* AMDFAM10 has optimized REP instruction for medium sized blocks, but for
802 very small blocks it is better to use loop. For large blocks, libcall can
803 do nontemporary accesses and beat inline considerably. */
804 {{libcall, {{6, loop}, {14, unrolled_loop}, {-1, rep_prefix_4_byte}}},
805 {libcall, {{16, loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
806 {{libcall, {{8, loop}, {24, unrolled_loop},
807 {2048, rep_prefix_4_byte}, {-1, libcall}}},
808 {libcall, {{48, unrolled_loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
809 4, /* scalar_stmt_cost. */
810 2, /* scalar load_cost. */
811 2, /* scalar_store_cost. */
812 6, /* vec_stmt_cost. */
813 0, /* vec_to_scalar_cost. */
814 2, /* scalar_to_vec_cost. */
815 2, /* vec_align_load_cost. */
816 2, /* vec_unalign_load_cost. */
817 2, /* vec_store_cost. */
818 2, /* cond_taken_branch_cost. */
819 1, /* cond_not_taken_branch_cost. */
823 struct processor_costs pentium4_cost = {
824 COSTS_N_INSNS (1), /* cost of an add instruction */
825 COSTS_N_INSNS (3), /* cost of a lea instruction */
826 COSTS_N_INSNS (4), /* variable shift costs */
827 COSTS_N_INSNS (4), /* constant shift costs */
828 {COSTS_N_INSNS (15), /* cost of starting multiply for QI */
829 COSTS_N_INSNS (15), /* HI */
830 COSTS_N_INSNS (15), /* SI */
831 COSTS_N_INSNS (15), /* DI */
832 COSTS_N_INSNS (15)}, /* other */
833 0, /* cost of multiply per each bit set */
834 {COSTS_N_INSNS (56), /* cost of a divide/mod for QI */
835 COSTS_N_INSNS (56), /* HI */
836 COSTS_N_INSNS (56), /* SI */
837 COSTS_N_INSNS (56), /* DI */
838 COSTS_N_INSNS (56)}, /* other */
839 COSTS_N_INSNS (1), /* cost of movsx */
840 COSTS_N_INSNS (1), /* cost of movzx */
841 16, /* "large" insn */
843 2, /* cost for loading QImode using movzbl */
844 {4, 5, 4}, /* cost of loading integer registers
845 in QImode, HImode and SImode.
846 Relative to reg-reg move (2). */
847 {2, 3, 2}, /* cost of storing integer registers */
848 2, /* cost of reg,reg fld/fst */
849 {2, 2, 6}, /* cost of loading fp registers
850 in SFmode, DFmode and XFmode */
851 {4, 4, 6}, /* cost of storing fp registers
852 in SFmode, DFmode and XFmode */
853 2, /* cost of moving MMX register */
854 {2, 2}, /* cost of loading MMX registers
855 in SImode and DImode */
856 {2, 2}, /* cost of storing MMX registers
857 in SImode and DImode */
858 12, /* cost of moving SSE register */
859 {12, 12, 12}, /* cost of loading SSE registers
860 in SImode, DImode and TImode */
861 {2, 2, 8}, /* cost of storing SSE registers
862 in SImode, DImode and TImode */
863 10, /* MMX or SSE register to integer */
864 8, /* size of l1 cache. */
865 256, /* size of l2 cache. */
866 64, /* size of prefetch block */
867 6, /* number of parallel prefetches */
869 COSTS_N_INSNS (5), /* cost of FADD and FSUB insns. */
870 COSTS_N_INSNS (7), /* cost of FMUL instruction. */
871 COSTS_N_INSNS (43), /* cost of FDIV instruction. */
872 COSTS_N_INSNS (2), /* cost of FABS instruction. */
873 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
874 COSTS_N_INSNS (43), /* cost of FSQRT instruction. */
875 {{libcall, {{12, loop_1_byte}, {-1, rep_prefix_4_byte}}},
876 DUMMY_STRINGOP_ALGS},
877 {{libcall, {{6, loop_1_byte}, {48, loop}, {20480, rep_prefix_4_byte},
879 DUMMY_STRINGOP_ALGS},
880 1, /* scalar_stmt_cost. */
881 1, /* scalar load_cost. */
882 1, /* scalar_store_cost. */
883 1, /* vec_stmt_cost. */
884 1, /* vec_to_scalar_cost. */
885 1, /* scalar_to_vec_cost. */
886 1, /* vec_align_load_cost. */
887 2, /* vec_unalign_load_cost. */
888 1, /* vec_store_cost. */
889 3, /* cond_taken_branch_cost. */
890 1, /* cond_not_taken_branch_cost. */
894 struct processor_costs nocona_cost = {
895 COSTS_N_INSNS (1), /* cost of an add instruction */
896 COSTS_N_INSNS (1), /* cost of a lea instruction */
897 COSTS_N_INSNS (1), /* variable shift costs */
898 COSTS_N_INSNS (1), /* constant shift costs */
899 {COSTS_N_INSNS (10), /* cost of starting multiply for QI */
900 COSTS_N_INSNS (10), /* HI */
901 COSTS_N_INSNS (10), /* SI */
902 COSTS_N_INSNS (10), /* DI */
903 COSTS_N_INSNS (10)}, /* other */
904 0, /* cost of multiply per each bit set */
905 {COSTS_N_INSNS (66), /* cost of a divide/mod for QI */
906 COSTS_N_INSNS (66), /* HI */
907 COSTS_N_INSNS (66), /* SI */
908 COSTS_N_INSNS (66), /* DI */
909 COSTS_N_INSNS (66)}, /* other */
910 COSTS_N_INSNS (1), /* cost of movsx */
911 COSTS_N_INSNS (1), /* cost of movzx */
912 16, /* "large" insn */
914 4, /* cost for loading QImode using movzbl */
915 {4, 4, 4}, /* cost of loading integer registers
916 in QImode, HImode and SImode.
917 Relative to reg-reg move (2). */
918 {4, 4, 4}, /* cost of storing integer registers */
919 3, /* cost of reg,reg fld/fst */
920 {12, 12, 12}, /* cost of loading fp registers
921 in SFmode, DFmode and XFmode */
922 {4, 4, 4}, /* cost of storing fp registers
923 in SFmode, DFmode and XFmode */
924 6, /* cost of moving MMX register */
925 {12, 12}, /* cost of loading MMX registers
926 in SImode and DImode */
927 {12, 12}, /* cost of storing MMX registers
928 in SImode and DImode */
929 6, /* cost of moving SSE register */
930 {12, 12, 12}, /* cost of loading SSE registers
931 in SImode, DImode and TImode */
932 {12, 12, 12}, /* cost of storing SSE registers
933 in SImode, DImode and TImode */
934 8, /* MMX or SSE register to integer */
935 8, /* size of l1 cache. */
936 1024, /* size of l2 cache. */
937 128, /* size of prefetch block */
938 8, /* number of parallel prefetches */
940 COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */
941 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
942 COSTS_N_INSNS (40), /* cost of FDIV instruction. */
943 COSTS_N_INSNS (3), /* cost of FABS instruction. */
944 COSTS_N_INSNS (3), /* cost of FCHS instruction. */
945 COSTS_N_INSNS (44), /* cost of FSQRT instruction. */
946 {{libcall, {{12, loop_1_byte}, {-1, rep_prefix_4_byte}}},
947 {libcall, {{32, loop}, {20000, rep_prefix_8_byte},
948 {100000, unrolled_loop}, {-1, libcall}}}},
949 {{libcall, {{6, loop_1_byte}, {48, loop}, {20480, rep_prefix_4_byte},
951 {libcall, {{24, loop}, {64, unrolled_loop},
952 {8192, rep_prefix_8_byte}, {-1, libcall}}}},
953 1, /* scalar_stmt_cost. */
954 1, /* scalar load_cost. */
955 1, /* scalar_store_cost. */
956 1, /* vec_stmt_cost. */
957 1, /* vec_to_scalar_cost. */
958 1, /* scalar_to_vec_cost. */
959 1, /* vec_align_load_cost. */
960 2, /* vec_unalign_load_cost. */
961 1, /* vec_store_cost. */
962 3, /* cond_taken_branch_cost. */
963 1, /* cond_not_taken_branch_cost. */
967 struct processor_costs core2_cost = {
968 COSTS_N_INSNS (1), /* cost of an add instruction */
969 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
970 COSTS_N_INSNS (1), /* variable shift costs */
971 COSTS_N_INSNS (1), /* constant shift costs */
972 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
973 COSTS_N_INSNS (3), /* HI */
974 COSTS_N_INSNS (3), /* SI */
975 COSTS_N_INSNS (3), /* DI */
976 COSTS_N_INSNS (3)}, /* other */
977 0, /* cost of multiply per each bit set */
978 {COSTS_N_INSNS (22), /* cost of a divide/mod for QI */
979 COSTS_N_INSNS (22), /* HI */
980 COSTS_N_INSNS (22), /* SI */
981 COSTS_N_INSNS (22), /* DI */
982 COSTS_N_INSNS (22)}, /* other */
983 COSTS_N_INSNS (1), /* cost of movsx */
984 COSTS_N_INSNS (1), /* cost of movzx */
985 8, /* "large" insn */
987 2, /* cost for loading QImode using movzbl */
988 {6, 6, 6}, /* cost of loading integer registers
989 in QImode, HImode and SImode.
990 Relative to reg-reg move (2). */
991 {4, 4, 4}, /* cost of storing integer registers */
992 2, /* cost of reg,reg fld/fst */
993 {6, 6, 6}, /* cost of loading fp registers
994 in SFmode, DFmode and XFmode */
995 {4, 4, 4}, /* cost of storing fp registers
996 in SFmode, DFmode and XFmode */
997 2, /* cost of moving MMX register */
998 {6, 6}, /* cost of loading MMX registers
999 in SImode and DImode */
1000 {4, 4}, /* cost of storing MMX registers
1001 in SImode and DImode */
1002 2, /* cost of moving SSE register */
1003 {6, 6, 6}, /* cost of loading SSE registers
1004 in SImode, DImode and TImode */
1005 {4, 4, 4}, /* cost of storing SSE registers
1006 in SImode, DImode and TImode */
1007 2, /* MMX or SSE register to integer */
1008 32, /* size of l1 cache. */
1009 2048, /* size of l2 cache. */
1010 128, /* size of prefetch block */
1011 8, /* number of parallel prefetches */
1012 3, /* Branch cost */
1013 COSTS_N_INSNS (3), /* cost of FADD and FSUB insns. */
1014 COSTS_N_INSNS (5), /* cost of FMUL instruction. */
1015 COSTS_N_INSNS (32), /* cost of FDIV instruction. */
1016 COSTS_N_INSNS (1), /* cost of FABS instruction. */
1017 COSTS_N_INSNS (1), /* cost of FCHS instruction. */
1018 COSTS_N_INSNS (58), /* cost of FSQRT instruction. */
1019 {{libcall, {{11, loop}, {-1, rep_prefix_4_byte}}},
1020 {libcall, {{32, loop}, {64, rep_prefix_4_byte},
1021 {8192, rep_prefix_8_byte}, {-1, libcall}}}},
1022 {{libcall, {{8, loop}, {15, unrolled_loop},
1023 {2048, rep_prefix_4_byte}, {-1, libcall}}},
1024 {libcall, {{24, loop}, {32, unrolled_loop},
1025 {8192, rep_prefix_8_byte}, {-1, libcall}}}},
1026 1, /* scalar_stmt_cost. */
1027 1, /* scalar load_cost. */
1028 1, /* scalar_store_cost. */
1029 1, /* vec_stmt_cost. */
1030 1, /* vec_to_scalar_cost. */
1031 1, /* scalar_to_vec_cost. */
1032 1, /* vec_align_load_cost. */
1033 2, /* vec_unalign_load_cost. */
1034 1, /* vec_store_cost. */
1035 3, /* cond_taken_branch_cost. */
1036 1, /* cond_not_taken_branch_cost. */
1040 struct processor_costs atom_cost = {
1041 COSTS_N_INSNS (1), /* cost of an add instruction */
1042 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
1043 COSTS_N_INSNS (1), /* variable shift costs */
1044 COSTS_N_INSNS (1), /* constant shift costs */
1045 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1046 COSTS_N_INSNS (4), /* HI */
1047 COSTS_N_INSNS (3), /* SI */
1048 COSTS_N_INSNS (4), /* DI */
1049 COSTS_N_INSNS (2)}, /* other */
1050 0, /* cost of multiply per each bit set */
1051 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
1052 COSTS_N_INSNS (26), /* HI */
1053 COSTS_N_INSNS (42), /* SI */
1054 COSTS_N_INSNS (74), /* DI */
1055 COSTS_N_INSNS (74)}, /* other */
1056 COSTS_N_INSNS (1), /* cost of movsx */
1057 COSTS_N_INSNS (1), /* cost of movzx */
1058 8, /* "large" insn */
1059 17, /* MOVE_RATIO */
1060 2, /* cost for loading QImode using movzbl */
1061 {4, 4, 4}, /* cost of loading integer registers
1062 in QImode, HImode and SImode.
1063 Relative to reg-reg move (2). */
1064 {4, 4, 4}, /* cost of storing integer registers */
1065 4, /* cost of reg,reg fld/fst */
1066 {12, 12, 12}, /* cost of loading fp registers
1067 in SFmode, DFmode and XFmode */
1068 {6, 6, 8}, /* cost of storing fp registers
1069 in SFmode, DFmode and XFmode */
1070 2, /* cost of moving MMX register */
1071 {8, 8}, /* cost of loading MMX registers
1072 in SImode and DImode */
1073 {8, 8}, /* cost of storing MMX registers
1074 in SImode and DImode */
1075 2, /* cost of moving SSE register */
1076 {8, 8, 8}, /* cost of loading SSE registers
1077 in SImode, DImode and TImode */
1078 {8, 8, 8}, /* cost of storing SSE registers
1079 in SImode, DImode and TImode */
1080 5, /* MMX or SSE register to integer */
1081 32, /* size of l1 cache. */
1082 256, /* size of l2 cache. */
1083 64, /* size of prefetch block */
1084 6, /* number of parallel prefetches */
1085 3, /* Branch cost */
1086 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
1087 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
1088 COSTS_N_INSNS (20), /* cost of FDIV instruction. */
1089 COSTS_N_INSNS (8), /* cost of FABS instruction. */
1090 COSTS_N_INSNS (8), /* cost of FCHS instruction. */
1091 COSTS_N_INSNS (40), /* cost of FSQRT instruction. */
1092 {{libcall, {{11, loop}, {-1, rep_prefix_4_byte}}},
1093 {libcall, {{32, loop}, {64, rep_prefix_4_byte},
1094 {8192, rep_prefix_8_byte}, {-1, libcall}}}},
1095 {{libcall, {{8, loop}, {15, unrolled_loop},
1096 {2048, rep_prefix_4_byte}, {-1, libcall}}},
1097 {libcall, {{24, loop}, {32, unrolled_loop},
1098 {8192, rep_prefix_8_byte}, {-1, libcall}}}},
1099 1, /* scalar_stmt_cost. */
1100 1, /* scalar load_cost. */
1101 1, /* scalar_store_cost. */
1102 1, /* vec_stmt_cost. */
1103 1, /* vec_to_scalar_cost. */
1104 1, /* scalar_to_vec_cost. */
1105 1, /* vec_align_load_cost. */
1106 2, /* vec_unalign_load_cost. */
1107 1, /* vec_store_cost. */
1108 3, /* cond_taken_branch_cost. */
1109 1, /* cond_not_taken_branch_cost. */
1112 /* Generic64 should produce code tuned for Nocona and K8. */
1114 struct processor_costs generic64_cost = {
1115 COSTS_N_INSNS (1), /* cost of an add instruction */
1116 /* On all chips taken into consideration lea is 2 cycles and more. With
1117 this cost however our current implementation of synth_mult results in
1118 use of unnecessary temporary registers causing regression on several
1119 SPECfp benchmarks. */
1120 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
1121 COSTS_N_INSNS (1), /* variable shift costs */
1122 COSTS_N_INSNS (1), /* constant shift costs */
1123 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1124 COSTS_N_INSNS (4), /* HI */
1125 COSTS_N_INSNS (3), /* SI */
1126 COSTS_N_INSNS (4), /* DI */
1127 COSTS_N_INSNS (2)}, /* other */
1128 0, /* cost of multiply per each bit set */
1129 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
1130 COSTS_N_INSNS (26), /* HI */
1131 COSTS_N_INSNS (42), /* SI */
1132 COSTS_N_INSNS (74), /* DI */
1133 COSTS_N_INSNS (74)}, /* other */
1134 COSTS_N_INSNS (1), /* cost of movsx */
1135 COSTS_N_INSNS (1), /* cost of movzx */
1136 8, /* "large" insn */
1137 17, /* MOVE_RATIO */
1138 4, /* cost for loading QImode using movzbl */
1139 {4, 4, 4}, /* cost of loading integer registers
1140 in QImode, HImode and SImode.
1141 Relative to reg-reg move (2). */
1142 {4, 4, 4}, /* cost of storing integer registers */
1143 4, /* cost of reg,reg fld/fst */
1144 {12, 12, 12}, /* cost of loading fp registers
1145 in SFmode, DFmode and XFmode */
1146 {6, 6, 8}, /* cost of storing fp registers
1147 in SFmode, DFmode and XFmode */
1148 2, /* cost of moving MMX register */
1149 {8, 8}, /* cost of loading MMX registers
1150 in SImode and DImode */
1151 {8, 8}, /* cost of storing MMX registers
1152 in SImode and DImode */
1153 2, /* cost of moving SSE register */
1154 {8, 8, 8}, /* cost of loading SSE registers
1155 in SImode, DImode and TImode */
1156 {8, 8, 8}, /* cost of storing SSE registers
1157 in SImode, DImode and TImode */
1158 5, /* MMX or SSE register to integer */
1159 32, /* size of l1 cache. */
1160 512, /* size of l2 cache. */
1161 64, /* size of prefetch block */
1162 6, /* number of parallel prefetches */
1163 /* Benchmarks shows large regressions on K8 sixtrack benchmark when this value
1164 is increased to perhaps more appropriate value of 5. */
1165 3, /* Branch cost */
1166 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
1167 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
1168 COSTS_N_INSNS (20), /* cost of FDIV instruction. */
1169 COSTS_N_INSNS (8), /* cost of FABS instruction. */
1170 COSTS_N_INSNS (8), /* cost of FCHS instruction. */
1171 COSTS_N_INSNS (40), /* cost of FSQRT instruction. */
1172 {DUMMY_STRINGOP_ALGS,
1173 {libcall, {{32, loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
1174 {DUMMY_STRINGOP_ALGS,
1175 {libcall, {{32, loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
1176 1, /* scalar_stmt_cost. */
1177 1, /* scalar load_cost. */
1178 1, /* scalar_store_cost. */
1179 1, /* vec_stmt_cost. */
1180 1, /* vec_to_scalar_cost. */
1181 1, /* scalar_to_vec_cost. */
1182 1, /* vec_align_load_cost. */
1183 2, /* vec_unalign_load_cost. */
1184 1, /* vec_store_cost. */
1185 3, /* cond_taken_branch_cost. */
1186 1, /* cond_not_taken_branch_cost. */
1189 /* Generic32 should produce code tuned for Athlon, PPro, Pentium4, Nocona and K8. */
1191 struct processor_costs generic32_cost = {
1192 COSTS_N_INSNS (1), /* cost of an add instruction */
1193 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
1194 COSTS_N_INSNS (1), /* variable shift costs */
1195 COSTS_N_INSNS (1), /* constant shift costs */
1196 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1197 COSTS_N_INSNS (4), /* HI */
1198 COSTS_N_INSNS (3), /* SI */
1199 COSTS_N_INSNS (4), /* DI */
1200 COSTS_N_INSNS (2)}, /* other */
1201 0, /* cost of multiply per each bit set */
1202 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
1203 COSTS_N_INSNS (26), /* HI */
1204 COSTS_N_INSNS (42), /* SI */
1205 COSTS_N_INSNS (74), /* DI */
1206 COSTS_N_INSNS (74)}, /* other */
1207 COSTS_N_INSNS (1), /* cost of movsx */
1208 COSTS_N_INSNS (1), /* cost of movzx */
1209 8, /* "large" insn */
1210 17, /* MOVE_RATIO */
1211 4, /* cost for loading QImode using movzbl */
1212 {4, 4, 4}, /* cost of loading integer registers
1213 in QImode, HImode and SImode.
1214 Relative to reg-reg move (2). */
1215 {4, 4, 4}, /* cost of storing integer registers */
1216 4, /* cost of reg,reg fld/fst */
1217 {12, 12, 12}, /* cost of loading fp registers
1218 in SFmode, DFmode and XFmode */
1219 {6, 6, 8}, /* cost of storing fp registers
1220 in SFmode, DFmode and XFmode */
1221 2, /* cost of moving MMX register */
1222 {8, 8}, /* cost of loading MMX registers
1223 in SImode and DImode */
1224 {8, 8}, /* cost of storing MMX registers
1225 in SImode and DImode */
1226 2, /* cost of moving SSE register */
1227 {8, 8, 8}, /* cost of loading SSE registers
1228 in SImode, DImode and TImode */
1229 {8, 8, 8}, /* cost of storing SSE registers
1230 in SImode, DImode and TImode */
1231 5, /* MMX or SSE register to integer */
1232 32, /* size of l1 cache. */
1233 256, /* size of l2 cache. */
1234 64, /* size of prefetch block */
1235 6, /* number of parallel prefetches */
1236 3, /* Branch cost */
1237 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
1238 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
1239 COSTS_N_INSNS (20), /* cost of FDIV instruction. */
1240 COSTS_N_INSNS (8), /* cost of FABS instruction. */
1241 COSTS_N_INSNS (8), /* cost of FCHS instruction. */
1242 COSTS_N_INSNS (40), /* cost of FSQRT instruction. */
1243 {{libcall, {{32, loop}, {8192, rep_prefix_4_byte}, {-1, libcall}}},
1244 DUMMY_STRINGOP_ALGS},
1245 {{libcall, {{32, loop}, {8192, rep_prefix_4_byte}, {-1, libcall}}},
1246 DUMMY_STRINGOP_ALGS},
1247 1, /* scalar_stmt_cost. */
1248 1, /* scalar load_cost. */
1249 1, /* scalar_store_cost. */
1250 1, /* vec_stmt_cost. */
1251 1, /* vec_to_scalar_cost. */
1252 1, /* scalar_to_vec_cost. */
1253 1, /* vec_align_load_cost. */
1254 2, /* vec_unalign_load_cost. */
1255 1, /* vec_store_cost. */
1256 3, /* cond_taken_branch_cost. */
1257 1, /* cond_not_taken_branch_cost. */
1260 const struct processor_costs *ix86_cost = &pentium_cost;
1262 /* Processor feature/optimization bitmasks. */
1263 #define m_386 (1<<PROCESSOR_I386)
1264 #define m_486 (1<<PROCESSOR_I486)
1265 #define m_PENT (1<<PROCESSOR_PENTIUM)
1266 #define m_PPRO (1<<PROCESSOR_PENTIUMPRO)
1267 #define m_PENT4 (1<<PROCESSOR_PENTIUM4)
1268 #define m_NOCONA (1<<PROCESSOR_NOCONA)
1269 #define m_CORE2 (1<<PROCESSOR_CORE2)
1270 #define m_ATOM (1<<PROCESSOR_ATOM)
1272 #define m_GEODE (1<<PROCESSOR_GEODE)
1273 #define m_K6 (1<<PROCESSOR_K6)
1274 #define m_K6_GEODE (m_K6 | m_GEODE)
1275 #define m_K8 (1<<PROCESSOR_K8)
1276 #define m_ATHLON (1<<PROCESSOR_ATHLON)
1277 #define m_ATHLON_K8 (m_K8 | m_ATHLON)
1278 #define m_AMDFAM10 (1<<PROCESSOR_AMDFAM10)
1279 #define m_AMD_MULTIPLE (m_K8 | m_ATHLON | m_AMDFAM10)
1281 #define m_GENERIC32 (1<<PROCESSOR_GENERIC32)
1282 #define m_GENERIC64 (1<<PROCESSOR_GENERIC64)
1284 /* Generic instruction choice should be common subset of supported CPUs
1285 (PPro/PENT4/NOCONA/CORE2/Athlon/K8). */
1286 #define m_GENERIC (m_GENERIC32 | m_GENERIC64)
1288 /* Feature tests against the various tunings. */
1289 unsigned char ix86_tune_features[X86_TUNE_LAST];
1291 /* Feature tests against the various tunings used to create ix86_tune_features
1292 based on the processor mask. */
1293 static unsigned int initial_ix86_tune_features[X86_TUNE_LAST] = {
1294 /* X86_TUNE_USE_LEAVE: Leave does not affect Nocona SPEC2000 results
1295 negatively, so enabling for Generic64 seems like good code size
1296 tradeoff. We can't enable it for 32bit generic because it does not
1297 work well with PPro base chips. */
1298 m_386 | m_K6_GEODE | m_AMD_MULTIPLE | m_CORE2 | m_GENERIC64,
1300 /* X86_TUNE_PUSH_MEMORY */
1301 m_386 | m_K6_GEODE | m_AMD_MULTIPLE | m_PENT4
1302 | m_NOCONA | m_CORE2 | m_GENERIC,
1304 /* X86_TUNE_ZERO_EXTEND_WITH_AND */
1307 /* X86_TUNE_UNROLL_STRLEN */
1308 m_486 | m_PENT | m_ATOM | m_PPRO | m_AMD_MULTIPLE | m_K6
1309 | m_CORE2 | m_GENERIC,
1311 /* X86_TUNE_DEEP_BRANCH_PREDICTION */
1312 m_ATOM | m_PPRO | m_K6_GEODE | m_AMD_MULTIPLE | m_PENT4 | m_GENERIC,
1314 /* X86_TUNE_BRANCH_PREDICTION_HINTS: Branch hints were put in P4 based
1315 on simulation result. But after P4 was made, no performance benefit
1316 was observed with branch hints. It also increases the code size.
1317 As a result, icc never generates branch hints. */
1320 /* X86_TUNE_DOUBLE_WITH_ADD */
1323 /* X86_TUNE_USE_SAHF */
1324 m_ATOM | m_PPRO | m_K6_GEODE | m_K8 | m_AMDFAM10 | m_PENT4
1325 | m_NOCONA | m_CORE2 | m_GENERIC,
1327 /* X86_TUNE_MOVX: Enable to zero extend integer registers to avoid
1328 partial dependencies. */
1329 m_AMD_MULTIPLE | m_ATOM | m_PPRO | m_PENT4 | m_NOCONA
1330 | m_CORE2 | m_GENERIC | m_GEODE /* m_386 | m_K6 */,
1332 /* X86_TUNE_PARTIAL_REG_STALL: We probably ought to watch for partial
1333 register stalls on Generic32 compilation setting as well. However
1334 in current implementation the partial register stalls are not eliminated
1335 very well - they can be introduced via subregs synthesized by combine
1336 and can happen in caller/callee saving sequences. Because this option
1337 pays back little on PPro based chips and is in conflict with partial reg
1338 dependencies used by Athlon/P4 based chips, it is better to leave it off
1339 for generic32 for now. */
1342 /* X86_TUNE_PARTIAL_FLAG_REG_STALL */
1343 m_CORE2 | m_GENERIC,
1345 /* X86_TUNE_USE_HIMODE_FIOP */
1346 m_386 | m_486 | m_K6_GEODE,
1348 /* X86_TUNE_USE_SIMODE_FIOP */
1349 ~(m_PPRO | m_AMD_MULTIPLE | m_PENT | m_ATOM | m_CORE2 | m_GENERIC),
1351 /* X86_TUNE_USE_MOV0 */
1354 /* X86_TUNE_USE_CLTD */
1355 ~(m_PENT | m_ATOM | m_K6 | m_CORE2 | m_GENERIC),
1357 /* X86_TUNE_USE_XCHGB: Use xchgb %rh,%rl instead of rolw/rorw $8,rx. */
1360 /* X86_TUNE_SPLIT_LONG_MOVES */
1363 /* X86_TUNE_READ_MODIFY_WRITE */
1366 /* X86_TUNE_READ_MODIFY */
1369 /* X86_TUNE_PROMOTE_QIMODE */
1370 m_K6_GEODE | m_PENT | m_ATOM | m_386 | m_486 | m_AMD_MULTIPLE
1371 | m_CORE2 | m_GENERIC /* | m_PENT4 ? */,
1373 /* X86_TUNE_FAST_PREFIX */
1374 ~(m_PENT | m_486 | m_386),
1376 /* X86_TUNE_SINGLE_STRINGOP */
1377 m_386 | m_PENT4 | m_NOCONA,
1379 /* X86_TUNE_QIMODE_MATH */
1382 /* X86_TUNE_HIMODE_MATH: On PPro this flag is meant to avoid partial
1383 register stalls. Just like X86_TUNE_PARTIAL_REG_STALL this option
1384 might be considered for Generic32 if our scheme for avoiding partial
1385 stalls was more effective. */
1388 /* X86_TUNE_PROMOTE_QI_REGS */
1391 /* X86_TUNE_PROMOTE_HI_REGS */
1394 /* X86_TUNE_ADD_ESP_4: Enable if add/sub is preferred over 1/2 push/pop. */
1395 m_ATOM | m_AMD_MULTIPLE | m_K6_GEODE | m_PENT4 | m_NOCONA
1396 | m_CORE2 | m_GENERIC,
1398 /* X86_TUNE_ADD_ESP_8 */
1399 m_AMD_MULTIPLE | m_ATOM | m_PPRO | m_K6_GEODE | m_386
1400 | m_486 | m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC,
1402 /* X86_TUNE_SUB_ESP_4 */
1403 m_AMD_MULTIPLE | m_ATOM | m_PPRO | m_PENT4 | m_NOCONA | m_CORE2
1406 /* X86_TUNE_SUB_ESP_8 */
1407 m_AMD_MULTIPLE | m_ATOM | m_PPRO | m_386 | m_486
1408 | m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC,
1410 /* X86_TUNE_INTEGER_DFMODE_MOVES: Enable if integer moves are preferred
1411 for DFmode copies */
1412 ~(m_AMD_MULTIPLE | m_ATOM | m_PENT4 | m_NOCONA | m_PPRO | m_CORE2
1413 | m_GENERIC | m_GEODE),
1415 /* X86_TUNE_PARTIAL_REG_DEPENDENCY */
1416 m_AMD_MULTIPLE | m_ATOM | m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC,
1418 /* X86_TUNE_SSE_PARTIAL_REG_DEPENDENCY: In the Generic model we have a
1419 conflict here in between PPro/Pentium4 based chips that thread 128bit
1420 SSE registers as single units versus K8 based chips that divide SSE
1421 registers to two 64bit halves. This knob promotes all store destinations
1422 to be 128bit to allow register renaming on 128bit SSE units, but usually
1423 results in one extra microop on 64bit SSE units. Experimental results
1424 shows that disabling this option on P4 brings over 20% SPECfp regression,
1425 while enabling it on K8 brings roughly 2.4% regression that can be partly
1426 masked by careful scheduling of moves. */
1427 m_ATOM | m_PENT4 | m_NOCONA | m_PPRO | m_CORE2 | m_GENERIC
1430 /* X86_TUNE_SSE_UNALIGNED_MOVE_OPTIMAL */
1433 /* X86_TUNE_SSE_SPLIT_REGS: Set for machines where the type and dependencies
1434 are resolved on SSE register parts instead of whole registers, so we may
1435 maintain just lower part of scalar values in proper format leaving the
1436 upper part undefined. */
1439 /* X86_TUNE_SSE_TYPELESS_STORES */
1442 /* X86_TUNE_SSE_LOAD0_BY_PXOR */
1443 m_PPRO | m_PENT4 | m_NOCONA,
1445 /* X86_TUNE_MEMORY_MISMATCH_STALL */
1446 m_AMD_MULTIPLE | m_ATOM | m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC,
1448 /* X86_TUNE_PROLOGUE_USING_MOVE */
1449 m_ATHLON_K8 | m_ATOM | m_PPRO | m_CORE2 | m_GENERIC,
1451 /* X86_TUNE_EPILOGUE_USING_MOVE */
1452 m_ATHLON_K8 | m_ATOM | m_PPRO | m_CORE2 | m_GENERIC,
1454 /* X86_TUNE_SHIFT1 */
1457 /* X86_TUNE_USE_FFREEP */
1460 /* X86_TUNE_INTER_UNIT_MOVES */
1461 ~(m_AMD_MULTIPLE | m_ATOM | m_GENERIC),
1463 /* X86_TUNE_INTER_UNIT_CONVERSIONS */
1466 /* X86_TUNE_FOUR_JUMP_LIMIT: Some CPU cores are not able to predict more
1467 than 4 branch instructions in the 16 byte window. */
1468 m_ATOM | m_PPRO | m_AMD_MULTIPLE | m_PENT4 | m_NOCONA | m_CORE2
1471 /* X86_TUNE_SCHEDULE */
1472 m_PPRO | m_AMD_MULTIPLE | m_K6_GEODE | m_PENT | m_ATOM | m_CORE2
1475 /* X86_TUNE_USE_BT */
1476 m_AMD_MULTIPLE | m_ATOM | m_CORE2 | m_GENERIC,
1478 /* X86_TUNE_USE_INCDEC */
1479 ~(m_PENT4 | m_NOCONA | m_GENERIC | m_ATOM),
1481 /* X86_TUNE_PAD_RETURNS */
1482 m_AMD_MULTIPLE | m_CORE2 | m_GENERIC,
1484 /* X86_TUNE_EXT_80387_CONSTANTS */
1485 m_K6_GEODE | m_ATHLON_K8 | m_ATOM | m_PENT4 | m_NOCONA | m_PPRO
1486 | m_CORE2 | m_GENERIC,
1488 /* X86_TUNE_SHORTEN_X87_SSE */
1491 /* X86_TUNE_AVOID_VECTOR_DECODE */
1494 /* X86_TUNE_PROMOTE_HIMODE_IMUL: Modern CPUs have same latency for HImode
1495 and SImode multiply, but 386 and 486 do HImode multiply faster. */
1498 /* X86_TUNE_SLOW_IMUL_IMM32_MEM: Imul of 32-bit constant and memory is
1499 vector path on AMD machines. */
1500 m_K8 | m_GENERIC64 | m_AMDFAM10,
1502 /* X86_TUNE_SLOW_IMUL_IMM8: Imul of 8-bit constant is vector path on AMD
1504 m_K8 | m_GENERIC64 | m_AMDFAM10,
1506 /* X86_TUNE_MOVE_M1_VIA_OR: On pentiums, it is faster to load -1 via OR
1510 /* X86_TUNE_NOT_UNPAIRABLE: NOT is not pairable on Pentium, while XOR is,
1511 but one byte longer. */
1514 /* X86_TUNE_NOT_VECTORMODE: On AMD K6, NOT is vector decoded with memory
1515 operand that cannot be represented using a modRM byte. The XOR
1516 replacement is long decoded, so this split helps here as well. */
1519 /* X86_TUNE_USE_VECTOR_FP_CONVERTS: Prefer vector packed SSE conversion
1521 m_AMDFAM10 | m_GENERIC,
1523 /* X86_TUNE_USE_VECTOR_CONVERTS: Prefer vector packed SSE conversion
1524 from integer to FP. */
1527 /* X86_TUNE_FUSE_CMP_AND_BRANCH: Fuse a compare or test instruction
1528 with a subsequent conditional jump instruction into a single
1529 compare-and-branch uop. */
1532 /* X86_TUNE_OPT_AGU: Optimize for Address Generation Unit. This flag
1533 will impact LEA instruction selection. */
1537 /* Feature tests against the various architecture variations. */
1538 unsigned char ix86_arch_features[X86_ARCH_LAST];
1540 /* Feature tests against the various architecture variations, used to create
1541 ix86_arch_features based on the processor mask. */
1542 static unsigned int initial_ix86_arch_features[X86_ARCH_LAST] = {
1543 /* X86_ARCH_CMOVE: Conditional move was added for pentiumpro. */
1544 ~(m_386 | m_486 | m_PENT | m_K6),
1546 /* X86_ARCH_CMPXCHG: Compare and exchange was added for 80486. */
1549 /* X86_ARCH_CMPXCHG8B: Compare and exchange 8 bytes was added for pentium. */
1552 /* X86_ARCH_XADD: Exchange and add was added for 80486. */
1555 /* X86_ARCH_BSWAP: Byteswap was added for 80486. */
1559 static const unsigned int x86_accumulate_outgoing_args
1560 = m_AMD_MULTIPLE | m_ATOM | m_PENT4 | m_NOCONA | m_PPRO | m_CORE2
1563 static const unsigned int x86_arch_always_fancy_math_387
1564 = m_PENT | m_ATOM | m_PPRO | m_AMD_MULTIPLE | m_PENT4
1565 | m_NOCONA | m_CORE2 | m_GENERIC;
1567 static enum stringop_alg stringop_alg = no_stringop;
1569 /* In case the average insn count for single function invocation is
1570 lower than this constant, emit fast (but longer) prologue and
1572 #define FAST_PROLOGUE_INSN_COUNT 20
1574 /* Names for 8 (low), 8 (high), and 16-bit registers, respectively. */
1575 static const char *const qi_reg_name[] = QI_REGISTER_NAMES;
1576 static const char *const qi_high_reg_name[] = QI_HIGH_REGISTER_NAMES;
1577 static const char *const hi_reg_name[] = HI_REGISTER_NAMES;
1579 /* Array of the smallest class containing reg number REGNO, indexed by
1580 REGNO. Used by REGNO_REG_CLASS in i386.h. */
1582 enum reg_class const regclass_map[FIRST_PSEUDO_REGISTER] =
1584 /* ax, dx, cx, bx */
1585 AREG, DREG, CREG, BREG,
1586 /* si, di, bp, sp */
1587 SIREG, DIREG, NON_Q_REGS, NON_Q_REGS,
1589 FP_TOP_REG, FP_SECOND_REG, FLOAT_REGS, FLOAT_REGS,
1590 FLOAT_REGS, FLOAT_REGS, FLOAT_REGS, FLOAT_REGS,
1593 /* flags, fpsr, fpcr, frame */
1594 NO_REGS, NO_REGS, NO_REGS, NON_Q_REGS,
1596 SSE_FIRST_REG, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS,
1599 MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS,
1602 NON_Q_REGS, NON_Q_REGS, NON_Q_REGS, NON_Q_REGS,
1603 NON_Q_REGS, NON_Q_REGS, NON_Q_REGS, NON_Q_REGS,
1604 /* SSE REX registers */
1605 SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS,
1609 /* The "default" register map used in 32bit mode. */
1611 int const dbx_register_map[FIRST_PSEUDO_REGISTER] =
1613 0, 2, 1, 3, 6, 7, 4, 5, /* general regs */
1614 12, 13, 14, 15, 16, 17, 18, 19, /* fp regs */
1615 -1, -1, -1, -1, -1, /* arg, flags, fpsr, fpcr, frame */
1616 21, 22, 23, 24, 25, 26, 27, 28, /* SSE */
1617 29, 30, 31, 32, 33, 34, 35, 36, /* MMX */
1618 -1, -1, -1, -1, -1, -1, -1, -1, /* extended integer registers */
1619 -1, -1, -1, -1, -1, -1, -1, -1, /* extended SSE registers */
1622 /* The "default" register map used in 64bit mode. */
1624 int const dbx64_register_map[FIRST_PSEUDO_REGISTER] =
1626 0, 1, 2, 3, 4, 5, 6, 7, /* general regs */
1627 33, 34, 35, 36, 37, 38, 39, 40, /* fp regs */
1628 -1, -1, -1, -1, -1, /* arg, flags, fpsr, fpcr, frame */
1629 17, 18, 19, 20, 21, 22, 23, 24, /* SSE */
1630 41, 42, 43, 44, 45, 46, 47, 48, /* MMX */
1631 8,9,10,11,12,13,14,15, /* extended integer registers */
1632 25, 26, 27, 28, 29, 30, 31, 32, /* extended SSE registers */
1635 /* Define the register numbers to be used in Dwarf debugging information.
1636 The SVR4 reference port C compiler uses the following register numbers
1637 in its Dwarf output code:
1638 0 for %eax (gcc regno = 0)
1639 1 for %ecx (gcc regno = 2)
1640 2 for %edx (gcc regno = 1)
1641 3 for %ebx (gcc regno = 3)
1642 4 for %esp (gcc regno = 7)
1643 5 for %ebp (gcc regno = 6)
1644 6 for %esi (gcc regno = 4)
1645 7 for %edi (gcc regno = 5)
1646 The following three DWARF register numbers are never generated by
1647 the SVR4 C compiler or by the GNU compilers, but SDB on x86/svr4
1648 believes these numbers have these meanings.
1649 8 for %eip (no gcc equivalent)
1650 9 for %eflags (gcc regno = 17)
1651 10 for %trapno (no gcc equivalent)
1652 It is not at all clear how we should number the FP stack registers
1653 for the x86 architecture. If the version of SDB on x86/svr4 were
1654 a bit less brain dead with respect to floating-point then we would
1655 have a precedent to follow with respect to DWARF register numbers
1656 for x86 FP registers, but the SDB on x86/svr4 is so completely
1657 broken with respect to FP registers that it is hardly worth thinking
1658 of it as something to strive for compatibility with.
1659 The version of x86/svr4 SDB I have at the moment does (partially)
1660 seem to believe that DWARF register number 11 is associated with
1661 the x86 register %st(0), but that's about all. Higher DWARF
1662 register numbers don't seem to be associated with anything in
1663 particular, and even for DWARF regno 11, SDB only seems to under-
1664 stand that it should say that a variable lives in %st(0) (when
1665 asked via an `=' command) if we said it was in DWARF regno 11,
1666 but SDB still prints garbage when asked for the value of the
1667 variable in question (via a `/' command).
1668 (Also note that the labels SDB prints for various FP stack regs
1669 when doing an `x' command are all wrong.)
1670 Note that these problems generally don't affect the native SVR4
1671 C compiler because it doesn't allow the use of -O with -g and
1672 because when it is *not* optimizing, it allocates a memory
1673 location for each floating-point variable, and the memory
1674 location is what gets described in the DWARF AT_location
1675 attribute for the variable in question.
1676 Regardless of the severe mental illness of the x86/svr4 SDB, we
1677 do something sensible here and we use the following DWARF
1678 register numbers. Note that these are all stack-top-relative
1680 11 for %st(0) (gcc regno = 8)
1681 12 for %st(1) (gcc regno = 9)
1682 13 for %st(2) (gcc regno = 10)
1683 14 for %st(3) (gcc regno = 11)
1684 15 for %st(4) (gcc regno = 12)
1685 16 for %st(5) (gcc regno = 13)
1686 17 for %st(6) (gcc regno = 14)
1687 18 for %st(7) (gcc regno = 15)
1689 int const svr4_dbx_register_map[FIRST_PSEUDO_REGISTER] =
1691 0, 2, 1, 3, 6, 7, 5, 4, /* general regs */
1692 11, 12, 13, 14, 15, 16, 17, 18, /* fp regs */
1693 -1, 9, -1, -1, -1, /* arg, flags, fpsr, fpcr, frame */
1694 21, 22, 23, 24, 25, 26, 27, 28, /* SSE registers */
1695 29, 30, 31, 32, 33, 34, 35, 36, /* MMX registers */
1696 -1, -1, -1, -1, -1, -1, -1, -1, /* extended integer registers */
1697 -1, -1, -1, -1, -1, -1, -1, -1, /* extended SSE registers */
1700 /* Test and compare insns in i386.md store the information needed to
1701 generate branch and scc insns here. */
1703 rtx ix86_compare_op0 = NULL_RTX;
1704 rtx ix86_compare_op1 = NULL_RTX;
1706 /* Define parameter passing and return registers. */
1708 static int const x86_64_int_parameter_registers[6] =
1710 DI_REG, SI_REG, DX_REG, CX_REG, R8_REG, R9_REG
1713 static int const x86_64_ms_abi_int_parameter_registers[4] =
1715 CX_REG, DX_REG, R8_REG, R9_REG
1718 static int const x86_64_int_return_registers[4] =
1720 AX_REG, DX_REG, DI_REG, SI_REG
1723 /* Define the structure for the machine field in struct function. */
1725 struct stack_local_entry GTY(())
1727 unsigned short mode;
1730 struct stack_local_entry *next;
1733 /* Structure describing stack frame layout.
1734 Stack grows downward:
1740 saved frame pointer if frame_pointer_needed
1741 <- HARD_FRAME_POINTER
1750 [va_arg registers] (
1751 > to_allocate <- FRAME_POINTER
1763 HOST_WIDE_INT frame;
1765 int outgoing_arguments_size;
1768 HOST_WIDE_INT to_allocate;
1769 /* The offsets relative to ARG_POINTER. */
1770 HOST_WIDE_INT frame_pointer_offset;
1771 HOST_WIDE_INT hard_frame_pointer_offset;
1772 HOST_WIDE_INT stack_pointer_offset;
1774 /* When save_regs_using_mov is set, emit prologue using
1775 move instead of push instructions. */
1776 bool save_regs_using_mov;
1779 /* Code model option. */
1780 enum cmodel ix86_cmodel;
1782 enum asm_dialect ix86_asm_dialect = ASM_ATT;
1784 enum tls_dialect ix86_tls_dialect = TLS_DIALECT_GNU;
1786 /* Which unit we are generating floating point math for. */
1787 enum fpmath_unit ix86_fpmath;
1789 /* Which cpu are we scheduling for. */
1790 enum attr_cpu ix86_schedule;
1792 /* Which cpu are we optimizing for. */
1793 enum processor_type ix86_tune;
1795 /* Which instruction set architecture to use. */
1796 enum processor_type ix86_arch;
1798 /* true if sse prefetch instruction is not NOOP. */
1799 int x86_prefetch_sse;
1801 /* ix86_regparm_string as a number */
1802 static int ix86_regparm;
1804 /* -mstackrealign option */
1805 extern int ix86_force_align_arg_pointer;
1806 static const char ix86_force_align_arg_pointer_string[]
1807 = "force_align_arg_pointer";
1809 static rtx (*ix86_gen_leave) (void);
1810 static rtx (*ix86_gen_pop1) (rtx);
1811 static rtx (*ix86_gen_add3) (rtx, rtx, rtx);
1812 static rtx (*ix86_gen_sub3) (rtx, rtx, rtx);
1813 static rtx (*ix86_gen_sub3_carry) (rtx, rtx, rtx, rtx);
1814 static rtx (*ix86_gen_one_cmpl2) (rtx, rtx);
1815 static rtx (*ix86_gen_monitor) (rtx, rtx, rtx);
1816 static rtx (*ix86_gen_andsp) (rtx, rtx, rtx);
1818 /* Preferred alignment for stack boundary in bits. */
1819 unsigned int ix86_preferred_stack_boundary;
1821 /* Alignment for incoming stack boundary in bits specified at
1823 static unsigned int ix86_user_incoming_stack_boundary;
1825 /* Default alignment for incoming stack boundary in bits. */
1826 static unsigned int ix86_default_incoming_stack_boundary;
1828 /* Alignment for incoming stack boundary in bits. */
1829 unsigned int ix86_incoming_stack_boundary;
1831 /* The abi used by target. */
1832 enum calling_abi ix86_abi;
1834 /* Values 1-5: see jump.c */
1835 int ix86_branch_cost;
1837 /* Calling abi specific va_list type nodes. */
1838 static GTY(()) tree sysv_va_list_type_node;
1839 static GTY(()) tree ms_va_list_type_node;
1841 /* Variables which are this size or smaller are put in the data/bss
1842 or ldata/lbss sections. */
1844 int ix86_section_threshold = 65536;
1846 /* Prefix built by ASM_GENERATE_INTERNAL_LABEL. */
1847 char internal_label_prefix[16];
1848 int internal_label_prefix_len;
1850 /* Fence to use after loop using movnt. */
1853 /* Register class used for passing given 64bit part of the argument.
1854 These represent classes as documented by the PS ABI, with the exception
1855 of SSESF, SSEDF classes, that are basically SSE class, just gcc will
1856 use SF or DFmode move instead of DImode to avoid reformatting penalties.
1858 Similarly we play games with INTEGERSI_CLASS to use cheaper SImode moves
1859 whenever possible (upper half does contain padding). */
1860 enum x86_64_reg_class
1863 X86_64_INTEGER_CLASS,
1864 X86_64_INTEGERSI_CLASS,
1871 X86_64_COMPLEX_X87_CLASS,
1875 #define MAX_CLASSES 4
1877 /* Table of constants used by fldpi, fldln2, etc.... */
1878 static REAL_VALUE_TYPE ext_80387_constants_table [5];
1879 static bool ext_80387_constants_init = 0;
1882 static struct machine_function * ix86_init_machine_status (void);
1883 static rtx ix86_function_value (const_tree, const_tree, bool);
1884 static int ix86_function_regparm (const_tree, const_tree);
1885 static void ix86_compute_frame_layout (struct ix86_frame *);
1886 static bool ix86_expand_vector_init_one_nonzero (bool, enum machine_mode,
1888 static void ix86_add_new_builtins (int);
1890 enum ix86_function_specific_strings
1892 IX86_FUNCTION_SPECIFIC_ARCH,
1893 IX86_FUNCTION_SPECIFIC_TUNE,
1894 IX86_FUNCTION_SPECIFIC_FPMATH,
1895 IX86_FUNCTION_SPECIFIC_MAX
1898 static char *ix86_target_string (int, int, const char *, const char *,
1899 const char *, bool);
1900 static void ix86_debug_options (void) ATTRIBUTE_UNUSED;
1901 static void ix86_function_specific_save (struct cl_target_option *);
1902 static void ix86_function_specific_restore (struct cl_target_option *);
1903 static void ix86_function_specific_print (FILE *, int,
1904 struct cl_target_option *);
1905 static bool ix86_valid_target_attribute_p (tree, tree, tree, int);
1906 static bool ix86_valid_target_attribute_inner_p (tree, char *[]);
1907 static bool ix86_can_inline_p (tree, tree);
1908 static void ix86_set_current_function (tree);
1910 static enum calling_abi ix86_function_abi (const_tree);
1913 /* The svr4 ABI for the i386 says that records and unions are returned
1915 #ifndef DEFAULT_PCC_STRUCT_RETURN
1916 #define DEFAULT_PCC_STRUCT_RETURN 1
1919 /* Whether -mtune= or -march= were specified */
1920 static int ix86_tune_defaulted;
1921 static int ix86_arch_specified;
1923 /* Bit flags that specify the ISA we are compiling for. */
1924 int ix86_isa_flags = TARGET_64BIT_DEFAULT | TARGET_SUBTARGET_ISA_DEFAULT;
1926 /* A mask of ix86_isa_flags that includes bit X if X
1927 was set or cleared on the command line. */
1928 static int ix86_isa_flags_explicit;
1930 /* Define a set of ISAs which are available when a given ISA is
1931 enabled. MMX and SSE ISAs are handled separately. */
1933 #define OPTION_MASK_ISA_MMX_SET OPTION_MASK_ISA_MMX
1934 #define OPTION_MASK_ISA_3DNOW_SET \
1935 (OPTION_MASK_ISA_3DNOW | OPTION_MASK_ISA_MMX_SET)
1937 #define OPTION_MASK_ISA_SSE_SET OPTION_MASK_ISA_SSE
1938 #define OPTION_MASK_ISA_SSE2_SET \
1939 (OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_SSE_SET)
1940 #define OPTION_MASK_ISA_SSE3_SET \
1941 (OPTION_MASK_ISA_SSE3 | OPTION_MASK_ISA_SSE2_SET)
1942 #define OPTION_MASK_ISA_SSSE3_SET \
1943 (OPTION_MASK_ISA_SSSE3 | OPTION_MASK_ISA_SSE3_SET)
1944 #define OPTION_MASK_ISA_SSE4_1_SET \
1945 (OPTION_MASK_ISA_SSE4_1 | OPTION_MASK_ISA_SSSE3_SET)
1946 #define OPTION_MASK_ISA_SSE4_2_SET \
1947 (OPTION_MASK_ISA_SSE4_2 | OPTION_MASK_ISA_SSE4_1_SET)
1948 #define OPTION_MASK_ISA_AVX_SET \
1949 (OPTION_MASK_ISA_AVX | OPTION_MASK_ISA_SSE4_2_SET)
1950 #define OPTION_MASK_ISA_FMA_SET \
1951 (OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_AVX_SET)
1953 /* SSE4 includes both SSE4.1 and SSE4.2. -msse4 should be the same
1955 #define OPTION_MASK_ISA_SSE4_SET OPTION_MASK_ISA_SSE4_2_SET
1957 #define OPTION_MASK_ISA_SSE4A_SET \
1958 (OPTION_MASK_ISA_SSE4A | OPTION_MASK_ISA_SSE3_SET)
1959 #define OPTION_MASK_ISA_SSE5_SET \
1960 (OPTION_MASK_ISA_SSE5 | OPTION_MASK_ISA_SSE4A_SET)
1962 /* AES and PCLMUL need SSE2 because they use xmm registers */
1963 #define OPTION_MASK_ISA_AES_SET \
1964 (OPTION_MASK_ISA_AES | OPTION_MASK_ISA_SSE2_SET)
1965 #define OPTION_MASK_ISA_PCLMUL_SET \
1966 (OPTION_MASK_ISA_PCLMUL | OPTION_MASK_ISA_SSE2_SET)
1968 #define OPTION_MASK_ISA_ABM_SET \
1969 (OPTION_MASK_ISA_ABM | OPTION_MASK_ISA_POPCNT)
1970 #define OPTION_MASK_ISA_POPCNT_SET OPTION_MASK_ISA_POPCNT
1971 #define OPTION_MASK_ISA_CX16_SET OPTION_MASK_ISA_CX16
1972 #define OPTION_MASK_ISA_SAHF_SET OPTION_MASK_ISA_SAHF
1974 /* Define a set of ISAs which aren't available when a given ISA is
1975 disabled. MMX and SSE ISAs are handled separately. */
1977 #define OPTION_MASK_ISA_MMX_UNSET \
1978 (OPTION_MASK_ISA_MMX | OPTION_MASK_ISA_3DNOW_UNSET)
1979 #define OPTION_MASK_ISA_3DNOW_UNSET \
1980 (OPTION_MASK_ISA_3DNOW | OPTION_MASK_ISA_3DNOW_A_UNSET)
1981 #define OPTION_MASK_ISA_3DNOW_A_UNSET OPTION_MASK_ISA_3DNOW_A
1983 #define OPTION_MASK_ISA_SSE_UNSET \
1984 (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_SSE2_UNSET)
1985 #define OPTION_MASK_ISA_SSE2_UNSET \
1986 (OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_SSE3_UNSET)
1987 #define OPTION_MASK_ISA_SSE3_UNSET \
1988 (OPTION_MASK_ISA_SSE3 \
1989 | OPTION_MASK_ISA_SSSE3_UNSET \
1990 | OPTION_MASK_ISA_SSE4A_UNSET )
1991 #define OPTION_MASK_ISA_SSSE3_UNSET \
1992 (OPTION_MASK_ISA_SSSE3 | OPTION_MASK_ISA_SSE4_1_UNSET)
1993 #define OPTION_MASK_ISA_SSE4_1_UNSET \
1994 (OPTION_MASK_ISA_SSE4_1 | OPTION_MASK_ISA_SSE4_2_UNSET)
1995 #define OPTION_MASK_ISA_SSE4_2_UNSET \
1996 (OPTION_MASK_ISA_SSE4_2 | OPTION_MASK_ISA_AVX_UNSET )
1997 #define OPTION_MASK_ISA_AVX_UNSET \
1998 (OPTION_MASK_ISA_AVX | OPTION_MASK_ISA_FMA_UNSET)
1999 #define OPTION_MASK_ISA_FMA_UNSET OPTION_MASK_ISA_FMA
2001 /* SSE4 includes both SSE4.1 and SSE4.2. -mno-sse4 should the same
2003 #define OPTION_MASK_ISA_SSE4_UNSET OPTION_MASK_ISA_SSE4_1_UNSET
2005 #define OPTION_MASK_ISA_SSE4A_UNSET \
2006 (OPTION_MASK_ISA_SSE4A | OPTION_MASK_ISA_SSE5_UNSET)
2007 #define OPTION_MASK_ISA_SSE5_UNSET OPTION_MASK_ISA_SSE5
2008 #define OPTION_MASK_ISA_AES_UNSET OPTION_MASK_ISA_AES
2009 #define OPTION_MASK_ISA_PCLMUL_UNSET OPTION_MASK_ISA_PCLMUL
2010 #define OPTION_MASK_ISA_ABM_UNSET OPTION_MASK_ISA_ABM
2011 #define OPTION_MASK_ISA_POPCNT_UNSET OPTION_MASK_ISA_POPCNT
2012 #define OPTION_MASK_ISA_CX16_UNSET OPTION_MASK_ISA_CX16
2013 #define OPTION_MASK_ISA_SAHF_UNSET OPTION_MASK_ISA_SAHF
2015 /* Vectorization library interface and handlers. */
2016 tree (*ix86_veclib_handler)(enum built_in_function, tree, tree) = NULL;
2017 static tree ix86_veclibabi_svml (enum built_in_function, tree, tree);
2018 static tree ix86_veclibabi_acml (enum built_in_function, tree, tree);
2020 /* Processor target table, indexed by processor number */
2023 const struct processor_costs *cost; /* Processor costs */
2024 const int align_loop; /* Default alignments. */
2025 const int align_loop_max_skip;
2026 const int align_jump;
2027 const int align_jump_max_skip;
2028 const int align_func;
2031 static const struct ptt processor_target_table[PROCESSOR_max] =
2033 {&i386_cost, 4, 3, 4, 3, 4},
2034 {&i486_cost, 16, 15, 16, 15, 16},
2035 {&pentium_cost, 16, 7, 16, 7, 16},
2036 {&pentiumpro_cost, 16, 15, 16, 10, 16},
2037 {&geode_cost, 0, 0, 0, 0, 0},
2038 {&k6_cost, 32, 7, 32, 7, 32},
2039 {&athlon_cost, 16, 7, 16, 7, 16},
2040 {&pentium4_cost, 0, 0, 0, 0, 0},
2041 {&k8_cost, 16, 7, 16, 7, 16},
2042 {&nocona_cost, 0, 0, 0, 0, 0},
2043 {&core2_cost, 16, 10, 16, 10, 16},
2044 {&generic32_cost, 16, 7, 16, 7, 16},
2045 {&generic64_cost, 16, 10, 16, 10, 16},
2046 {&amdfam10_cost, 32, 24, 32, 7, 32},
2047 {&atom_cost, 16, 7, 16, 7, 16}
2050 static const char *const cpu_names[TARGET_CPU_DEFAULT_max] =
2076 /* Implement TARGET_HANDLE_OPTION. */
2079 ix86_handle_option (size_t code, const char *arg ATTRIBUTE_UNUSED, int value)
2086 ix86_isa_flags |= OPTION_MASK_ISA_MMX_SET;
2087 ix86_isa_flags_explicit |= OPTION_MASK_ISA_MMX_SET;
2091 ix86_isa_flags &= ~OPTION_MASK_ISA_MMX_UNSET;
2092 ix86_isa_flags_explicit |= OPTION_MASK_ISA_MMX_UNSET;
2099 ix86_isa_flags |= OPTION_MASK_ISA_3DNOW_SET;
2100 ix86_isa_flags_explicit |= OPTION_MASK_ISA_3DNOW_SET;
2104 ix86_isa_flags &= ~OPTION_MASK_ISA_3DNOW_UNSET;
2105 ix86_isa_flags_explicit |= OPTION_MASK_ISA_3DNOW_UNSET;
2115 ix86_isa_flags |= OPTION_MASK_ISA_SSE_SET;
2116 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE_SET;
2120 ix86_isa_flags &= ~OPTION_MASK_ISA_SSE_UNSET;
2121 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE_UNSET;
2128 ix86_isa_flags |= OPTION_MASK_ISA_SSE2_SET;
2129 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE2_SET;
2133 ix86_isa_flags &= ~OPTION_MASK_ISA_SSE2_UNSET;
2134 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE2_UNSET;
2141 ix86_isa_flags |= OPTION_MASK_ISA_SSE3_SET;
2142 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE3_SET;
2146 ix86_isa_flags &= ~OPTION_MASK_ISA_SSE3_UNSET;
2147 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE3_UNSET;
2154 ix86_isa_flags |= OPTION_MASK_ISA_SSSE3_SET;
2155 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSSE3_SET;
2159 ix86_isa_flags &= ~OPTION_MASK_ISA_SSSE3_UNSET;
2160 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSSE3_UNSET;
2167 ix86_isa_flags |= OPTION_MASK_ISA_SSE4_1_SET;
2168 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE4_1_SET;
2172 ix86_isa_flags &= ~OPTION_MASK_ISA_SSE4_1_UNSET;
2173 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE4_1_UNSET;
2180 ix86_isa_flags |= OPTION_MASK_ISA_SSE4_2_SET;
2181 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE4_2_SET;
2185 ix86_isa_flags &= ~OPTION_MASK_ISA_SSE4_2_UNSET;
2186 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE4_2_UNSET;
2193 ix86_isa_flags |= OPTION_MASK_ISA_AVX_SET;
2194 ix86_isa_flags_explicit |= OPTION_MASK_ISA_AVX_SET;
2198 ix86_isa_flags &= ~OPTION_MASK_ISA_AVX_UNSET;
2199 ix86_isa_flags_explicit |= OPTION_MASK_ISA_AVX_UNSET;
2206 ix86_isa_flags |= OPTION_MASK_ISA_FMA_SET;
2207 ix86_isa_flags_explicit |= OPTION_MASK_ISA_FMA_SET;
2211 ix86_isa_flags &= ~OPTION_MASK_ISA_FMA_UNSET;
2212 ix86_isa_flags_explicit |= OPTION_MASK_ISA_FMA_UNSET;
2217 ix86_isa_flags |= OPTION_MASK_ISA_SSE4_SET;
2218 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE4_SET;
2222 ix86_isa_flags &= ~OPTION_MASK_ISA_SSE4_UNSET;
2223 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE4_UNSET;
2229 ix86_isa_flags |= OPTION_MASK_ISA_SSE4A_SET;
2230 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE4A_SET;
2234 ix86_isa_flags &= ~OPTION_MASK_ISA_SSE4A_UNSET;
2235 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE4A_UNSET;
2242 ix86_isa_flags |= OPTION_MASK_ISA_SSE5_SET;
2243 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE5_SET;
2247 ix86_isa_flags &= ~OPTION_MASK_ISA_SSE5_UNSET;
2248 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE5_UNSET;
2255 ix86_isa_flags |= OPTION_MASK_ISA_ABM_SET;
2256 ix86_isa_flags_explicit |= OPTION_MASK_ISA_ABM_SET;
2260 ix86_isa_flags &= ~OPTION_MASK_ISA_ABM_UNSET;
2261 ix86_isa_flags_explicit |= OPTION_MASK_ISA_ABM_UNSET;
2268 ix86_isa_flags |= OPTION_MASK_ISA_POPCNT_SET;
2269 ix86_isa_flags_explicit |= OPTION_MASK_ISA_POPCNT_SET;
2273 ix86_isa_flags &= ~OPTION_MASK_ISA_POPCNT_UNSET;
2274 ix86_isa_flags_explicit |= OPTION_MASK_ISA_POPCNT_UNSET;
2281 ix86_isa_flags |= OPTION_MASK_ISA_SAHF_SET;
2282 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SAHF_SET;
2286 ix86_isa_flags &= ~OPTION_MASK_ISA_SAHF_UNSET;
2287 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SAHF_UNSET;
2294 ix86_isa_flags |= OPTION_MASK_ISA_CX16_SET;
2295 ix86_isa_flags_explicit |= OPTION_MASK_ISA_CX16_SET;
2299 ix86_isa_flags &= ~OPTION_MASK_ISA_CX16_UNSET;
2300 ix86_isa_flags_explicit |= OPTION_MASK_ISA_CX16_UNSET;
2307 ix86_isa_flags |= OPTION_MASK_ISA_AES_SET;
2308 ix86_isa_flags_explicit |= OPTION_MASK_ISA_AES_SET;
2312 ix86_isa_flags &= ~OPTION_MASK_ISA_AES_UNSET;
2313 ix86_isa_flags_explicit |= OPTION_MASK_ISA_AES_UNSET;
2320 ix86_isa_flags |= OPTION_MASK_ISA_PCLMUL_SET;
2321 ix86_isa_flags_explicit |= OPTION_MASK_ISA_PCLMUL_SET;
2325 ix86_isa_flags &= ~OPTION_MASK_ISA_PCLMUL_UNSET;
2326 ix86_isa_flags_explicit |= OPTION_MASK_ISA_PCLMUL_UNSET;
2335 /* Return a string the documents the current -m options. The caller is
2336 responsible for freeing the string. */
2339 ix86_target_string (int isa, int flags, const char *arch, const char *tune,
2340 const char *fpmath, bool add_nl_p)
2342 struct ix86_target_opts
2344 const char *option; /* option string */
2345 int mask; /* isa mask options */
2348 /* This table is ordered so that options like -msse5 or -msse4.2 that imply
2349 preceding options while match those first. */
2350 static struct ix86_target_opts isa_opts[] =
2352 { "-m64", OPTION_MASK_ISA_64BIT },
2353 { "-msse5", OPTION_MASK_ISA_SSE5 },
2354 { "-msse4a", OPTION_MASK_ISA_SSE4A },
2355 { "-msse4.2", OPTION_MASK_ISA_SSE4_2 },
2356 { "-msse4.1", OPTION_MASK_ISA_SSE4_1 },
2357 { "-mssse3", OPTION_MASK_ISA_SSSE3 },
2358 { "-msse3", OPTION_MASK_ISA_SSE3 },
2359 { "-msse2", OPTION_MASK_ISA_SSE2 },
2360 { "-msse", OPTION_MASK_ISA_SSE },
2361 { "-m3dnow", OPTION_MASK_ISA_3DNOW },
2362 { "-m3dnowa", OPTION_MASK_ISA_3DNOW_A },
2363 { "-mmmx", OPTION_MASK_ISA_MMX },
2364 { "-mabm", OPTION_MASK_ISA_ABM },
2365 { "-mpopcnt", OPTION_MASK_ISA_POPCNT },
2366 { "-maes", OPTION_MASK_ISA_AES },
2367 { "-mpclmul", OPTION_MASK_ISA_PCLMUL },
2371 static struct ix86_target_opts flag_opts[] =
2373 { "-m128bit-long-double", MASK_128BIT_LONG_DOUBLE },
2374 { "-m80387", MASK_80387 },
2375 { "-maccumulate-outgoing-args", MASK_ACCUMULATE_OUTGOING_ARGS },
2376 { "-malign-double", MASK_ALIGN_DOUBLE },
2377 { "-mcld", MASK_CLD },
2378 { "-mfp-ret-in-387", MASK_FLOAT_RETURNS },
2379 { "-mieee-fp", MASK_IEEE_FP },
2380 { "-minline-all-stringops", MASK_INLINE_ALL_STRINGOPS },
2381 { "-minline-stringops-dynamically", MASK_INLINE_STRINGOPS_DYNAMICALLY },
2382 { "-mms-bitfields", MASK_MS_BITFIELD_LAYOUT },
2383 { "-mno-align-stringops", MASK_NO_ALIGN_STRINGOPS },
2384 { "-mno-fancy-math-387", MASK_NO_FANCY_MATH_387 },
2385 { "-mno-fused-madd", MASK_NO_FUSED_MADD },
2386 { "-mno-push-args", MASK_NO_PUSH_ARGS },
2387 { "-mno-red-zone", MASK_NO_RED_ZONE },
2388 { "-momit-leaf-frame-pointer", MASK_OMIT_LEAF_FRAME_POINTER },
2389 { "-mrecip", MASK_RECIP },
2390 { "-mrtd", MASK_RTD },
2391 { "-msseregparm", MASK_SSEREGPARM },
2392 { "-mstack-arg-probe", MASK_STACK_PROBE },
2393 { "-mtls-direct-seg-refs", MASK_TLS_DIRECT_SEG_REFS },
2396 const char *opts[ARRAY_SIZE (isa_opts) + ARRAY_SIZE (flag_opts) + 6][2];
2399 char target_other[40];
2408 memset (opts, '\0', sizeof (opts));
2410 /* Add -march= option. */
2413 opts[num][0] = "-march=";
2414 opts[num++][1] = arch;
2417 /* Add -mtune= option. */
2420 opts[num][0] = "-mtune=";
2421 opts[num++][1] = tune;
2424 /* Pick out the options in isa options. */
2425 for (i = 0; i < ARRAY_SIZE (isa_opts); i++)
2427 if ((isa & isa_opts[i].mask) != 0)
2429 opts[num++][0] = isa_opts[i].option;
2430 isa &= ~ isa_opts[i].mask;
2434 if (isa && add_nl_p)
2436 opts[num++][0] = isa_other;
2437 sprintf (isa_other, "(other isa: 0x%x)", isa);
2440 /* Add flag options. */
2441 for (i = 0; i < ARRAY_SIZE (flag_opts); i++)
2443 if ((flags & flag_opts[i].mask) != 0)
2445 opts[num++][0] = flag_opts[i].option;
2446 flags &= ~ flag_opts[i].mask;
2450 if (flags && add_nl_p)
2452 opts[num++][0] = target_other;
2453 sprintf (target_other, "(other flags: 0x%x)", isa);
2456 /* Add -fpmath= option. */
2459 opts[num][0] = "-mfpmath=";
2460 opts[num++][1] = fpmath;
2467 gcc_assert (num < ARRAY_SIZE (opts));
2469 /* Size the string. */
2471 sep_len = (add_nl_p) ? 3 : 1;
2472 for (i = 0; i < num; i++)
2475 for (j = 0; j < 2; j++)
2477 len += strlen (opts[i][j]);
2480 /* Build the string. */
2481 ret = ptr = (char *) xmalloc (len);
2484 for (i = 0; i < num; i++)
2488 for (j = 0; j < 2; j++)
2489 len2[j] = (opts[i][j]) ? strlen (opts[i][j]) : 0;
2496 if (add_nl_p && line_len + len2[0] + len2[1] > 70)
2504 for (j = 0; j < 2; j++)
2507 memcpy (ptr, opts[i][j], len2[j]);
2509 line_len += len2[j];
2514 gcc_assert (ret + len >= ptr);
2519 /* Function that is callable from the debugger to print the current
2522 ix86_debug_options (void)
2524 char *opts = ix86_target_string (ix86_isa_flags, target_flags,
2525 ix86_arch_string, ix86_tune_string,
2526 ix86_fpmath_string, true);
2530 fprintf (stderr, "%s\n\n", opts);
2534 fprintf (stderr, "<no options>\n\n");
2539 /* Sometimes certain combinations of command options do not make
2540 sense on a particular target machine. You can define a macro
2541 `OVERRIDE_OPTIONS' to take account of this. This macro, if
2542 defined, is executed once just after all the command options have
2545 Don't use this macro to turn on various extra optimizations for
2546 `-O'. That is what `OPTIMIZATION_OPTIONS' is for. */
2549 override_options (bool main_args_p)
2552 unsigned int ix86_arch_mask, ix86_tune_mask;
2557 /* Comes from final.c -- no real reason to change it. */
2558 #define MAX_CODE_ALIGN 16
2566 PTA_PREFETCH_SSE = 1 << 4,
2568 PTA_3DNOW_A = 1 << 6,
2572 PTA_POPCNT = 1 << 10,
2574 PTA_SSE4A = 1 << 12,
2575 PTA_NO_SAHF = 1 << 13,
2576 PTA_SSE4_1 = 1 << 14,
2577 PTA_SSE4_2 = 1 << 15,
2580 PTA_PCLMUL = 1 << 18,
2587 const char *const name; /* processor name or nickname. */
2588 const enum processor_type processor;
2589 const enum attr_cpu schedule;
2590 const unsigned /*enum pta_flags*/ flags;
2592 const processor_alias_table[] =
2594 {"i386", PROCESSOR_I386, CPU_NONE, 0},
2595 {"i486", PROCESSOR_I486, CPU_NONE, 0},
2596 {"i586", PROCESSOR_PENTIUM, CPU_PENTIUM, 0},
2597 {"pentium", PROCESSOR_PENTIUM, CPU_PENTIUM, 0},
2598 {"pentium-mmx", PROCESSOR_PENTIUM, CPU_PENTIUM, PTA_MMX},
2599 {"winchip-c6", PROCESSOR_I486, CPU_NONE, PTA_MMX},
2600 {"winchip2", PROCESSOR_I486, CPU_NONE, PTA_MMX | PTA_3DNOW},
2601 {"c3", PROCESSOR_I486, CPU_NONE, PTA_MMX | PTA_3DNOW},
2602 {"c3-2", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO, PTA_MMX | PTA_SSE},
2603 {"i686", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO, 0},
2604 {"pentiumpro", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO, 0},
2605 {"pentium2", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO, PTA_MMX},
2606 {"pentium3", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO,
2608 {"pentium3m", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO,
2610 {"pentium-m", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO,
2611 PTA_MMX | PTA_SSE | PTA_SSE2},
2612 {"pentium4", PROCESSOR_PENTIUM4, CPU_NONE,
2613 PTA_MMX |PTA_SSE | PTA_SSE2},
2614 {"pentium4m", PROCESSOR_PENTIUM4, CPU_NONE,
2615 PTA_MMX | PTA_SSE | PTA_SSE2},
2616 {"prescott", PROCESSOR_NOCONA, CPU_NONE,
2617 PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3},
2618 {"nocona", PROCESSOR_NOCONA, CPU_NONE,
2619 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
2620 | PTA_CX16 | PTA_NO_SAHF},
2621 {"core2", PROCESSOR_CORE2, CPU_CORE2,
2622 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
2623 | PTA_SSSE3 | PTA_CX16},
2624 {"atom", PROCESSOR_ATOM, CPU_ATOM,
2625 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
2626 | PTA_SSSE3 | PTA_CX16},
2627 {"geode", PROCESSOR_GEODE, CPU_GEODE,
2628 PTA_MMX | PTA_3DNOW | PTA_3DNOW_A |PTA_PREFETCH_SSE},
2629 {"k6", PROCESSOR_K6, CPU_K6, PTA_MMX},
2630 {"k6-2", PROCESSOR_K6, CPU_K6, PTA_MMX | PTA_3DNOW},
2631 {"k6-3", PROCESSOR_K6, CPU_K6, PTA_MMX | PTA_3DNOW},
2632 {"athlon", PROCESSOR_ATHLON, CPU_ATHLON,
2633 PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_PREFETCH_SSE},
2634 {"athlon-tbird", PROCESSOR_ATHLON, CPU_ATHLON,
2635 PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_PREFETCH_SSE},
2636 {"athlon-4", PROCESSOR_ATHLON, CPU_ATHLON,
2637 PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE},
2638 {"athlon-xp", PROCESSOR_ATHLON, CPU_ATHLON,
2639 PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE},
2640 {"athlon-mp", PROCESSOR_ATHLON, CPU_ATHLON,
2641 PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE},
2642 {"x86-64", PROCESSOR_K8, CPU_K8,
2643 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_NO_SAHF},
2644 {"k8", PROCESSOR_K8, CPU_K8,
2645 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
2646 | PTA_SSE2 | PTA_NO_SAHF},
2647 {"k8-sse3", PROCESSOR_K8, CPU_K8,
2648 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
2649 | PTA_SSE2 | PTA_SSE3 | PTA_NO_SAHF},
2650 {"opteron", PROCESSOR_K8, CPU_K8,
2651 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
2652 | PTA_SSE2 | PTA_NO_SAHF},
2653 {"opteron-sse3", PROCESSOR_K8, CPU_K8,
2654 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
2655 | PTA_SSE2 | PTA_SSE3 | PTA_NO_SAHF},
2656 {"athlon64", PROCESSOR_K8, CPU_K8,
2657 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
2658 | PTA_SSE2 | PTA_NO_SAHF},
2659 {"athlon64-sse3", PROCESSOR_K8, CPU_K8,
2660 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
2661 | PTA_SSE2 | PTA_SSE3 | PTA_NO_SAHF},
2662 {"athlon-fx", PROCESSOR_K8, CPU_K8,
2663 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
2664 | PTA_SSE2 | PTA_NO_SAHF},
2665 {"amdfam10", PROCESSOR_AMDFAM10, CPU_AMDFAM10,
2666 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
2667 | PTA_SSE2 | PTA_SSE3 | PTA_SSE4A | PTA_CX16 | PTA_ABM},
2668 {"barcelona", PROCESSOR_AMDFAM10, CPU_AMDFAM10,
2669 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
2670 | PTA_SSE2 | PTA_SSE3 | PTA_SSE4A | PTA_CX16 | PTA_ABM},
2671 {"generic32", PROCESSOR_GENERIC32, CPU_PENTIUMPRO,
2672 0 /* flags are only used for -march switch. */ },
2673 {"generic64", PROCESSOR_GENERIC64, CPU_GENERIC64,
2674 PTA_64BIT /* flags are only used for -march switch. */ },
2677 int const pta_size = ARRAY_SIZE (processor_alias_table);
2679 /* Set up prefix/suffix so the error messages refer to either the command
2680 line argument, or the attribute(target). */
2689 prefix = "option(\"";
2694 #ifdef SUBTARGET_OVERRIDE_OPTIONS
2695 SUBTARGET_OVERRIDE_OPTIONS;
2698 #ifdef SUBSUBTARGET_OVERRIDE_OPTIONS
2699 SUBSUBTARGET_OVERRIDE_OPTIONS;
2702 /* -fPIC is the default for x86_64. */
2703 if (TARGET_MACHO && TARGET_64BIT)
2706 /* Set the default values for switches whose default depends on TARGET_64BIT
2707 in case they weren't overwritten by command line options. */
2710 /* Mach-O doesn't support omitting the frame pointer for now. */
2711 if (flag_omit_frame_pointer == 2)
2712 flag_omit_frame_pointer = (TARGET_MACHO ? 0 : 1);
2713 if (flag_asynchronous_unwind_tables == 2)
2714 flag_asynchronous_unwind_tables = 1;
2715 if (flag_pcc_struct_return == 2)
2716 flag_pcc_struct_return = 0;
2720 if (flag_omit_frame_pointer == 2)
2721 flag_omit_frame_pointer = 0;
2722 if (flag_asynchronous_unwind_tables == 2)
2723 flag_asynchronous_unwind_tables = 0;
2724 if (flag_pcc_struct_return == 2)
2725 flag_pcc_struct_return = DEFAULT_PCC_STRUCT_RETURN;
2728 /* Need to check -mtune=generic first. */
2729 if (ix86_tune_string)
2731 if (!strcmp (ix86_tune_string, "generic")
2732 || !strcmp (ix86_tune_string, "i686")
2733 /* As special support for cross compilers we read -mtune=native
2734 as -mtune=generic. With native compilers we won't see the
2735 -mtune=native, as it was changed by the driver. */
2736 || !strcmp (ix86_tune_string, "native"))
2739 ix86_tune_string = "generic64";
2741 ix86_tune_string = "generic32";
2743 /* If this call is for setting the option attribute, allow the
2744 generic32/generic64 that was previously set. */
2745 else if (!main_args_p
2746 && (!strcmp (ix86_tune_string, "generic32")
2747 || !strcmp (ix86_tune_string, "generic64")))
2749 else if (!strncmp (ix86_tune_string, "generic", 7))
2750 error ("bad value (%s) for %stune=%s %s",
2751 ix86_tune_string, prefix, suffix, sw);
2755 if (ix86_arch_string)
2756 ix86_tune_string = ix86_arch_string;
2757 if (!ix86_tune_string)
2759 ix86_tune_string = cpu_names[TARGET_CPU_DEFAULT];
2760 ix86_tune_defaulted = 1;
2763 /* ix86_tune_string is set to ix86_arch_string or defaulted. We
2764 need to use a sensible tune option. */
2765 if (!strcmp (ix86_tune_string, "generic")
2766 || !strcmp (ix86_tune_string, "x86-64")
2767 || !strcmp (ix86_tune_string, "i686"))
2770 ix86_tune_string = "generic64";
2772 ix86_tune_string = "generic32";
2775 if (ix86_stringop_string)
2777 if (!strcmp (ix86_stringop_string, "rep_byte"))
2778 stringop_alg = rep_prefix_1_byte;
2779 else if (!strcmp (ix86_stringop_string, "libcall"))
2780 stringop_alg = libcall;
2781 else if (!strcmp (ix86_stringop_string, "rep_4byte"))
2782 stringop_alg = rep_prefix_4_byte;
2783 else if (!strcmp (ix86_stringop_string, "rep_8byte")
2785 /* rep; movq isn't available in 32-bit code. */
2786 stringop_alg = rep_prefix_8_byte;
2787 else if (!strcmp (ix86_stringop_string, "byte_loop"))
2788 stringop_alg = loop_1_byte;
2789 else if (!strcmp (ix86_stringop_string, "loop"))
2790 stringop_alg = loop;
2791 else if (!strcmp (ix86_stringop_string, "unrolled_loop"))
2792 stringop_alg = unrolled_loop;
2794 error ("bad value (%s) for %sstringop-strategy=%s %s",
2795 ix86_stringop_string, prefix, suffix, sw);
2797 if (!strcmp (ix86_tune_string, "x86-64"))
2798 warning (OPT_Wdeprecated, "%stune=x86-64%s is deprecated. Use "
2799 "%stune=k8%s or %stune=generic%s instead as appropriate.",
2800 prefix, suffix, prefix, suffix, prefix, suffix);
2802 if (!ix86_arch_string)
2803 ix86_arch_string = TARGET_64BIT ? "x86-64" : "i386";
2805 ix86_arch_specified = 1;
2807 if (!strcmp (ix86_arch_string, "generic"))
2808 error ("generic CPU can be used only for %stune=%s %s",
2809 prefix, suffix, sw);
2810 if (!strncmp (ix86_arch_string, "generic", 7))
2811 error ("bad value (%s) for %sarch=%s %s",
2812 ix86_arch_string, prefix, suffix, sw);
2814 /* Validate -mabi= value. */
2815 if (ix86_abi_string)
2817 if (strcmp (ix86_abi_string, "sysv") == 0)
2818 ix86_abi = SYSV_ABI;
2819 else if (strcmp (ix86_abi_string, "ms") == 0)
2822 error ("unknown ABI (%s) for %sabi=%s %s",
2823 ix86_abi_string, prefix, suffix, sw);
2826 ix86_abi = DEFAULT_ABI;
2828 if (ix86_cmodel_string != 0)
2830 if (!strcmp (ix86_cmodel_string, "small"))
2831 ix86_cmodel = flag_pic ? CM_SMALL_PIC : CM_SMALL;
2832 else if (!strcmp (ix86_cmodel_string, "medium"))
2833 ix86_cmodel = flag_pic ? CM_MEDIUM_PIC : CM_MEDIUM;
2834 else if (!strcmp (ix86_cmodel_string, "large"))
2835 ix86_cmodel = flag_pic ? CM_LARGE_PIC : CM_LARGE;
2837 error ("code model %s does not support PIC mode", ix86_cmodel_string);
2838 else if (!strcmp (ix86_cmodel_string, "32"))
2839 ix86_cmodel = CM_32;
2840 else if (!strcmp (ix86_cmodel_string, "kernel") && !flag_pic)
2841 ix86_cmodel = CM_KERNEL;
2843 error ("bad value (%s) for %scmodel=%s %s",
2844 ix86_cmodel_string, prefix, suffix, sw);
2848 /* For TARGET_64BIT and MS_ABI, force pic on, in order to enable the
2849 use of rip-relative addressing. This eliminates fixups that
2850 would otherwise be needed if this object is to be placed in a
2851 DLL, and is essentially just as efficient as direct addressing. */
2852 if (TARGET_64BIT && DEFAULT_ABI == MS_ABI)
2853 ix86_cmodel = CM_SMALL_PIC, flag_pic = 1;
2854 else if (TARGET_64BIT)
2855 ix86_cmodel = flag_pic ? CM_SMALL_PIC : CM_SMALL;
2857 ix86_cmodel = CM_32;
2859 if (ix86_asm_string != 0)
2862 && !strcmp (ix86_asm_string, "intel"))
2863 ix86_asm_dialect = ASM_INTEL;
2864 else if (!strcmp (ix86_asm_string, "att"))
2865 ix86_asm_dialect = ASM_ATT;
2867 error ("bad value (%s) for %sasm=%s %s",
2868 ix86_asm_string, prefix, suffix, sw);
2870 if ((TARGET_64BIT == 0) != (ix86_cmodel == CM_32))
2871 error ("code model %qs not supported in the %s bit mode",
2872 ix86_cmodel_string, TARGET_64BIT ? "64" : "32");
2873 if ((TARGET_64BIT != 0) != ((ix86_isa_flags & OPTION_MASK_ISA_64BIT) != 0))
2874 sorry ("%i-bit mode not compiled in",
2875 (ix86_isa_flags & OPTION_MASK_ISA_64BIT) ? 64 : 32);
2877 for (i = 0; i < pta_size; i++)
2878 if (! strcmp (ix86_arch_string, processor_alias_table[i].name))
2880 ix86_schedule = processor_alias_table[i].schedule;
2881 ix86_arch = processor_alias_table[i].processor;
2882 /* Default cpu tuning to the architecture. */
2883 ix86_tune = ix86_arch;
2885 if (TARGET_64BIT && !(processor_alias_table[i].flags & PTA_64BIT))
2886 error ("CPU you selected does not support x86-64 "
2889 if (processor_alias_table[i].flags & PTA_MMX
2890 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_MMX))
2891 ix86_isa_flags |= OPTION_MASK_ISA_MMX;
2892 if (processor_alias_table[i].flags & PTA_3DNOW
2893 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_3DNOW))
2894 ix86_isa_flags |= OPTION_MASK_ISA_3DNOW;
2895 if (processor_alias_table[i].flags & PTA_3DNOW_A
2896 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_3DNOW_A))
2897 ix86_isa_flags |= OPTION_MASK_ISA_3DNOW_A;
2898 if (processor_alias_table[i].flags & PTA_SSE
2899 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE))
2900 ix86_isa_flags |= OPTION_MASK_ISA_SSE;
2901 if (processor_alias_table[i].flags & PTA_SSE2
2902 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE2))
2903 ix86_isa_flags |= OPTION_MASK_ISA_SSE2;
2904 if (processor_alias_table[i].flags & PTA_SSE3
2905 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE3))
2906 ix86_isa_flags |= OPTION_MASK_ISA_SSE3;
2907 if (processor_alias_table[i].flags & PTA_SSSE3
2908 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSSE3))
2909 ix86_isa_flags |= OPTION_MASK_ISA_SSSE3;
2910 if (processor_alias_table[i].flags & PTA_SSE4_1
2911 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE4_1))
2912 ix86_isa_flags |= OPTION_MASK_ISA_SSE4_1;
2913 if (processor_alias_table[i].flags & PTA_SSE4_2
2914 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE4_2))
2915 ix86_isa_flags |= OPTION_MASK_ISA_SSE4_2;
2916 if (processor_alias_table[i].flags & PTA_AVX
2917 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_AVX))
2918 ix86_isa_flags |= OPTION_MASK_ISA_AVX;
2919 if (processor_alias_table[i].flags & PTA_FMA
2920 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_FMA))
2921 ix86_isa_flags |= OPTION_MASK_ISA_FMA;
2922 if (processor_alias_table[i].flags & PTA_SSE4A
2923 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE4A))
2924 ix86_isa_flags |= OPTION_MASK_ISA_SSE4A;
2925 if (processor_alias_table[i].flags & PTA_SSE5
2926 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE5))
2927 ix86_isa_flags |= OPTION_MASK_ISA_SSE5;
2928 if (processor_alias_table[i].flags & PTA_ABM
2929 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_ABM))
2930 ix86_isa_flags |= OPTION_MASK_ISA_ABM;
2931 if (processor_alias_table[i].flags & PTA_CX16
2932 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_CX16))
2933 ix86_isa_flags |= OPTION_MASK_ISA_CX16;
2934 if (processor_alias_table[i].flags & (PTA_POPCNT | PTA_ABM)
2935 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_POPCNT))
2936 ix86_isa_flags |= OPTION_MASK_ISA_POPCNT;
2937 if (!(TARGET_64BIT && (processor_alias_table[i].flags & PTA_NO_SAHF))
2938 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SAHF))
2939 ix86_isa_flags |= OPTION_MASK_ISA_SAHF;
2940 if (processor_alias_table[i].flags & PTA_AES
2941 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_AES))
2942 ix86_isa_flags |= OPTION_MASK_ISA_AES;
2943 if (processor_alias_table[i].flags & PTA_PCLMUL
2944 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_PCLMUL))
2945 ix86_isa_flags |= OPTION_MASK_ISA_PCLMUL;
2946 if (processor_alias_table[i].flags & (PTA_PREFETCH_SSE | PTA_SSE))
2947 x86_prefetch_sse = true;
2953 error ("bad value (%s) for %sarch=%s %s",
2954 ix86_arch_string, prefix, suffix, sw);
2956 ix86_arch_mask = 1u << ix86_arch;
2957 for (i = 0; i < X86_ARCH_LAST; ++i)
2958 ix86_arch_features[i] = !!(initial_ix86_arch_features[i] & ix86_arch_mask);
2960 for (i = 0; i < pta_size; i++)
2961 if (! strcmp (ix86_tune_string, processor_alias_table[i].name))
2963 ix86_schedule = processor_alias_table[i].schedule;
2964 ix86_tune = processor_alias_table[i].processor;
2965 if (TARGET_64BIT && !(processor_alias_table[i].flags & PTA_64BIT))
2967 if (ix86_tune_defaulted)
2969 ix86_tune_string = "x86-64";
2970 for (i = 0; i < pta_size; i++)
2971 if (! strcmp (ix86_tune_string,
2972 processor_alias_table[i].name))
2974 ix86_schedule = processor_alias_table[i].schedule;
2975 ix86_tune = processor_alias_table[i].processor;
2978 error ("CPU you selected does not support x86-64 "
2981 /* Intel CPUs have always interpreted SSE prefetch instructions as
2982 NOPs; so, we can enable SSE prefetch instructions even when
2983 -mtune (rather than -march) points us to a processor that has them.
2984 However, the VIA C3 gives a SIGILL, so we only do that for i686 and
2985 higher processors. */
2987 && (processor_alias_table[i].flags & (PTA_PREFETCH_SSE | PTA_SSE)))
2988 x86_prefetch_sse = true;
2992 error ("bad value (%s) for %stune=%s %s",
2993 ix86_tune_string, prefix, suffix, sw);
2995 ix86_tune_mask = 1u << ix86_tune;
2996 for (i = 0; i < X86_TUNE_LAST; ++i)
2997 ix86_tune_features[i] = !!(initial_ix86_tune_features[i] & ix86_tune_mask);
3000 ix86_cost = &ix86_size_cost;
3002 ix86_cost = processor_target_table[ix86_tune].cost;
3004 /* Arrange to set up i386_stack_locals for all functions. */
3005 init_machine_status = ix86_init_machine_status;
3007 /* Validate -mregparm= value. */
3008 if (ix86_regparm_string)
3011 warning (0, "%sregparm%s is ignored in 64-bit mode", prefix, suffix);
3012 i = atoi (ix86_regparm_string);
3013 if (i < 0 || i > REGPARM_MAX)
3014 error ("%sregparm=%d%s is not between 0 and %d",
3015 prefix, i, suffix, REGPARM_MAX);
3020 ix86_regparm = REGPARM_MAX;
3022 /* If the user has provided any of the -malign-* options,
3023 warn and use that value only if -falign-* is not set.
3024 Remove this code in GCC 3.2 or later. */
3025 if (ix86_align_loops_string)
3027 warning (0, "%salign-loops%s is obsolete, use -falign-loops%s",
3028 prefix, suffix, suffix);
3029 if (align_loops == 0)
3031 i = atoi (ix86_align_loops_string);
3032 if (i < 0 || i > MAX_CODE_ALIGN)
3033 error ("%salign-loops=%d%s is not between 0 and %d",
3034 prefix, i, suffix, MAX_CODE_ALIGN);
3036 align_loops = 1 << i;
3040 if (ix86_align_jumps_string)
3042 warning (0, "%salign-jumps%s is obsolete, use -falign-jumps%s",
3043 prefix, suffix, suffix);
3044 if (align_jumps == 0)
3046 i = atoi (ix86_align_jumps_string);
3047 if (i < 0 || i > MAX_CODE_ALIGN)
3048 error ("%salign-loops=%d%s is not between 0 and %d",
3049 prefix, i, suffix, MAX_CODE_ALIGN);
3051 align_jumps = 1 << i;
3055 if (ix86_align_funcs_string)
3057 warning (0, "%salign-functions%s is obsolete, use -falign-functions%s",
3058 prefix, suffix, suffix);
3059 if (align_functions == 0)
3061 i = atoi (ix86_align_funcs_string);
3062 if (i < 0 || i > MAX_CODE_ALIGN)
3063 error ("%salign-loops=%d%s is not between 0 and %d",
3064 prefix, i, suffix, MAX_CODE_ALIGN);
3066 align_functions = 1 << i;
3070 /* Default align_* from the processor table. */
3071 if (align_loops == 0)
3073 align_loops = processor_target_table[ix86_tune].align_loop;
3074 align_loops_max_skip = processor_target_table[ix86_tune].align_loop_max_skip;
3076 if (align_jumps == 0)
3078 align_jumps = processor_target_table[ix86_tune].align_jump;
3079 align_jumps_max_skip = processor_target_table[ix86_tune].align_jump_max_skip;
3081 if (align_functions == 0)
3083 align_functions = processor_target_table[ix86_tune].align_func;
3086 /* Validate -mbranch-cost= value, or provide default. */
3087 ix86_branch_cost = ix86_cost->branch_cost;
3088 if (ix86_branch_cost_string)
3090 i = atoi (ix86_branch_cost_string);
3092 error ("%sbranch-cost=%d%s is not between 0 and 5", prefix, i, suffix);
3094 ix86_branch_cost = i;
3096 if (ix86_section_threshold_string)
3098 i = atoi (ix86_section_threshold_string);
3100 error ("%slarge-data-threshold=%d%s is negative", prefix, i, suffix);
3102 ix86_section_threshold = i;
3105 if (ix86_tls_dialect_string)
3107 if (strcmp (ix86_tls_dialect_string, "gnu") == 0)
3108 ix86_tls_dialect = TLS_DIALECT_GNU;
3109 else if (strcmp (ix86_tls_dialect_string, "gnu2") == 0)
3110 ix86_tls_dialect = TLS_DIALECT_GNU2;
3111 else if (strcmp (ix86_tls_dialect_string, "sun") == 0)
3112 ix86_tls_dialect = TLS_DIALECT_SUN;
3114 error ("bad value (%s) for %stls-dialect=%s %s",
3115 ix86_tls_dialect_string, prefix, suffix, sw);
3118 if (ix87_precision_string)
3120 i = atoi (ix87_precision_string);
3121 if (i != 32 && i != 64 && i != 80)
3122 error ("pc%d is not valid precision setting (32, 64 or 80)", i);
3127 target_flags |= TARGET_SUBTARGET64_DEFAULT & ~target_flags_explicit;
3129 /* Enable by default the SSE and MMX builtins. Do allow the user to
3130 explicitly disable any of these. In particular, disabling SSE and
3131 MMX for kernel code is extremely useful. */
3132 if (!ix86_arch_specified)
3134 |= ((OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_MMX
3135 | TARGET_SUBTARGET64_ISA_DEFAULT) & ~ix86_isa_flags_explicit);
3138 warning (0, "%srtd%s is ignored in 64bit mode", prefix, suffix);
3142 target_flags |= TARGET_SUBTARGET32_DEFAULT & ~target_flags_explicit;
3144 if (!ix86_arch_specified)
3146 |= TARGET_SUBTARGET32_ISA_DEFAULT & ~ix86_isa_flags_explicit;
3148 /* i386 ABI does not specify red zone. It still makes sense to use it
3149 when programmer takes care to stack from being destroyed. */
3150 if (!(target_flags_explicit & MASK_NO_RED_ZONE))
3151 target_flags |= MASK_NO_RED_ZONE;
3154 /* Keep nonleaf frame pointers. */
3155 if (flag_omit_frame_pointer)
3156 target_flags &= ~MASK_OMIT_LEAF_FRAME_POINTER;
3157 else if (TARGET_OMIT_LEAF_FRAME_POINTER)
3158 flag_omit_frame_pointer = 1;
3160 /* If we're doing fast math, we don't care about comparison order
3161 wrt NaNs. This lets us use a shorter comparison sequence. */
3162 if (flag_finite_math_only)
3163 target_flags &= ~MASK_IEEE_FP;
3165 /* If the architecture always has an FPU, turn off NO_FANCY_MATH_387,
3166 since the insns won't need emulation. */
3167 if (x86_arch_always_fancy_math_387 & ix86_arch_mask)
3168 target_flags &= ~MASK_NO_FANCY_MATH_387;
3170 /* Likewise, if the target doesn't have a 387, or we've specified
3171 software floating point, don't use 387 inline intrinsics. */
3173 target_flags |= MASK_NO_FANCY_MATH_387;
3175 /* Turn on MMX builtins for -msse. */
3178 ix86_isa_flags |= OPTION_MASK_ISA_MMX & ~ix86_isa_flags_explicit;
3179 x86_prefetch_sse = true;
3182 /* Turn on popcnt instruction for -msse4.2 or -mabm. */
3183 if (TARGET_SSE4_2 || TARGET_ABM)
3184 ix86_isa_flags |= OPTION_MASK_ISA_POPCNT & ~ix86_isa_flags_explicit;
3186 /* Validate -mpreferred-stack-boundary= value or default it to
3187 PREFERRED_STACK_BOUNDARY_DEFAULT. */
3188 ix86_preferred_stack_boundary = PREFERRED_STACK_BOUNDARY_DEFAULT;
3189 if (ix86_preferred_stack_boundary_string)
3191 i = atoi (ix86_preferred_stack_boundary_string);
3192 if (i < (TARGET_64BIT ? 4 : 2) || i > 12)
3193 error ("%spreferred-stack-boundary=%d%s is not between %d and 12",
3194 prefix, i, suffix, TARGET_64BIT ? 4 : 2);
3196 ix86_preferred_stack_boundary = (1 << i) * BITS_PER_UNIT;
3199 /* Set the default value for -mstackrealign. */
3200 if (ix86_force_align_arg_pointer == -1)
3201 ix86_force_align_arg_pointer = STACK_REALIGN_DEFAULT;
3203 /* Validate -mincoming-stack-boundary= value or default it to
3204 MIN_STACK_BOUNDARY/PREFERRED_STACK_BOUNDARY. */
3205 if (ix86_force_align_arg_pointer)
3206 ix86_default_incoming_stack_boundary = MIN_STACK_BOUNDARY;
3208 ix86_default_incoming_stack_boundary = PREFERRED_STACK_BOUNDARY;
3209 ix86_incoming_stack_boundary = ix86_default_incoming_stack_boundary;
3210 if (ix86_incoming_stack_boundary_string)
3212 i = atoi (ix86_incoming_stack_boundary_string);
3213 if (i < (TARGET_64BIT ? 4 : 2) || i > 12)
3214 error ("-mincoming-stack-boundary=%d is not between %d and 12",
3215 i, TARGET_64BIT ? 4 : 2);
3218 ix86_user_incoming_stack_boundary = (1 << i) * BITS_PER_UNIT;
3219 ix86_incoming_stack_boundary
3220 = ix86_user_incoming_stack_boundary;
3224 /* Accept -msseregparm only if at least SSE support is enabled. */
3225 if (TARGET_SSEREGPARM
3227 error ("%ssseregparm%s used without SSE enabled", prefix, suffix);
3229 ix86_fpmath = TARGET_FPMATH_DEFAULT;
3230 if (ix86_fpmath_string != 0)
3232 if (! strcmp (ix86_fpmath_string, "387"))
3233 ix86_fpmath = FPMATH_387;
3234 else if (! strcmp (ix86_fpmath_string, "sse"))
3238 warning (0, "SSE instruction set disabled, using 387 arithmetics");
3239 ix86_fpmath = FPMATH_387;
3242 ix86_fpmath = FPMATH_SSE;
3244 else if (! strcmp (ix86_fpmath_string, "387,sse")
3245 || ! strcmp (ix86_fpmath_string, "387+sse")
3246 || ! strcmp (ix86_fpmath_string, "sse,387")
3247 || ! strcmp (ix86_fpmath_string, "sse+387")
3248 || ! strcmp (ix86_fpmath_string, "both"))
3252 warning (0, "SSE instruction set disabled, using 387 arithmetics");
3253 ix86_fpmath = FPMATH_387;
3255 else if (!TARGET_80387)
3257 warning (0, "387 instruction set disabled, using SSE arithmetics");
3258 ix86_fpmath = FPMATH_SSE;
3261 ix86_fpmath = (enum fpmath_unit) (FPMATH_SSE | FPMATH_387);
3264 error ("bad value (%s) for %sfpmath=%s %s",
3265 ix86_fpmath_string, prefix, suffix, sw);
3268 /* If the i387 is disabled, then do not return values in it. */
3270 target_flags &= ~MASK_FLOAT_RETURNS;
3272 /* Use external vectorized library in vectorizing intrinsics. */
3273 if (ix86_veclibabi_string)
3275 if (strcmp (ix86_veclibabi_string, "svml") == 0)
3276 ix86_veclib_handler = ix86_veclibabi_svml;
3277 else if (strcmp (ix86_veclibabi_string, "acml") == 0)
3278 ix86_veclib_handler = ix86_veclibabi_acml;
3280 error ("unknown vectorization library ABI type (%s) for "
3281 "%sveclibabi=%s %s", ix86_veclibabi_string,
3282 prefix, suffix, sw);
3285 if ((x86_accumulate_outgoing_args & ix86_tune_mask)
3286 && !(target_flags_explicit & MASK_ACCUMULATE_OUTGOING_ARGS)
3288 target_flags |= MASK_ACCUMULATE_OUTGOING_ARGS;
3290 /* ??? Unwind info is not correct around the CFG unless either a frame
3291 pointer is present or M_A_O_A is set. Fixing this requires rewriting
3292 unwind info generation to be aware of the CFG and propagating states
3294 if ((flag_unwind_tables || flag_asynchronous_unwind_tables
3295 || flag_exceptions || flag_non_call_exceptions)
3296 && flag_omit_frame_pointer
3297 && !(target_flags & MASK_ACCUMULATE_OUTGOING_ARGS))
3299 if (target_flags_explicit & MASK_ACCUMULATE_OUTGOING_ARGS)
3300 warning (0, "unwind tables currently require either a frame pointer "
3301 "or %saccumulate-outgoing-args%s for correctness",
3303 target_flags |= MASK_ACCUMULATE_OUTGOING_ARGS;
3306 /* If stack probes are required, the space used for large function
3307 arguments on the stack must also be probed, so enable
3308 -maccumulate-outgoing-args so this happens in the prologue. */
3309 if (TARGET_STACK_PROBE
3310 && !(target_flags & MASK_ACCUMULATE_OUTGOING_ARGS))
3312 if (target_flags_explicit & MASK_ACCUMULATE_OUTGOING_ARGS)
3313 warning (0, "stack probing requires %saccumulate-outgoing-args%s "
3314 "for correctness", prefix, suffix);
3315 target_flags |= MASK_ACCUMULATE_OUTGOING_ARGS;
3318 /* For sane SSE instruction set generation we need fcomi instruction.
3319 It is safe to enable all CMOVE instructions. */
3323 /* Figure out what ASM_GENERATE_INTERNAL_LABEL builds as a prefix. */
3326 ASM_GENERATE_INTERNAL_LABEL (internal_label_prefix, "LX", 0);
3327 p = strchr (internal_label_prefix, 'X');
3328 internal_label_prefix_len = p - internal_label_prefix;
3332 /* When scheduling description is not available, disable scheduler pass
3333 so it won't slow down the compilation and make x87 code slower. */
3334 if (!TARGET_SCHEDULE)
3335 flag_schedule_insns_after_reload = flag_schedule_insns = 0;
3337 if (!PARAM_SET_P (PARAM_SIMULTANEOUS_PREFETCHES))
3338 set_param_value ("simultaneous-prefetches",
3339 ix86_cost->simultaneous_prefetches);
3340 if (!PARAM_SET_P (PARAM_L1_CACHE_LINE_SIZE))
3341 set_param_value ("l1-cache-line-size", ix86_cost->prefetch_block);
3342 if (!PARAM_SET_P (PARAM_L1_CACHE_SIZE))
3343 set_param_value ("l1-cache-size", ix86_cost->l1_cache_size);
3344 if (!PARAM_SET_P (PARAM_L2_CACHE_SIZE))
3345 set_param_value ("l2-cache-size", ix86_cost->l2_cache_size);
3347 /* If using typedef char *va_list, signal that __builtin_va_start (&ap, 0)
3348 can be optimized to ap = __builtin_next_arg (0). */
3350 targetm.expand_builtin_va_start = NULL;
3354 ix86_gen_leave = gen_leave_rex64;
3355 ix86_gen_pop1 = gen_popdi1;
3356 ix86_gen_add3 = gen_adddi3;
3357 ix86_gen_sub3 = gen_subdi3;
3358 ix86_gen_sub3_carry = gen_subdi3_carry_rex64;
3359 ix86_gen_one_cmpl2 = gen_one_cmpldi2;
3360 ix86_gen_monitor = gen_sse3_monitor64;
3361 ix86_gen_andsp = gen_anddi3;
3365 ix86_gen_leave = gen_leave;
3366 ix86_gen_pop1 = gen_popsi1;
3367 ix86_gen_add3 = gen_addsi3;
3368 ix86_gen_sub3 = gen_subsi3;
3369 ix86_gen_sub3_carry = gen_subsi3_carry;
3370 ix86_gen_one_cmpl2 = gen_one_cmplsi2;
3371 ix86_gen_monitor = gen_sse3_monitor;
3372 ix86_gen_andsp = gen_andsi3;
3376 /* Use -mcld by default for 32-bit code if configured with --enable-cld. */
3378 target_flags |= MASK_CLD & ~target_flags_explicit;
3381 /* Save the initial options in case the user does function specific options */
3383 target_option_default_node = target_option_current_node
3384 = build_target_option_node ();
3387 /* Save the current options */
3390 ix86_function_specific_save (struct cl_target_option *ptr)
3392 gcc_assert (IN_RANGE (ix86_arch, 0, 255));
3393 gcc_assert (IN_RANGE (ix86_schedule, 0, 255));
3394 gcc_assert (IN_RANGE (ix86_tune, 0, 255));
3395 gcc_assert (IN_RANGE (ix86_fpmath, 0, 255));
3396 gcc_assert (IN_RANGE (ix86_branch_cost, 0, 255));
3398 ptr->arch = ix86_arch;
3399 ptr->schedule = ix86_schedule;
3400 ptr->tune = ix86_tune;
3401 ptr->fpmath = ix86_fpmath;
3402 ptr->branch_cost = ix86_branch_cost;
3403 ptr->tune_defaulted = ix86_tune_defaulted;
3404 ptr->arch_specified = ix86_arch_specified;
3405 ptr->ix86_isa_flags_explicit = ix86_isa_flags_explicit;
3406 ptr->target_flags_explicit = target_flags_explicit;
3409 /* Restore the current options */
3412 ix86_function_specific_restore (struct cl_target_option *ptr)
3414 enum processor_type old_tune = ix86_tune;
3415 enum processor_type old_arch = ix86_arch;
3416 unsigned int ix86_arch_mask, ix86_tune_mask;
3419 ix86_arch = ptr->arch;
3420 ix86_schedule = ptr->schedule;
3421 ix86_tune = ptr->tune;
3422 ix86_fpmath = ptr->fpmath;
3423 ix86_branch_cost = ptr->branch_cost;
3424 ix86_tune_defaulted = ptr->tune_defaulted;
3425 ix86_arch_specified = ptr->arch_specified;
3426 ix86_isa_flags_explicit = ptr->ix86_isa_flags_explicit;
3427 target_flags_explicit = ptr->target_flags_explicit;
3429 /* Recreate the arch feature tests if the arch changed */
3430 if (old_arch != ix86_arch)
3432 ix86_arch_mask = 1u << ix86_arch;
3433 for (i = 0; i < X86_ARCH_LAST; ++i)
3434 ix86_arch_features[i]
3435 = !!(initial_ix86_arch_features[i] & ix86_arch_mask);
3438 /* Recreate the tune optimization tests */
3439 if (old_tune != ix86_tune)
3441 ix86_tune_mask = 1u << ix86_tune;
3442 for (i = 0; i < X86_TUNE_LAST; ++i)
3443 ix86_tune_features[i]
3444 = !!(initial_ix86_tune_features[i] & ix86_tune_mask);
3448 /* Print the current options */
3451 ix86_function_specific_print (FILE *file, int indent,
3452 struct cl_target_option *ptr)
3455 = ix86_target_string (ptr->ix86_isa_flags, ptr->target_flags,
3456 NULL, NULL, NULL, false);
3458 fprintf (file, "%*sarch = %d (%s)\n",
3461 ((ptr->arch < TARGET_CPU_DEFAULT_max)
3462 ? cpu_names[ptr->arch]
3465 fprintf (file, "%*stune = %d (%s)\n",
3468 ((ptr->tune < TARGET_CPU_DEFAULT_max)
3469 ? cpu_names[ptr->tune]
3472 fprintf (file, "%*sfpmath = %d%s%s\n", indent, "", ptr->fpmath,
3473 (ptr->fpmath & FPMATH_387) ? ", 387" : "",
3474 (ptr->fpmath & FPMATH_SSE) ? ", sse" : "");
3475 fprintf (file, "%*sbranch_cost = %d\n", indent, "", ptr->branch_cost);
3479 fprintf (file, "%*s%s\n", indent, "", target_string);
3480 free (target_string);
3485 /* Inner function to process the attribute((target(...))), take an argument and
3486 set the current options from the argument. If we have a list, recursively go
3490 ix86_valid_target_attribute_inner_p (tree args, char *p_strings[])
3495 #define IX86_ATTR_ISA(S,O) { S, sizeof (S)-1, ix86_opt_isa, O, 0 }
3496 #define IX86_ATTR_STR(S,O) { S, sizeof (S)-1, ix86_opt_str, O, 0 }
3497 #define IX86_ATTR_YES(S,O,M) { S, sizeof (S)-1, ix86_opt_yes, O, M }
3498 #define IX86_ATTR_NO(S,O,M) { S, sizeof (S)-1, ix86_opt_no, O, M }
3513 enum ix86_opt_type type;
3518 IX86_ATTR_ISA ("3dnow", OPT_m3dnow),
3519 IX86_ATTR_ISA ("abm", OPT_mabm),
3520 IX86_ATTR_ISA ("aes", OPT_maes),
3521 IX86_ATTR_ISA ("avx", OPT_mavx),
3522 IX86_ATTR_ISA ("mmx", OPT_mmmx),
3523 IX86_ATTR_ISA ("pclmul", OPT_mpclmul),
3524 IX86_ATTR_ISA ("popcnt", OPT_mpopcnt),
3525 IX86_ATTR_ISA ("sse", OPT_msse),
3526 IX86_ATTR_ISA ("sse2", OPT_msse2),
3527 IX86_ATTR_ISA ("sse3", OPT_msse3),
3528 IX86_ATTR_ISA ("sse4", OPT_msse4),
3529 IX86_ATTR_ISA ("sse4.1", OPT_msse4_1),
3530 IX86_ATTR_ISA ("sse4.2", OPT_msse4_2),
3531 IX86_ATTR_ISA ("sse4a", OPT_msse4a),
3532 IX86_ATTR_ISA ("sse5", OPT_msse5),
3533 IX86_ATTR_ISA ("ssse3", OPT_mssse3),
3535 /* string options */
3536 IX86_ATTR_STR ("arch=", IX86_FUNCTION_SPECIFIC_ARCH),
3537 IX86_ATTR_STR ("fpmath=", IX86_FUNCTION_SPECIFIC_FPMATH),
3538 IX86_ATTR_STR ("tune=", IX86_FUNCTION_SPECIFIC_TUNE),
3541 IX86_ATTR_YES ("cld",
3545 IX86_ATTR_NO ("fancy-math-387",
3546 OPT_mfancy_math_387,
3547 MASK_NO_FANCY_MATH_387),
3549 IX86_ATTR_NO ("fused-madd",
3551 MASK_NO_FUSED_MADD),
3553 IX86_ATTR_YES ("ieee-fp",
3557 IX86_ATTR_YES ("inline-all-stringops",
3558 OPT_minline_all_stringops,
3559 MASK_INLINE_ALL_STRINGOPS),
3561 IX86_ATTR_YES ("inline-stringops-dynamically",
3562 OPT_minline_stringops_dynamically,
3563 MASK_INLINE_STRINGOPS_DYNAMICALLY),
3565 IX86_ATTR_NO ("align-stringops",
3566 OPT_mno_align_stringops,
3567 MASK_NO_ALIGN_STRINGOPS),
3569 IX86_ATTR_YES ("recip",
3575 /* If this is a list, recurse to get the options. */
3576 if (TREE_CODE (args) == TREE_LIST)
3580 for (; args; args = TREE_CHAIN (args))
3581 if (TREE_VALUE (args)
3582 && !ix86_valid_target_attribute_inner_p (TREE_VALUE (args), p_strings))
3588 else if (TREE_CODE (args) != STRING_CST)
3591 /* Handle multiple arguments separated by commas. */
3592 next_optstr = ASTRDUP (TREE_STRING_POINTER (args));
3594 while (next_optstr && *next_optstr != '\0')
3596 char *p = next_optstr;
3598 char *comma = strchr (next_optstr, ',');
3599 const char *opt_string;
3600 size_t len, opt_len;
3605 enum ix86_opt_type type = ix86_opt_unknown;
3611 len = comma - next_optstr;
3612 next_optstr = comma + 1;
3620 /* Recognize no-xxx. */
3621 if (len > 3 && p[0] == 'n' && p[1] == 'o' && p[2] == '-')
3630 /* Find the option. */
3633 for (i = 0; i < ARRAY_SIZE (attrs); i++)
3635 type = attrs[i].type;
3636 opt_len = attrs[i].len;
3637 if (ch == attrs[i].string[0]
3638 && ((type != ix86_opt_str) ? len == opt_len : len > opt_len)
3639 && memcmp (p, attrs[i].string, opt_len) == 0)
3642 mask = attrs[i].mask;
3643 opt_string = attrs[i].string;
3648 /* Process the option. */
3651 error ("attribute(target(\"%s\")) is unknown", orig_p);
3655 else if (type == ix86_opt_isa)
3656 ix86_handle_option (opt, p, opt_set_p);
3658 else if (type == ix86_opt_yes || type == ix86_opt_no)
3660 if (type == ix86_opt_no)
3661 opt_set_p = !opt_set_p;
3664 target_flags |= mask;
3666 target_flags &= ~mask;
3669 else if (type == ix86_opt_str)
3673 error ("option(\"%s\") was already specified", opt_string);
3677 p_strings[opt] = xstrdup (p + opt_len);
3687 /* Return a TARGET_OPTION_NODE tree of the target options listed or NULL. */
3690 ix86_valid_target_attribute_tree (tree args)
3692 const char *orig_arch_string = ix86_arch_string;
3693 const char *orig_tune_string = ix86_tune_string;
3694 const char *orig_fpmath_string = ix86_fpmath_string;
3695 int orig_tune_defaulted = ix86_tune_defaulted;
3696 int orig_arch_specified = ix86_arch_specified;
3697 char *option_strings[IX86_FUNCTION_SPECIFIC_MAX] = { NULL, NULL, NULL };
3700 struct cl_target_option *def
3701 = TREE_TARGET_OPTION (target_option_default_node);
3703 /* Process each of the options on the chain. */
3704 if (! ix86_valid_target_attribute_inner_p (args, option_strings))
3707 /* If the changed options are different from the default, rerun override_options,
3708 and then save the options away. The string options are are attribute options,
3709 and will be undone when we copy the save structure. */
3710 if (ix86_isa_flags != def->ix86_isa_flags
3711 || target_flags != def->target_flags
3712 || option_strings[IX86_FUNCTION_SPECIFIC_ARCH]
3713 || option_strings[IX86_FUNCTION_SPECIFIC_TUNE]
3714 || option_strings[IX86_FUNCTION_SPECIFIC_FPMATH])
3716 /* If we are using the default tune= or arch=, undo the string assigned,
3717 and use the default. */
3718 if (option_strings[IX86_FUNCTION_SPECIFIC_ARCH])
3719 ix86_arch_string = option_strings[IX86_FUNCTION_SPECIFIC_ARCH];
3720 else if (!orig_arch_specified)
3721 ix86_arch_string = NULL;
3723 if (option_strings[IX86_FUNCTION_SPECIFIC_TUNE])
3724 ix86_tune_string = option_strings[IX86_FUNCTION_SPECIFIC_TUNE];
3725 else if (orig_tune_defaulted)
3726 ix86_tune_string = NULL;
3728 /* If fpmath= is not set, and we now have sse2 on 32-bit, use it. */
3729 if (option_strings[IX86_FUNCTION_SPECIFIC_FPMATH])
3730 ix86_fpmath_string = option_strings[IX86_FUNCTION_SPECIFIC_FPMATH];
3731 else if (!TARGET_64BIT && TARGET_SSE)
3732 ix86_fpmath_string = "sse,387";
3734 /* Do any overrides, such as arch=xxx, or tune=xxx support. */
3735 override_options (false);
3737 /* Add any builtin functions with the new isa if any. */
3738 ix86_add_new_builtins (ix86_isa_flags);
3740 /* Save the current options unless we are validating options for
3742 t = build_target_option_node ();
3744 ix86_arch_string = orig_arch_string;
3745 ix86_tune_string = orig_tune_string;
3746 ix86_fpmath_string = orig_fpmath_string;
3748 /* Free up memory allocated to hold the strings */
3749 for (i = 0; i < IX86_FUNCTION_SPECIFIC_MAX; i++)
3750 if (option_strings[i])
3751 free (option_strings[i]);
3757 /* Hook to validate attribute((target("string"))). */
3760 ix86_valid_target_attribute_p (tree fndecl,
3761 tree ARG_UNUSED (name),
3763 int ARG_UNUSED (flags))
3765 struct cl_target_option cur_target;
3767 tree old_optimize = build_optimization_node ();
3768 tree new_target, new_optimize;
3769 tree func_optimize = DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl);
3771 /* If the function changed the optimization levels as well as setting target
3772 options, start with the optimizations specified. */
3773 if (func_optimize && func_optimize != old_optimize)
3774 cl_optimization_restore (TREE_OPTIMIZATION (func_optimize));
3776 /* The target attributes may also change some optimization flags, so update
3777 the optimization options if necessary. */
3778 cl_target_option_save (&cur_target);
3779 new_target = ix86_valid_target_attribute_tree (args);
3780 new_optimize = build_optimization_node ();
3787 DECL_FUNCTION_SPECIFIC_TARGET (fndecl) = new_target;
3789 if (old_optimize != new_optimize)
3790 DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl) = new_optimize;
3793 cl_target_option_restore (&cur_target);
3795 if (old_optimize != new_optimize)
3796 cl_optimization_restore (TREE_OPTIMIZATION (old_optimize));
3802 /* Hook to determine if one function can safely inline another. */
3805 ix86_can_inline_p (tree caller, tree callee)
3808 tree caller_tree = DECL_FUNCTION_SPECIFIC_TARGET (caller);
3809 tree callee_tree = DECL_FUNCTION_SPECIFIC_TARGET (callee);
3811 /* If callee has no option attributes, then it is ok to inline. */
3815 /* If caller has no option attributes, but callee does then it is not ok to
3817 else if (!caller_tree)
3822 struct cl_target_option *caller_opts = TREE_TARGET_OPTION (caller_tree);
3823 struct cl_target_option *callee_opts = TREE_TARGET_OPTION (callee_tree);
3825 /* Callee's isa options should a subset of the caller's, i.e. a SSE5 function
3826 can inline a SSE2 function but a SSE2 function can't inline a SSE5
3828 if ((caller_opts->ix86_isa_flags & callee_opts->ix86_isa_flags)
3829 != callee_opts->ix86_isa_flags)
3832 /* See if we have the same non-isa options. */
3833 else if (caller_opts->target_flags != callee_opts->target_flags)
3836 /* See if arch, tune, etc. are the same. */
3837 else if (caller_opts->arch != callee_opts->arch)
3840 else if (caller_opts->tune != callee_opts->tune)
3843 else if (caller_opts->fpmath != callee_opts->fpmath)
3846 else if (caller_opts->branch_cost != callee_opts->branch_cost)
3857 /* Remember the last target of ix86_set_current_function. */
3858 static GTY(()) tree ix86_previous_fndecl;
3860 /* Establish appropriate back-end context for processing the function
3861 FNDECL. The argument might be NULL to indicate processing at top
3862 level, outside of any function scope. */
3864 ix86_set_current_function (tree fndecl)
3866 /* Only change the context if the function changes. This hook is called
3867 several times in the course of compiling a function, and we don't want to
3868 slow things down too much or call target_reinit when it isn't safe. */
3869 if (fndecl && fndecl != ix86_previous_fndecl)
3871 tree old_tree = (ix86_previous_fndecl
3872 ? DECL_FUNCTION_SPECIFIC_TARGET (ix86_previous_fndecl)
3875 tree new_tree = (fndecl
3876 ? DECL_FUNCTION_SPECIFIC_TARGET (fndecl)
3879 ix86_previous_fndecl = fndecl;
3880 if (old_tree == new_tree)
3885 cl_target_option_restore (TREE_TARGET_OPTION (new_tree));
3891 struct cl_target_option *def
3892 = TREE_TARGET_OPTION (target_option_current_node);
3894 cl_target_option_restore (def);
3901 /* Return true if this goes in large data/bss. */
3904 ix86_in_large_data_p (tree exp)
3906 if (ix86_cmodel != CM_MEDIUM && ix86_cmodel != CM_MEDIUM_PIC)
3909 /* Functions are never large data. */
3910 if (TREE_CODE (exp) == FUNCTION_DECL)
3913 if (TREE_CODE (exp) == VAR_DECL && DECL_SECTION_NAME (exp))
3915 const char *section = TREE_STRING_POINTER (DECL_SECTION_NAME (exp));
3916 if (strcmp (section, ".ldata") == 0
3917 || strcmp (section, ".lbss") == 0)
3923 HOST_WIDE_INT size = int_size_in_bytes (TREE_TYPE (exp));
3925 /* If this is an incomplete type with size 0, then we can't put it
3926 in data because it might be too big when completed. */
3927 if (!size || size > ix86_section_threshold)
3934 /* Switch to the appropriate section for output of DECL.
3935 DECL is either a `VAR_DECL' node or a constant of some sort.
3936 RELOC indicates whether forming the initial value of DECL requires
3937 link-time relocations. */
3939 static section * x86_64_elf_select_section (tree, int, unsigned HOST_WIDE_INT)
3943 x86_64_elf_select_section (tree decl, int reloc,
3944 unsigned HOST_WIDE_INT align)
3946 if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
3947 && ix86_in_large_data_p (decl))
3949 const char *sname = NULL;
3950 unsigned int flags = SECTION_WRITE;
3951 switch (categorize_decl_for_section (decl, reloc))
3956 case SECCAT_DATA_REL:
3957 sname = ".ldata.rel";
3959 case SECCAT_DATA_REL_LOCAL:
3960 sname = ".ldata.rel.local";
3962 case SECCAT_DATA_REL_RO:
3963 sname = ".ldata.rel.ro";
3965 case SECCAT_DATA_REL_RO_LOCAL:
3966 sname = ".ldata.rel.ro.local";
3970 flags |= SECTION_BSS;
3973 case SECCAT_RODATA_MERGE_STR:
3974 case SECCAT_RODATA_MERGE_STR_INIT:
3975 case SECCAT_RODATA_MERGE_CONST:
3979 case SECCAT_SRODATA:
3986 /* We don't split these for medium model. Place them into
3987 default sections and hope for best. */
3989 case SECCAT_EMUTLS_VAR:
3990 case SECCAT_EMUTLS_TMPL:
3995 /* We might get called with string constants, but get_named_section
3996 doesn't like them as they are not DECLs. Also, we need to set
3997 flags in that case. */
3999 return get_section (sname, flags, NULL);
4000 return get_named_section (decl, sname, reloc);
4003 return default_elf_select_section (decl, reloc, align);
4006 /* Build up a unique section name, expressed as a
4007 STRING_CST node, and assign it to DECL_SECTION_NAME (decl).
4008 RELOC indicates whether the initial value of EXP requires
4009 link-time relocations. */
4011 static void ATTRIBUTE_UNUSED
4012 x86_64_elf_unique_section (tree decl, int reloc)
4014 if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
4015 && ix86_in_large_data_p (decl))
4017 const char *prefix = NULL;
4018 /* We only need to use .gnu.linkonce if we don't have COMDAT groups. */
4019 bool one_only = DECL_ONE_ONLY (decl) && !HAVE_COMDAT_GROUP;
4021 switch (categorize_decl_for_section (decl, reloc))
4024 case SECCAT_DATA_REL:
4025 case SECCAT_DATA_REL_LOCAL:
4026 case SECCAT_DATA_REL_RO:
4027 case SECCAT_DATA_REL_RO_LOCAL:
4028 prefix = one_only ? ".ld" : ".ldata";
4031 prefix = one_only ? ".lb" : ".lbss";
4034 case SECCAT_RODATA_MERGE_STR:
4035 case SECCAT_RODATA_MERGE_STR_INIT:
4036 case SECCAT_RODATA_MERGE_CONST:
4037 prefix = one_only ? ".lr" : ".lrodata";
4039 case SECCAT_SRODATA:
4046 /* We don't split these for medium model. Place them into
4047 default sections and hope for best. */
4049 case SECCAT_EMUTLS_VAR:
4050 prefix = targetm.emutls.var_section;
4052 case SECCAT_EMUTLS_TMPL:
4053 prefix = targetm.emutls.tmpl_section;
4058 const char *name, *linkonce;
4061 name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl));
4062 name = targetm.strip_name_encoding (name);
4064 /* If we're using one_only, then there needs to be a .gnu.linkonce
4065 prefix to the section name. */
4066 linkonce = one_only ? ".gnu.linkonce" : "";
4068 string = ACONCAT ((linkonce, prefix, ".", name, NULL));
4070 DECL_SECTION_NAME (decl) = build_string (strlen (string), string);
4074 default_unique_section (decl, reloc);
4077 #ifdef COMMON_ASM_OP
4078 /* This says how to output assembler code to declare an
4079 uninitialized external linkage data object.
4081 For medium model x86-64 we need to use .largecomm opcode for
4084 x86_elf_aligned_common (FILE *file,
4085 const char *name, unsigned HOST_WIDE_INT size,
4088 if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
4089 && size > (unsigned int)ix86_section_threshold)
4090 fprintf (file, ".largecomm\t");
4092 fprintf (file, "%s", COMMON_ASM_OP);
4093 assemble_name (file, name);
4094 fprintf (file, ","HOST_WIDE_INT_PRINT_UNSIGNED",%u\n",
4095 size, align / BITS_PER_UNIT);
4099 /* Utility function for targets to use in implementing
4100 ASM_OUTPUT_ALIGNED_BSS. */
4103 x86_output_aligned_bss (FILE *file, tree decl ATTRIBUTE_UNUSED,
4104 const char *name, unsigned HOST_WIDE_INT size,
4107 if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
4108 && size > (unsigned int)ix86_section_threshold)
4109 switch_to_section (get_named_section (decl, ".lbss", 0));
4111 switch_to_section (bss_section);
4112 ASM_OUTPUT_ALIGN (file, floor_log2 (align / BITS_PER_UNIT));
4113 #ifdef ASM_DECLARE_OBJECT_NAME
4114 last_assemble_variable_decl = decl;
4115 ASM_DECLARE_OBJECT_NAME (file, name, decl);
4117 /* Standard thing is just output label for the object. */
4118 ASM_OUTPUT_LABEL (file, name);
4119 #endif /* ASM_DECLARE_OBJECT_NAME */
4120 ASM_OUTPUT_SKIP (file, size ? size : 1);
4124 optimization_options (int level, int size ATTRIBUTE_UNUSED)
4126 /* For -O2 and beyond, turn off -fschedule-insns by default. It tends to
4127 make the problem with not enough registers even worse. */
4128 #ifdef INSN_SCHEDULING
4130 flag_schedule_insns = 0;
4134 /* The Darwin libraries never set errno, so we might as well
4135 avoid calling them when that's the only reason we would. */
4136 flag_errno_math = 0;
4138 /* The default values of these switches depend on the TARGET_64BIT
4139 that is not known at this moment. Mark these values with 2 and
4140 let user the to override these. In case there is no command line option
4141 specifying them, we will set the defaults in override_options. */
4143 flag_omit_frame_pointer = 2;
4144 flag_pcc_struct_return = 2;
4145 flag_asynchronous_unwind_tables = 2;
4146 flag_vect_cost_model = 1;
4147 #ifdef SUBTARGET_OPTIMIZATION_OPTIONS
4148 SUBTARGET_OPTIMIZATION_OPTIONS;
4152 /* Decide whether we can make a sibling call to a function. DECL is the
4153 declaration of the function being targeted by the call and EXP is the
4154 CALL_EXPR representing the call. */
4157 ix86_function_ok_for_sibcall (tree decl, tree exp)
4162 /* If we are generating position-independent code, we cannot sibcall
4163 optimize any indirect call, or a direct call to a global function,
4164 as the PLT requires %ebx be live. */
4165 if (!TARGET_64BIT && flag_pic && (!decl || !targetm.binds_local_p (decl)))
4172 func = TREE_TYPE (CALL_EXPR_FN (exp));
4173 if (POINTER_TYPE_P (func))
4174 func = TREE_TYPE (func);
4177 /* Check that the return value locations are the same. Like
4178 if we are returning floats on the 80387 register stack, we cannot
4179 make a sibcall from a function that doesn't return a float to a
4180 function that does or, conversely, from a function that does return
4181 a float to a function that doesn't; the necessary stack adjustment
4182 would not be executed. This is also the place we notice
4183 differences in the return value ABI. Note that it is ok for one
4184 of the functions to have void return type as long as the return
4185 value of the other is passed in a register. */
4186 a = ix86_function_value (TREE_TYPE (exp), func, false);
4187 b = ix86_function_value (TREE_TYPE (DECL_RESULT (cfun->decl)),
4189 if (STACK_REG_P (a) || STACK_REG_P (b))
4191 if (!rtx_equal_p (a, b))
4194 else if (VOID_TYPE_P (TREE_TYPE (DECL_RESULT (cfun->decl))))
4196 else if (!rtx_equal_p (a, b))
4199 /* If this call is indirect, we'll need to be able to use a call-clobbered
4200 register for the address of the target function. Make sure that all
4201 such registers are not used for passing parameters. */
4202 if (!decl && !TARGET_64BIT)
4206 /* We're looking at the CALL_EXPR, we need the type of the function. */
4207 type = CALL_EXPR_FN (exp); /* pointer expression */
4208 type = TREE_TYPE (type); /* pointer type */
4209 type = TREE_TYPE (type); /* function type */
4211 if (ix86_function_regparm (type, NULL) >= 3)
4213 /* ??? Need to count the actual number of registers to be used,
4214 not the possible number of registers. Fix later. */
4219 /* Dllimport'd functions are also called indirectly. */
4220 if (TARGET_DLLIMPORT_DECL_ATTRIBUTES
4222 && decl && DECL_DLLIMPORT_P (decl)
4223 && ix86_function_regparm (TREE_TYPE (decl), NULL) >= 3)
4226 /* If we need to align the outgoing stack, then sibcalling would
4227 unalign the stack, which may break the called function. */
4228 if (ix86_incoming_stack_boundary < PREFERRED_STACK_BOUNDARY)
4231 /* Otherwise okay. That also includes certain types of indirect calls. */
4235 /* Handle "cdecl", "stdcall", "fastcall", "regparm" and "sseregparm"
4236 calling convention attributes;
4237 arguments as in struct attribute_spec.handler. */
4240 ix86_handle_cconv_attribute (tree *node, tree name,
4242 int flags ATTRIBUTE_UNUSED,
4245 if (TREE_CODE (*node) != FUNCTION_TYPE
4246 && TREE_CODE (*node) != METHOD_TYPE
4247 && TREE_CODE (*node) != FIELD_DECL
4248 && TREE_CODE (*node) != TYPE_DECL)
4250 warning (OPT_Wattributes, "%qs attribute only applies to functions",
4251 IDENTIFIER_POINTER (name));
4252 *no_add_attrs = true;
4256 /* Can combine regparm with all attributes but fastcall. */
4257 if (is_attribute_p ("regparm", name))
4261 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
4263 error ("fastcall and regparm attributes are not compatible");
4266 cst = TREE_VALUE (args);
4267 if (TREE_CODE (cst) != INTEGER_CST)
4269 warning (OPT_Wattributes,
4270 "%qs attribute requires an integer constant argument",
4271 IDENTIFIER_POINTER (name));
4272 *no_add_attrs = true;
4274 else if (compare_tree_int (cst, REGPARM_MAX) > 0)
4276 warning (OPT_Wattributes, "argument to %qs attribute larger than %d",
4277 IDENTIFIER_POINTER (name), REGPARM_MAX);
4278 *no_add_attrs = true;
4286 /* Do not warn when emulating the MS ABI. */
4287 if (TREE_CODE (*node) != FUNCTION_TYPE || ix86_function_type_abi (*node)!=MS_ABI)
4288 warning (OPT_Wattributes, "%qs attribute ignored",
4289 IDENTIFIER_POINTER (name));
4290 *no_add_attrs = true;
4294 /* Can combine fastcall with stdcall (redundant) and sseregparm. */
4295 if (is_attribute_p ("fastcall", name))
4297 if (lookup_attribute ("cdecl", TYPE_ATTRIBUTES (*node)))
4299 error ("fastcall and cdecl attributes are not compatible");
4301 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (*node)))
4303 error ("fastcall and stdcall attributes are not compatible");
4305 if (lookup_attribute ("regparm", TYPE_ATTRIBUTES (*node)))
4307 error ("fastcall and regparm attributes are not compatible");
4311 /* Can combine stdcall with fastcall (redundant), regparm and
4313 else if (is_attribute_p ("stdcall", name))
4315 if (lookup_attribute ("cdecl", TYPE_ATTRIBUTES (*node)))
4317 error ("stdcall and cdecl attributes are not compatible");
4319 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
4321 error ("stdcall and fastcall attributes are not compatible");
4325 /* Can combine cdecl with regparm and sseregparm. */
4326 else if (is_attribute_p ("cdecl", name))
4328 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (*node)))
4330 error ("stdcall and cdecl attributes are not compatible");
4332 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
4334 error ("fastcall and cdecl attributes are not compatible");
4338 /* Can combine sseregparm with all attributes. */
4343 /* Return 0 if the attributes for two types are incompatible, 1 if they
4344 are compatible, and 2 if they are nearly compatible (which causes a
4345 warning to be generated). */
4348 ix86_comp_type_attributes (const_tree type1, const_tree type2)
4350 /* Check for mismatch of non-default calling convention. */
4351 const char *const rtdstr = TARGET_RTD ? "cdecl" : "stdcall";
4353 if (TREE_CODE (type1) != FUNCTION_TYPE
4354 && TREE_CODE (type1) != METHOD_TYPE)
4357 /* Check for mismatched fastcall/regparm types. */
4358 if ((!lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type1))
4359 != !lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type2)))
4360 || (ix86_function_regparm (type1, NULL)
4361 != ix86_function_regparm (type2, NULL)))
4364 /* Check for mismatched sseregparm types. */
4365 if (!lookup_attribute ("sseregparm", TYPE_ATTRIBUTES (type1))
4366 != !lookup_attribute ("sseregparm", TYPE_ATTRIBUTES (type2)))
4369 /* Check for mismatched return types (cdecl vs stdcall). */
4370 if (!lookup_attribute (rtdstr, TYPE_ATTRIBUTES (type1))
4371 != !lookup_attribute (rtdstr, TYPE_ATTRIBUTES (type2)))
4377 /* Return the regparm value for a function with the indicated TYPE and DECL.
4378 DECL may be NULL when calling function indirectly
4379 or considering a libcall. */
4382 ix86_function_regparm (const_tree type, const_tree decl)
4387 static bool error_issued;
4390 return (ix86_function_type_abi (type) == SYSV_ABI
4391 ? X86_64_REGPARM_MAX : X64_REGPARM_MAX);
4393 regparm = ix86_regparm;
4394 attr = lookup_attribute ("regparm", TYPE_ATTRIBUTES (type));
4398 = TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (attr)));
4400 if (decl && TREE_CODE (decl) == FUNCTION_DECL)
4402 /* We can't use regparm(3) for nested functions because
4403 these pass static chain pointer in %ecx register. */
4404 if (!error_issued && regparm == 3
4405 && decl_function_context (decl)
4406 && !DECL_NO_STATIC_CHAIN (decl))
4408 error ("nested functions are limited to 2 register parameters");
4409 error_issued = true;
4417 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type)))
4420 /* Use register calling convention for local functions when possible. */
4422 && TREE_CODE (decl) == FUNCTION_DECL
4426 /* FIXME: remove this CONST_CAST when cgraph.[ch] is constified. */
4427 struct cgraph_local_info *i = cgraph_local_info (CONST_CAST_TREE(decl));
4430 int local_regparm, globals = 0, regno;
4433 /* Make sure no regparm register is taken by a
4434 fixed register variable. */
4435 for (local_regparm = 0; local_regparm < REGPARM_MAX; local_regparm++)
4436 if (fixed_regs[local_regparm])
4439 /* We can't use regparm(3) for nested functions as these use
4440 static chain pointer in third argument. */
4441 if (local_regparm == 3
4442 && decl_function_context (decl)
4443 && !DECL_NO_STATIC_CHAIN (decl))
4446 /* If the function realigns its stackpointer, the prologue will
4447 clobber %ecx. If we've already generated code for the callee,
4448 the callee DECL_STRUCT_FUNCTION is gone, so we fall back to
4449 scanning the attributes for the self-realigning property. */
4450 f = DECL_STRUCT_FUNCTION (decl);
4451 /* Since current internal arg pointer won't conflict with
4452 parameter passing regs, so no need to change stack
4453 realignment and adjust regparm number.
4455 Each fixed register usage increases register pressure,
4456 so less registers should be used for argument passing.
4457 This functionality can be overriden by an explicit
4459 for (regno = 0; regno <= DI_REG; regno++)
4460 if (fixed_regs[regno])
4464 = globals < local_regparm ? local_regparm - globals : 0;
4466 if (local_regparm > regparm)
4467 regparm = local_regparm;
4474 /* Return 1 or 2, if we can pass up to SSE_REGPARM_MAX SFmode (1) and
4475 DFmode (2) arguments in SSE registers for a function with the
4476 indicated TYPE and DECL. DECL may be NULL when calling function
4477 indirectly or considering a libcall. Otherwise return 0. */
4480 ix86_function_sseregparm (const_tree type, const_tree decl, bool warn)
4482 gcc_assert (!TARGET_64BIT);
4484 /* Use SSE registers to pass SFmode and DFmode arguments if requested
4485 by the sseregparm attribute. */
4486 if (TARGET_SSEREGPARM
4487 || (type && lookup_attribute ("sseregparm", TYPE_ATTRIBUTES (type))))
4494 error ("Calling %qD with attribute sseregparm without "
4495 "SSE/SSE2 enabled", decl);
4497 error ("Calling %qT with attribute sseregparm without "
4498 "SSE/SSE2 enabled", type);
4506 /* For local functions, pass up to SSE_REGPARM_MAX SFmode
4507 (and DFmode for SSE2) arguments in SSE registers. */
4508 if (decl && TARGET_SSE_MATH && optimize && !profile_flag)
4510 /* FIXME: remove this CONST_CAST when cgraph.[ch] is constified. */
4511 struct cgraph_local_info *i = cgraph_local_info (CONST_CAST_TREE(decl));
4513 return TARGET_SSE2 ? 2 : 1;
4519 /* Return true if EAX is live at the start of the function. Used by
4520 ix86_expand_prologue to determine if we need special help before
4521 calling allocate_stack_worker. */
4524 ix86_eax_live_at_start_p (void)
4526 /* Cheat. Don't bother working forward from ix86_function_regparm
4527 to the function type to whether an actual argument is located in
4528 eax. Instead just look at cfg info, which is still close enough
4529 to correct at this point. This gives false positives for broken
4530 functions that might use uninitialized data that happens to be
4531 allocated in eax, but who cares? */
4532 return REGNO_REG_SET_P (df_get_live_out (ENTRY_BLOCK_PTR), 0);
4535 /* Value is the number of bytes of arguments automatically
4536 popped when returning from a subroutine call.
4537 FUNDECL is the declaration node of the function (as a tree),
4538 FUNTYPE is the data type of the function (as a tree),
4539 or for a library call it is an identifier node for the subroutine name.
4540 SIZE is the number of bytes of arguments passed on the stack.
4542 On the 80386, the RTD insn may be used to pop them if the number
4543 of args is fixed, but if the number is variable then the caller
4544 must pop them all. RTD can't be used for library calls now
4545 because the library is compiled with the Unix compiler.
4546 Use of RTD is a selectable option, since it is incompatible with
4547 standard Unix calling sequences. If the option is not selected,
4548 the caller must always pop the args.
4550 The attribute stdcall is equivalent to RTD on a per module basis. */
4553 ix86_return_pops_args (tree fundecl, tree funtype, int size)
4557 /* None of the 64-bit ABIs pop arguments. */
4561 rtd = TARGET_RTD && (!fundecl || TREE_CODE (fundecl) != IDENTIFIER_NODE);
4563 /* Cdecl functions override -mrtd, and never pop the stack. */
4564 if (! lookup_attribute ("cdecl", TYPE_ATTRIBUTES (funtype)))
4566 /* Stdcall and fastcall functions will pop the stack if not
4568 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (funtype))
4569 || lookup_attribute ("fastcall", TYPE_ATTRIBUTES (funtype)))
4572 if (rtd && ! stdarg_p (funtype))
4576 /* Lose any fake structure return argument if it is passed on the stack. */
4577 if (aggregate_value_p (TREE_TYPE (funtype), fundecl)
4578 && !KEEP_AGGREGATE_RETURN_POINTER)
4580 int nregs = ix86_function_regparm (funtype, fundecl);
4582 return GET_MODE_SIZE (Pmode);
4588 /* Argument support functions. */
4590 /* Return true when register may be used to pass function parameters. */
4592 ix86_function_arg_regno_p (int regno)
4595 const int *parm_regs;
4600 return (regno < REGPARM_MAX
4601 || (TARGET_SSE && SSE_REGNO_P (regno) && !fixed_regs[regno]));
4603 return (regno < REGPARM_MAX
4604 || (TARGET_MMX && MMX_REGNO_P (regno)
4605 && (regno < FIRST_MMX_REG + MMX_REGPARM_MAX))
4606 || (TARGET_SSE && SSE_REGNO_P (regno)
4607 && (regno < FIRST_SSE_REG + SSE_REGPARM_MAX)));
4612 if (SSE_REGNO_P (regno) && TARGET_SSE)
4617 if (TARGET_SSE && SSE_REGNO_P (regno)
4618 && (regno < FIRST_SSE_REG + SSE_REGPARM_MAX))
4622 /* TODO: The function should depend on current function ABI but
4623 builtins.c would need updating then. Therefore we use the
4626 /* RAX is used as hidden argument to va_arg functions. */
4627 if (ix86_abi == SYSV_ABI && regno == AX_REG)
4630 if (ix86_abi == MS_ABI)
4631 parm_regs = x86_64_ms_abi_int_parameter_registers;
4633 parm_regs = x86_64_int_parameter_registers;
4634 for (i = 0; i < (ix86_abi == MS_ABI ? X64_REGPARM_MAX
4635 : X86_64_REGPARM_MAX); i++)
4636 if (regno == parm_regs[i])
4641 /* Return if we do not know how to pass TYPE solely in registers. */
4644 ix86_must_pass_in_stack (enum machine_mode mode, const_tree type)
4646 if (must_pass_in_stack_var_size_or_pad (mode, type))
4649 /* For 32-bit, we want TImode aggregates to go on the stack. But watch out!
4650 The layout_type routine is crafty and tries to trick us into passing
4651 currently unsupported vector types on the stack by using TImode. */
4652 return (!TARGET_64BIT && mode == TImode
4653 && type && TREE_CODE (type) != VECTOR_TYPE);
4656 /* It returns the size, in bytes, of the area reserved for arguments passed
4657 in registers for the function represented by fndecl dependent to the used
4660 ix86_reg_parm_stack_space (const_tree fndecl)
4662 enum calling_abi call_abi = SYSV_ABI;
4663 if (fndecl != NULL_TREE && TREE_CODE (fndecl) == FUNCTION_DECL)
4664 call_abi = ix86_function_abi (fndecl);
4666 call_abi = ix86_function_type_abi (fndecl);
4667 if (call_abi == MS_ABI)
4672 /* Returns value SYSV_ABI, MS_ABI dependent on fntype, specifying the
4675 ix86_function_type_abi (const_tree fntype)
4677 if (TARGET_64BIT && fntype != NULL)
4679 enum calling_abi abi = ix86_abi;
4680 if (abi == SYSV_ABI)
4682 if (lookup_attribute ("ms_abi", TYPE_ATTRIBUTES (fntype)))
4685 else if (lookup_attribute ("sysv_abi", TYPE_ATTRIBUTES (fntype)))
4692 static enum calling_abi
4693 ix86_function_abi (const_tree fndecl)
4697 return ix86_function_type_abi (TREE_TYPE (fndecl));
4700 /* Returns value SYSV_ABI, MS_ABI dependent on cfun, specifying the
4703 ix86_cfun_abi (void)
4705 if (! cfun || ! TARGET_64BIT)
4707 return cfun->machine->call_abi;
4711 extern void init_regs (void);
4713 /* Implementation of call abi switching target hook. Specific to FNDECL
4714 the specific call register sets are set. See also CONDITIONAL_REGISTER_USAGE
4715 for more details. */
4717 ix86_call_abi_override (const_tree fndecl)
4719 if (fndecl == NULL_TREE)
4720 cfun->machine->call_abi = ix86_abi;
4722 cfun->machine->call_abi = ix86_function_type_abi (TREE_TYPE (fndecl));
4725 /* MS and SYSV ABI have different set of call used registers. Avoid expensive
4726 re-initialization of init_regs each time we switch function context since
4727 this is needed only during RTL expansion. */
4729 ix86_maybe_switch_abi (void)
4732 call_used_regs[SI_REG] == (cfun->machine->call_abi == MS_ABI))
4736 /* Initialize a variable CUM of type CUMULATIVE_ARGS
4737 for a call to a function whose data type is FNTYPE.
4738 For a library call, FNTYPE is 0. */
4741 init_cumulative_args (CUMULATIVE_ARGS *cum, /* Argument info to initialize */
4742 tree fntype, /* tree ptr for function decl */
4743 rtx libname, /* SYMBOL_REF of library name or 0 */
4746 struct cgraph_local_info *i = fndecl ? cgraph_local_info (fndecl) : NULL;
4747 memset (cum, 0, sizeof (*cum));
4750 cum->call_abi = ix86_function_abi (fndecl);
4752 cum->call_abi = ix86_function_type_abi (fntype);
4753 /* Set up the number of registers to use for passing arguments. */
4755 if (cum->call_abi == MS_ABI && !ACCUMULATE_OUTGOING_ARGS)
4756 sorry ("ms_abi attribute require -maccumulate-outgoing-args or subtarget optimization implying it");
4757 cum->nregs = ix86_regparm;
4760 if (cum->call_abi != ix86_abi)
4761 cum->nregs = ix86_abi != SYSV_ABI ? X86_64_REGPARM_MAX
4766 cum->sse_nregs = SSE_REGPARM_MAX;
4769 if (cum->call_abi != ix86_abi)
4770 cum->sse_nregs = ix86_abi != SYSV_ABI ? X86_64_SSE_REGPARM_MAX
4771 : X64_SSE_REGPARM_MAX;
4775 cum->mmx_nregs = MMX_REGPARM_MAX;
4776 cum->warn_avx = true;
4777 cum->warn_sse = true;
4778 cum->warn_mmx = true;
4780 /* Because type might mismatch in between caller and callee, we need to
4781 use actual type of function for local calls.
4782 FIXME: cgraph_analyze can be told to actually record if function uses
4783 va_start so for local functions maybe_vaarg can be made aggressive
4785 FIXME: once typesytem is fixed, we won't need this code anymore. */
4787 fntype = TREE_TYPE (fndecl);
4788 cum->maybe_vaarg = (fntype
4789 ? (!prototype_p (fntype) || stdarg_p (fntype))
4794 /* If there are variable arguments, then we won't pass anything
4795 in registers in 32-bit mode. */
4796 if (stdarg_p (fntype))
4807 /* Use ecx and edx registers if function has fastcall attribute,
4808 else look for regparm information. */
4811 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (fntype)))
4817 cum->nregs = ix86_function_regparm (fntype, fndecl);
4820 /* Set up the number of SSE registers used for passing SFmode
4821 and DFmode arguments. Warn for mismatching ABI. */
4822 cum->float_in_sse = ix86_function_sseregparm (fntype, fndecl, true);
4826 /* Return the "natural" mode for TYPE. In most cases, this is just TYPE_MODE.
4827 But in the case of vector types, it is some vector mode.
4829 When we have only some of our vector isa extensions enabled, then there
4830 are some modes for which vector_mode_supported_p is false. For these
4831 modes, the generic vector support in gcc will choose some non-vector mode
4832 in order to implement the type. By computing the natural mode, we'll
4833 select the proper ABI location for the operand and not depend on whatever
4834 the middle-end decides to do with these vector types.
4836 The midde-end can't deal with the vector types > 16 bytes. In this
4837 case, we return the original mode and warn ABI change if CUM isn't
4840 static enum machine_mode
4841 type_natural_mode (const_tree type, CUMULATIVE_ARGS *cum)
4843 enum machine_mode mode = TYPE_MODE (type);
4845 if (TREE_CODE (type) == VECTOR_TYPE && !VECTOR_MODE_P (mode))
4847 HOST_WIDE_INT size = int_size_in_bytes (type);
4848 if ((size == 8 || size == 16 || size == 32)
4849 /* ??? Generic code allows us to create width 1 vectors. Ignore. */
4850 && TYPE_VECTOR_SUBPARTS (type) > 1)
4852 enum machine_mode innermode = TYPE_MODE (TREE_TYPE (type));
4854 if (TREE_CODE (TREE_TYPE (type)) == REAL_TYPE)
4855 mode = MIN_MODE_VECTOR_FLOAT;
4857 mode = MIN_MODE_VECTOR_INT;
4859 /* Get the mode which has this inner mode and number of units. */
4860 for (; mode != VOIDmode; mode = GET_MODE_WIDER_MODE (mode))
4861 if (GET_MODE_NUNITS (mode) == TYPE_VECTOR_SUBPARTS (type)
4862 && GET_MODE_INNER (mode) == innermode)
4864 if (size == 32 && !TARGET_AVX)
4866 static bool warnedavx;
4873 warning (0, "AVX vector argument without AVX "
4874 "enabled changes the ABI");
4876 return TYPE_MODE (type);
4889 /* We want to pass a value in REGNO whose "natural" mode is MODE. However,
4890 this may not agree with the mode that the type system has chosen for the
4891 register, which is ORIG_MODE. If ORIG_MODE is not BLKmode, then we can
4892 go ahead and use it. Otherwise we have to build a PARALLEL instead. */
4895 gen_reg_or_parallel (enum machine_mode mode, enum machine_mode orig_mode,
4900 if (orig_mode != BLKmode)
4901 tmp = gen_rtx_REG (orig_mode, regno);
4904 tmp = gen_rtx_REG (mode, regno);
4905 tmp = gen_rtx_EXPR_LIST (VOIDmode, tmp, const0_rtx);
4906 tmp = gen_rtx_PARALLEL (orig_mode, gen_rtvec (1, tmp));
4912 /* x86-64 register passing implementation. See x86-64 ABI for details. Goal
4913 of this code is to classify each 8bytes of incoming argument by the register
4914 class and assign registers accordingly. */
4916 /* Return the union class of CLASS1 and CLASS2.
4917 See the x86-64 PS ABI for details. */
4919 static enum x86_64_reg_class
4920 merge_classes (enum x86_64_reg_class class1, enum x86_64_reg_class class2)
4922 /* Rule #1: If both classes are equal, this is the resulting class. */
4923 if (class1 == class2)
4926 /* Rule #2: If one of the classes is NO_CLASS, the resulting class is
4928 if (class1 == X86_64_NO_CLASS)
4930 if (class2 == X86_64_NO_CLASS)
4933 /* Rule #3: If one of the classes is MEMORY, the result is MEMORY. */
4934 if (class1 == X86_64_MEMORY_CLASS || class2 == X86_64_MEMORY_CLASS)
4935 return X86_64_MEMORY_CLASS;
4937 /* Rule #4: If one of the classes is INTEGER, the result is INTEGER. */
4938 if ((class1 == X86_64_INTEGERSI_CLASS && class2 == X86_64_SSESF_CLASS)
4939 || (class2 == X86_64_INTEGERSI_CLASS && class1 == X86_64_SSESF_CLASS))
4940 return X86_64_INTEGERSI_CLASS;
4941 if (class1 == X86_64_INTEGER_CLASS || class1 == X86_64_INTEGERSI_CLASS
4942 || class2 == X86_64_INTEGER_CLASS || class2 == X86_64_INTEGERSI_CLASS)
4943 return X86_64_INTEGER_CLASS;
4945 /* Rule #5: If one of the classes is X87, X87UP, or COMPLEX_X87 class,
4947 if (class1 == X86_64_X87_CLASS
4948 || class1 == X86_64_X87UP_CLASS
4949 || class1 == X86_64_COMPLEX_X87_CLASS
4950 || class2 == X86_64_X87_CLASS
4951 || class2 == X86_64_X87UP_CLASS
4952 || class2 == X86_64_COMPLEX_X87_CLASS)
4953 return X86_64_MEMORY_CLASS;
4955 /* Rule #6: Otherwise class SSE is used. */
4956 return X86_64_SSE_CLASS;
4959 /* Classify the argument of type TYPE and mode MODE.
4960 CLASSES will be filled by the register class used to pass each word
4961 of the operand. The number of words is returned. In case the parameter
4962 should be passed in memory, 0 is returned. As a special case for zero
4963 sized containers, classes[0] will be NO_CLASS and 1 is returned.
4965 BIT_OFFSET is used internally for handling records and specifies offset
4966 of the offset in bits modulo 256 to avoid overflow cases.
4968 See the x86-64 PS ABI for details.
4972 classify_argument (enum machine_mode mode, const_tree type,
4973 enum x86_64_reg_class classes[MAX_CLASSES], int bit_offset)
4975 HOST_WIDE_INT bytes =
4976 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
4977 int words = (bytes + (bit_offset % 64) / 8 + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
4979 /* Variable sized entities are always passed/returned in memory. */
4983 if (mode != VOIDmode
4984 && targetm.calls.must_pass_in_stack (mode, type))
4987 if (type && AGGREGATE_TYPE_P (type))
4991 enum x86_64_reg_class subclasses[MAX_CLASSES];
4993 /* On x86-64 we pass structures larger than 32 bytes on the stack. */
4997 for (i = 0; i < words; i++)
4998 classes[i] = X86_64_NO_CLASS;
5000 /* Zero sized arrays or structures are NO_CLASS. We return 0 to
5001 signalize memory class, so handle it as special case. */
5004 classes[0] = X86_64_NO_CLASS;
5008 /* Classify each field of record and merge classes. */
5009 switch (TREE_CODE (type))
5012 /* And now merge the fields of structure. */
5013 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
5015 if (TREE_CODE (field) == FIELD_DECL)
5019 if (TREE_TYPE (field) == error_mark_node)
5022 /* Bitfields are always classified as integer. Handle them
5023 early, since later code would consider them to be
5024 misaligned integers. */
5025 if (DECL_BIT_FIELD (field))
5027 for (i = (int_bit_position (field) + (bit_offset % 64)) / 8 / 8;
5028 i < ((int_bit_position (field) + (bit_offset % 64))
5029 + tree_low_cst (DECL_SIZE (field), 0)
5032 merge_classes (X86_64_INTEGER_CLASS,
5039 type = TREE_TYPE (field);
5041 /* Flexible array member is ignored. */
5042 if (TYPE_MODE (type) == BLKmode
5043 && TREE_CODE (type) == ARRAY_TYPE
5044 && TYPE_SIZE (type) == NULL_TREE
5045 && TYPE_DOMAIN (type) != NULL_TREE
5046 && (TYPE_MAX_VALUE (TYPE_DOMAIN (type))
5051 if (!warned && warn_psabi)
5054 inform (input_location,
5055 "The ABI of passing struct with"
5056 " a flexible array member has"
5057 " changed in GCC 4.4");
5061 num = classify_argument (TYPE_MODE (type), type,
5063 (int_bit_position (field)
5064 + bit_offset) % 256);
5067 pos = (int_bit_position (field) + (bit_offset % 64)) / 8 / 8;
5068 for (i = 0; i < num && (i + pos) < words; i++)
5070 merge_classes (subclasses[i], classes[i + pos]);
5077 /* Arrays are handled as small records. */
5080 num = classify_argument (TYPE_MODE (TREE_TYPE (type)),
5081 TREE_TYPE (type), subclasses, bit_offset);
5085 /* The partial classes are now full classes. */
5086 if (subclasses[0] == X86_64_SSESF_CLASS && bytes != 4)
5087 subclasses[0] = X86_64_SSE_CLASS;
5088 if (subclasses[0] == X86_64_INTEGERSI_CLASS
5089 && !((bit_offset % 64) == 0 && bytes == 4))
5090 subclasses[0] = X86_64_INTEGER_CLASS;
5092 for (i = 0; i < words; i++)
5093 classes[i] = subclasses[i % num];
5098 case QUAL_UNION_TYPE:
5099 /* Unions are similar to RECORD_TYPE but offset is always 0.
5101 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
5103 if (TREE_CODE (field) == FIELD_DECL)
5107 if (TREE_TYPE (field) == error_mark_node)
5110 num = classify_argument (TYPE_MODE (TREE_TYPE (field)),
5111 TREE_TYPE (field), subclasses,
5115 for (i = 0; i < num; i++)
5116 classes[i] = merge_classes (subclasses[i], classes[i]);
5127 /* When size > 16 bytes, if the first one isn't
5128 X86_64_SSE_CLASS or any other ones aren't
5129 X86_64_SSEUP_CLASS, everything should be passed in
5131 if (classes[0] != X86_64_SSE_CLASS)
5134 for (i = 1; i < words; i++)
5135 if (classes[i] != X86_64_SSEUP_CLASS)
5139 /* Final merger cleanup. */
5140 for (i = 0; i < words; i++)
5142 /* If one class is MEMORY, everything should be passed in
5144 if (classes[i] == X86_64_MEMORY_CLASS)
5147 /* The X86_64_SSEUP_CLASS should be always preceded by
5148 X86_64_SSE_CLASS or X86_64_SSEUP_CLASS. */
5149 if (classes[i] == X86_64_SSEUP_CLASS
5150 && classes[i - 1] != X86_64_SSE_CLASS
5151 && classes[i - 1] != X86_64_SSEUP_CLASS)
5153 /* The first one should never be X86_64_SSEUP_CLASS. */
5154 gcc_assert (i != 0);
5155 classes[i] = X86_64_SSE_CLASS;
5158 /* If X86_64_X87UP_CLASS isn't preceded by X86_64_X87_CLASS,
5159 everything should be passed in memory. */
5160 if (classes[i] == X86_64_X87UP_CLASS
5161 && (classes[i - 1] != X86_64_X87_CLASS))
5165 /* The first one should never be X86_64_X87UP_CLASS. */
5166 gcc_assert (i != 0);
5167 if (!warned && warn_psabi)
5170 inform (input_location,
5171 "The ABI of passing union with long double"
5172 " has changed in GCC 4.4");
5180 /* Compute alignment needed. We align all types to natural boundaries with
5181 exception of XFmode that is aligned to 64bits. */
5182 if (mode != VOIDmode && mode != BLKmode)
5184 int mode_alignment = GET_MODE_BITSIZE (mode);
5187 mode_alignment = 128;
5188 else if (mode == XCmode)
5189 mode_alignment = 256;
5190 if (COMPLEX_MODE_P (mode))
5191 mode_alignment /= 2;
5192 /* Misaligned fields are always returned in memory. */
5193 if (bit_offset % mode_alignment)
5197 /* for V1xx modes, just use the base mode */
5198 if (VECTOR_MODE_P (mode) && mode != V1DImode
5199 && GET_MODE_SIZE (GET_MODE_INNER (mode)) == bytes)
5200 mode = GET_MODE_INNER (mode);
5202 /* Classification of atomic types. */
5207 classes[0] = X86_64_SSE_CLASS;
5210 classes[0] = X86_64_SSE_CLASS;
5211 classes[1] = X86_64_SSEUP_CLASS;
5221 int size = (bit_offset % 64)+ (int) GET_MODE_BITSIZE (mode);
5225 classes[0] = X86_64_INTEGERSI_CLASS;
5228 else if (size <= 64)
5230 classes[0] = X86_64_INTEGER_CLASS;
5233 else if (size <= 64+32)
5235 classes[0] = X86_64_INTEGER_CLASS;
5236 classes[1] = X86_64_INTEGERSI_CLASS;
5239 else if (size <= 64+64)
5241 classes[0] = classes[1] = X86_64_INTEGER_CLASS;
5249 classes[0] = classes[1] = X86_64_INTEGER_CLASS;
5253 /* OImode shouldn't be used directly. */
5258 if (!(bit_offset % 64))
5259 classes[0] = X86_64_SSESF_CLASS;
5261 classes[0] = X86_64_SSE_CLASS;
5264 classes[0] = X86_64_SSEDF_CLASS;
5267 classes[0] = X86_64_X87_CLASS;
5268 classes[1] = X86_64_X87UP_CLASS;
5271 classes[0] = X86_64_SSE_CLASS;
5272 classes[1] = X86_64_SSEUP_CLASS;
5275 classes[0] = X86_64_SSE_CLASS;
5276 if (!(bit_offset % 64))
5282 if (!warned && warn_psabi)
5285 inform (input_location,
5286 "The ABI of passing structure with complex float"
5287 " member has changed in GCC 4.4");
5289 classes[1] = X86_64_SSESF_CLASS;
5293 classes[0] = X86_64_SSEDF_CLASS;
5294 classes[1] = X86_64_SSEDF_CLASS;
5297 classes[0] = X86_64_COMPLEX_X87_CLASS;
5300 /* This modes is larger than 16 bytes. */
5308 classes[0] = X86_64_SSE_CLASS;
5309 classes[1] = X86_64_SSEUP_CLASS;
5310 classes[2] = X86_64_SSEUP_CLASS;
5311 classes[3] = X86_64_SSEUP_CLASS;
5319 classes[0] = X86_64_SSE_CLASS;
5320 classes[1] = X86_64_SSEUP_CLASS;
5327 classes[0] = X86_64_SSE_CLASS;
5333 gcc_assert (VECTOR_MODE_P (mode));
5338 gcc_assert (GET_MODE_CLASS (GET_MODE_INNER (mode)) == MODE_INT);
5340 if (bit_offset + GET_MODE_BITSIZE (mode) <= 32)
5341 classes[0] = X86_64_INTEGERSI_CLASS;
5343 classes[0] = X86_64_INTEGER_CLASS;
5344 classes[1] = X86_64_INTEGER_CLASS;
5345 return 1 + (bytes > 8);
5349 /* Examine the argument and return set number of register required in each
5350 class. Return 0 iff parameter should be passed in memory. */
5352 examine_argument (enum machine_mode mode, const_tree type, int in_return,
5353 int *int_nregs, int *sse_nregs)
5355 enum x86_64_reg_class regclass[MAX_CLASSES];
5356 int n = classify_argument (mode, type, regclass, 0);
5362 for (n--; n >= 0; n--)
5363 switch (regclass[n])
5365 case X86_64_INTEGER_CLASS:
5366 case X86_64_INTEGERSI_CLASS:
5369 case X86_64_SSE_CLASS:
5370 case X86_64_SSESF_CLASS:
5371 case X86_64_SSEDF_CLASS:
5374 case X86_64_NO_CLASS:
5375 case X86_64_SSEUP_CLASS:
5377 case X86_64_X87_CLASS:
5378 case X86_64_X87UP_CLASS:
5382 case X86_64_COMPLEX_X87_CLASS:
5383 return in_return ? 2 : 0;
5384 case X86_64_MEMORY_CLASS:
5390 /* Construct container for the argument used by GCC interface. See
5391 FUNCTION_ARG for the detailed description. */
5394 construct_container (enum machine_mode mode, enum machine_mode orig_mode,
5395 const_tree type, int in_return, int nintregs, int nsseregs,
5396 const int *intreg, int sse_regno)
5398 /* The following variables hold the static issued_error state. */
5399 static bool issued_sse_arg_error;
5400 static bool issued_sse_ret_error;
5401 static bool issued_x87_ret_error;
5403 enum machine_mode tmpmode;
5405 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
5406 enum x86_64_reg_class regclass[MAX_CLASSES];
5410 int needed_sseregs, needed_intregs;
5411 rtx exp[MAX_CLASSES];
5414 n = classify_argument (mode, type, regclass, 0);
5417 if (!examine_argument (mode, type, in_return, &needed_intregs,
5420 if (needed_intregs > nintregs || needed_sseregs > nsseregs)
5423 /* We allowed the user to turn off SSE for kernel mode. Don't crash if
5424 some less clueful developer tries to use floating-point anyway. */
5425 if (needed_sseregs && !TARGET_SSE)
5429 if (!issued_sse_ret_error)
5431 error ("SSE register return with SSE disabled");
5432 issued_sse_ret_error = true;
5435 else if (!issued_sse_arg_error)
5437 error ("SSE register argument with SSE disabled");
5438 issued_sse_arg_error = true;
5443 /* Likewise, error if the ABI requires us to return values in the
5444 x87 registers and the user specified -mno-80387. */
5445 if (!TARGET_80387 && in_return)
5446 for (i = 0; i < n; i++)
5447 if (regclass[i] == X86_64_X87_CLASS
5448 || regclass[i] == X86_64_X87UP_CLASS
5449 || regclass[i] == X86_64_COMPLEX_X87_CLASS)
5451 if (!issued_x87_ret_error)
5453 error ("x87 register return with x87 disabled");
5454 issued_x87_ret_error = true;
5459 /* First construct simple cases. Avoid SCmode, since we want to use
5460 single register to pass this type. */
5461 if (n == 1 && mode != SCmode)
5462 switch (regclass[0])
5464 case X86_64_INTEGER_CLASS:
5465 case X86_64_INTEGERSI_CLASS:
5466 return gen_rtx_REG (mode, intreg[0]);
5467 case X86_64_SSE_CLASS:
5468 case X86_64_SSESF_CLASS:
5469 case X86_64_SSEDF_CLASS:
5470 return gen_reg_or_parallel (mode, orig_mode, SSE_REGNO (sse_regno));
5471 case X86_64_X87_CLASS:
5472 case X86_64_COMPLEX_X87_CLASS:
5473 return gen_rtx_REG (mode, FIRST_STACK_REG);
5474 case X86_64_NO_CLASS:
5475 /* Zero sized array, struct or class. */
5480 if (n == 2 && regclass[0] == X86_64_SSE_CLASS
5481 && regclass[1] == X86_64_SSEUP_CLASS && mode != BLKmode)
5482 return gen_rtx_REG (mode, SSE_REGNO (sse_regno));
5484 && regclass[0] == X86_64_SSE_CLASS
5485 && regclass[1] == X86_64_SSEUP_CLASS
5486 && regclass[2] == X86_64_SSEUP_CLASS
5487 && regclass[3] == X86_64_SSEUP_CLASS
5489 return gen_rtx_REG (mode, SSE_REGNO (sse_regno));
5492 && regclass[0] == X86_64_X87_CLASS && regclass[1] == X86_64_X87UP_CLASS)
5493 return gen_rtx_REG (XFmode, FIRST_STACK_REG);
5494 if (n == 2 && regclass[0] == X86_64_INTEGER_CLASS
5495 && regclass[1] == X86_64_INTEGER_CLASS
5496 && (mode == CDImode || mode == TImode || mode == TFmode)
5497 && intreg[0] + 1 == intreg[1])
5498 return gen_rtx_REG (mode, intreg[0]);
5500 /* Otherwise figure out the entries of the PARALLEL. */
5501 for (i = 0; i < n; i++)
5505 switch (regclass[i])
5507 case X86_64_NO_CLASS:
5509 case X86_64_INTEGER_CLASS:
5510 case X86_64_INTEGERSI_CLASS:
5511 /* Merge TImodes on aligned occasions here too. */
5512 if (i * 8 + 8 > bytes)
5513 tmpmode = mode_for_size ((bytes - i * 8) * BITS_PER_UNIT, MODE_INT, 0);
5514 else if (regclass[i] == X86_64_INTEGERSI_CLASS)
5518 /* We've requested 24 bytes we don't have mode for. Use DImode. */
5519 if (tmpmode == BLKmode)
5521 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
5522 gen_rtx_REG (tmpmode, *intreg),
5526 case X86_64_SSESF_CLASS:
5527 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
5528 gen_rtx_REG (SFmode,
5529 SSE_REGNO (sse_regno)),
5533 case X86_64_SSEDF_CLASS:
5534 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
5535 gen_rtx_REG (DFmode,
5536 SSE_REGNO (sse_regno)),
5540 case X86_64_SSE_CLASS:
5548 if (i == 0 && regclass[1] == X86_64_SSEUP_CLASS)
5558 && regclass[1] == X86_64_SSEUP_CLASS
5559 && regclass[2] == X86_64_SSEUP_CLASS
5560 && regclass[3] == X86_64_SSEUP_CLASS);
5567 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
5568 gen_rtx_REG (tmpmode,
5569 SSE_REGNO (sse_regno)),
5578 /* Empty aligned struct, union or class. */
5582 ret = gen_rtx_PARALLEL (mode, rtvec_alloc (nexps));
5583 for (i = 0; i < nexps; i++)
5584 XVECEXP (ret, 0, i) = exp [i];
5588 /* Update the data in CUM to advance over an argument of mode MODE
5589 and data type TYPE. (TYPE is null for libcalls where that information
5590 may not be available.) */
5593 function_arg_advance_32 (CUMULATIVE_ARGS *cum, enum machine_mode mode,
5594 tree type, HOST_WIDE_INT bytes, HOST_WIDE_INT words)
5610 cum->words += words;
5611 cum->nregs -= words;
5612 cum->regno += words;
5614 if (cum->nregs <= 0)
5622 /* OImode shouldn't be used directly. */
5626 if (cum->float_in_sse < 2)
5629 if (cum->float_in_sse < 1)
5646 if (!type || !AGGREGATE_TYPE_P (type))
5648 cum->sse_words += words;
5649 cum->sse_nregs -= 1;
5650 cum->sse_regno += 1;
5651 if (cum->sse_nregs <= 0)
5664 if (!type || !AGGREGATE_TYPE_P (type))
5666 cum->mmx_words += words;
5667 cum->mmx_nregs -= 1;
5668 cum->mmx_regno += 1;
5669 if (cum->mmx_nregs <= 0)
5680 function_arg_advance_64 (CUMULATIVE_ARGS *cum, enum machine_mode mode,
5681 tree type, HOST_WIDE_INT words, int named)
5683 int int_nregs, sse_nregs;
5685 /* Unnamed 256bit vector mode parameters are passed on stack. */
5686 if (!named && VALID_AVX256_REG_MODE (mode))
5689 if (!examine_argument (mode, type, 0, &int_nregs, &sse_nregs))
5690 cum->words += words;
5691 else if (sse_nregs <= cum->sse_nregs && int_nregs <= cum->nregs)
5693 cum->nregs -= int_nregs;
5694 cum->sse_nregs -= sse_nregs;
5695 cum->regno += int_nregs;
5696 cum->sse_regno += sse_nregs;
5699 cum->words += words;
5703 function_arg_advance_ms_64 (CUMULATIVE_ARGS *cum, HOST_WIDE_INT bytes,
5704 HOST_WIDE_INT words)
5706 /* Otherwise, this should be passed indirect. */
5707 gcc_assert (bytes == 1 || bytes == 2 || bytes == 4 || bytes == 8);
5709 cum->words += words;
5718 function_arg_advance (CUMULATIVE_ARGS *cum, enum machine_mode mode,
5719 tree type, int named)
5721 HOST_WIDE_INT bytes, words;
5723 if (mode == BLKmode)
5724 bytes = int_size_in_bytes (type);
5726 bytes = GET_MODE_SIZE (mode);
5727 words = (bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
5730 mode = type_natural_mode (type, NULL);
5732 if (TARGET_64BIT && (cum ? cum->call_abi : ix86_abi) == MS_ABI)
5733 function_arg_advance_ms_64 (cum, bytes, words);
5734 else if (TARGET_64BIT)
5735 function_arg_advance_64 (cum, mode, type, words, named);
5737 function_arg_advance_32 (cum, mode, type, bytes, words);
5740 /* Define where to put the arguments to a function.
5741 Value is zero to push the argument on the stack,
5742 or a hard register in which to store the argument.
5744 MODE is the argument's machine mode.
5745 TYPE is the data type of the argument (as a tree).
5746 This is null for libcalls where that information may
5748 CUM is a variable of type CUMULATIVE_ARGS which gives info about
5749 the preceding args and about the function being called.
5750 NAMED is nonzero if this argument is a named parameter
5751 (otherwise it is an extra parameter matching an ellipsis). */
5754 function_arg_32 (CUMULATIVE_ARGS *cum, enum machine_mode mode,
5755 enum machine_mode orig_mode, tree type,
5756 HOST_WIDE_INT bytes, HOST_WIDE_INT words)
5758 static bool warnedsse, warnedmmx;
5760 /* Avoid the AL settings for the Unix64 ABI. */
5761 if (mode == VOIDmode)
5777 if (words <= cum->nregs)
5779 int regno = cum->regno;
5781 /* Fastcall allocates the first two DWORD (SImode) or
5782 smaller arguments to ECX and EDX if it isn't an
5788 || (type && AGGREGATE_TYPE_P (type)))
5791 /* ECX not EAX is the first allocated register. */
5792 if (regno == AX_REG)
5795 return gen_rtx_REG (mode, regno);
5800 if (cum->float_in_sse < 2)
5803 if (cum->float_in_sse < 1)
5807 /* In 32bit, we pass TImode in xmm registers. */
5814 if (!type || !AGGREGATE_TYPE_P (type))
5816 if (!TARGET_SSE && !warnedsse && cum->warn_sse)
5819 warning (0, "SSE vector argument without SSE enabled "
5823 return gen_reg_or_parallel (mode, orig_mode,
5824 cum->sse_regno + FIRST_SSE_REG);
5829 /* OImode shouldn't be used directly. */
5838 if (!type || !AGGREGATE_TYPE_P (type))
5841 return gen_reg_or_parallel (mode, orig_mode,
5842 cum->sse_regno + FIRST_SSE_REG);
5851 if (!type || !AGGREGATE_TYPE_P (type))
5853 if (!TARGET_MMX && !warnedmmx && cum->warn_mmx)
5856 warning (0, "MMX vector argument without MMX enabled "
5860 return gen_reg_or_parallel (mode, orig_mode,
5861 cum->mmx_regno + FIRST_MMX_REG);
5870 function_arg_64 (CUMULATIVE_ARGS *cum, enum machine_mode mode,
5871 enum machine_mode orig_mode, tree type, int named)
5873 /* Handle a hidden AL argument containing number of registers
5874 for varargs x86-64 functions. */
5875 if (mode == VOIDmode)
5876 return GEN_INT (cum->maybe_vaarg
5877 ? (cum->sse_nregs < 0
5878 ? (cum->call_abi == ix86_abi
5880 : (ix86_abi != SYSV_ABI ? X86_64_SSE_REGPARM_MAX
5881 : X64_SSE_REGPARM_MAX))
5896 /* Unnamed 256bit vector mode parameters are passed on stack. */
5902 return construct_container (mode, orig_mode, type, 0, cum->nregs,
5904 &x86_64_int_parameter_registers [cum->regno],
5909 function_arg_ms_64 (CUMULATIVE_ARGS *cum, enum machine_mode mode,
5910 enum machine_mode orig_mode, int named,
5911 HOST_WIDE_INT bytes)
5915 /* We need to add clobber for MS_ABI->SYSV ABI calls in expand_call.
5916 We use value of -2 to specify that current function call is MSABI. */
5917 if (mode == VOIDmode)
5918 return GEN_INT (-2);
5920 /* If we've run out of registers, it goes on the stack. */
5921 if (cum->nregs == 0)
5924 regno = x86_64_ms_abi_int_parameter_registers[cum->regno];
5926 /* Only floating point modes are passed in anything but integer regs. */
5927 if (TARGET_SSE && (mode == SFmode || mode == DFmode))
5930 regno = cum->regno + FIRST_SSE_REG;
5935 /* Unnamed floating parameters are passed in both the
5936 SSE and integer registers. */
5937 t1 = gen_rtx_REG (mode, cum->regno + FIRST_SSE_REG);
5938 t2 = gen_rtx_REG (mode, regno);
5939 t1 = gen_rtx_EXPR_LIST (VOIDmode, t1, const0_rtx);
5940 t2 = gen_rtx_EXPR_LIST (VOIDmode, t2, const0_rtx);
5941 return gen_rtx_PARALLEL (mode, gen_rtvec (2, t1, t2));
5944 /* Handle aggregated types passed in register. */
5945 if (orig_mode == BLKmode)
5947 if (bytes > 0 && bytes <= 8)
5948 mode = (bytes > 4 ? DImode : SImode);
5949 if (mode == BLKmode)
5953 return gen_reg_or_parallel (mode, orig_mode, regno);
5957 function_arg (CUMULATIVE_ARGS *cum, enum machine_mode omode,
5958 tree type, int named)
5960 enum machine_mode mode = omode;
5961 HOST_WIDE_INT bytes, words;
5963 if (mode == BLKmode)
5964 bytes = int_size_in_bytes (type);
5966 bytes = GET_MODE_SIZE (mode);
5967 words = (bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
5969 /* To simplify the code below, represent vector types with a vector mode
5970 even if MMX/SSE are not active. */
5971 if (type && TREE_CODE (type) == VECTOR_TYPE)
5972 mode = type_natural_mode (type, cum);
5974 if (TARGET_64BIT && (cum ? cum->call_abi : ix86_abi) == MS_ABI)
5975 return function_arg_ms_64 (cum, mode, omode, named, bytes);
5976 else if (TARGET_64BIT)
5977 return function_arg_64 (cum, mode, omode, type, named);
5979 return function_arg_32 (cum, mode, omode, type, bytes, words);
5982 /* A C expression that indicates when an argument must be passed by
5983 reference. If nonzero for an argument, a copy of that argument is
5984 made in memory and a pointer to the argument is passed instead of
5985 the argument itself. The pointer is passed in whatever way is
5986 appropriate for passing a pointer to that type. */
5989 ix86_pass_by_reference (CUMULATIVE_ARGS *cum ATTRIBUTE_UNUSED,
5990 enum machine_mode mode ATTRIBUTE_UNUSED,
5991 const_tree type, bool named ATTRIBUTE_UNUSED)
5993 /* See Windows x64 Software Convention. */
5994 if (TARGET_64BIT && (cum ? cum->call_abi : ix86_abi) == MS_ABI)
5996 int msize = (int) GET_MODE_SIZE (mode);
5999 /* Arrays are passed by reference. */
6000 if (TREE_CODE (type) == ARRAY_TYPE)
6003 if (AGGREGATE_TYPE_P (type))
6005 /* Structs/unions of sizes other than 8, 16, 32, or 64 bits
6006 are passed by reference. */
6007 msize = int_size_in_bytes (type);
6011 /* __m128 is passed by reference. */
6013 case 1: case 2: case 4: case 8:
6019 else if (TARGET_64BIT && type && int_size_in_bytes (type) == -1)
6025 /* Return true when TYPE should be 128bit aligned for 32bit argument passing
6028 contains_aligned_value_p (tree type)
6030 enum machine_mode mode = TYPE_MODE (type);
6031 if (((TARGET_SSE && SSE_REG_MODE_P (mode))
6035 && (!TYPE_USER_ALIGN (type) || TYPE_ALIGN (type) > 128))
6037 if (TYPE_ALIGN (type) < 128)
6040 if (AGGREGATE_TYPE_P (type))
6042 /* Walk the aggregates recursively. */
6043 switch (TREE_CODE (type))
6047 case QUAL_UNION_TYPE:
6051 /* Walk all the structure fields. */
6052 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
6054 if (TREE_CODE (field) == FIELD_DECL
6055 && contains_aligned_value_p (TREE_TYPE (field)))
6062 /* Just for use if some languages passes arrays by value. */
6063 if (contains_aligned_value_p (TREE_TYPE (type)))
6074 /* Gives the alignment boundary, in bits, of an argument with the
6075 specified mode and type. */
6078 ix86_function_arg_boundary (enum machine_mode mode, tree type)
6083 /* Since canonical type is used for call, we convert it to
6084 canonical type if needed. */
6085 if (!TYPE_STRUCTURAL_EQUALITY_P (type))
6086 type = TYPE_CANONICAL (type);
6087 align = TYPE_ALIGN (type);
6090 align = GET_MODE_ALIGNMENT (mode);
6091 if (align < PARM_BOUNDARY)
6092 align = PARM_BOUNDARY;
6093 /* In 32bit, only _Decimal128 and __float128 are aligned to their
6094 natural boundaries. */
6095 if (!TARGET_64BIT && mode != TDmode && mode != TFmode)
6097 /* i386 ABI defines all arguments to be 4 byte aligned. We have to
6098 make an exception for SSE modes since these require 128bit
6101 The handling here differs from field_alignment. ICC aligns MMX
6102 arguments to 4 byte boundaries, while structure fields are aligned
6103 to 8 byte boundaries. */
6106 if (!(TARGET_SSE && SSE_REG_MODE_P (mode)))
6107 align = PARM_BOUNDARY;
6111 if (!contains_aligned_value_p (type))
6112 align = PARM_BOUNDARY;
6115 if (align > BIGGEST_ALIGNMENT)
6116 align = BIGGEST_ALIGNMENT;
6120 /* Return true if N is a possible register number of function value. */
6123 ix86_function_value_regno_p (int regno)
6130 case FIRST_FLOAT_REG:
6131 /* TODO: The function should depend on current function ABI but
6132 builtins.c would need updating then. Therefore we use the
6134 if (TARGET_64BIT && ix86_abi == MS_ABI)
6136 return TARGET_FLOAT_RETURNS_IN_80387;
6142 if (TARGET_MACHO || TARGET_64BIT)
6150 /* Define how to find the value returned by a function.
6151 VALTYPE is the data type of the value (as a tree).
6152 If the precise function being called is known, FUNC is its FUNCTION_DECL;
6153 otherwise, FUNC is 0. */
6156 function_value_32 (enum machine_mode orig_mode, enum machine_mode mode,
6157 const_tree fntype, const_tree fn)
6161 /* 8-byte vector modes in %mm0. See ix86_return_in_memory for where
6162 we normally prevent this case when mmx is not available. However
6163 some ABIs may require the result to be returned like DImode. */
6164 if (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 8)
6165 regno = TARGET_MMX ? FIRST_MMX_REG : 0;
6167 /* 16-byte vector modes in %xmm0. See ix86_return_in_memory for where
6168 we prevent this case when sse is not available. However some ABIs
6169 may require the result to be returned like integer TImode. */
6170 else if (mode == TImode
6171 || (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 16))
6172 regno = TARGET_SSE ? FIRST_SSE_REG : 0;
6174 /* 32-byte vector modes in %ymm0. */
6175 else if (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 32)
6176 regno = TARGET_AVX ? FIRST_SSE_REG : 0;
6178 /* Floating point return values in %st(0) (unless -mno-fp-ret-in-387). */
6179 else if (X87_FLOAT_MODE_P (mode) && TARGET_FLOAT_RETURNS_IN_80387)
6180 regno = FIRST_FLOAT_REG;
6182 /* Most things go in %eax. */
6185 /* Override FP return register with %xmm0 for local functions when
6186 SSE math is enabled or for functions with sseregparm attribute. */
6187 if ((fn || fntype) && (mode == SFmode || mode == DFmode))
6189 int sse_level = ix86_function_sseregparm (fntype, fn, false);
6190 if ((sse_level >= 1 && mode == SFmode)
6191 || (sse_level == 2 && mode == DFmode))
6192 regno = FIRST_SSE_REG;
6195 /* OImode shouldn't be used directly. */
6196 gcc_assert (mode != OImode);
6198 return gen_rtx_REG (orig_mode, regno);
6202 function_value_64 (enum machine_mode orig_mode, enum machine_mode mode,
6207 /* Handle libcalls, which don't provide a type node. */
6208 if (valtype == NULL)
6220 return gen_rtx_REG (mode, FIRST_SSE_REG);
6223 return gen_rtx_REG (mode, FIRST_FLOAT_REG);
6227 return gen_rtx_REG (mode, AX_REG);
6231 ret = construct_container (mode, orig_mode, valtype, 1,
6232 X86_64_REGPARM_MAX, X86_64_SSE_REGPARM_MAX,
6233 x86_64_int_return_registers, 0);
6235 /* For zero sized structures, construct_container returns NULL, but we
6236 need to keep rest of compiler happy by returning meaningful value. */
6238 ret = gen_rtx_REG (orig_mode, AX_REG);
6244 function_value_ms_64 (enum machine_mode orig_mode, enum machine_mode mode)
6246 unsigned int regno = AX_REG;
6250 switch (GET_MODE_SIZE (mode))
6253 if((SCALAR_INT_MODE_P (mode) || VECTOR_MODE_P (mode))
6254 && !COMPLEX_MODE_P (mode))
6255 regno = FIRST_SSE_REG;
6259 if (mode == SFmode || mode == DFmode)
6260 regno = FIRST_SSE_REG;
6266 return gen_rtx_REG (orig_mode, regno);
6270 ix86_function_value_1 (const_tree valtype, const_tree fntype_or_decl,
6271 enum machine_mode orig_mode, enum machine_mode mode)
6273 const_tree fn, fntype;
6276 if (fntype_or_decl && DECL_P (fntype_or_decl))
6277 fn = fntype_or_decl;
6278 fntype = fn ? TREE_TYPE (fn) : fntype_or_decl;
6280 if (TARGET_64BIT && ix86_function_type_abi (fntype) == MS_ABI)
6281 return function_value_ms_64 (orig_mode, mode);
6282 else if (TARGET_64BIT)
6283 return function_value_64 (orig_mode, mode, valtype);
6285 return function_value_32 (orig_mode, mode, fntype, fn);
6289 ix86_function_value (const_tree valtype, const_tree fntype_or_decl,
6290 bool outgoing ATTRIBUTE_UNUSED)
6292 enum machine_mode mode, orig_mode;
6294 orig_mode = TYPE_MODE (valtype);
6295 mode = type_natural_mode (valtype, NULL);
6296 return ix86_function_value_1 (valtype, fntype_or_decl, orig_mode, mode);
6300 ix86_libcall_value (enum machine_mode mode)
6302 return ix86_function_value_1 (NULL, NULL, mode, mode);
6305 /* Return true iff type is returned in memory. */
6307 static int ATTRIBUTE_UNUSED
6308 return_in_memory_32 (const_tree type, enum machine_mode mode)
6312 if (mode == BLKmode)
6315 size = int_size_in_bytes (type);
6317 if (MS_AGGREGATE_RETURN && AGGREGATE_TYPE_P (type) && size <= 8)
6320 if (VECTOR_MODE_P (mode) || mode == TImode)
6322 /* User-created vectors small enough to fit in EAX. */
6326 /* MMX/3dNow values are returned in MM0,
6327 except when it doesn't exits. */
6329 return (TARGET_MMX ? 0 : 1);
6331 /* SSE values are returned in XMM0, except when it doesn't exist. */
6333 return (TARGET_SSE ? 0 : 1);
6335 /* AVX values are returned in YMM0, except when it doesn't exist. */
6337 return TARGET_AVX ? 0 : 1;
6346 /* OImode shouldn't be used directly. */
6347 gcc_assert (mode != OImode);
6352 static int ATTRIBUTE_UNUSED
6353 return_in_memory_64 (const_tree type, enum machine_mode mode)
6355 int needed_intregs, needed_sseregs;
6356 return !examine_argument (mode, type, 1, &needed_intregs, &needed_sseregs);
6359 static int ATTRIBUTE_UNUSED
6360 return_in_memory_ms_64 (const_tree type, enum machine_mode mode)
6362 HOST_WIDE_INT size = int_size_in_bytes (type);
6364 /* __m128 is returned in xmm0. */
6365 if ((SCALAR_INT_MODE_P (mode) || VECTOR_MODE_P (mode))
6366 && !COMPLEX_MODE_P (mode) && (GET_MODE_SIZE (mode) == 16 || size == 16))
6369 /* Otherwise, the size must be exactly in [1248]. */
6370 return (size != 1 && size != 2 && size != 4 && size != 8);
6374 ix86_return_in_memory (const_tree type, const_tree fntype ATTRIBUTE_UNUSED)
6376 #ifdef SUBTARGET_RETURN_IN_MEMORY
6377 return SUBTARGET_RETURN_IN_MEMORY (type, fntype);
6379 const enum machine_mode mode = type_natural_mode (type, NULL);
6383 if (ix86_function_type_abi (fntype) == MS_ABI)
6384 return return_in_memory_ms_64 (type, mode);
6386 return return_in_memory_64 (type, mode);
6389 return return_in_memory_32 (type, mode);
6393 /* Return false iff TYPE is returned in memory. This version is used
6394 on Solaris 10. It is similar to the generic ix86_return_in_memory,
6395 but differs notably in that when MMX is available, 8-byte vectors
6396 are returned in memory, rather than in MMX registers. */
6399 ix86_sol10_return_in_memory (const_tree type, const_tree fntype ATTRIBUTE_UNUSED)
6402 enum machine_mode mode = type_natural_mode (type, NULL);
6405 return return_in_memory_64 (type, mode);
6407 if (mode == BLKmode)
6410 size = int_size_in_bytes (type);
6412 if (VECTOR_MODE_P (mode))
6414 /* Return in memory only if MMX registers *are* available. This
6415 seems backwards, but it is consistent with the existing
6422 else if (mode == TImode)
6424 else if (mode == XFmode)
6430 /* When returning SSE vector types, we have a choice of either
6431 (1) being abi incompatible with a -march switch, or
6432 (2) generating an error.
6433 Given no good solution, I think the safest thing is one warning.
6434 The user won't be able to use -Werror, but....
6436 Choose the STRUCT_VALUE_RTX hook because that's (at present) only
6437 called in response to actually generating a caller or callee that
6438 uses such a type. As opposed to TARGET_RETURN_IN_MEMORY, which is called
6439 via aggregate_value_p for general type probing from tree-ssa. */
6442 ix86_struct_value_rtx (tree type, int incoming ATTRIBUTE_UNUSED)
6444 static bool warnedsse, warnedmmx;
6446 if (!TARGET_64BIT && type)
6448 /* Look at the return type of the function, not the function type. */
6449 enum machine_mode mode = TYPE_MODE (TREE_TYPE (type));
6451 if (!TARGET_SSE && !warnedsse)
6454 || (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 16))
6457 warning (0, "SSE vector return without SSE enabled "
6462 if (!TARGET_MMX && !warnedmmx)
6464 if (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 8)
6467 warning (0, "MMX vector return without MMX enabled "
6477 /* Create the va_list data type. */
6479 /* Returns the calling convention specific va_list date type.
6480 The argument ABI can be DEFAULT_ABI, MS_ABI, or SYSV_ABI. */
6483 ix86_build_builtin_va_list_abi (enum calling_abi abi)
6485 tree f_gpr, f_fpr, f_ovf, f_sav, record, type_decl;
6487 /* For i386 we use plain pointer to argument area. */
6488 if (!TARGET_64BIT || abi == MS_ABI)
6489 return build_pointer_type (char_type_node);
6491 record = (*lang_hooks.types.make_type) (RECORD_TYPE);
6492 type_decl = build_decl (TYPE_DECL, get_identifier ("__va_list_tag"), record);
6494 f_gpr = build_decl (FIELD_DECL, get_identifier ("gp_offset"),
6495 unsigned_type_node);
6496 f_fpr = build_decl (FIELD_DECL, get_identifier ("fp_offset"),
6497 unsigned_type_node);
6498 f_ovf = build_decl (FIELD_DECL, get_identifier ("overflow_arg_area"),
6500 f_sav = build_decl (FIELD_DECL, get_identifier ("reg_save_area"),
6503 va_list_gpr_counter_field = f_gpr;
6504 va_list_fpr_counter_field = f_fpr;
6506 DECL_FIELD_CONTEXT (f_gpr) = record;
6507 DECL_FIELD_CONTEXT (f_fpr) = record;
6508 DECL_FIELD_CONTEXT (f_ovf) = record;
6509 DECL_FIELD_CONTEXT (f_sav) = record;
6511 TREE_CHAIN (record) = type_decl;
6512 TYPE_NAME (record) = type_decl;
6513 TYPE_FIELDS (record) = f_gpr;
6514 TREE_CHAIN (f_gpr) = f_fpr;
6515 TREE_CHAIN (f_fpr) = f_ovf;
6516 TREE_CHAIN (f_ovf) = f_sav;
6518 layout_type (record);
6520 /* The correct type is an array type of one element. */
6521 return build_array_type (record, build_index_type (size_zero_node));
6524 /* Setup the builtin va_list data type and for 64-bit the additional
6525 calling convention specific va_list data types. */
6528 ix86_build_builtin_va_list (void)
6530 tree ret = ix86_build_builtin_va_list_abi (ix86_abi);
6532 /* Initialize abi specific va_list builtin types. */
6536 if (ix86_abi == MS_ABI)
6538 t = ix86_build_builtin_va_list_abi (SYSV_ABI);
6539 if (TREE_CODE (t) != RECORD_TYPE)
6540 t = build_variant_type_copy (t);
6541 sysv_va_list_type_node = t;
6546 if (TREE_CODE (t) != RECORD_TYPE)
6547 t = build_variant_type_copy (t);
6548 sysv_va_list_type_node = t;
6550 if (ix86_abi != MS_ABI)
6552 t = ix86_build_builtin_va_list_abi (MS_ABI);
6553 if (TREE_CODE (t) != RECORD_TYPE)
6554 t = build_variant_type_copy (t);
6555 ms_va_list_type_node = t;
6560 if (TREE_CODE (t) != RECORD_TYPE)
6561 t = build_variant_type_copy (t);
6562 ms_va_list_type_node = t;
6569 /* Worker function for TARGET_SETUP_INCOMING_VARARGS. */
6572 setup_incoming_varargs_64 (CUMULATIVE_ARGS *cum)
6581 int regparm = ix86_regparm;
6583 if (cum->call_abi != ix86_abi)
6584 regparm = ix86_abi != SYSV_ABI ? X86_64_REGPARM_MAX : X64_REGPARM_MAX;
6586 /* GPR size of varargs save area. */
6587 if (cfun->va_list_gpr_size)
6588 ix86_varargs_gpr_size = X86_64_REGPARM_MAX * UNITS_PER_WORD;
6590 ix86_varargs_gpr_size = 0;
6592 /* FPR size of varargs save area. We don't need it if we don't pass
6593 anything in SSE registers. */
6594 if (cum->sse_nregs && cfun->va_list_fpr_size)
6595 ix86_varargs_fpr_size = X86_64_SSE_REGPARM_MAX * 16;
6597 ix86_varargs_fpr_size = 0;
6599 if (! ix86_varargs_gpr_size && ! ix86_varargs_fpr_size)
6602 save_area = frame_pointer_rtx;
6603 set = get_varargs_alias_set ();
6605 for (i = cum->regno;
6607 && i < cum->regno + cfun->va_list_gpr_size / UNITS_PER_WORD;
6610 mem = gen_rtx_MEM (Pmode,
6611 plus_constant (save_area, i * UNITS_PER_WORD));
6612 MEM_NOTRAP_P (mem) = 1;
6613 set_mem_alias_set (mem, set);
6614 emit_move_insn (mem, gen_rtx_REG (Pmode,
6615 x86_64_int_parameter_registers[i]));
6618 if (ix86_varargs_fpr_size)
6620 /* Now emit code to save SSE registers. The AX parameter contains number
6621 of SSE parameter registers used to call this function. We use
6622 sse_prologue_save insn template that produces computed jump across
6623 SSE saves. We need some preparation work to get this working. */
6625 label = gen_label_rtx ();
6626 label_ref = gen_rtx_LABEL_REF (Pmode, label);
6628 /* Compute address to jump to :
6629 label - eax*4 + nnamed_sse_arguments*4 Or
6630 label - eax*5 + nnamed_sse_arguments*5 for AVX. */
6631 tmp_reg = gen_reg_rtx (Pmode);
6632 nsse_reg = gen_reg_rtx (Pmode);
6633 emit_insn (gen_zero_extendqidi2 (nsse_reg, gen_rtx_REG (QImode, AX_REG)));
6634 emit_insn (gen_rtx_SET (VOIDmode, tmp_reg,
6635 gen_rtx_MULT (Pmode, nsse_reg,
6638 /* vmovaps is one byte longer than movaps. */
6640 emit_insn (gen_rtx_SET (VOIDmode, tmp_reg,
6641 gen_rtx_PLUS (Pmode, tmp_reg,
6647 gen_rtx_CONST (DImode,
6648 gen_rtx_PLUS (DImode,
6650 GEN_INT (cum->sse_regno
6651 * (TARGET_AVX ? 5 : 4)))));
6653 emit_move_insn (nsse_reg, label_ref);
6654 emit_insn (gen_subdi3 (nsse_reg, nsse_reg, tmp_reg));
6656 /* Compute address of memory block we save into. We always use pointer
6657 pointing 127 bytes after first byte to store - this is needed to keep
6658 instruction size limited by 4 bytes (5 bytes for AVX) with one
6659 byte displacement. */
6660 tmp_reg = gen_reg_rtx (Pmode);
6661 emit_insn (gen_rtx_SET (VOIDmode, tmp_reg,
6662 plus_constant (save_area,
6663 ix86_varargs_gpr_size + 127)));
6664 mem = gen_rtx_MEM (BLKmode, plus_constant (tmp_reg, -127));
6665 MEM_NOTRAP_P (mem) = 1;
6666 set_mem_alias_set (mem, set);
6667 set_mem_align (mem, BITS_PER_WORD);
6669 /* And finally do the dirty job! */
6670 emit_insn (gen_sse_prologue_save (mem, nsse_reg,
6671 GEN_INT (cum->sse_regno), label));
6676 setup_incoming_varargs_ms_64 (CUMULATIVE_ARGS *cum)
6678 alias_set_type set = get_varargs_alias_set ();
6681 for (i = cum->regno; i < X64_REGPARM_MAX; i++)
6685 mem = gen_rtx_MEM (Pmode,
6686 plus_constant (virtual_incoming_args_rtx,
6687 i * UNITS_PER_WORD));
6688 MEM_NOTRAP_P (mem) = 1;
6689 set_mem_alias_set (mem, set);
6691 reg = gen_rtx_REG (Pmode, x86_64_ms_abi_int_parameter_registers[i]);
6692 emit_move_insn (mem, reg);
6697 ix86_setup_incoming_varargs (CUMULATIVE_ARGS *cum, enum machine_mode mode,
6698 tree type, int *pretend_size ATTRIBUTE_UNUSED,
6701 CUMULATIVE_ARGS next_cum;
6704 /* This argument doesn't appear to be used anymore. Which is good,
6705 because the old code here didn't suppress rtl generation. */
6706 gcc_assert (!no_rtl);
6711 fntype = TREE_TYPE (current_function_decl);
6713 /* For varargs, we do not want to skip the dummy va_dcl argument.
6714 For stdargs, we do want to skip the last named argument. */
6716 if (stdarg_p (fntype))
6717 function_arg_advance (&next_cum, mode, type, 1);
6719 if (cum->call_abi == MS_ABI)
6720 setup_incoming_varargs_ms_64 (&next_cum);
6722 setup_incoming_varargs_64 (&next_cum);
6725 /* Checks if TYPE is of kind va_list char *. */
6728 is_va_list_char_pointer (tree type)
6732 /* For 32-bit it is always true. */
6735 canonic = ix86_canonical_va_list_type (type);
6736 return (canonic == ms_va_list_type_node
6737 || (ix86_abi == MS_ABI && canonic == va_list_type_node));
6740 /* Implement va_start. */
6743 ix86_va_start (tree valist, rtx nextarg)
6745 HOST_WIDE_INT words, n_gpr, n_fpr;
6746 tree f_gpr, f_fpr, f_ovf, f_sav;
6747 tree gpr, fpr, ovf, sav, t;
6750 /* Only 64bit target needs something special. */
6751 if (!TARGET_64BIT || is_va_list_char_pointer (TREE_TYPE (valist)))
6753 std_expand_builtin_va_start (valist, nextarg);
6757 f_gpr = TYPE_FIELDS (TREE_TYPE (sysv_va_list_type_node));
6758 f_fpr = TREE_CHAIN (f_gpr);
6759 f_ovf = TREE_CHAIN (f_fpr);
6760 f_sav = TREE_CHAIN (f_ovf);
6762 valist = build1 (INDIRECT_REF, TREE_TYPE (TREE_TYPE (valist)), valist);
6763 gpr = build3 (COMPONENT_REF, TREE_TYPE (f_gpr), valist, f_gpr, NULL_TREE);
6764 fpr = build3 (COMPONENT_REF, TREE_TYPE (f_fpr), valist, f_fpr, NULL_TREE);
6765 ovf = build3 (COMPONENT_REF, TREE_TYPE (f_ovf), valist, f_ovf, NULL_TREE);
6766 sav = build3 (COMPONENT_REF, TREE_TYPE (f_sav), valist, f_sav, NULL_TREE);
6768 /* Count number of gp and fp argument registers used. */
6769 words = crtl->args.info.words;
6770 n_gpr = crtl->args.info.regno;
6771 n_fpr = crtl->args.info.sse_regno;
6773 if (cfun->va_list_gpr_size)
6775 type = TREE_TYPE (gpr);
6776 t = build2 (MODIFY_EXPR, type,
6777 gpr, build_int_cst (type, n_gpr * 8));
6778 TREE_SIDE_EFFECTS (t) = 1;
6779 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
6782 if (TARGET_SSE && cfun->va_list_fpr_size)
6784 type = TREE_TYPE (fpr);
6785 t = build2 (MODIFY_EXPR, type, fpr,
6786 build_int_cst (type, n_fpr * 16 + 8*X86_64_REGPARM_MAX));
6787 TREE_SIDE_EFFECTS (t) = 1;
6788 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
6791 /* Find the overflow area. */
6792 type = TREE_TYPE (ovf);
6793 t = make_tree (type, crtl->args.internal_arg_pointer);
6795 t = build2 (POINTER_PLUS_EXPR, type, t,
6796 size_int (words * UNITS_PER_WORD));
6797 t = build2 (MODIFY_EXPR, type, ovf, t);
6798 TREE_SIDE_EFFECTS (t) = 1;
6799 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
6801 if (ix86_varargs_gpr_size || ix86_varargs_fpr_size)
6803 /* Find the register save area.
6804 Prologue of the function save it right above stack frame. */
6805 type = TREE_TYPE (sav);
6806 t = make_tree (type, frame_pointer_rtx);
6807 if (!ix86_varargs_gpr_size)
6808 t = build2 (POINTER_PLUS_EXPR, type, t,
6809 size_int (-8 * X86_64_REGPARM_MAX));
6810 t = build2 (MODIFY_EXPR, type, sav, t);
6811 TREE_SIDE_EFFECTS (t) = 1;
6812 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
6816 /* Implement va_arg. */
6819 ix86_gimplify_va_arg (tree valist, tree type, gimple_seq *pre_p,
6822 static const int intreg[6] = { 0, 1, 2, 3, 4, 5 };
6823 tree f_gpr, f_fpr, f_ovf, f_sav;
6824 tree gpr, fpr, ovf, sav, t;
6826 tree lab_false, lab_over = NULL_TREE;
6831 enum machine_mode nat_mode;
6834 /* Only 64bit target needs something special. */
6835 if (!TARGET_64BIT || is_va_list_char_pointer (TREE_TYPE (valist)))
6836 return std_gimplify_va_arg_expr (valist, type, pre_p, post_p);
6838 f_gpr = TYPE_FIELDS (TREE_TYPE (sysv_va_list_type_node));
6839 f_fpr = TREE_CHAIN (f_gpr);
6840 f_ovf = TREE_CHAIN (f_fpr);
6841 f_sav = TREE_CHAIN (f_ovf);
6843 gpr = build3 (COMPONENT_REF, TREE_TYPE (f_gpr),
6844 build_va_arg_indirect_ref (valist), f_gpr, NULL_TREE);
6845 valist = build_va_arg_indirect_ref (valist);
6846 fpr = build3 (COMPONENT_REF, TREE_TYPE (f_fpr), valist, f_fpr, NULL_TREE);
6847 ovf = build3 (COMPONENT_REF, TREE_TYPE (f_ovf), valist, f_ovf, NULL_TREE);
6848 sav = build3 (COMPONENT_REF, TREE_TYPE (f_sav), valist, f_sav, NULL_TREE);
6850 indirect_p = pass_by_reference (NULL, TYPE_MODE (type), type, false);
6852 type = build_pointer_type (type);
6853 size = int_size_in_bytes (type);
6854 rsize = (size + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
6856 nat_mode = type_natural_mode (type, NULL);
6865 /* Unnamed 256bit vector mode parameters are passed on stack. */
6866 if (ix86_cfun_abi () == SYSV_ABI)
6873 container = construct_container (nat_mode, TYPE_MODE (type),
6874 type, 0, X86_64_REGPARM_MAX,
6875 X86_64_SSE_REGPARM_MAX, intreg,
6880 /* Pull the value out of the saved registers. */
6882 addr = create_tmp_var (ptr_type_node, "addr");
6883 DECL_POINTER_ALIAS_SET (addr) = get_varargs_alias_set ();
6887 int needed_intregs, needed_sseregs;
6889 tree int_addr, sse_addr;
6891 lab_false = create_artificial_label ();
6892 lab_over = create_artificial_label ();
6894 examine_argument (nat_mode, type, 0, &needed_intregs, &needed_sseregs);
6896 need_temp = (!REG_P (container)
6897 && ((needed_intregs && TYPE_ALIGN (type) > 64)
6898 || TYPE_ALIGN (type) > 128));
6900 /* In case we are passing structure, verify that it is consecutive block
6901 on the register save area. If not we need to do moves. */
6902 if (!need_temp && !REG_P (container))
6904 /* Verify that all registers are strictly consecutive */
6905 if (SSE_REGNO_P (REGNO (XEXP (XVECEXP (container, 0, 0), 0))))
6909 for (i = 0; i < XVECLEN (container, 0) && !need_temp; i++)
6911 rtx slot = XVECEXP (container, 0, i);
6912 if (REGNO (XEXP (slot, 0)) != FIRST_SSE_REG + (unsigned int) i
6913 || INTVAL (XEXP (slot, 1)) != i * 16)
6921 for (i = 0; i < XVECLEN (container, 0) && !need_temp; i++)
6923 rtx slot = XVECEXP (container, 0, i);
6924 if (REGNO (XEXP (slot, 0)) != (unsigned int) i
6925 || INTVAL (XEXP (slot, 1)) != i * 8)
6937 int_addr = create_tmp_var (ptr_type_node, "int_addr");
6938 DECL_POINTER_ALIAS_SET (int_addr) = get_varargs_alias_set ();
6939 sse_addr = create_tmp_var (ptr_type_node, "sse_addr");
6940 DECL_POINTER_ALIAS_SET (sse_addr) = get_varargs_alias_set ();
6943 /* First ensure that we fit completely in registers. */
6946 t = build_int_cst (TREE_TYPE (gpr),
6947 (X86_64_REGPARM_MAX - needed_intregs + 1) * 8);
6948 t = build2 (GE_EXPR, boolean_type_node, gpr, t);
6949 t2 = build1 (GOTO_EXPR, void_type_node, lab_false);
6950 t = build3 (COND_EXPR, void_type_node, t, t2, NULL_TREE);
6951 gimplify_and_add (t, pre_p);
6955 t = build_int_cst (TREE_TYPE (fpr),
6956 (X86_64_SSE_REGPARM_MAX - needed_sseregs + 1) * 16
6957 + X86_64_REGPARM_MAX * 8);
6958 t = build2 (GE_EXPR, boolean_type_node, fpr, t);
6959 t2 = build1 (GOTO_EXPR, void_type_node, lab_false);
6960 t = build3 (COND_EXPR, void_type_node, t, t2, NULL_TREE);
6961 gimplify_and_add (t, pre_p);
6964 /* Compute index to start of area used for integer regs. */
6967 /* int_addr = gpr + sav; */
6968 t = fold_convert (sizetype, gpr);
6969 t = build2 (POINTER_PLUS_EXPR, ptr_type_node, sav, t);
6970 gimplify_assign (int_addr, t, pre_p);
6974 /* sse_addr = fpr + sav; */
6975 t = fold_convert (sizetype, fpr);
6976 t = build2 (POINTER_PLUS_EXPR, ptr_type_node, sav, t);
6977 gimplify_assign (sse_addr, t, pre_p);
6982 tree temp = create_tmp_var (type, "va_arg_tmp");
6985 t = build1 (ADDR_EXPR, build_pointer_type (type), temp);
6986 gimplify_assign (addr, t, pre_p);
6988 for (i = 0; i < XVECLEN (container, 0); i++)
6990 rtx slot = XVECEXP (container, 0, i);
6991 rtx reg = XEXP (slot, 0);
6992 enum machine_mode mode = GET_MODE (reg);
6993 tree piece_type = lang_hooks.types.type_for_mode (mode, 1);
6994 tree addr_type = build_pointer_type (piece_type);
6995 tree daddr_type = build_pointer_type_for_mode (piece_type,
6999 tree dest_addr, dest;
7001 if (SSE_REGNO_P (REGNO (reg)))
7003 src_addr = sse_addr;
7004 src_offset = (REGNO (reg) - FIRST_SSE_REG) * 16;
7008 src_addr = int_addr;
7009 src_offset = REGNO (reg) * 8;
7011 src_addr = fold_convert (addr_type, src_addr);
7012 src_addr = fold_build2 (POINTER_PLUS_EXPR, addr_type, src_addr,
7013 size_int (src_offset));
7014 src = build_va_arg_indirect_ref (src_addr);
7016 dest_addr = fold_convert (daddr_type, addr);
7017 dest_addr = fold_build2 (POINTER_PLUS_EXPR, daddr_type, dest_addr,
7018 size_int (INTVAL (XEXP (slot, 1))));
7019 dest = build_va_arg_indirect_ref (dest_addr);
7021 gimplify_assign (dest, src, pre_p);
7027 t = build2 (PLUS_EXPR, TREE_TYPE (gpr), gpr,
7028 build_int_cst (TREE_TYPE (gpr), needed_intregs * 8));
7029 gimplify_assign (gpr, t, pre_p);
7034 t = build2 (PLUS_EXPR, TREE_TYPE (fpr), fpr,
7035 build_int_cst (TREE_TYPE (fpr), needed_sseregs * 16));
7036 gimplify_assign (fpr, t, pre_p);
7039 gimple_seq_add_stmt (pre_p, gimple_build_goto (lab_over));
7041 gimple_seq_add_stmt (pre_p, gimple_build_label (lab_false));
7044 /* ... otherwise out of the overflow area. */
7046 /* When we align parameter on stack for caller, if the parameter
7047 alignment is beyond MAX_SUPPORTED_STACK_ALIGNMENT, it will be
7048 aligned at MAX_SUPPORTED_STACK_ALIGNMENT. We will match callee
7049 here with caller. */
7050 arg_boundary = FUNCTION_ARG_BOUNDARY (VOIDmode, type);
7051 if ((unsigned int) arg_boundary > MAX_SUPPORTED_STACK_ALIGNMENT)
7052 arg_boundary = MAX_SUPPORTED_STACK_ALIGNMENT;
7054 /* Care for on-stack alignment if needed. */
7055 if (arg_boundary <= 64
7056 || integer_zerop (TYPE_SIZE (type)))
7060 HOST_WIDE_INT align = arg_boundary / 8;
7061 t = build2 (POINTER_PLUS_EXPR, TREE_TYPE (ovf), ovf,
7062 size_int (align - 1));
7063 t = fold_convert (sizetype, t);
7064 t = build2 (BIT_AND_EXPR, TREE_TYPE (t), t,
7066 t = fold_convert (TREE_TYPE (ovf), t);
7068 gimplify_expr (&t, pre_p, NULL, is_gimple_val, fb_rvalue);
7069 gimplify_assign (addr, t, pre_p);
7071 t = build2 (POINTER_PLUS_EXPR, TREE_TYPE (t), t,
7072 size_int (rsize * UNITS_PER_WORD));
7073 gimplify_assign (unshare_expr (ovf), t, pre_p);
7076 gimple_seq_add_stmt (pre_p, gimple_build_label (lab_over));
7078 ptrtype = build_pointer_type (type);
7079 addr = fold_convert (ptrtype, addr);
7082 addr = build_va_arg_indirect_ref (addr);
7083 return build_va_arg_indirect_ref (addr);
7086 /* Return nonzero if OPNUM's MEM should be matched
7087 in movabs* patterns. */
7090 ix86_check_movabs (rtx insn, int opnum)
7094 set = PATTERN (insn);
7095 if (GET_CODE (set) == PARALLEL)
7096 set = XVECEXP (set, 0, 0);
7097 gcc_assert (GET_CODE (set) == SET);
7098 mem = XEXP (set, opnum);
7099 while (GET_CODE (mem) == SUBREG)
7100 mem = SUBREG_REG (mem);
7101 gcc_assert (MEM_P (mem));
7102 return (volatile_ok || !MEM_VOLATILE_P (mem));
7105 /* Initialize the table of extra 80387 mathematical constants. */
7108 init_ext_80387_constants (void)
7110 static const char * cst[5] =
7112 "0.3010299956639811952256464283594894482", /* 0: fldlg2 */
7113 "0.6931471805599453094286904741849753009", /* 1: fldln2 */
7114 "1.4426950408889634073876517827983434472", /* 2: fldl2e */
7115 "3.3219280948873623478083405569094566090", /* 3: fldl2t */
7116 "3.1415926535897932385128089594061862044", /* 4: fldpi */
7120 for (i = 0; i < 5; i++)
7122 real_from_string (&ext_80387_constants_table[i], cst[i]);
7123 /* Ensure each constant is rounded to XFmode precision. */
7124 real_convert (&ext_80387_constants_table[i],
7125 XFmode, &ext_80387_constants_table[i]);
7128 ext_80387_constants_init = 1;
7131 /* Return true if the constant is something that can be loaded with
7132 a special instruction. */
7135 standard_80387_constant_p (rtx x)
7137 enum machine_mode mode = GET_MODE (x);
7141 if (!(X87_FLOAT_MODE_P (mode) && (GET_CODE (x) == CONST_DOUBLE)))
7144 if (x == CONST0_RTX (mode))
7146 if (x == CONST1_RTX (mode))
7149 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
7151 /* For XFmode constants, try to find a special 80387 instruction when
7152 optimizing for size or on those CPUs that benefit from them. */
7154 && (optimize_function_for_size_p (cfun) || TARGET_EXT_80387_CONSTANTS))
7158 if (! ext_80387_constants_init)
7159 init_ext_80387_constants ();
7161 for (i = 0; i < 5; i++)
7162 if (real_identical (&r, &ext_80387_constants_table[i]))
7166 /* Load of the constant -0.0 or -1.0 will be split as
7167 fldz;fchs or fld1;fchs sequence. */
7168 if (real_isnegzero (&r))
7170 if (real_identical (&r, &dconstm1))
7176 /* Return the opcode of the special instruction to be used to load
7180 standard_80387_constant_opcode (rtx x)
7182 switch (standard_80387_constant_p (x))
7206 /* Return the CONST_DOUBLE representing the 80387 constant that is
7207 loaded by the specified special instruction. The argument IDX
7208 matches the return value from standard_80387_constant_p. */
7211 standard_80387_constant_rtx (int idx)
7215 if (! ext_80387_constants_init)
7216 init_ext_80387_constants ();
7232 return CONST_DOUBLE_FROM_REAL_VALUE (ext_80387_constants_table[i],
7236 /* Return 1 if mode is a valid mode for sse. */
7238 standard_sse_mode_p (enum machine_mode mode)
7255 /* Return 1 if X is all 0s. For all 1s, return 2 if X is in 128bit
7256 SSE modes and SSE2 is enabled, return 3 if X is in 256bit AVX
7257 modes and AVX is enabled. */
7260 standard_sse_constant_p (rtx x)
7262 enum machine_mode mode = GET_MODE (x);
7264 if (x == const0_rtx || x == CONST0_RTX (GET_MODE (x)))
7266 if (vector_all_ones_operand (x, mode))
7268 if (standard_sse_mode_p (mode))
7269 return TARGET_SSE2 ? 2 : -2;
7270 else if (VALID_AVX256_REG_MODE (mode))
7271 return TARGET_AVX ? 3 : -3;
7277 /* Return the opcode of the special instruction to be used to load
7281 standard_sse_constant_opcode (rtx insn, rtx x)
7283 switch (standard_sse_constant_p (x))
7286 switch (get_attr_mode (insn))
7289 return TARGET_AVX ? "vxorps\t%0, %0, %0" : "xorps\t%0, %0";
7291 return TARGET_AVX ? "vxorpd\t%0, %0, %0" : "xorpd\t%0, %0";
7293 return TARGET_AVX ? "vpxor\t%0, %0, %0" : "pxor\t%0, %0";
7295 return "vxorps\t%x0, %x0, %x0";
7297 return "vxorpd\t%x0, %x0, %x0";
7299 return "vpxor\t%x0, %x0, %x0";
7305 switch (get_attr_mode (insn))
7310 return "vpcmpeqd\t%0, %0, %0";
7316 return "pcmpeqd\t%0, %0";
7321 /* Returns 1 if OP contains a symbol reference */
7324 symbolic_reference_mentioned_p (rtx op)
7329 if (GET_CODE (op) == SYMBOL_REF || GET_CODE (op) == LABEL_REF)
7332 fmt = GET_RTX_FORMAT (GET_CODE (op));
7333 for (i = GET_RTX_LENGTH (GET_CODE (op)) - 1; i >= 0; i--)
7339 for (j = XVECLEN (op, i) - 1; j >= 0; j--)
7340 if (symbolic_reference_mentioned_p (XVECEXP (op, i, j)))
7344 else if (fmt[i] == 'e' && symbolic_reference_mentioned_p (XEXP (op, i)))
7351 /* Return 1 if it is appropriate to emit `ret' instructions in the
7352 body of a function. Do this only if the epilogue is simple, needing a
7353 couple of insns. Prior to reloading, we can't tell how many registers
7354 must be saved, so return 0 then. Return 0 if there is no frame
7355 marker to de-allocate. */
7358 ix86_can_use_return_insn_p (void)
7360 struct ix86_frame frame;
7362 if (! reload_completed || frame_pointer_needed)
7365 /* Don't allow more than 32 pop, since that's all we can do
7366 with one instruction. */
7367 if (crtl->args.pops_args
7368 && crtl->args.size >= 32768)
7371 ix86_compute_frame_layout (&frame);
7372 return frame.to_allocate == 0 && (frame.nregs + frame.nsseregs) == 0;
7375 /* Value should be nonzero if functions must have frame pointers.
7376 Zero means the frame pointer need not be set up (and parms may
7377 be accessed via the stack pointer) in functions that seem suitable. */
7380 ix86_frame_pointer_required (void)
7382 /* If we accessed previous frames, then the generated code expects
7383 to be able to access the saved ebp value in our frame. */
7384 if (cfun->machine->accesses_prev_frame)
7387 /* Several x86 os'es need a frame pointer for other reasons,
7388 usually pertaining to setjmp. */
7389 if (SUBTARGET_FRAME_POINTER_REQUIRED)
7392 /* In override_options, TARGET_OMIT_LEAF_FRAME_POINTER turns off
7393 the frame pointer by default. Turn it back on now if we've not
7394 got a leaf function. */
7395 if (TARGET_OMIT_LEAF_FRAME_POINTER
7396 && (!current_function_is_leaf
7397 || ix86_current_function_calls_tls_descriptor))
7406 /* Record that the current function accesses previous call frames. */
7409 ix86_setup_frame_addresses (void)
7411 cfun->machine->accesses_prev_frame = 1;
7414 #if (defined(HAVE_GAS_HIDDEN) && (SUPPORTS_ONE_ONLY - 0)) || TARGET_MACHO
7415 # define USE_HIDDEN_LINKONCE 1
7417 # define USE_HIDDEN_LINKONCE 0
7420 static int pic_labels_used;
7422 /* Fills in the label name that should be used for a pc thunk for
7423 the given register. */
7426 get_pc_thunk_name (char name[32], unsigned int regno)
7428 gcc_assert (!TARGET_64BIT);
7430 if (USE_HIDDEN_LINKONCE)
7431 sprintf (name, "__i686.get_pc_thunk.%s", reg_names[regno]);
7433 ASM_GENERATE_INTERNAL_LABEL (name, "LPR", regno);
7437 /* This function generates code for -fpic that loads %ebx with
7438 the return address of the caller and then returns. */
7441 ix86_file_end (void)
7446 for (regno = 0; regno < 8; ++regno)
7450 if (! ((pic_labels_used >> regno) & 1))
7453 get_pc_thunk_name (name, regno);
7458 switch_to_section (darwin_sections[text_coal_section]);
7459 fputs ("\t.weak_definition\t", asm_out_file);
7460 assemble_name (asm_out_file, name);
7461 fputs ("\n\t.private_extern\t", asm_out_file);
7462 assemble_name (asm_out_file, name);
7463 fputs ("\n", asm_out_file);
7464 ASM_OUTPUT_LABEL (asm_out_file, name);
7468 if (USE_HIDDEN_LINKONCE)
7472 decl = build_decl (FUNCTION_DECL, get_identifier (name),
7474 TREE_PUBLIC (decl) = 1;
7475 TREE_STATIC (decl) = 1;
7476 DECL_ONE_ONLY (decl) = 1;
7478 (*targetm.asm_out.unique_section) (decl, 0);
7479 switch_to_section (get_named_section (decl, NULL, 0));
7481 (*targetm.asm_out.globalize_label) (asm_out_file, name);
7482 fputs ("\t.hidden\t", asm_out_file);
7483 assemble_name (asm_out_file, name);
7484 fputc ('\n', asm_out_file);
7485 ASM_DECLARE_FUNCTION_NAME (asm_out_file, name, decl);
7489 switch_to_section (text_section);
7490 ASM_OUTPUT_LABEL (asm_out_file, name);
7493 xops[0] = gen_rtx_REG (Pmode, regno);
7494 xops[1] = gen_rtx_MEM (Pmode, stack_pointer_rtx);
7495 output_asm_insn ("mov%z0\t{%1, %0|%0, %1}", xops);
7496 output_asm_insn ("ret", xops);
7499 if (NEED_INDICATE_EXEC_STACK)
7500 file_end_indicate_exec_stack ();
7503 /* Emit code for the SET_GOT patterns. */
7506 output_set_got (rtx dest, rtx label ATTRIBUTE_UNUSED)
7512 if (TARGET_VXWORKS_RTP && flag_pic)
7514 /* Load (*VXWORKS_GOTT_BASE) into the PIC register. */
7515 xops[2] = gen_rtx_MEM (Pmode,
7516 gen_rtx_SYMBOL_REF (Pmode, VXWORKS_GOTT_BASE));
7517 output_asm_insn ("mov{l}\t{%2, %0|%0, %2}", xops);
7519 /* Load (*VXWORKS_GOTT_BASE)[VXWORKS_GOTT_INDEX] into the PIC register.
7520 Use %P and a local symbol in order to print VXWORKS_GOTT_INDEX as
7521 an unadorned address. */
7522 xops[2] = gen_rtx_SYMBOL_REF (Pmode, VXWORKS_GOTT_INDEX);
7523 SYMBOL_REF_FLAGS (xops[2]) |= SYMBOL_FLAG_LOCAL;
7524 output_asm_insn ("mov{l}\t{%P2(%0), %0|%0, DWORD PTR %P2[%0]}", xops);
7528 xops[1] = gen_rtx_SYMBOL_REF (Pmode, GOT_SYMBOL_NAME);
7530 if (! TARGET_DEEP_BRANCH_PREDICTION || !flag_pic)
7532 xops[2] = gen_rtx_LABEL_REF (Pmode, label ? label : gen_label_rtx ());
7535 output_asm_insn ("mov%z0\t{%2, %0|%0, %2}", xops);
7537 output_asm_insn ("call\t%a2", xops);
7540 /* Output the Mach-O "canonical" label name ("Lxx$pb") here too. This
7541 is what will be referenced by the Mach-O PIC subsystem. */
7543 ASM_OUTPUT_LABEL (asm_out_file, MACHOPIC_FUNCTION_BASE_NAME);
7546 (*targetm.asm_out.internal_label) (asm_out_file, "L",
7547 CODE_LABEL_NUMBER (XEXP (xops[2], 0)));
7550 output_asm_insn ("pop%z0\t%0", xops);
7555 get_pc_thunk_name (name, REGNO (dest));
7556 pic_labels_used |= 1 << REGNO (dest);
7558 xops[2] = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (name));
7559 xops[2] = gen_rtx_MEM (QImode, xops[2]);
7560 output_asm_insn ("call\t%X2", xops);
7561 /* Output the Mach-O "canonical" label name ("Lxx$pb") here too. This
7562 is what will be referenced by the Mach-O PIC subsystem. */
7565 ASM_OUTPUT_LABEL (asm_out_file, MACHOPIC_FUNCTION_BASE_NAME);
7567 targetm.asm_out.internal_label (asm_out_file, "L",
7568 CODE_LABEL_NUMBER (label));
7575 if (!flag_pic || TARGET_DEEP_BRANCH_PREDICTION)
7576 output_asm_insn ("add%z0\t{%1, %0|%0, %1}", xops);
7578 output_asm_insn ("add%z0\t{%1+[.-%a2], %0|%0, %1+(.-%a2)}", xops);
7583 /* Generate an "push" pattern for input ARG. */
7588 return gen_rtx_SET (VOIDmode,
7590 gen_rtx_PRE_DEC (Pmode,
7591 stack_pointer_rtx)),
7595 /* Return >= 0 if there is an unused call-clobbered register available
7596 for the entire function. */
7599 ix86_select_alt_pic_regnum (void)
7601 if (current_function_is_leaf && !crtl->profile
7602 && !ix86_current_function_calls_tls_descriptor)
7605 /* Can't use the same register for both PIC and DRAP. */
7607 drap = REGNO (crtl->drap_reg);
7610 for (i = 2; i >= 0; --i)
7611 if (i != drap && !df_regs_ever_live_p (i))
7615 return INVALID_REGNUM;
7618 /* Return 1 if we need to save REGNO. */
7620 ix86_save_reg (unsigned int regno, int maybe_eh_return)
7622 if (pic_offset_table_rtx
7623 && regno == REAL_PIC_OFFSET_TABLE_REGNUM
7624 && (df_regs_ever_live_p (REAL_PIC_OFFSET_TABLE_REGNUM)
7626 || crtl->calls_eh_return
7627 || crtl->uses_const_pool))
7629 if (ix86_select_alt_pic_regnum () != INVALID_REGNUM)
7634 if (crtl->calls_eh_return && maybe_eh_return)
7639 unsigned test = EH_RETURN_DATA_REGNO (i);
7640 if (test == INVALID_REGNUM)
7648 && regno == REGNO (crtl->drap_reg))
7651 return (df_regs_ever_live_p (regno)
7652 && !call_used_regs[regno]
7653 && !fixed_regs[regno]
7654 && (regno != HARD_FRAME_POINTER_REGNUM || !frame_pointer_needed));
7657 /* Return number of saved general prupose registers. */
7660 ix86_nsaved_regs (void)
7665 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
7666 if (!SSE_REGNO_P (regno) && ix86_save_reg (regno, true))
7671 /* Return number of saved SSE registrers. */
7674 ix86_nsaved_sseregs (void)
7679 if (ix86_cfun_abi () != MS_ABI)
7681 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
7682 if (SSE_REGNO_P (regno) && ix86_save_reg (regno, true))
7687 /* Given FROM and TO register numbers, say whether this elimination is
7688 allowed. If stack alignment is needed, we can only replace argument
7689 pointer with hard frame pointer, or replace frame pointer with stack
7690 pointer. Otherwise, frame pointer elimination is automatically
7691 handled and all other eliminations are valid. */
7694 ix86_can_eliminate (int from, int to)
7696 if (stack_realign_fp)
7697 return ((from == ARG_POINTER_REGNUM
7698 && to == HARD_FRAME_POINTER_REGNUM)
7699 || (from == FRAME_POINTER_REGNUM
7700 && to == STACK_POINTER_REGNUM));
7702 return to == STACK_POINTER_REGNUM ? !frame_pointer_needed : 1;
7705 /* Return the offset between two registers, one to be eliminated, and the other
7706 its replacement, at the start of a routine. */
7709 ix86_initial_elimination_offset (int from, int to)
7711 struct ix86_frame frame;
7712 ix86_compute_frame_layout (&frame);
7714 if (from == ARG_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
7715 return frame.hard_frame_pointer_offset;
7716 else if (from == FRAME_POINTER_REGNUM
7717 && to == HARD_FRAME_POINTER_REGNUM)
7718 return frame.hard_frame_pointer_offset - frame.frame_pointer_offset;
7721 gcc_assert (to == STACK_POINTER_REGNUM);
7723 if (from == ARG_POINTER_REGNUM)
7724 return frame.stack_pointer_offset;
7726 gcc_assert (from == FRAME_POINTER_REGNUM);
7727 return frame.stack_pointer_offset - frame.frame_pointer_offset;
7731 /* In a dynamically-aligned function, we can't know the offset from
7732 stack pointer to frame pointer, so we must ensure that setjmp
7733 eliminates fp against the hard fp (%ebp) rather than trying to
7734 index from %esp up to the top of the frame across a gap that is
7735 of unknown (at compile-time) size. */
7737 ix86_builtin_setjmp_frame_value (void)
7739 return stack_realign_fp ? hard_frame_pointer_rtx : virtual_stack_vars_rtx;
7742 /* Fill structure ix86_frame about frame of currently computed function. */
7745 ix86_compute_frame_layout (struct ix86_frame *frame)
7747 HOST_WIDE_INT total_size;
7748 unsigned int stack_alignment_needed;
7749 HOST_WIDE_INT offset;
7750 unsigned int preferred_alignment;
7751 HOST_WIDE_INT size = get_frame_size ();
7753 frame->nregs = ix86_nsaved_regs ();
7754 frame->nsseregs = ix86_nsaved_sseregs ();
7757 stack_alignment_needed = crtl->stack_alignment_needed / BITS_PER_UNIT;
7758 preferred_alignment = crtl->preferred_stack_boundary / BITS_PER_UNIT;
7760 /* MS ABI seem to require stack alignment to be always 16 except for function
7762 if (ix86_cfun_abi () == MS_ABI && preferred_alignment < 16)
7764 preferred_alignment = 16;
7765 stack_alignment_needed = 16;
7766 crtl->preferred_stack_boundary = 128;
7767 crtl->stack_alignment_needed = 128;
7770 gcc_assert (!size || stack_alignment_needed);
7771 gcc_assert (preferred_alignment >= STACK_BOUNDARY / BITS_PER_UNIT);
7772 gcc_assert (preferred_alignment <= stack_alignment_needed);
7774 /* During reload iteration the amount of registers saved can change.
7775 Recompute the value as needed. Do not recompute when amount of registers
7776 didn't change as reload does multiple calls to the function and does not
7777 expect the decision to change within single iteration. */
7778 if (!optimize_function_for_size_p (cfun)
7779 && cfun->machine->use_fast_prologue_epilogue_nregs != frame->nregs)
7781 int count = frame->nregs;
7783 cfun->machine->use_fast_prologue_epilogue_nregs = count;
7784 /* The fast prologue uses move instead of push to save registers. This
7785 is significantly longer, but also executes faster as modern hardware
7786 can execute the moves in parallel, but can't do that for push/pop.
7788 Be careful about choosing what prologue to emit: When function takes
7789 many instructions to execute we may use slow version as well as in
7790 case function is known to be outside hot spot (this is known with
7791 feedback only). Weight the size of function by number of registers
7792 to save as it is cheap to use one or two push instructions but very
7793 slow to use many of them. */
7795 count = (count - 1) * FAST_PROLOGUE_INSN_COUNT;
7796 if (cfun->function_frequency < FUNCTION_FREQUENCY_NORMAL
7797 || (flag_branch_probabilities
7798 && cfun->function_frequency < FUNCTION_FREQUENCY_HOT))
7799 cfun->machine->use_fast_prologue_epilogue = false;
7801 cfun->machine->use_fast_prologue_epilogue
7802 = !expensive_function_p (count);
7804 if (TARGET_PROLOGUE_USING_MOVE
7805 && cfun->machine->use_fast_prologue_epilogue)
7806 frame->save_regs_using_mov = true;
7808 frame->save_regs_using_mov = false;
7811 /* Skip return address and saved base pointer. */
7812 offset = frame_pointer_needed ? UNITS_PER_WORD * 2 : UNITS_PER_WORD;
7814 frame->hard_frame_pointer_offset = offset;
7816 /* Set offset to aligned because the realigned frame starts from
7818 if (stack_realign_fp)
7819 offset = (offset + stack_alignment_needed -1) & -stack_alignment_needed;
7821 /* Register save area */
7822 offset += frame->nregs * UNITS_PER_WORD;
7824 /* Align SSE reg save area. */
7825 if (frame->nsseregs)
7826 frame->padding0 = ((offset + 16 - 1) & -16) - offset;
7828 frame->padding0 = 0;
7830 /* SSE register save area. */
7831 offset += frame->padding0 + frame->nsseregs * 16;
7834 frame->va_arg_size = ix86_varargs_gpr_size + ix86_varargs_fpr_size;
7835 offset += frame->va_arg_size;
7837 /* Align start of frame for local function. */
7838 frame->padding1 = ((offset + stack_alignment_needed - 1)
7839 & -stack_alignment_needed) - offset;
7841 offset += frame->padding1;
7843 /* Frame pointer points here. */
7844 frame->frame_pointer_offset = offset;
7848 /* Add outgoing arguments area. Can be skipped if we eliminated
7849 all the function calls as dead code.
7850 Skipping is however impossible when function calls alloca. Alloca
7851 expander assumes that last crtl->outgoing_args_size
7852 of stack frame are unused. */
7853 if (ACCUMULATE_OUTGOING_ARGS
7854 && (!current_function_is_leaf || cfun->calls_alloca
7855 || ix86_current_function_calls_tls_descriptor))
7857 offset += crtl->outgoing_args_size;
7858 frame->outgoing_arguments_size = crtl->outgoing_args_size;
7861 frame->outgoing_arguments_size = 0;
7863 /* Align stack boundary. Only needed if we're calling another function
7865 if (!current_function_is_leaf || cfun->calls_alloca
7866 || ix86_current_function_calls_tls_descriptor)
7867 frame->padding2 = ((offset + preferred_alignment - 1)
7868 & -preferred_alignment) - offset;
7870 frame->padding2 = 0;
7872 offset += frame->padding2;
7874 /* We've reached end of stack frame. */
7875 frame->stack_pointer_offset = offset;
7877 /* Size prologue needs to allocate. */
7878 frame->to_allocate =
7879 (size + frame->padding1 + frame->padding2
7880 + frame->outgoing_arguments_size + frame->va_arg_size);
7882 if ((!frame->to_allocate && frame->nregs <= 1)
7883 || (TARGET_64BIT && frame->to_allocate >= (HOST_WIDE_INT) 0x80000000))
7884 frame->save_regs_using_mov = false;
7886 if (!TARGET_64BIT_MS_ABI && TARGET_RED_ZONE && current_function_sp_is_unchanging
7887 && current_function_is_leaf
7888 && !ix86_current_function_calls_tls_descriptor)
7890 frame->red_zone_size = frame->to_allocate;
7891 if (frame->save_regs_using_mov)
7892 frame->red_zone_size += frame->nregs * UNITS_PER_WORD;
7893 if (frame->red_zone_size > RED_ZONE_SIZE - RED_ZONE_RESERVE)
7894 frame->red_zone_size = RED_ZONE_SIZE - RED_ZONE_RESERVE;
7897 frame->red_zone_size = 0;
7898 frame->to_allocate -= frame->red_zone_size;
7899 frame->stack_pointer_offset -= frame->red_zone_size;
7901 fprintf (stderr, "\n");
7902 fprintf (stderr, "size: %ld\n", (long)size);
7903 fprintf (stderr, "nregs: %ld\n", (long)frame->nregs);
7904 fprintf (stderr, "nsseregs: %ld\n", (long)frame->nsseregs);
7905 fprintf (stderr, "padding0: %ld\n", (long)frame->padding0);
7906 fprintf (stderr, "alignment1: %ld\n", (long)stack_alignment_needed);
7907 fprintf (stderr, "padding1: %ld\n", (long)frame->padding1);
7908 fprintf (stderr, "va_arg: %ld\n", (long)frame->va_arg_size);
7909 fprintf (stderr, "padding2: %ld\n", (long)frame->padding2);
7910 fprintf (stderr, "to_allocate: %ld\n", (long)frame->to_allocate);
7911 fprintf (stderr, "red_zone_size: %ld\n", (long)frame->red_zone_size);
7912 fprintf (stderr, "frame_pointer_offset: %ld\n", (long)frame->frame_pointer_offset);
7913 fprintf (stderr, "hard_frame_pointer_offset: %ld\n",
7914 (long)frame->hard_frame_pointer_offset);
7915 fprintf (stderr, "stack_pointer_offset: %ld\n", (long)frame->stack_pointer_offset);
7916 fprintf (stderr, "current_function_is_leaf: %ld\n", (long)current_function_is_leaf);
7917 fprintf (stderr, "cfun->calls_alloca: %ld\n", (long)cfun->calls_alloca);
7918 fprintf (stderr, "x86_current_function_calls_tls_descriptor: %ld\n", (long)ix86_current_function_calls_tls_descriptor);
7922 /* Emit code to save registers in the prologue. */
7925 ix86_emit_save_regs (void)
7930 for (regno = FIRST_PSEUDO_REGISTER - 1; regno-- > 0; )
7931 if (!SSE_REGNO_P (regno) && ix86_save_reg (regno, true))
7933 insn = emit_insn (gen_push (gen_rtx_REG (Pmode, regno)));
7934 RTX_FRAME_RELATED_P (insn) = 1;
7938 /* Emit code to save registers using MOV insns. First register
7939 is restored from POINTER + OFFSET. */
7941 ix86_emit_save_regs_using_mov (rtx pointer, HOST_WIDE_INT offset)
7946 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
7947 if (!SSE_REGNO_P (regno) && ix86_save_reg (regno, true))
7949 insn = emit_move_insn (adjust_address (gen_rtx_MEM (Pmode, pointer),
7951 gen_rtx_REG (Pmode, regno));
7952 RTX_FRAME_RELATED_P (insn) = 1;
7953 offset += UNITS_PER_WORD;
7957 /* Emit code to save registers using MOV insns. First register
7958 is restored from POINTER + OFFSET. */
7960 ix86_emit_save_sse_regs_using_mov (rtx pointer, HOST_WIDE_INT offset)
7966 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
7967 if (SSE_REGNO_P (regno) && ix86_save_reg (regno, true))
7969 mem = adjust_address (gen_rtx_MEM (TImode, pointer), TImode, offset);
7970 set_mem_align (mem, 128);
7971 insn = emit_move_insn (mem, gen_rtx_REG (TImode, regno));
7972 RTX_FRAME_RELATED_P (insn) = 1;
7977 /* Expand prologue or epilogue stack adjustment.
7978 The pattern exist to put a dependency on all ebp-based memory accesses.
7979 STYLE should be negative if instructions should be marked as frame related,
7980 zero if %r11 register is live and cannot be freely used and positive
7984 pro_epilogue_adjust_stack (rtx dest, rtx src, rtx offset, int style)
7989 insn = emit_insn (gen_pro_epilogue_adjust_stack_1 (dest, src, offset));
7990 else if (x86_64_immediate_operand (offset, DImode))
7991 insn = emit_insn (gen_pro_epilogue_adjust_stack_rex64 (dest, src, offset));
7995 /* r11 is used by indirect sibcall return as well, set before the
7996 epilogue and used after the epilogue. ATM indirect sibcall
7997 shouldn't be used together with huge frame sizes in one
7998 function because of the frame_size check in sibcall.c. */
8000 r11 = gen_rtx_REG (DImode, R11_REG);
8001 insn = emit_insn (gen_rtx_SET (DImode, r11, offset));
8003 RTX_FRAME_RELATED_P (insn) = 1;
8004 insn = emit_insn (gen_pro_epilogue_adjust_stack_rex64_2 (dest, src, r11,
8008 RTX_FRAME_RELATED_P (insn) = 1;
8011 /* Find an available register to be used as dynamic realign argument
8012 pointer regsiter. Such a register will be written in prologue and
8013 used in begin of body, so it must not be
8014 1. parameter passing register.
8016 We reuse static-chain register if it is available. Otherwise, we
8017 use DI for i386 and R13 for x86-64. We chose R13 since it has
8020 Return: the regno of chosen register. */
8023 find_drap_reg (void)
8025 tree decl = cfun->decl;
8029 /* Use R13 for nested function or function need static chain.
8030 Since function with tail call may use any caller-saved
8031 registers in epilogue, DRAP must not use caller-saved
8032 register in such case. */
8033 if ((decl_function_context (decl)
8034 && !DECL_NO_STATIC_CHAIN (decl))
8035 || crtl->tail_call_emit)
8042 /* Use DI for nested function or function need static chain.
8043 Since function with tail call may use any caller-saved
8044 registers in epilogue, DRAP must not use caller-saved
8045 register in such case. */
8046 if ((decl_function_context (decl)
8047 && !DECL_NO_STATIC_CHAIN (decl))
8048 || crtl->tail_call_emit)
8051 /* Reuse static chain register if it isn't used for parameter
8053 if (ix86_function_regparm (TREE_TYPE (decl), decl) <= 2
8054 && !lookup_attribute ("fastcall",
8055 TYPE_ATTRIBUTES (TREE_TYPE (decl))))
8062 /* Update incoming stack boundary and estimated stack alignment. */
8065 ix86_update_stack_boundary (void)
8067 /* Prefer the one specified at command line. */
8068 ix86_incoming_stack_boundary
8069 = (ix86_user_incoming_stack_boundary
8070 ? ix86_user_incoming_stack_boundary
8071 : ix86_default_incoming_stack_boundary);
8073 /* Incoming stack alignment can be changed on individual functions
8074 via force_align_arg_pointer attribute. We use the smallest
8075 incoming stack boundary. */
8076 if (ix86_incoming_stack_boundary > MIN_STACK_BOUNDARY
8077 && lookup_attribute (ix86_force_align_arg_pointer_string,
8078 TYPE_ATTRIBUTES (TREE_TYPE (current_function_decl))))
8079 ix86_incoming_stack_boundary = MIN_STACK_BOUNDARY;
8081 /* The incoming stack frame has to be aligned at least at
8082 parm_stack_boundary. */
8083 if (ix86_incoming_stack_boundary < crtl->parm_stack_boundary)
8084 ix86_incoming_stack_boundary = crtl->parm_stack_boundary;
8086 /* Stack at entrance of main is aligned by runtime. We use the
8087 smallest incoming stack boundary. */
8088 if (ix86_incoming_stack_boundary > MAIN_STACK_BOUNDARY
8089 && DECL_NAME (current_function_decl)
8090 && MAIN_NAME_P (DECL_NAME (current_function_decl))
8091 && DECL_FILE_SCOPE_P (current_function_decl))
8092 ix86_incoming_stack_boundary = MAIN_STACK_BOUNDARY;
8094 /* x86_64 vararg needs 16byte stack alignment for register save
8098 && crtl->stack_alignment_estimated < 128)
8099 crtl->stack_alignment_estimated = 128;
8102 /* Handle the TARGET_GET_DRAP_RTX hook. Return NULL if no DRAP is
8103 needed or an rtx for DRAP otherwise. */
8106 ix86_get_drap_rtx (void)
8108 if (ix86_force_drap || !ACCUMULATE_OUTGOING_ARGS)
8109 crtl->need_drap = true;
8111 if (stack_realign_drap)
8113 /* Assign DRAP to vDRAP and returns vDRAP */
8114 unsigned int regno = find_drap_reg ();
8119 arg_ptr = gen_rtx_REG (Pmode, regno);
8120 crtl->drap_reg = arg_ptr;
8123 drap_vreg = copy_to_reg (arg_ptr);
8127 insn = emit_insn_before (seq, NEXT_INSN (entry_of_function ()));
8128 RTX_FRAME_RELATED_P (insn) = 1;
8135 /* Handle the TARGET_INTERNAL_ARG_POINTER hook. */
8138 ix86_internal_arg_pointer (void)
8140 return virtual_incoming_args_rtx;
8143 /* Handle the TARGET_DWARF_HANDLE_FRAME_UNSPEC hook.
8144 This is called from dwarf2out.c to emit call frame instructions
8145 for frame-related insns containing UNSPECs and UNSPEC_VOLATILEs. */
8147 ix86_dwarf_handle_frame_unspec (const char *label, rtx pattern, int index)
8149 rtx unspec = SET_SRC (pattern);
8150 gcc_assert (GET_CODE (unspec) == UNSPEC);
8154 case UNSPEC_REG_SAVE:
8155 dwarf2out_reg_save_reg (label, XVECEXP (unspec, 0, 0),
8156 SET_DEST (pattern));
8158 case UNSPEC_DEF_CFA:
8159 dwarf2out_def_cfa (label, REGNO (SET_DEST (pattern)),
8160 INTVAL (XVECEXP (unspec, 0, 0)));
8167 /* Finalize stack_realign_needed flag, which will guide prologue/epilogue
8168 to be generated in correct form. */
8170 ix86_finalize_stack_realign_flags (void)
8172 /* Check if stack realign is really needed after reload, and
8173 stores result in cfun */
8174 unsigned int incoming_stack_boundary
8175 = (crtl->parm_stack_boundary > ix86_incoming_stack_boundary
8176 ? crtl->parm_stack_boundary : ix86_incoming_stack_boundary);
8177 unsigned int stack_realign = (incoming_stack_boundary
8178 < (current_function_is_leaf
8179 ? crtl->max_used_stack_slot_alignment
8180 : crtl->stack_alignment_needed));
8182 if (crtl->stack_realign_finalized)
8184 /* After stack_realign_needed is finalized, we can't no longer
8186 gcc_assert (crtl->stack_realign_needed == stack_realign);
8190 crtl->stack_realign_needed = stack_realign;
8191 crtl->stack_realign_finalized = true;
8195 /* Expand the prologue into a bunch of separate insns. */
8198 ix86_expand_prologue (void)
8202 struct ix86_frame frame;
8203 HOST_WIDE_INT allocate;
8205 ix86_finalize_stack_realign_flags ();
8207 /* DRAP should not coexist with stack_realign_fp */
8208 gcc_assert (!(crtl->drap_reg && stack_realign_fp));
8210 ix86_compute_frame_layout (&frame);
8212 /* Emit prologue code to adjust stack alignment and setup DRAP, in case
8213 of DRAP is needed and stack realignment is really needed after reload */
8214 if (crtl->drap_reg && crtl->stack_realign_needed)
8217 int align_bytes = crtl->stack_alignment_needed / BITS_PER_UNIT;
8218 int param_ptr_offset = (call_used_regs[REGNO (crtl->drap_reg)]
8219 ? 0 : UNITS_PER_WORD);
8221 gcc_assert (stack_realign_drap);
8223 /* Grab the argument pointer. */
8224 x = plus_constant (stack_pointer_rtx,
8225 (UNITS_PER_WORD + param_ptr_offset));
8228 /* Only need to push parameter pointer reg if it is caller
8230 if (!call_used_regs[REGNO (crtl->drap_reg)])
8232 /* Push arg pointer reg */
8233 insn = emit_insn (gen_push (y));
8234 RTX_FRAME_RELATED_P (insn) = 1;
8237 insn = emit_insn (gen_rtx_SET (VOIDmode, y, x));
8238 RTX_FRAME_RELATED_P (insn) = 1;
8240 /* Align the stack. */
8241 insn = emit_insn ((*ix86_gen_andsp) (stack_pointer_rtx,
8243 GEN_INT (-align_bytes)));
8244 RTX_FRAME_RELATED_P (insn) = 1;
8246 /* Replicate the return address on the stack so that return
8247 address can be reached via (argp - 1) slot. This is needed
8248 to implement macro RETURN_ADDR_RTX and intrinsic function
8249 expand_builtin_return_addr etc. */
8251 x = gen_frame_mem (Pmode,
8252 plus_constant (x, -UNITS_PER_WORD));
8253 insn = emit_insn (gen_push (x));
8254 RTX_FRAME_RELATED_P (insn) = 1;
8257 /* Note: AT&T enter does NOT have reversed args. Enter is probably
8258 slower on all targets. Also sdb doesn't like it. */
8260 if (frame_pointer_needed)
8262 insn = emit_insn (gen_push (hard_frame_pointer_rtx));
8263 RTX_FRAME_RELATED_P (insn) = 1;
8265 insn = emit_move_insn (hard_frame_pointer_rtx, stack_pointer_rtx);
8266 RTX_FRAME_RELATED_P (insn) = 1;
8269 if (stack_realign_fp)
8271 int align_bytes = crtl->stack_alignment_needed / BITS_PER_UNIT;
8272 gcc_assert (align_bytes > MIN_STACK_BOUNDARY / BITS_PER_UNIT);
8274 /* Align the stack. */
8275 insn = emit_insn ((*ix86_gen_andsp) (stack_pointer_rtx,
8277 GEN_INT (-align_bytes)));
8278 RTX_FRAME_RELATED_P (insn) = 1;
8281 allocate = frame.to_allocate + frame.nsseregs * 16 + frame.padding0;
8283 if (!frame.save_regs_using_mov)
8284 ix86_emit_save_regs ();
8286 allocate += frame.nregs * UNITS_PER_WORD;
8288 /* When using red zone we may start register saving before allocating
8289 the stack frame saving one cycle of the prologue. However I will
8290 avoid doing this if I am going to have to probe the stack since
8291 at least on x86_64 the stack probe can turn into a call that clobbers
8292 a red zone location */
8293 if (!TARGET_64BIT_MS_ABI && TARGET_RED_ZONE && frame.save_regs_using_mov
8294 && (! TARGET_STACK_PROBE || allocate < CHECK_STACK_LIMIT))
8295 ix86_emit_save_regs_using_mov ((frame_pointer_needed
8296 && !crtl->stack_realign_needed)
8297 ? hard_frame_pointer_rtx
8298 : stack_pointer_rtx,
8299 -frame.nregs * UNITS_PER_WORD);
8303 else if (! TARGET_STACK_PROBE || allocate < CHECK_STACK_LIMIT)
8304 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
8305 GEN_INT (-allocate), -1);
8308 /* Only valid for Win32. */
8309 rtx eax = gen_rtx_REG (Pmode, AX_REG);
8313 gcc_assert (!TARGET_64BIT || cfun->machine->call_abi == MS_ABI);
8315 if (cfun->machine->call_abi == MS_ABI)
8318 eax_live = ix86_eax_live_at_start_p ();
8322 emit_insn (gen_push (eax));
8323 allocate -= UNITS_PER_WORD;
8326 emit_move_insn (eax, GEN_INT (allocate));
8329 insn = gen_allocate_stack_worker_64 (eax, eax);
8331 insn = gen_allocate_stack_worker_32 (eax, eax);
8332 insn = emit_insn (insn);
8333 RTX_FRAME_RELATED_P (insn) = 1;
8334 t = gen_rtx_PLUS (Pmode, stack_pointer_rtx, GEN_INT (-allocate));
8335 t = gen_rtx_SET (VOIDmode, stack_pointer_rtx, t);
8336 REG_NOTES (insn) = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR,
8337 t, REG_NOTES (insn));
8341 if (frame_pointer_needed)
8342 t = plus_constant (hard_frame_pointer_rtx,
8345 - frame.nregs * UNITS_PER_WORD);
8347 t = plus_constant (stack_pointer_rtx, allocate);
8348 emit_move_insn (eax, gen_rtx_MEM (Pmode, t));
8352 if (frame.save_regs_using_mov
8353 && !(!TARGET_64BIT_MS_ABI && TARGET_RED_ZONE
8354 && (! TARGET_STACK_PROBE || allocate < CHECK_STACK_LIMIT)))
8356 if (!frame_pointer_needed
8357 || !frame.to_allocate
8358 || crtl->stack_realign_needed)
8359 ix86_emit_save_regs_using_mov (stack_pointer_rtx,
8361 + frame.nsseregs * 16 + frame.padding0);
8363 ix86_emit_save_regs_using_mov (hard_frame_pointer_rtx,
8364 -frame.nregs * UNITS_PER_WORD);
8366 if (!frame_pointer_needed
8367 || !frame.to_allocate
8368 || crtl->stack_realign_needed)
8369 ix86_emit_save_sse_regs_using_mov (stack_pointer_rtx,
8372 ix86_emit_save_sse_regs_using_mov (hard_frame_pointer_rtx,
8373 - frame.nregs * UNITS_PER_WORD
8374 - frame.nsseregs * 16
8377 pic_reg_used = false;
8378 if (pic_offset_table_rtx
8379 && (df_regs_ever_live_p (REAL_PIC_OFFSET_TABLE_REGNUM)
8382 unsigned int alt_pic_reg_used = ix86_select_alt_pic_regnum ();
8384 if (alt_pic_reg_used != INVALID_REGNUM)
8385 SET_REGNO (pic_offset_table_rtx, alt_pic_reg_used);
8387 pic_reg_used = true;
8394 if (ix86_cmodel == CM_LARGE_PIC)
8396 rtx tmp_reg = gen_rtx_REG (DImode, R11_REG);
8397 rtx label = gen_label_rtx ();
8399 LABEL_PRESERVE_P (label) = 1;
8400 gcc_assert (REGNO (pic_offset_table_rtx) != REGNO (tmp_reg));
8401 insn = emit_insn (gen_set_rip_rex64 (pic_offset_table_rtx, label));
8402 insn = emit_insn (gen_set_got_offset_rex64 (tmp_reg, label));
8403 insn = emit_insn (gen_adddi3 (pic_offset_table_rtx,
8404 pic_offset_table_rtx, tmp_reg));
8407 insn = emit_insn (gen_set_got_rex64 (pic_offset_table_rtx));
8410 insn = emit_insn (gen_set_got (pic_offset_table_rtx));
8413 /* In the pic_reg_used case, make sure that the got load isn't deleted
8414 when mcount needs it. Blockage to avoid call movement across mcount
8415 call is emitted in generic code after the NOTE_INSN_PROLOGUE_END
8417 if (crtl->profile && pic_reg_used)
8418 emit_insn (gen_prologue_use (pic_offset_table_rtx));
8420 if (crtl->drap_reg && !crtl->stack_realign_needed)
8422 /* vDRAP is setup but after reload it turns out stack realign
8423 isn't necessary, here we will emit prologue to setup DRAP
8424 without stack realign adjustment */
8425 int drap_bp_offset = UNITS_PER_WORD * 2;
8426 rtx x = plus_constant (hard_frame_pointer_rtx, drap_bp_offset);
8427 insn = emit_insn (gen_rtx_SET (VOIDmode, crtl->drap_reg, x));
8430 /* Prevent instructions from being scheduled into register save push
8431 sequence when access to the redzone area is done through frame pointer.
8432 The offset betweeh the frame pointer and the stack pointer is calculated
8433 relative to the value of the stack pointer at the end of the function
8434 prologue, and moving instructions that access redzone area via frame
8435 pointer inside push sequence violates this assumption. */
8436 if (frame_pointer_needed && frame.red_zone_size)
8437 emit_insn (gen_memory_blockage ());
8439 /* Emit cld instruction if stringops are used in the function. */
8440 if (TARGET_CLD && ix86_current_function_needs_cld)
8441 emit_insn (gen_cld ());
8444 /* Emit code to restore saved registers using MOV insns. First register
8445 is restored from POINTER + OFFSET. */
8447 ix86_emit_restore_regs_using_mov (rtx pointer, HOST_WIDE_INT offset,
8448 int maybe_eh_return)
8451 rtx base_address = gen_rtx_MEM (Pmode, pointer);
8453 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
8454 if (!SSE_REGNO_P (regno) && ix86_save_reg (regno, maybe_eh_return))
8456 /* Ensure that adjust_address won't be forced to produce pointer
8457 out of range allowed by x86-64 instruction set. */
8458 if (TARGET_64BIT && offset != trunc_int_for_mode (offset, SImode))
8462 r11 = gen_rtx_REG (DImode, R11_REG);
8463 emit_move_insn (r11, GEN_INT (offset));
8464 emit_insn (gen_adddi3 (r11, r11, pointer));
8465 base_address = gen_rtx_MEM (Pmode, r11);
8468 emit_move_insn (gen_rtx_REG (Pmode, regno),
8469 adjust_address (base_address, Pmode, offset));
8470 offset += UNITS_PER_WORD;
8474 /* Emit code to restore saved registers using MOV insns. First register
8475 is restored from POINTER + OFFSET. */
8477 ix86_emit_restore_sse_regs_using_mov (rtx pointer, HOST_WIDE_INT offset,
8478 int maybe_eh_return)
8481 rtx base_address = gen_rtx_MEM (TImode, pointer);
8484 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
8485 if (SSE_REGNO_P (regno) && ix86_save_reg (regno, maybe_eh_return))
8487 /* Ensure that adjust_address won't be forced to produce pointer
8488 out of range allowed by x86-64 instruction set. */
8489 if (TARGET_64BIT && offset != trunc_int_for_mode (offset, SImode))
8493 r11 = gen_rtx_REG (DImode, R11_REG);
8494 emit_move_insn (r11, GEN_INT (offset));
8495 emit_insn (gen_adddi3 (r11, r11, pointer));
8496 base_address = gen_rtx_MEM (TImode, r11);
8499 mem = adjust_address (base_address, TImode, offset);
8500 set_mem_align (mem, 128);
8501 emit_move_insn (gen_rtx_REG (TImode, regno), mem);
8506 /* Restore function stack, frame, and registers. */
8509 ix86_expand_epilogue (int style)
8513 struct ix86_frame frame;
8514 HOST_WIDE_INT offset;
8516 ix86_finalize_stack_realign_flags ();
8518 /* When stack is realigned, SP must be valid. */
8519 sp_valid = (!frame_pointer_needed
8520 || current_function_sp_is_unchanging
8521 || stack_realign_fp);
8523 ix86_compute_frame_layout (&frame);
8525 /* See the comment about red zone and frame
8526 pointer usage in ix86_expand_prologue. */
8527 if (frame_pointer_needed && frame.red_zone_size)
8528 emit_insn (gen_memory_blockage ());
8530 /* Calculate start of saved registers relative to ebp. Special care
8531 must be taken for the normal return case of a function using
8532 eh_return: the eax and edx registers are marked as saved, but not
8533 restored along this path. */
8534 offset = frame.nregs;
8535 if (crtl->calls_eh_return && style != 2)
8537 offset *= -UNITS_PER_WORD;
8538 offset -= frame.nsseregs * 16 + frame.padding0;
8540 /* If we're only restoring one register and sp is not valid then
8541 using a move instruction to restore the register since it's
8542 less work than reloading sp and popping the register.
8544 The default code result in stack adjustment using add/lea instruction,
8545 while this code results in LEAVE instruction (or discrete equivalent),
8546 so it is profitable in some other cases as well. Especially when there
8547 are no registers to restore. We also use this code when TARGET_USE_LEAVE
8548 and there is exactly one register to pop. This heuristic may need some
8549 tuning in future. */
8550 if ((!sp_valid && (frame.nregs + frame.nsseregs) <= 1)
8551 || (TARGET_EPILOGUE_USING_MOVE
8552 && cfun->machine->use_fast_prologue_epilogue
8553 && ((frame.nregs + frame.nsseregs) > 1 || frame.to_allocate))
8554 || (frame_pointer_needed && !(frame.nregs + frame.nsseregs) && frame.to_allocate)
8555 || (frame_pointer_needed && TARGET_USE_LEAVE
8556 && cfun->machine->use_fast_prologue_epilogue
8557 && (frame.nregs + frame.nsseregs) == 1)
8558 || crtl->calls_eh_return)
8560 /* Restore registers. We can use ebp or esp to address the memory
8561 locations. If both are available, default to ebp, since offsets
8562 are known to be small. Only exception is esp pointing directly
8563 to the end of block of saved registers, where we may simplify
8566 If we are realigning stack with bp and sp, regs restore can't
8567 be addressed by bp. sp must be used instead. */
8569 if (!frame_pointer_needed
8570 || (sp_valid && !frame.to_allocate)
8571 || stack_realign_fp)
8573 ix86_emit_restore_sse_regs_using_mov (stack_pointer_rtx,
8574 frame.to_allocate, style == 2);
8575 ix86_emit_restore_regs_using_mov (stack_pointer_rtx,
8577 + frame.nsseregs * 16
8578 + frame.padding0, style == 2);
8582 ix86_emit_restore_sse_regs_using_mov (hard_frame_pointer_rtx,
8583 offset, style == 2);
8584 ix86_emit_restore_regs_using_mov (hard_frame_pointer_rtx,
8586 + frame.nsseregs * 16
8587 + frame.padding0, style == 2);
8590 /* eh_return epilogues need %ecx added to the stack pointer. */
8593 rtx tmp, sa = EH_RETURN_STACKADJ_RTX;
8595 /* Stack align doesn't work with eh_return. */
8596 gcc_assert (!crtl->stack_realign_needed);
8598 if (frame_pointer_needed)
8600 tmp = gen_rtx_PLUS (Pmode, hard_frame_pointer_rtx, sa);
8601 tmp = plus_constant (tmp, UNITS_PER_WORD);
8602 emit_insn (gen_rtx_SET (VOIDmode, sa, tmp));
8604 tmp = gen_rtx_MEM (Pmode, hard_frame_pointer_rtx);
8605 emit_move_insn (hard_frame_pointer_rtx, tmp);
8607 pro_epilogue_adjust_stack (stack_pointer_rtx, sa,
8612 tmp = gen_rtx_PLUS (Pmode, stack_pointer_rtx, sa);
8613 tmp = plus_constant (tmp, (frame.to_allocate
8614 + frame.nregs * UNITS_PER_WORD
8615 + frame.nsseregs * 16
8617 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx, tmp));
8620 else if (!frame_pointer_needed)
8621 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
8622 GEN_INT (frame.to_allocate
8623 + frame.nregs * UNITS_PER_WORD
8624 + frame.nsseregs * 16
8627 /* If not an i386, mov & pop is faster than "leave". */
8628 else if (TARGET_USE_LEAVE || optimize_function_for_size_p (cfun)
8629 || !cfun->machine->use_fast_prologue_epilogue)
8630 emit_insn ((*ix86_gen_leave) ());
8633 pro_epilogue_adjust_stack (stack_pointer_rtx,
8634 hard_frame_pointer_rtx,
8637 emit_insn ((*ix86_gen_pop1) (hard_frame_pointer_rtx));
8642 /* First step is to deallocate the stack frame so that we can
8645 If we realign stack with frame pointer, then stack pointer
8646 won't be able to recover via lea $offset(%bp), %sp, because
8647 there is a padding area between bp and sp for realign.
8648 "add $to_allocate, %sp" must be used instead. */
8651 gcc_assert (frame_pointer_needed);
8652 gcc_assert (!stack_realign_fp);
8653 pro_epilogue_adjust_stack (stack_pointer_rtx,
8654 hard_frame_pointer_rtx,
8655 GEN_INT (offset), style);
8656 ix86_emit_restore_sse_regs_using_mov (stack_pointer_rtx,
8657 frame.to_allocate, style == 2);
8658 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
8659 GEN_INT (frame.nsseregs * 16), style);
8661 else if (frame.to_allocate || frame.nsseregs)
8663 ix86_emit_restore_sse_regs_using_mov (stack_pointer_rtx,
8666 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
8667 GEN_INT (frame.to_allocate
8668 + frame.nsseregs * 16
8669 + frame.padding0), style);
8672 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
8673 if (!SSE_REGNO_P (regno) && ix86_save_reg (regno, false))
8674 emit_insn ((*ix86_gen_pop1) (gen_rtx_REG (Pmode, regno)));
8675 if (frame_pointer_needed)
8677 /* Leave results in shorter dependency chains on CPUs that are
8678 able to grok it fast. */
8679 if (TARGET_USE_LEAVE)
8680 emit_insn ((*ix86_gen_leave) ());
8683 /* For stack realigned really happens, recover stack
8684 pointer to hard frame pointer is a must, if not using
8686 if (stack_realign_fp)
8687 pro_epilogue_adjust_stack (stack_pointer_rtx,
8688 hard_frame_pointer_rtx,
8690 emit_insn ((*ix86_gen_pop1) (hard_frame_pointer_rtx));
8695 if (crtl->drap_reg && crtl->stack_realign_needed)
8697 int param_ptr_offset = (call_used_regs[REGNO (crtl->drap_reg)]
8698 ? 0 : UNITS_PER_WORD);
8699 gcc_assert (stack_realign_drap);
8700 emit_insn ((*ix86_gen_add3) (stack_pointer_rtx,
8702 GEN_INT (-(UNITS_PER_WORD
8703 + param_ptr_offset))));
8704 if (!call_used_regs[REGNO (crtl->drap_reg)])
8705 emit_insn ((*ix86_gen_pop1) (crtl->drap_reg));
8709 /* Sibcall epilogues don't want a return instruction. */
8713 if (crtl->args.pops_args && crtl->args.size)
8715 rtx popc = GEN_INT (crtl->args.pops_args);
8717 /* i386 can only pop 64K bytes. If asked to pop more, pop
8718 return address, do explicit add, and jump indirectly to the
8721 if (crtl->args.pops_args >= 65536)
8723 rtx ecx = gen_rtx_REG (SImode, CX_REG);
8725 /* There is no "pascal" calling convention in any 64bit ABI. */
8726 gcc_assert (!TARGET_64BIT);
8728 emit_insn (gen_popsi1 (ecx));
8729 emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx, popc));
8730 emit_jump_insn (gen_return_indirect_internal (ecx));
8733 emit_jump_insn (gen_return_pop_internal (popc));
8736 emit_jump_insn (gen_return_internal ());
8739 /* Reset from the function's potential modifications. */
8742 ix86_output_function_epilogue (FILE *file ATTRIBUTE_UNUSED,
8743 HOST_WIDE_INT size ATTRIBUTE_UNUSED)
8745 if (pic_offset_table_rtx)
8746 SET_REGNO (pic_offset_table_rtx, REAL_PIC_OFFSET_TABLE_REGNUM);
8748 /* Mach-O doesn't support labels at the end of objects, so if
8749 it looks like we might want one, insert a NOP. */
8751 rtx insn = get_last_insn ();
8754 && NOTE_KIND (insn) != NOTE_INSN_DELETED_LABEL)
8755 insn = PREV_INSN (insn);
8759 && NOTE_KIND (insn) == NOTE_INSN_DELETED_LABEL)))
8760 fputs ("\tnop\n", file);
8766 /* Extract the parts of an RTL expression that is a valid memory address
8767 for an instruction. Return 0 if the structure of the address is
8768 grossly off. Return -1 if the address contains ASHIFT, so it is not
8769 strictly valid, but still used for computing length of lea instruction. */
8772 ix86_decompose_address (rtx addr, struct ix86_address *out)
8774 rtx base = NULL_RTX, index = NULL_RTX, disp = NULL_RTX;
8775 rtx base_reg, index_reg;
8776 HOST_WIDE_INT scale = 1;
8777 rtx scale_rtx = NULL_RTX;
8779 enum ix86_address_seg seg = SEG_DEFAULT;
8781 if (REG_P (addr) || GET_CODE (addr) == SUBREG)
8783 else if (GET_CODE (addr) == PLUS)
8793 addends[n++] = XEXP (op, 1);
8796 while (GET_CODE (op) == PLUS);
8801 for (i = n; i >= 0; --i)
8804 switch (GET_CODE (op))
8809 index = XEXP (op, 0);
8810 scale_rtx = XEXP (op, 1);
8814 if (XINT (op, 1) == UNSPEC_TP
8815 && TARGET_TLS_DIRECT_SEG_REFS
8816 && seg == SEG_DEFAULT)
8817 seg = TARGET_64BIT ? SEG_FS : SEG_GS;
8846 else if (GET_CODE (addr) == MULT)
8848 index = XEXP (addr, 0); /* index*scale */
8849 scale_rtx = XEXP (addr, 1);
8851 else if (GET_CODE (addr) == ASHIFT)
8855 /* We're called for lea too, which implements ashift on occasion. */
8856 index = XEXP (addr, 0);
8857 tmp = XEXP (addr, 1);
8858 if (!CONST_INT_P (tmp))
8860 scale = INTVAL (tmp);
8861 if ((unsigned HOST_WIDE_INT) scale > 3)
8867 disp = addr; /* displacement */
8869 /* Extract the integral value of scale. */
8872 if (!CONST_INT_P (scale_rtx))
8874 scale = INTVAL (scale_rtx);
8877 base_reg = base && GET_CODE (base) == SUBREG ? SUBREG_REG (base) : base;
8878 index_reg = index && GET_CODE (index) == SUBREG ? SUBREG_REG (index) : index;
8880 /* Allow arg pointer and stack pointer as index if there is not scaling. */
8881 if (base_reg && index_reg && scale == 1
8882 && (index_reg == arg_pointer_rtx
8883 || index_reg == frame_pointer_rtx
8884 || (REG_P (index_reg) && REGNO (index_reg) == STACK_POINTER_REGNUM)))
8887 tmp = base, base = index, index = tmp;
8888 tmp = base_reg, base_reg = index_reg, index_reg = tmp;
8891 /* Special case: %ebp cannot be encoded as a base without a displacement. */
8892 if ((base_reg == hard_frame_pointer_rtx
8893 || base_reg == frame_pointer_rtx
8894 || base_reg == arg_pointer_rtx) && !disp)
8897 /* Special case: on K6, [%esi] makes the instruction vector decoded.
8898 Avoid this by transforming to [%esi+0].
8899 Reload calls address legitimization without cfun defined, so we need
8900 to test cfun for being non-NULL. */
8901 if (TARGET_K6 && cfun && optimize_function_for_speed_p (cfun)
8902 && base_reg && !index_reg && !disp
8904 && REGNO_REG_CLASS (REGNO (base_reg)) == SIREG)
8907 /* Special case: encode reg+reg instead of reg*2. */
8908 if (!base && index && scale && scale == 2)
8909 base = index, base_reg = index_reg, scale = 1;
8911 /* Special case: scaling cannot be encoded without base or displacement. */
8912 if (!base && !disp && index && scale != 1)
8924 /* Return cost of the memory address x.
8925 For i386, it is better to use a complex address than let gcc copy
8926 the address into a reg and make a new pseudo. But not if the address
8927 requires to two regs - that would mean more pseudos with longer
8930 ix86_address_cost (rtx x, bool speed ATTRIBUTE_UNUSED)
8932 struct ix86_address parts;
8934 int ok = ix86_decompose_address (x, &parts);
8938 if (parts.base && GET_CODE (parts.base) == SUBREG)
8939 parts.base = SUBREG_REG (parts.base);
8940 if (parts.index && GET_CODE (parts.index) == SUBREG)
8941 parts.index = SUBREG_REG (parts.index);
8943 /* Attempt to minimize number of registers in the address. */
8945 && (!REG_P (parts.base) || REGNO (parts.base) >= FIRST_PSEUDO_REGISTER))
8947 && (!REG_P (parts.index)
8948 || REGNO (parts.index) >= FIRST_PSEUDO_REGISTER)))
8952 && (!REG_P (parts.base) || REGNO (parts.base) >= FIRST_PSEUDO_REGISTER)
8954 && (!REG_P (parts.index) || REGNO (parts.index) >= FIRST_PSEUDO_REGISTER)
8955 && parts.base != parts.index)
8958 /* AMD-K6 don't like addresses with ModR/M set to 00_xxx_100b,
8959 since it's predecode logic can't detect the length of instructions
8960 and it degenerates to vector decoded. Increase cost of such
8961 addresses here. The penalty is minimally 2 cycles. It may be worthwhile
8962 to split such addresses or even refuse such addresses at all.
8964 Following addressing modes are affected:
8969 The first and last case may be avoidable by explicitly coding the zero in
8970 memory address, but I don't have AMD-K6 machine handy to check this
8974 && ((!parts.disp && parts.base && parts.index && parts.scale != 1)
8975 || (parts.disp && !parts.base && parts.index && parts.scale != 1)
8976 || (!parts.disp && parts.base && parts.index && parts.scale == 1)))
8982 /* Allow {LABEL | SYMBOL}_REF - SYMBOL_REF-FOR-PICBASE for Mach-O as
8983 this is used for to form addresses to local data when -fPIC is in
8987 darwin_local_data_pic (rtx disp)
8989 return (GET_CODE (disp) == UNSPEC
8990 && XINT (disp, 1) == UNSPEC_MACHOPIC_OFFSET);
8993 /* Determine if a given RTX is a valid constant. We already know this
8994 satisfies CONSTANT_P. */
8997 legitimate_constant_p (rtx x)
8999 switch (GET_CODE (x))
9004 if (GET_CODE (x) == PLUS)
9006 if (!CONST_INT_P (XEXP (x, 1)))
9011 if (TARGET_MACHO && darwin_local_data_pic (x))
9014 /* Only some unspecs are valid as "constants". */
9015 if (GET_CODE (x) == UNSPEC)
9016 switch (XINT (x, 1))
9021 return TARGET_64BIT;
9024 x = XVECEXP (x, 0, 0);
9025 return (GET_CODE (x) == SYMBOL_REF
9026 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_EXEC);
9028 x = XVECEXP (x, 0, 0);
9029 return (GET_CODE (x) == SYMBOL_REF
9030 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_DYNAMIC);
9035 /* We must have drilled down to a symbol. */
9036 if (GET_CODE (x) == LABEL_REF)
9038 if (GET_CODE (x) != SYMBOL_REF)
9043 /* TLS symbols are never valid. */
9044 if (SYMBOL_REF_TLS_MODEL (x))
9047 /* DLLIMPORT symbols are never valid. */
9048 if (TARGET_DLLIMPORT_DECL_ATTRIBUTES
9049 && SYMBOL_REF_DLLIMPORT_P (x))
9054 if (GET_MODE (x) == TImode
9055 && x != CONST0_RTX (TImode)
9061 if (!standard_sse_constant_p (x))
9068 /* Otherwise we handle everything else in the move patterns. */
9072 /* Determine if it's legal to put X into the constant pool. This
9073 is not possible for the address of thread-local symbols, which
9074 is checked above. */
9077 ix86_cannot_force_const_mem (rtx x)
9079 /* We can always put integral constants and vectors in memory. */
9080 switch (GET_CODE (x))
9090 return !legitimate_constant_p (x);
9093 /* Determine if a given RTX is a valid constant address. */
9096 constant_address_p (rtx x)
9098 return CONSTANT_P (x) && legitimate_address_p (Pmode, x, 1);
9101 /* Nonzero if the constant value X is a legitimate general operand
9102 when generating PIC code. It is given that flag_pic is on and
9103 that X satisfies CONSTANT_P or is a CONST_DOUBLE. */
9106 legitimate_pic_operand_p (rtx x)
9110 switch (GET_CODE (x))
9113 inner = XEXP (x, 0);
9114 if (GET_CODE (inner) == PLUS
9115 && CONST_INT_P (XEXP (inner, 1)))
9116 inner = XEXP (inner, 0);
9118 /* Only some unspecs are valid as "constants". */
9119 if (GET_CODE (inner) == UNSPEC)
9120 switch (XINT (inner, 1))
9125 return TARGET_64BIT;
9127 x = XVECEXP (inner, 0, 0);
9128 return (GET_CODE (x) == SYMBOL_REF
9129 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_EXEC);
9130 case UNSPEC_MACHOPIC_OFFSET:
9131 return legitimate_pic_address_disp_p (x);
9139 return legitimate_pic_address_disp_p (x);
9146 /* Determine if a given CONST RTX is a valid memory displacement
9150 legitimate_pic_address_disp_p (rtx disp)
9154 /* In 64bit mode we can allow direct addresses of symbols and labels
9155 when they are not dynamic symbols. */
9158 rtx op0 = disp, op1;
9160 switch (GET_CODE (disp))
9166 if (GET_CODE (XEXP (disp, 0)) != PLUS)
9168 op0 = XEXP (XEXP (disp, 0), 0);
9169 op1 = XEXP (XEXP (disp, 0), 1);
9170 if (!CONST_INT_P (op1)
9171 || INTVAL (op1) >= 16*1024*1024
9172 || INTVAL (op1) < -16*1024*1024)
9174 if (GET_CODE (op0) == LABEL_REF)
9176 if (GET_CODE (op0) != SYMBOL_REF)
9181 /* TLS references should always be enclosed in UNSPEC. */
9182 if (SYMBOL_REF_TLS_MODEL (op0))
9184 if (!SYMBOL_REF_FAR_ADDR_P (op0) && SYMBOL_REF_LOCAL_P (op0)
9185 && ix86_cmodel != CM_LARGE_PIC)
9193 if (GET_CODE (disp) != CONST)
9195 disp = XEXP (disp, 0);
9199 /* We are unsafe to allow PLUS expressions. This limit allowed distance
9200 of GOT tables. We should not need these anyway. */
9201 if (GET_CODE (disp) != UNSPEC
9202 || (XINT (disp, 1) != UNSPEC_GOTPCREL
9203 && XINT (disp, 1) != UNSPEC_GOTOFF
9204 && XINT (disp, 1) != UNSPEC_PLTOFF))
9207 if (GET_CODE (XVECEXP (disp, 0, 0)) != SYMBOL_REF
9208 && GET_CODE (XVECEXP (disp, 0, 0)) != LABEL_REF)
9214 if (GET_CODE (disp) == PLUS)
9216 if (!CONST_INT_P (XEXP (disp, 1)))
9218 disp = XEXP (disp, 0);
9222 if (TARGET_MACHO && darwin_local_data_pic (disp))
9225 if (GET_CODE (disp) != UNSPEC)
9228 switch (XINT (disp, 1))
9233 /* We need to check for both symbols and labels because VxWorks loads
9234 text labels with @GOT rather than @GOTOFF. See gotoff_operand for
9236 return (GET_CODE (XVECEXP (disp, 0, 0)) == SYMBOL_REF
9237 || GET_CODE (XVECEXP (disp, 0, 0)) == LABEL_REF);
9239 /* Refuse GOTOFF in 64bit mode since it is always 64bit when used.
9240 While ABI specify also 32bit relocation but we don't produce it in
9241 small PIC model at all. */
9242 if ((GET_CODE (XVECEXP (disp, 0, 0)) == SYMBOL_REF
9243 || GET_CODE (XVECEXP (disp, 0, 0)) == LABEL_REF)
9245 return gotoff_operand (XVECEXP (disp, 0, 0), Pmode);
9247 case UNSPEC_GOTTPOFF:
9248 case UNSPEC_GOTNTPOFF:
9249 case UNSPEC_INDNTPOFF:
9252 disp = XVECEXP (disp, 0, 0);
9253 return (GET_CODE (disp) == SYMBOL_REF
9254 && SYMBOL_REF_TLS_MODEL (disp) == TLS_MODEL_INITIAL_EXEC);
9256 disp = XVECEXP (disp, 0, 0);
9257 return (GET_CODE (disp) == SYMBOL_REF
9258 && SYMBOL_REF_TLS_MODEL (disp) == TLS_MODEL_LOCAL_EXEC);
9260 disp = XVECEXP (disp, 0, 0);
9261 return (GET_CODE (disp) == SYMBOL_REF
9262 && SYMBOL_REF_TLS_MODEL (disp) == TLS_MODEL_LOCAL_DYNAMIC);
9268 /* GO_IF_LEGITIMATE_ADDRESS recognizes an RTL expression that is a valid
9269 memory address for an instruction. The MODE argument is the machine mode
9270 for the MEM expression that wants to use this address.
9272 It only recognizes address in canonical form. LEGITIMIZE_ADDRESS should
9273 convert common non-canonical forms to canonical form so that they will
9277 legitimate_address_p (enum machine_mode mode ATTRIBUTE_UNUSED,
9278 rtx addr, int strict)
9280 struct ix86_address parts;
9281 rtx base, index, disp;
9282 HOST_WIDE_INT scale;
9283 const char *reason = NULL;
9284 rtx reason_rtx = NULL_RTX;
9286 if (ix86_decompose_address (addr, &parts) <= 0)
9288 reason = "decomposition failed";
9293 index = parts.index;
9295 scale = parts.scale;
9297 /* Validate base register.
9299 Don't allow SUBREG's that span more than a word here. It can lead to spill
9300 failures when the base is one word out of a two word structure, which is
9301 represented internally as a DImode int. */
9310 else if (GET_CODE (base) == SUBREG
9311 && REG_P (SUBREG_REG (base))
9312 && GET_MODE_SIZE (GET_MODE (SUBREG_REG (base)))
9314 reg = SUBREG_REG (base);
9317 reason = "base is not a register";
9321 if (GET_MODE (base) != Pmode)
9323 reason = "base is not in Pmode";
9327 if ((strict && ! REG_OK_FOR_BASE_STRICT_P (reg))
9328 || (! strict && ! REG_OK_FOR_BASE_NONSTRICT_P (reg)))
9330 reason = "base is not valid";
9335 /* Validate index register.
9337 Don't allow SUBREG's that span more than a word here -- same as above. */
9346 else if (GET_CODE (index) == SUBREG
9347 && REG_P (SUBREG_REG (index))
9348 && GET_MODE_SIZE (GET_MODE (SUBREG_REG (index)))
9350 reg = SUBREG_REG (index);
9353 reason = "index is not a register";
9357 if (GET_MODE (index) != Pmode)
9359 reason = "index is not in Pmode";
9363 if ((strict && ! REG_OK_FOR_INDEX_STRICT_P (reg))
9364 || (! strict && ! REG_OK_FOR_INDEX_NONSTRICT_P (reg)))
9366 reason = "index is not valid";
9371 /* Validate scale factor. */
9374 reason_rtx = GEN_INT (scale);
9377 reason = "scale without index";
9381 if (scale != 2 && scale != 4 && scale != 8)
9383 reason = "scale is not a valid multiplier";
9388 /* Validate displacement. */
9393 if (GET_CODE (disp) == CONST
9394 && GET_CODE (XEXP (disp, 0)) == UNSPEC
9395 && XINT (XEXP (disp, 0), 1) != UNSPEC_MACHOPIC_OFFSET)
9396 switch (XINT (XEXP (disp, 0), 1))
9398 /* Refuse GOTOFF and GOT in 64bit mode since it is always 64bit when
9399 used. While ABI specify also 32bit relocations, we don't produce
9400 them at all and use IP relative instead. */
9403 gcc_assert (flag_pic);
9405 goto is_legitimate_pic;
9406 reason = "64bit address unspec";
9409 case UNSPEC_GOTPCREL:
9410 gcc_assert (flag_pic);
9411 goto is_legitimate_pic;
9413 case UNSPEC_GOTTPOFF:
9414 case UNSPEC_GOTNTPOFF:
9415 case UNSPEC_INDNTPOFF:
9421 reason = "invalid address unspec";
9425 else if (SYMBOLIC_CONST (disp)
9429 && MACHOPIC_INDIRECT
9430 && !machopic_operand_p (disp)
9436 if (TARGET_64BIT && (index || base))
9438 /* foo@dtpoff(%rX) is ok. */
9439 if (GET_CODE (disp) != CONST
9440 || GET_CODE (XEXP (disp, 0)) != PLUS
9441 || GET_CODE (XEXP (XEXP (disp, 0), 0)) != UNSPEC
9442 || !CONST_INT_P (XEXP (XEXP (disp, 0), 1))
9443 || (XINT (XEXP (XEXP (disp, 0), 0), 1) != UNSPEC_DTPOFF
9444 && XINT (XEXP (XEXP (disp, 0), 0), 1) != UNSPEC_NTPOFF))
9446 reason = "non-constant pic memory reference";
9450 else if (! legitimate_pic_address_disp_p (disp))
9452 reason = "displacement is an invalid pic construct";
9456 /* This code used to verify that a symbolic pic displacement
9457 includes the pic_offset_table_rtx register.
9459 While this is good idea, unfortunately these constructs may
9460 be created by "adds using lea" optimization for incorrect
9469 This code is nonsensical, but results in addressing
9470 GOT table with pic_offset_table_rtx base. We can't
9471 just refuse it easily, since it gets matched by
9472 "addsi3" pattern, that later gets split to lea in the
9473 case output register differs from input. While this
9474 can be handled by separate addsi pattern for this case
9475 that never results in lea, this seems to be easier and
9476 correct fix for crash to disable this test. */
9478 else if (GET_CODE (disp) != LABEL_REF
9479 && !CONST_INT_P (disp)
9480 && (GET_CODE (disp) != CONST
9481 || !legitimate_constant_p (disp))
9482 && (GET_CODE (disp) != SYMBOL_REF
9483 || !legitimate_constant_p (disp)))
9485 reason = "displacement is not constant";
9488 else if (TARGET_64BIT
9489 && !x86_64_immediate_operand (disp, VOIDmode))
9491 reason = "displacement is out of range";
9496 /* Everything looks valid. */
9503 /* Return a unique alias set for the GOT. */
9505 static alias_set_type
9506 ix86_GOT_alias_set (void)
9508 static alias_set_type set = -1;
9510 set = new_alias_set ();
9514 /* Return a legitimate reference for ORIG (an address) using the
9515 register REG. If REG is 0, a new pseudo is generated.
9517 There are two types of references that must be handled:
9519 1. Global data references must load the address from the GOT, via
9520 the PIC reg. An insn is emitted to do this load, and the reg is
9523 2. Static data references, constant pool addresses, and code labels
9524 compute the address as an offset from the GOT, whose base is in
9525 the PIC reg. Static data objects have SYMBOL_FLAG_LOCAL set to
9526 differentiate them from global data objects. The returned
9527 address is the PIC reg + an unspec constant.
9529 GO_IF_LEGITIMATE_ADDRESS rejects symbolic references unless the PIC
9530 reg also appears in the address. */
9533 legitimize_pic_address (rtx orig, rtx reg)
9540 if (TARGET_MACHO && !TARGET_64BIT)
9543 reg = gen_reg_rtx (Pmode);
9544 /* Use the generic Mach-O PIC machinery. */
9545 return machopic_legitimize_pic_address (orig, GET_MODE (orig), reg);
9549 if (TARGET_64BIT && legitimate_pic_address_disp_p (addr))
9551 else if (TARGET_64BIT
9552 && ix86_cmodel != CM_SMALL_PIC
9553 && gotoff_operand (addr, Pmode))
9556 /* This symbol may be referenced via a displacement from the PIC
9557 base address (@GOTOFF). */
9559 if (reload_in_progress)
9560 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
9561 if (GET_CODE (addr) == CONST)
9562 addr = XEXP (addr, 0);
9563 if (GET_CODE (addr) == PLUS)
9565 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, XEXP (addr, 0)),
9567 new_rtx = gen_rtx_PLUS (Pmode, new_rtx, XEXP (addr, 1));
9570 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTOFF);
9571 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
9573 tmpreg = gen_reg_rtx (Pmode);
9576 emit_move_insn (tmpreg, new_rtx);
9580 new_rtx = expand_simple_binop (Pmode, PLUS, reg, pic_offset_table_rtx,
9581 tmpreg, 1, OPTAB_DIRECT);
9584 else new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, tmpreg);
9586 else if (!TARGET_64BIT && gotoff_operand (addr, Pmode))
9588 /* This symbol may be referenced via a displacement from the PIC
9589 base address (@GOTOFF). */
9591 if (reload_in_progress)
9592 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
9593 if (GET_CODE (addr) == CONST)
9594 addr = XEXP (addr, 0);
9595 if (GET_CODE (addr) == PLUS)
9597 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, XEXP (addr, 0)),
9599 new_rtx = gen_rtx_PLUS (Pmode, new_rtx, XEXP (addr, 1));
9602 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTOFF);
9603 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
9604 new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new_rtx);
9608 emit_move_insn (reg, new_rtx);
9612 else if ((GET_CODE (addr) == SYMBOL_REF && SYMBOL_REF_TLS_MODEL (addr) == 0)
9613 /* We can't use @GOTOFF for text labels on VxWorks;
9614 see gotoff_operand. */
9615 || (TARGET_VXWORKS_RTP && GET_CODE (addr) == LABEL_REF))
9617 if (TARGET_DLLIMPORT_DECL_ATTRIBUTES)
9619 if (GET_CODE (addr) == SYMBOL_REF && SYMBOL_REF_DLLIMPORT_P (addr))
9620 return legitimize_dllimport_symbol (addr, true);
9621 if (GET_CODE (addr) == CONST && GET_CODE (XEXP (addr, 0)) == PLUS
9622 && GET_CODE (XEXP (XEXP (addr, 0), 0)) == SYMBOL_REF
9623 && SYMBOL_REF_DLLIMPORT_P (XEXP (XEXP (addr, 0), 0)))
9625 rtx t = legitimize_dllimport_symbol (XEXP (XEXP (addr, 0), 0), true);
9626 return gen_rtx_PLUS (Pmode, t, XEXP (XEXP (addr, 0), 1));
9630 if (TARGET_64BIT && ix86_cmodel != CM_LARGE_PIC)
9632 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTPCREL);
9633 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
9634 new_rtx = gen_const_mem (Pmode, new_rtx);
9635 set_mem_alias_set (new_rtx, ix86_GOT_alias_set ());
9638 reg = gen_reg_rtx (Pmode);
9639 /* Use directly gen_movsi, otherwise the address is loaded
9640 into register for CSE. We don't want to CSE this addresses,
9641 instead we CSE addresses from the GOT table, so skip this. */
9642 emit_insn (gen_movsi (reg, new_rtx));
9647 /* This symbol must be referenced via a load from the
9648 Global Offset Table (@GOT). */
9650 if (reload_in_progress)
9651 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
9652 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOT);
9653 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
9655 new_rtx = force_reg (Pmode, new_rtx);
9656 new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new_rtx);
9657 new_rtx = gen_const_mem (Pmode, new_rtx);
9658 set_mem_alias_set (new_rtx, ix86_GOT_alias_set ());
9661 reg = gen_reg_rtx (Pmode);
9662 emit_move_insn (reg, new_rtx);
9668 if (CONST_INT_P (addr)
9669 && !x86_64_immediate_operand (addr, VOIDmode))
9673 emit_move_insn (reg, addr);
9677 new_rtx = force_reg (Pmode, addr);
9679 else if (GET_CODE (addr) == CONST)
9681 addr = XEXP (addr, 0);
9683 /* We must match stuff we generate before. Assume the only
9684 unspecs that can get here are ours. Not that we could do
9685 anything with them anyway.... */
9686 if (GET_CODE (addr) == UNSPEC
9687 || (GET_CODE (addr) == PLUS
9688 && GET_CODE (XEXP (addr, 0)) == UNSPEC))
9690 gcc_assert (GET_CODE (addr) == PLUS);
9692 if (GET_CODE (addr) == PLUS)
9694 rtx op0 = XEXP (addr, 0), op1 = XEXP (addr, 1);
9696 /* Check first to see if this is a constant offset from a @GOTOFF
9697 symbol reference. */
9698 if (gotoff_operand (op0, Pmode)
9699 && CONST_INT_P (op1))
9703 if (reload_in_progress)
9704 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
9705 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, op0),
9707 new_rtx = gen_rtx_PLUS (Pmode, new_rtx, op1);
9708 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
9709 new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new_rtx);
9713 emit_move_insn (reg, new_rtx);
9719 if (INTVAL (op1) < -16*1024*1024
9720 || INTVAL (op1) >= 16*1024*1024)
9722 if (!x86_64_immediate_operand (op1, Pmode))
9723 op1 = force_reg (Pmode, op1);
9724 new_rtx = gen_rtx_PLUS (Pmode, force_reg (Pmode, op0), op1);
9730 base = legitimize_pic_address (XEXP (addr, 0), reg);
9731 new_rtx = legitimize_pic_address (XEXP (addr, 1),
9732 base == reg ? NULL_RTX : reg);
9734 if (CONST_INT_P (new_rtx))
9735 new_rtx = plus_constant (base, INTVAL (new_rtx));
9738 if (GET_CODE (new_rtx) == PLUS && CONSTANT_P (XEXP (new_rtx, 1)))
9740 base = gen_rtx_PLUS (Pmode, base, XEXP (new_rtx, 0));
9741 new_rtx = XEXP (new_rtx, 1);
9743 new_rtx = gen_rtx_PLUS (Pmode, base, new_rtx);
9751 /* Load the thread pointer. If TO_REG is true, force it into a register. */
9754 get_thread_pointer (int to_reg)
9758 tp = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx), UNSPEC_TP);
9762 reg = gen_reg_rtx (Pmode);
9763 insn = gen_rtx_SET (VOIDmode, reg, tp);
9764 insn = emit_insn (insn);
9769 /* A subroutine of legitimize_address and ix86_expand_move. FOR_MOV is
9770 false if we expect this to be used for a memory address and true if
9771 we expect to load the address into a register. */
9774 legitimize_tls_address (rtx x, enum tls_model model, int for_mov)
9776 rtx dest, base, off, pic, tp;
9781 case TLS_MODEL_GLOBAL_DYNAMIC:
9782 dest = gen_reg_rtx (Pmode);
9783 tp = TARGET_GNU2_TLS ? get_thread_pointer (1) : 0;
9785 if (TARGET_64BIT && ! TARGET_GNU2_TLS)
9787 rtx rax = gen_rtx_REG (Pmode, AX_REG), insns;
9790 emit_call_insn (gen_tls_global_dynamic_64 (rax, x));
9791 insns = get_insns ();
9794 RTL_CONST_CALL_P (insns) = 1;
9795 emit_libcall_block (insns, dest, rax, x);
9797 else if (TARGET_64BIT && TARGET_GNU2_TLS)
9798 emit_insn (gen_tls_global_dynamic_64 (dest, x));
9800 emit_insn (gen_tls_global_dynamic_32 (dest, x));
9802 if (TARGET_GNU2_TLS)
9804 dest = force_reg (Pmode, gen_rtx_PLUS (Pmode, tp, dest));
9806 set_unique_reg_note (get_last_insn (), REG_EQUIV, x);
9810 case TLS_MODEL_LOCAL_DYNAMIC:
9811 base = gen_reg_rtx (Pmode);
9812 tp = TARGET_GNU2_TLS ? get_thread_pointer (1) : 0;
9814 if (TARGET_64BIT && ! TARGET_GNU2_TLS)
9816 rtx rax = gen_rtx_REG (Pmode, AX_REG), insns, note;
9819 emit_call_insn (gen_tls_local_dynamic_base_64 (rax));
9820 insns = get_insns ();
9823 note = gen_rtx_EXPR_LIST (VOIDmode, const0_rtx, NULL);
9824 note = gen_rtx_EXPR_LIST (VOIDmode, ix86_tls_get_addr (), note);
9825 RTL_CONST_CALL_P (insns) = 1;
9826 emit_libcall_block (insns, base, rax, note);
9828 else if (TARGET_64BIT && TARGET_GNU2_TLS)
9829 emit_insn (gen_tls_local_dynamic_base_64 (base));
9831 emit_insn (gen_tls_local_dynamic_base_32 (base));
9833 if (TARGET_GNU2_TLS)
9835 rtx x = ix86_tls_module_base ();
9837 set_unique_reg_note (get_last_insn (), REG_EQUIV,
9838 gen_rtx_MINUS (Pmode, x, tp));
9841 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x), UNSPEC_DTPOFF);
9842 off = gen_rtx_CONST (Pmode, off);
9844 dest = force_reg (Pmode, gen_rtx_PLUS (Pmode, base, off));
9846 if (TARGET_GNU2_TLS)
9848 dest = force_reg (Pmode, gen_rtx_PLUS (Pmode, dest, tp));
9850 set_unique_reg_note (get_last_insn (), REG_EQUIV, x);
9855 case TLS_MODEL_INITIAL_EXEC:
9859 type = UNSPEC_GOTNTPOFF;
9863 if (reload_in_progress)
9864 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
9865 pic = pic_offset_table_rtx;
9866 type = TARGET_ANY_GNU_TLS ? UNSPEC_GOTNTPOFF : UNSPEC_GOTTPOFF;
9868 else if (!TARGET_ANY_GNU_TLS)
9870 pic = gen_reg_rtx (Pmode);
9871 emit_insn (gen_set_got (pic));
9872 type = UNSPEC_GOTTPOFF;
9877 type = UNSPEC_INDNTPOFF;
9880 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x), type);
9881 off = gen_rtx_CONST (Pmode, off);
9883 off = gen_rtx_PLUS (Pmode, pic, off);
9884 off = gen_const_mem (Pmode, off);
9885 set_mem_alias_set (off, ix86_GOT_alias_set ());
9887 if (TARGET_64BIT || TARGET_ANY_GNU_TLS)
9889 base = get_thread_pointer (for_mov || !TARGET_TLS_DIRECT_SEG_REFS);
9890 off = force_reg (Pmode, off);
9891 return gen_rtx_PLUS (Pmode, base, off);
9895 base = get_thread_pointer (true);
9896 dest = gen_reg_rtx (Pmode);
9897 emit_insn (gen_subsi3 (dest, base, off));
9901 case TLS_MODEL_LOCAL_EXEC:
9902 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x),
9903 (TARGET_64BIT || TARGET_ANY_GNU_TLS)
9904 ? UNSPEC_NTPOFF : UNSPEC_TPOFF);
9905 off = gen_rtx_CONST (Pmode, off);
9907 if (TARGET_64BIT || TARGET_ANY_GNU_TLS)
9909 base = get_thread_pointer (for_mov || !TARGET_TLS_DIRECT_SEG_REFS);
9910 return gen_rtx_PLUS (Pmode, base, off);
9914 base = get_thread_pointer (true);
9915 dest = gen_reg_rtx (Pmode);
9916 emit_insn (gen_subsi3 (dest, base, off));
9927 /* Create or return the unique __imp_DECL dllimport symbol corresponding
9930 static GTY((if_marked ("tree_map_marked_p"), param_is (struct tree_map)))
9931 htab_t dllimport_map;
9934 get_dllimport_decl (tree decl)
9936 struct tree_map *h, in;
9940 size_t namelen, prefixlen;
9946 dllimport_map = htab_create_ggc (512, tree_map_hash, tree_map_eq, 0);
9948 in.hash = htab_hash_pointer (decl);
9949 in.base.from = decl;
9950 loc = htab_find_slot_with_hash (dllimport_map, &in, in.hash, INSERT);
9951 h = (struct tree_map *) *loc;
9955 *loc = h = GGC_NEW (struct tree_map);
9957 h->base.from = decl;
9958 h->to = to = build_decl (VAR_DECL, NULL, ptr_type_node);
9959 DECL_ARTIFICIAL (to) = 1;
9960 DECL_IGNORED_P (to) = 1;
9961 DECL_EXTERNAL (to) = 1;
9962 TREE_READONLY (to) = 1;
9964 name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl));
9965 name = targetm.strip_name_encoding (name);
9966 prefix = name[0] == FASTCALL_PREFIX || user_label_prefix[0] == 0
9967 ? "*__imp_" : "*__imp__";
9968 namelen = strlen (name);
9969 prefixlen = strlen (prefix);
9970 imp_name = (char *) alloca (namelen + prefixlen + 1);
9971 memcpy (imp_name, prefix, prefixlen);
9972 memcpy (imp_name + prefixlen, name, namelen + 1);
9974 name = ggc_alloc_string (imp_name, namelen + prefixlen);
9975 rtl = gen_rtx_SYMBOL_REF (Pmode, name);
9976 SET_SYMBOL_REF_DECL (rtl, to);
9977 SYMBOL_REF_FLAGS (rtl) = SYMBOL_FLAG_LOCAL;
9979 rtl = gen_const_mem (Pmode, rtl);
9980 set_mem_alias_set (rtl, ix86_GOT_alias_set ());
9982 SET_DECL_RTL (to, rtl);
9983 SET_DECL_ASSEMBLER_NAME (to, get_identifier (name));
9988 /* Expand SYMBOL into its corresponding dllimport symbol. WANT_REG is
9989 true if we require the result be a register. */
9992 legitimize_dllimport_symbol (rtx symbol, bool want_reg)
9997 gcc_assert (SYMBOL_REF_DECL (symbol));
9998 imp_decl = get_dllimport_decl (SYMBOL_REF_DECL (symbol));
10000 x = DECL_RTL (imp_decl);
10002 x = force_reg (Pmode, x);
10006 /* Try machine-dependent ways of modifying an illegitimate address
10007 to be legitimate. If we find one, return the new, valid address.
10008 This macro is used in only one place: `memory_address' in explow.c.
10010 OLDX is the address as it was before break_out_memory_refs was called.
10011 In some cases it is useful to look at this to decide what needs to be done.
10013 MODE and WIN are passed so that this macro can use
10014 GO_IF_LEGITIMATE_ADDRESS.
10016 It is always safe for this macro to do nothing. It exists to recognize
10017 opportunities to optimize the output.
10019 For the 80386, we handle X+REG by loading X into a register R and
10020 using R+REG. R will go in a general reg and indexing will be used.
10021 However, if REG is a broken-out memory address or multiplication,
10022 nothing needs to be done because REG can certainly go in a general reg.
10024 When -fpic is used, special handling is needed for symbolic references.
10025 See comments by legitimize_pic_address in i386.c for details. */
10028 legitimize_address (rtx x, rtx oldx ATTRIBUTE_UNUSED, enum machine_mode mode)
10033 log = GET_CODE (x) == SYMBOL_REF ? SYMBOL_REF_TLS_MODEL (x) : 0;
10035 return legitimize_tls_address (x, (enum tls_model) log, false);
10036 if (GET_CODE (x) == CONST
10037 && GET_CODE (XEXP (x, 0)) == PLUS
10038 && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF
10039 && (log = SYMBOL_REF_TLS_MODEL (XEXP (XEXP (x, 0), 0))))
10041 rtx t = legitimize_tls_address (XEXP (XEXP (x, 0), 0),
10042 (enum tls_model) log, false);
10043 return gen_rtx_PLUS (Pmode, t, XEXP (XEXP (x, 0), 1));
10046 if (TARGET_DLLIMPORT_DECL_ATTRIBUTES)
10048 if (GET_CODE (x) == SYMBOL_REF && SYMBOL_REF_DLLIMPORT_P (x))
10049 return legitimize_dllimport_symbol (x, true);
10050 if (GET_CODE (x) == CONST
10051 && GET_CODE (XEXP (x, 0)) == PLUS
10052 && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF
10053 && SYMBOL_REF_DLLIMPORT_P (XEXP (XEXP (x, 0), 0)))
10055 rtx t = legitimize_dllimport_symbol (XEXP (XEXP (x, 0), 0), true);
10056 return gen_rtx_PLUS (Pmode, t, XEXP (XEXP (x, 0), 1));
10060 if (flag_pic && SYMBOLIC_CONST (x))
10061 return legitimize_pic_address (x, 0);
10063 /* Canonicalize shifts by 0, 1, 2, 3 into multiply */
10064 if (GET_CODE (x) == ASHIFT
10065 && CONST_INT_P (XEXP (x, 1))
10066 && (unsigned HOST_WIDE_INT) INTVAL (XEXP (x, 1)) < 4)
10069 log = INTVAL (XEXP (x, 1));
10070 x = gen_rtx_MULT (Pmode, force_reg (Pmode, XEXP (x, 0)),
10071 GEN_INT (1 << log));
10074 if (GET_CODE (x) == PLUS)
10076 /* Canonicalize shifts by 0, 1, 2, 3 into multiply. */
10078 if (GET_CODE (XEXP (x, 0)) == ASHIFT
10079 && CONST_INT_P (XEXP (XEXP (x, 0), 1))
10080 && (unsigned HOST_WIDE_INT) INTVAL (XEXP (XEXP (x, 0), 1)) < 4)
10083 log = INTVAL (XEXP (XEXP (x, 0), 1));
10084 XEXP (x, 0) = gen_rtx_MULT (Pmode,
10085 force_reg (Pmode, XEXP (XEXP (x, 0), 0)),
10086 GEN_INT (1 << log));
10089 if (GET_CODE (XEXP (x, 1)) == ASHIFT
10090 && CONST_INT_P (XEXP (XEXP (x, 1), 1))
10091 && (unsigned HOST_WIDE_INT) INTVAL (XEXP (XEXP (x, 1), 1)) < 4)
10094 log = INTVAL (XEXP (XEXP (x, 1), 1));
10095 XEXP (x, 1) = gen_rtx_MULT (Pmode,
10096 force_reg (Pmode, XEXP (XEXP (x, 1), 0)),
10097 GEN_INT (1 << log));
10100 /* Put multiply first if it isn't already. */
10101 if (GET_CODE (XEXP (x, 1)) == MULT)
10103 rtx tmp = XEXP (x, 0);
10104 XEXP (x, 0) = XEXP (x, 1);
10109 /* Canonicalize (plus (mult (reg) (const)) (plus (reg) (const)))
10110 into (plus (plus (mult (reg) (const)) (reg)) (const)). This can be
10111 created by virtual register instantiation, register elimination, and
10112 similar optimizations. */
10113 if (GET_CODE (XEXP (x, 0)) == MULT && GET_CODE (XEXP (x, 1)) == PLUS)
10116 x = gen_rtx_PLUS (Pmode,
10117 gen_rtx_PLUS (Pmode, XEXP (x, 0),
10118 XEXP (XEXP (x, 1), 0)),
10119 XEXP (XEXP (x, 1), 1));
10123 (plus (plus (mult (reg) (const)) (plus (reg) (const))) const)
10124 into (plus (plus (mult (reg) (const)) (reg)) (const)). */
10125 else if (GET_CODE (x) == PLUS && GET_CODE (XEXP (x, 0)) == PLUS
10126 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
10127 && GET_CODE (XEXP (XEXP (x, 0), 1)) == PLUS
10128 && CONSTANT_P (XEXP (x, 1)))
10131 rtx other = NULL_RTX;
10133 if (CONST_INT_P (XEXP (x, 1)))
10135 constant = XEXP (x, 1);
10136 other = XEXP (XEXP (XEXP (x, 0), 1), 1);
10138 else if (CONST_INT_P (XEXP (XEXP (XEXP (x, 0), 1), 1)))
10140 constant = XEXP (XEXP (XEXP (x, 0), 1), 1);
10141 other = XEXP (x, 1);
10149 x = gen_rtx_PLUS (Pmode,
10150 gen_rtx_PLUS (Pmode, XEXP (XEXP (x, 0), 0),
10151 XEXP (XEXP (XEXP (x, 0), 1), 0)),
10152 plus_constant (other, INTVAL (constant)));
10156 if (changed && legitimate_address_p (mode, x, FALSE))
10159 if (GET_CODE (XEXP (x, 0)) == MULT)
10162 XEXP (x, 0) = force_operand (XEXP (x, 0), 0);
10165 if (GET_CODE (XEXP (x, 1)) == MULT)
10168 XEXP (x, 1) = force_operand (XEXP (x, 1), 0);
10172 && REG_P (XEXP (x, 1))
10173 && REG_P (XEXP (x, 0)))
10176 if (flag_pic && SYMBOLIC_CONST (XEXP (x, 1)))
10179 x = legitimize_pic_address (x, 0);
10182 if (changed && legitimate_address_p (mode, x, FALSE))
10185 if (REG_P (XEXP (x, 0)))
10187 rtx temp = gen_reg_rtx (Pmode);
10188 rtx val = force_operand (XEXP (x, 1), temp);
10190 emit_move_insn (temp, val);
10192 XEXP (x, 1) = temp;
10196 else if (REG_P (XEXP (x, 1)))
10198 rtx temp = gen_reg_rtx (Pmode);
10199 rtx val = force_operand (XEXP (x, 0), temp);
10201 emit_move_insn (temp, val);
10203 XEXP (x, 0) = temp;
10211 /* Print an integer constant expression in assembler syntax. Addition
10212 and subtraction are the only arithmetic that may appear in these
10213 expressions. FILE is the stdio stream to write to, X is the rtx, and
10214 CODE is the operand print code from the output string. */
10217 output_pic_addr_const (FILE *file, rtx x, int code)
10221 switch (GET_CODE (x))
10224 gcc_assert (flag_pic);
10229 if (! TARGET_MACHO || TARGET_64BIT)
10230 output_addr_const (file, x);
10233 const char *name = XSTR (x, 0);
10235 /* Mark the decl as referenced so that cgraph will
10236 output the function. */
10237 if (SYMBOL_REF_DECL (x))
10238 mark_decl_referenced (SYMBOL_REF_DECL (x));
10241 if (MACHOPIC_INDIRECT
10242 && machopic_classify_symbol (x) == MACHOPIC_UNDEFINED_FUNCTION)
10243 name = machopic_indirection_name (x, /*stub_p=*/true);
10245 assemble_name (file, name);
10247 if (!TARGET_MACHO && !(TARGET_64BIT && DEFAULT_ABI == MS_ABI)
10248 && code == 'P' && ! SYMBOL_REF_LOCAL_P (x))
10249 fputs ("@PLT", file);
10256 ASM_GENERATE_INTERNAL_LABEL (buf, "L", CODE_LABEL_NUMBER (x));
10257 assemble_name (asm_out_file, buf);
10261 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
10265 /* This used to output parentheses around the expression,
10266 but that does not work on the 386 (either ATT or BSD assembler). */
10267 output_pic_addr_const (file, XEXP (x, 0), code);
10271 if (GET_MODE (x) == VOIDmode)
10273 /* We can use %d if the number is <32 bits and positive. */
10274 if (CONST_DOUBLE_HIGH (x) || CONST_DOUBLE_LOW (x) < 0)
10275 fprintf (file, "0x%lx%08lx",
10276 (unsigned long) CONST_DOUBLE_HIGH (x),
10277 (unsigned long) CONST_DOUBLE_LOW (x));
10279 fprintf (file, HOST_WIDE_INT_PRINT_DEC, CONST_DOUBLE_LOW (x));
10282 /* We can't handle floating point constants;
10283 PRINT_OPERAND must handle them. */
10284 output_operand_lossage ("floating constant misused");
10288 /* Some assemblers need integer constants to appear first. */
10289 if (CONST_INT_P (XEXP (x, 0)))
10291 output_pic_addr_const (file, XEXP (x, 0), code);
10293 output_pic_addr_const (file, XEXP (x, 1), code);
10297 gcc_assert (CONST_INT_P (XEXP (x, 1)));
10298 output_pic_addr_const (file, XEXP (x, 1), code);
10300 output_pic_addr_const (file, XEXP (x, 0), code);
10306 putc (ASSEMBLER_DIALECT == ASM_INTEL ? '(' : '[', file);
10307 output_pic_addr_const (file, XEXP (x, 0), code);
10309 output_pic_addr_const (file, XEXP (x, 1), code);
10311 putc (ASSEMBLER_DIALECT == ASM_INTEL ? ')' : ']', file);
10315 gcc_assert (XVECLEN (x, 0) == 1);
10316 output_pic_addr_const (file, XVECEXP (x, 0, 0), code);
10317 switch (XINT (x, 1))
10320 fputs ("@GOT", file);
10322 case UNSPEC_GOTOFF:
10323 fputs ("@GOTOFF", file);
10325 case UNSPEC_PLTOFF:
10326 fputs ("@PLTOFF", file);
10328 case UNSPEC_GOTPCREL:
10329 fputs (ASSEMBLER_DIALECT == ASM_ATT ?
10330 "@GOTPCREL(%rip)" : "@GOTPCREL[rip]", file);
10332 case UNSPEC_GOTTPOFF:
10333 /* FIXME: This might be @TPOFF in Sun ld too. */
10334 fputs ("@GOTTPOFF", file);
10337 fputs ("@TPOFF", file);
10339 case UNSPEC_NTPOFF:
10341 fputs ("@TPOFF", file);
10343 fputs ("@NTPOFF", file);
10345 case UNSPEC_DTPOFF:
10346 fputs ("@DTPOFF", file);
10348 case UNSPEC_GOTNTPOFF:
10350 fputs (ASSEMBLER_DIALECT == ASM_ATT ?
10351 "@GOTTPOFF(%rip)": "@GOTTPOFF[rip]", file);
10353 fputs ("@GOTNTPOFF", file);
10355 case UNSPEC_INDNTPOFF:
10356 fputs ("@INDNTPOFF", file);
10359 case UNSPEC_MACHOPIC_OFFSET:
10361 machopic_output_function_base_name (file);
10365 output_operand_lossage ("invalid UNSPEC as operand");
10371 output_operand_lossage ("invalid expression as operand");
10375 /* This is called from dwarf2out.c via TARGET_ASM_OUTPUT_DWARF_DTPREL.
10376 We need to emit DTP-relative relocations. */
10378 static void ATTRIBUTE_UNUSED
10379 i386_output_dwarf_dtprel (FILE *file, int size, rtx x)
10381 fputs (ASM_LONG, file);
10382 output_addr_const (file, x);
10383 fputs ("@DTPOFF", file);
10389 fputs (", 0", file);
10392 gcc_unreachable ();
10396 /* Return true if X is a representation of the PIC register. This copes
10397 with calls from ix86_find_base_term, where the register might have
10398 been replaced by a cselib value. */
10401 ix86_pic_register_p (rtx x)
10403 if (GET_CODE (x) == VALUE)
10404 return (pic_offset_table_rtx
10405 && rtx_equal_for_cselib_p (x, pic_offset_table_rtx));
10407 return REG_P (x) && REGNO (x) == PIC_OFFSET_TABLE_REGNUM;
10410 /* In the name of slightly smaller debug output, and to cater to
10411 general assembler lossage, recognize PIC+GOTOFF and turn it back
10412 into a direct symbol reference.
10414 On Darwin, this is necessary to avoid a crash, because Darwin
10415 has a different PIC label for each routine but the DWARF debugging
10416 information is not associated with any particular routine, so it's
10417 necessary to remove references to the PIC label from RTL stored by
10418 the DWARF output code. */
10421 ix86_delegitimize_address (rtx orig_x)
10424 /* reg_addend is NULL or a multiple of some register. */
10425 rtx reg_addend = NULL_RTX;
10426 /* const_addend is NULL or a const_int. */
10427 rtx const_addend = NULL_RTX;
10428 /* This is the result, or NULL. */
10429 rtx result = NULL_RTX;
10436 if (GET_CODE (x) != CONST
10437 || GET_CODE (XEXP (x, 0)) != UNSPEC
10438 || XINT (XEXP (x, 0), 1) != UNSPEC_GOTPCREL
10439 || !MEM_P (orig_x))
10441 return XVECEXP (XEXP (x, 0), 0, 0);
10444 if (GET_CODE (x) != PLUS
10445 || GET_CODE (XEXP (x, 1)) != CONST)
10448 if (ix86_pic_register_p (XEXP (x, 0)))
10449 /* %ebx + GOT/GOTOFF */
10451 else if (GET_CODE (XEXP (x, 0)) == PLUS)
10453 /* %ebx + %reg * scale + GOT/GOTOFF */
10454 reg_addend = XEXP (x, 0);
10455 if (ix86_pic_register_p (XEXP (reg_addend, 0)))
10456 reg_addend = XEXP (reg_addend, 1);
10457 else if (ix86_pic_register_p (XEXP (reg_addend, 1)))
10458 reg_addend = XEXP (reg_addend, 0);
10461 if (!REG_P (reg_addend)
10462 && GET_CODE (reg_addend) != MULT
10463 && GET_CODE (reg_addend) != ASHIFT)
10469 x = XEXP (XEXP (x, 1), 0);
10470 if (GET_CODE (x) == PLUS
10471 && CONST_INT_P (XEXP (x, 1)))
10473 const_addend = XEXP (x, 1);
10477 if (GET_CODE (x) == UNSPEC
10478 && ((XINT (x, 1) == UNSPEC_GOT && MEM_P (orig_x))
10479 || (XINT (x, 1) == UNSPEC_GOTOFF && !MEM_P (orig_x))))
10480 result = XVECEXP (x, 0, 0);
10482 if (TARGET_MACHO && darwin_local_data_pic (x)
10483 && !MEM_P (orig_x))
10484 result = XVECEXP (x, 0, 0);
10490 result = gen_rtx_CONST (Pmode, gen_rtx_PLUS (Pmode, result, const_addend));
10492 result = gen_rtx_PLUS (Pmode, reg_addend, result);
10496 /* If X is a machine specific address (i.e. a symbol or label being
10497 referenced as a displacement from the GOT implemented using an
10498 UNSPEC), then return the base term. Otherwise return X. */
10501 ix86_find_base_term (rtx x)
10507 if (GET_CODE (x) != CONST)
10509 term = XEXP (x, 0);
10510 if (GET_CODE (term) == PLUS
10511 && (CONST_INT_P (XEXP (term, 1))
10512 || GET_CODE (XEXP (term, 1)) == CONST_DOUBLE))
10513 term = XEXP (term, 0);
10514 if (GET_CODE (term) != UNSPEC
10515 || XINT (term, 1) != UNSPEC_GOTPCREL)
10518 return XVECEXP (term, 0, 0);
10521 return ix86_delegitimize_address (x);
10525 put_condition_code (enum rtx_code code, enum machine_mode mode, int reverse,
10526 int fp, FILE *file)
10528 const char *suffix;
10530 if (mode == CCFPmode || mode == CCFPUmode)
10532 enum rtx_code second_code, bypass_code;
10533 ix86_fp_comparison_codes (code, &bypass_code, &code, &second_code);
10534 gcc_assert (bypass_code == UNKNOWN && second_code == UNKNOWN);
10535 code = ix86_fp_compare_code_to_integer (code);
10539 code = reverse_condition (code);
10590 gcc_assert (mode == CCmode || mode == CCNOmode || mode == CCGCmode);
10594 /* ??? Use "nbe" instead of "a" for fcmov lossage on some assemblers.
10595 Those same assemblers have the same but opposite lossage on cmov. */
10596 if (mode == CCmode)
10597 suffix = fp ? "nbe" : "a";
10598 else if (mode == CCCmode)
10601 gcc_unreachable ();
10617 gcc_unreachable ();
10621 gcc_assert (mode == CCmode || mode == CCCmode);
10638 gcc_unreachable ();
10642 /* ??? As above. */
10643 gcc_assert (mode == CCmode || mode == CCCmode);
10644 suffix = fp ? "nb" : "ae";
10647 gcc_assert (mode == CCmode || mode == CCGCmode || mode == CCNOmode);
10651 /* ??? As above. */
10652 if (mode == CCmode)
10654 else if (mode == CCCmode)
10655 suffix = fp ? "nb" : "ae";
10657 gcc_unreachable ();
10660 suffix = fp ? "u" : "p";
10663 suffix = fp ? "nu" : "np";
10666 gcc_unreachable ();
10668 fputs (suffix, file);
10671 /* Print the name of register X to FILE based on its machine mode and number.
10672 If CODE is 'w', pretend the mode is HImode.
10673 If CODE is 'b', pretend the mode is QImode.
10674 If CODE is 'k', pretend the mode is SImode.
10675 If CODE is 'q', pretend the mode is DImode.
10676 If CODE is 'x', pretend the mode is V4SFmode.
10677 If CODE is 't', pretend the mode is V8SFmode.
10678 If CODE is 'h', pretend the reg is the 'high' byte register.
10679 If CODE is 'y', print "st(0)" instead of "st", if the reg is stack op.
10680 If CODE is 'd', duplicate the operand for AVX instruction.
10684 print_reg (rtx x, int code, FILE *file)
10687 bool duplicated = code == 'd' && TARGET_AVX;
10689 gcc_assert (x == pc_rtx
10690 || (REGNO (x) != ARG_POINTER_REGNUM
10691 && REGNO (x) != FRAME_POINTER_REGNUM
10692 && REGNO (x) != FLAGS_REG
10693 && REGNO (x) != FPSR_REG
10694 && REGNO (x) != FPCR_REG));
10696 if (ASSEMBLER_DIALECT == ASM_ATT)
10701 gcc_assert (TARGET_64BIT);
10702 fputs ("rip", file);
10706 if (code == 'w' || MMX_REG_P (x))
10708 else if (code == 'b')
10710 else if (code == 'k')
10712 else if (code == 'q')
10714 else if (code == 'y')
10716 else if (code == 'h')
10718 else if (code == 'x')
10720 else if (code == 't')
10723 code = GET_MODE_SIZE (GET_MODE (x));
10725 /* Irritatingly, AMD extended registers use different naming convention
10726 from the normal registers. */
10727 if (REX_INT_REG_P (x))
10729 gcc_assert (TARGET_64BIT);
10733 error ("extended registers have no high halves");
10736 fprintf (file, "r%ib", REGNO (x) - FIRST_REX_INT_REG + 8);
10739 fprintf (file, "r%iw", REGNO (x) - FIRST_REX_INT_REG + 8);
10742 fprintf (file, "r%id", REGNO (x) - FIRST_REX_INT_REG + 8);
10745 fprintf (file, "r%i", REGNO (x) - FIRST_REX_INT_REG + 8);
10748 error ("unsupported operand size for extended register");
10758 if (STACK_TOP_P (x))
10767 if (! ANY_FP_REG_P (x))
10768 putc (code == 8 && TARGET_64BIT ? 'r' : 'e', file);
10773 reg = hi_reg_name[REGNO (x)];
10776 if (REGNO (x) >= ARRAY_SIZE (qi_reg_name))
10778 reg = qi_reg_name[REGNO (x)];
10781 if (REGNO (x) >= ARRAY_SIZE (qi_high_reg_name))
10783 reg = qi_high_reg_name[REGNO (x)];
10788 gcc_assert (!duplicated);
10790 fputs (hi_reg_name[REGNO (x)] + 1, file);
10795 gcc_unreachable ();
10801 if (ASSEMBLER_DIALECT == ASM_ATT)
10802 fprintf (file, ", %%%s", reg);
10804 fprintf (file, ", %s", reg);
10808 /* Locate some local-dynamic symbol still in use by this function
10809 so that we can print its name in some tls_local_dynamic_base
10813 get_some_local_dynamic_name_1 (rtx *px, void *data ATTRIBUTE_UNUSED)
10817 if (GET_CODE (x) == SYMBOL_REF
10818 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_DYNAMIC)
10820 cfun->machine->some_ld_name = XSTR (x, 0);
10827 static const char *
10828 get_some_local_dynamic_name (void)
10832 if (cfun->machine->some_ld_name)
10833 return cfun->machine->some_ld_name;
10835 for (insn = get_insns (); insn ; insn = NEXT_INSN (insn))
10837 && for_each_rtx (&PATTERN (insn), get_some_local_dynamic_name_1, 0))
10838 return cfun->machine->some_ld_name;
10840 gcc_unreachable ();
10843 /* Meaning of CODE:
10844 L,W,B,Q,S,T -- print the opcode suffix for specified size of operand.
10845 C -- print opcode suffix for set/cmov insn.
10846 c -- like C, but print reversed condition
10847 E,e -- likewise, but for compare-and-branch fused insn.
10848 F,f -- likewise, but for floating-point.
10849 O -- if HAVE_AS_IX86_CMOV_SUN_SYNTAX, expand to "w.", "l." or "q.",
10851 R -- print the prefix for register names.
10852 z -- print the opcode suffix for the size of the current operand.
10853 * -- print a star (in certain assembler syntax)
10854 A -- print an absolute memory reference.
10855 w -- print the operand as if it's a "word" (HImode) even if it isn't.
10856 s -- print a shift double count, followed by the assemblers argument
10858 b -- print the QImode name of the register for the indicated operand.
10859 %b0 would print %al if operands[0] is reg 0.
10860 w -- likewise, print the HImode name of the register.
10861 k -- likewise, print the SImode name of the register.
10862 q -- likewise, print the DImode name of the register.
10863 x -- likewise, print the V4SFmode name of the register.
10864 t -- likewise, print the V8SFmode name of the register.
10865 h -- print the QImode name for a "high" register, either ah, bh, ch or dh.
10866 y -- print "st(0)" instead of "st" as a register.
10867 d -- print duplicated register operand for AVX instruction.
10868 D -- print condition for SSE cmp instruction.
10869 P -- if PIC, print an @PLT suffix.
10870 X -- don't print any sort of PIC '@' suffix for a symbol.
10871 & -- print some in-use local-dynamic symbol name.
10872 H -- print a memory address offset by 8; used for sse high-parts
10873 Y -- print condition for SSE5 com* instruction.
10874 + -- print a branch hint as 'cs' or 'ds' prefix
10875 ; -- print a semicolon (after prefixes due to bug in older gas).
10879 print_operand (FILE *file, rtx x, int code)
10886 if (ASSEMBLER_DIALECT == ASM_ATT)
10891 assemble_name (file, get_some_local_dynamic_name ());
10895 switch (ASSEMBLER_DIALECT)
10902 /* Intel syntax. For absolute addresses, registers should not
10903 be surrounded by braces. */
10907 PRINT_OPERAND (file, x, 0);
10914 gcc_unreachable ();
10917 PRINT_OPERAND (file, x, 0);
10922 if (ASSEMBLER_DIALECT == ASM_ATT)
10927 if (ASSEMBLER_DIALECT == ASM_ATT)
10932 if (ASSEMBLER_DIALECT == ASM_ATT)
10937 if (ASSEMBLER_DIALECT == ASM_ATT)
10942 if (ASSEMBLER_DIALECT == ASM_ATT)
10947 if (ASSEMBLER_DIALECT == ASM_ATT)
10952 /* 387 opcodes don't get size suffixes if the operands are
10954 if (STACK_REG_P (x))
10957 /* Likewise if using Intel opcodes. */
10958 if (ASSEMBLER_DIALECT == ASM_INTEL)
10961 /* This is the size of op from size of operand. */
10962 switch (GET_MODE_SIZE (GET_MODE (x)))
10971 #ifdef HAVE_GAS_FILDS_FISTS
10981 if (GET_MODE (x) == SFmode)
10996 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)
11000 #ifdef GAS_MNEMONICS
11015 gcc_unreachable ();
11032 if (CONST_INT_P (x) || ! SHIFT_DOUBLE_OMITS_COUNT)
11034 PRINT_OPERAND (file, x, 0);
11035 fputs (", ", file);
11040 /* Little bit of braindamage here. The SSE compare instructions
11041 does use completely different names for the comparisons that the
11042 fp conditional moves. */
11045 switch (GET_CODE (x))
11048 fputs ("eq", file);
11051 fputs ("eq_us", file);
11054 fputs ("lt", file);
11057 fputs ("nge", file);
11060 fputs ("le", file);
11063 fputs ("ngt", file);
11066 fputs ("unord", file);
11069 fputs ("neq", file);
11072 fputs ("neq_oq", file);
11075 fputs ("ge", file);
11078 fputs ("nlt", file);
11081 fputs ("gt", file);
11084 fputs ("nle", file);
11087 fputs ("ord", file);
11090 output_operand_lossage ("operand is not a condition code, invalid operand code 'D'");
11096 switch (GET_CODE (x))
11100 fputs ("eq", file);
11104 fputs ("lt", file);
11108 fputs ("le", file);
11111 fputs ("unord", file);
11115 fputs ("neq", file);
11119 fputs ("nlt", file);
11123 fputs ("nle", file);
11126 fputs ("ord", file);
11129 output_operand_lossage ("operand is not a condition code, invalid operand code 'D'");
11135 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
11136 if (ASSEMBLER_DIALECT == ASM_ATT)
11138 switch (GET_MODE (x))
11140 case HImode: putc ('w', file); break;
11142 case SFmode: putc ('l', file); break;
11144 case DFmode: putc ('q', file); break;
11145 default: gcc_unreachable ();
11152 if (!COMPARISON_P (x))
11154 output_operand_lossage ("operand is neither a constant nor a "
11155 "condition code, invalid operand code "
11159 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 0, 0, file);
11162 if (!COMPARISON_P (x))
11164 output_operand_lossage ("operand is neither a constant nor a "
11165 "condition code, invalid operand code "
11169 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
11170 if (ASSEMBLER_DIALECT == ASM_ATT)
11173 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 0, 1, file);
11176 /* Like above, but reverse condition */
11178 /* Check to see if argument to %c is really a constant
11179 and not a condition code which needs to be reversed. */
11180 if (!COMPARISON_P (x))
11182 output_operand_lossage ("operand is neither a constant nor a "
11183 "condition code, invalid operand "
11187 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 1, 0, file);
11190 if (!COMPARISON_P (x))
11192 output_operand_lossage ("operand is neither a constant nor a "
11193 "condition code, invalid operand "
11197 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
11198 if (ASSEMBLER_DIALECT == ASM_ATT)
11201 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 1, 1, file);
11205 put_condition_code (GET_CODE (x), CCmode, 0, 0, file);
11209 put_condition_code (GET_CODE (x), CCmode, 1, 0, file);
11213 /* It doesn't actually matter what mode we use here, as we're
11214 only going to use this for printing. */
11215 x = adjust_address_nv (x, DImode, 8);
11223 || optimize_function_for_size_p (cfun) || !TARGET_BRANCH_PREDICTION_HINTS)
11226 x = find_reg_note (current_output_insn, REG_BR_PROB, 0);
11229 int pred_val = INTVAL (XEXP (x, 0));
11231 if (pred_val < REG_BR_PROB_BASE * 45 / 100
11232 || pred_val > REG_BR_PROB_BASE * 55 / 100)
11234 int taken = pred_val > REG_BR_PROB_BASE / 2;
11235 int cputaken = final_forward_branch_p (current_output_insn) == 0;
11237 /* Emit hints only in the case default branch prediction
11238 heuristics would fail. */
11239 if (taken != cputaken)
11241 /* We use 3e (DS) prefix for taken branches and
11242 2e (CS) prefix for not taken branches. */
11244 fputs ("ds ; ", file);
11246 fputs ("cs ; ", file);
11254 switch (GET_CODE (x))
11257 fputs ("neq", file);
11260 fputs ("eq", file);
11264 fputs (INTEGRAL_MODE_P (GET_MODE (x)) ? "ge" : "unlt", file);
11268 fputs (INTEGRAL_MODE_P (GET_MODE (x)) ? "gt" : "unle", file);
11272 fputs ("le", file);
11276 fputs ("lt", file);
11279 fputs ("unord", file);
11282 fputs ("ord", file);
11285 fputs ("ueq", file);
11288 fputs ("nlt", file);
11291 fputs ("nle", file);
11294 fputs ("ule", file);
11297 fputs ("ult", file);
11300 fputs ("une", file);
11303 output_operand_lossage ("operand is not a condition code, invalid operand code 'D'");
11310 fputs (" ; ", file);
11317 output_operand_lossage ("invalid operand code '%c'", code);
11322 print_reg (x, code, file);
11324 else if (MEM_P (x))
11326 /* No `byte ptr' prefix for call instructions or BLKmode operands. */
11327 if (ASSEMBLER_DIALECT == ASM_INTEL && code != 'X' && code != 'P'
11328 && GET_MODE (x) != BLKmode)
11331 switch (GET_MODE_SIZE (GET_MODE (x)))
11333 case 1: size = "BYTE"; break;
11334 case 2: size = "WORD"; break;
11335 case 4: size = "DWORD"; break;
11336 case 8: size = "QWORD"; break;
11337 case 12: size = "XWORD"; break;
11339 if (GET_MODE (x) == XFmode)
11345 gcc_unreachable ();
11348 /* Check for explicit size override (codes 'b', 'w' and 'k') */
11351 else if (code == 'w')
11353 else if (code == 'k')
11356 fputs (size, file);
11357 fputs (" PTR ", file);
11361 /* Avoid (%rip) for call operands. */
11362 if (CONSTANT_ADDRESS_P (x) && code == 'P'
11363 && !CONST_INT_P (x))
11364 output_addr_const (file, x);
11365 else if (this_is_asm_operands && ! address_operand (x, VOIDmode))
11366 output_operand_lossage ("invalid constraints for operand");
11368 output_address (x);
11371 else if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) == SFmode)
11376 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
11377 REAL_VALUE_TO_TARGET_SINGLE (r, l);
11379 if (ASSEMBLER_DIALECT == ASM_ATT)
11381 fprintf (file, "0x%08lx", (long unsigned int) l);
11384 /* These float cases don't actually occur as immediate operands. */
11385 else if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) == DFmode)
11389 real_to_decimal (dstr, CONST_DOUBLE_REAL_VALUE (x), sizeof (dstr), 0, 1);
11390 fprintf (file, "%s", dstr);
11393 else if (GET_CODE (x) == CONST_DOUBLE
11394 && GET_MODE (x) == XFmode)
11398 real_to_decimal (dstr, CONST_DOUBLE_REAL_VALUE (x), sizeof (dstr), 0, 1);
11399 fprintf (file, "%s", dstr);
11404 /* We have patterns that allow zero sets of memory, for instance.
11405 In 64-bit mode, we should probably support all 8-byte vectors,
11406 since we can in fact encode that into an immediate. */
11407 if (GET_CODE (x) == CONST_VECTOR)
11409 gcc_assert (x == CONST0_RTX (GET_MODE (x)));
11415 if (CONST_INT_P (x) || GET_CODE (x) == CONST_DOUBLE)
11417 if (ASSEMBLER_DIALECT == ASM_ATT)
11420 else if (GET_CODE (x) == CONST || GET_CODE (x) == SYMBOL_REF
11421 || GET_CODE (x) == LABEL_REF)
11423 if (ASSEMBLER_DIALECT == ASM_ATT)
11426 fputs ("OFFSET FLAT:", file);
11429 if (CONST_INT_P (x))
11430 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
11432 output_pic_addr_const (file, x, code);
11434 output_addr_const (file, x);
11438 /* Print a memory operand whose address is ADDR. */
11441 print_operand_address (FILE *file, rtx addr)
11443 struct ix86_address parts;
11444 rtx base, index, disp;
11446 int ok = ix86_decompose_address (addr, &parts);
11451 index = parts.index;
11453 scale = parts.scale;
11461 if (ASSEMBLER_DIALECT == ASM_ATT)
11463 fputs ((parts.seg == SEG_FS ? "fs:" : "gs:"), file);
11466 gcc_unreachable ();
11469 /* Use one byte shorter RIP relative addressing for 64bit mode. */
11470 if (TARGET_64BIT && !base && !index)
11474 if (GET_CODE (disp) == CONST
11475 && GET_CODE (XEXP (disp, 0)) == PLUS
11476 && CONST_INT_P (XEXP (XEXP (disp, 0), 1)))
11477 symbol = XEXP (XEXP (disp, 0), 0);
11479 if (GET_CODE (symbol) == LABEL_REF
11480 || (GET_CODE (symbol) == SYMBOL_REF
11481 && SYMBOL_REF_TLS_MODEL (symbol) == 0))
11484 if (!base && !index)
11486 /* Displacement only requires special attention. */
11488 if (CONST_INT_P (disp))
11490 if (ASSEMBLER_DIALECT == ASM_INTEL && parts.seg == SEG_DEFAULT)
11491 fputs ("ds:", file);
11492 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (disp));
11495 output_pic_addr_const (file, disp, 0);
11497 output_addr_const (file, disp);
11501 if (ASSEMBLER_DIALECT == ASM_ATT)
11506 output_pic_addr_const (file, disp, 0);
11507 else if (GET_CODE (disp) == LABEL_REF)
11508 output_asm_label (disp);
11510 output_addr_const (file, disp);
11515 print_reg (base, 0, file);
11519 print_reg (index, 0, file);
11521 fprintf (file, ",%d", scale);
11527 rtx offset = NULL_RTX;
11531 /* Pull out the offset of a symbol; print any symbol itself. */
11532 if (GET_CODE (disp) == CONST
11533 && GET_CODE (XEXP (disp, 0)) == PLUS
11534 && CONST_INT_P (XEXP (XEXP (disp, 0), 1)))
11536 offset = XEXP (XEXP (disp, 0), 1);
11537 disp = gen_rtx_CONST (VOIDmode,
11538 XEXP (XEXP (disp, 0), 0));
11542 output_pic_addr_const (file, disp, 0);
11543 else if (GET_CODE (disp) == LABEL_REF)
11544 output_asm_label (disp);
11545 else if (CONST_INT_P (disp))
11548 output_addr_const (file, disp);
11554 print_reg (base, 0, file);
11557 if (INTVAL (offset) >= 0)
11559 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (offset));
11563 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (offset));
11570 print_reg (index, 0, file);
11572 fprintf (file, "*%d", scale);
11580 output_addr_const_extra (FILE *file, rtx x)
11584 if (GET_CODE (x) != UNSPEC)
11587 op = XVECEXP (x, 0, 0);
11588 switch (XINT (x, 1))
11590 case UNSPEC_GOTTPOFF:
11591 output_addr_const (file, op);
11592 /* FIXME: This might be @TPOFF in Sun ld. */
11593 fputs ("@GOTTPOFF", file);
11596 output_addr_const (file, op);
11597 fputs ("@TPOFF", file);
11599 case UNSPEC_NTPOFF:
11600 output_addr_const (file, op);
11602 fputs ("@TPOFF", file);
11604 fputs ("@NTPOFF", file);
11606 case UNSPEC_DTPOFF:
11607 output_addr_const (file, op);
11608 fputs ("@DTPOFF", file);
11610 case UNSPEC_GOTNTPOFF:
11611 output_addr_const (file, op);
11613 fputs (ASSEMBLER_DIALECT == ASM_ATT ?
11614 "@GOTTPOFF(%rip)" : "@GOTTPOFF[rip]", file);
11616 fputs ("@GOTNTPOFF", file);
11618 case UNSPEC_INDNTPOFF:
11619 output_addr_const (file, op);
11620 fputs ("@INDNTPOFF", file);
11623 case UNSPEC_MACHOPIC_OFFSET:
11624 output_addr_const (file, op);
11626 machopic_output_function_base_name (file);
11637 /* Split one or more DImode RTL references into pairs of SImode
11638 references. The RTL can be REG, offsettable MEM, integer constant, or
11639 CONST_DOUBLE. "operands" is a pointer to an array of DImode RTL to
11640 split and "num" is its length. lo_half and hi_half are output arrays
11641 that parallel "operands". */
11644 split_di (rtx operands[], int num, rtx lo_half[], rtx hi_half[])
11648 rtx op = operands[num];
11650 /* simplify_subreg refuse to split volatile memory addresses,
11651 but we still have to handle it. */
11654 lo_half[num] = adjust_address (op, SImode, 0);
11655 hi_half[num] = adjust_address (op, SImode, 4);
11659 lo_half[num] = simplify_gen_subreg (SImode, op,
11660 GET_MODE (op) == VOIDmode
11661 ? DImode : GET_MODE (op), 0);
11662 hi_half[num] = simplify_gen_subreg (SImode, op,
11663 GET_MODE (op) == VOIDmode
11664 ? DImode : GET_MODE (op), 4);
11668 /* Split one or more TImode RTL references into pairs of DImode
11669 references. The RTL can be REG, offsettable MEM, integer constant, or
11670 CONST_DOUBLE. "operands" is a pointer to an array of DImode RTL to
11671 split and "num" is its length. lo_half and hi_half are output arrays
11672 that parallel "operands". */
11675 split_ti (rtx operands[], int num, rtx lo_half[], rtx hi_half[])
11679 rtx op = operands[num];
11681 /* simplify_subreg refuse to split volatile memory addresses, but we
11682 still have to handle it. */
11685 lo_half[num] = adjust_address (op, DImode, 0);
11686 hi_half[num] = adjust_address (op, DImode, 8);
11690 lo_half[num] = simplify_gen_subreg (DImode, op, TImode, 0);
11691 hi_half[num] = simplify_gen_subreg (DImode, op, TImode, 8);
11696 /* Output code to perform a 387 binary operation in INSN, one of PLUS,
11697 MINUS, MULT or DIV. OPERANDS are the insn operands, where operands[3]
11698 is the expression of the binary operation. The output may either be
11699 emitted here, or returned to the caller, like all output_* functions.
11701 There is no guarantee that the operands are the same mode, as they
11702 might be within FLOAT or FLOAT_EXTEND expressions. */
11704 #ifndef SYSV386_COMPAT
11705 /* Set to 1 for compatibility with brain-damaged assemblers. No-one
11706 wants to fix the assemblers because that causes incompatibility
11707 with gcc. No-one wants to fix gcc because that causes
11708 incompatibility with assemblers... You can use the option of
11709 -DSYSV386_COMPAT=0 if you recompile both gcc and gas this way. */
11710 #define SYSV386_COMPAT 1
11714 output_387_binary_op (rtx insn, rtx *operands)
11716 static char buf[40];
11719 int is_sse = SSE_REG_P (operands[0]) || SSE_REG_P (operands[1]) || SSE_REG_P (operands[2]);
11721 #ifdef ENABLE_CHECKING
11722 /* Even if we do not want to check the inputs, this documents input
11723 constraints. Which helps in understanding the following code. */
11724 if (STACK_REG_P (operands[0])
11725 && ((REG_P (operands[1])
11726 && REGNO (operands[0]) == REGNO (operands[1])
11727 && (STACK_REG_P (operands[2]) || MEM_P (operands[2])))
11728 || (REG_P (operands[2])
11729 && REGNO (operands[0]) == REGNO (operands[2])
11730 && (STACK_REG_P (operands[1]) || MEM_P (operands[1]))))
11731 && (STACK_TOP_P (operands[1]) || STACK_TOP_P (operands[2])))
11734 gcc_assert (is_sse);
11737 switch (GET_CODE (operands[3]))
11740 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
11741 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
11749 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
11750 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
11758 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
11759 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
11767 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
11768 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
11776 gcc_unreachable ();
11783 strcpy (buf, ssep);
11784 if (GET_MODE (operands[0]) == SFmode)
11785 strcat (buf, "ss\t{%2, %1, %0|%0, %1, %2}");
11787 strcat (buf, "sd\t{%2, %1, %0|%0, %1, %2}");
11791 strcpy (buf, ssep + 1);
11792 if (GET_MODE (operands[0]) == SFmode)
11793 strcat (buf, "ss\t{%2, %0|%0, %2}");
11795 strcat (buf, "sd\t{%2, %0|%0, %2}");
11801 switch (GET_CODE (operands[3]))
11805 if (REG_P (operands[2]) && REGNO (operands[0]) == REGNO (operands[2]))
11807 rtx temp = operands[2];
11808 operands[2] = operands[1];
11809 operands[1] = temp;
11812 /* know operands[0] == operands[1]. */
11814 if (MEM_P (operands[2]))
11820 if (find_regno_note (insn, REG_DEAD, REGNO (operands[2])))
11822 if (STACK_TOP_P (operands[0]))
11823 /* How is it that we are storing to a dead operand[2]?
11824 Well, presumably operands[1] is dead too. We can't
11825 store the result to st(0) as st(0) gets popped on this
11826 instruction. Instead store to operands[2] (which I
11827 think has to be st(1)). st(1) will be popped later.
11828 gcc <= 2.8.1 didn't have this check and generated
11829 assembly code that the Unixware assembler rejected. */
11830 p = "p\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
11832 p = "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
11836 if (STACK_TOP_P (operands[0]))
11837 p = "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
11839 p = "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
11844 if (MEM_P (operands[1]))
11850 if (MEM_P (operands[2]))
11856 if (find_regno_note (insn, REG_DEAD, REGNO (operands[2])))
11859 /* The SystemV/386 SVR3.2 assembler, and probably all AT&T
11860 derived assemblers, confusingly reverse the direction of
11861 the operation for fsub{r} and fdiv{r} when the
11862 destination register is not st(0). The Intel assembler
11863 doesn't have this brain damage. Read !SYSV386_COMPAT to
11864 figure out what the hardware really does. */
11865 if (STACK_TOP_P (operands[0]))
11866 p = "{p\t%0, %2|rp\t%2, %0}";
11868 p = "{rp\t%2, %0|p\t%0, %2}";
11870 if (STACK_TOP_P (operands[0]))
11871 /* As above for fmul/fadd, we can't store to st(0). */
11872 p = "rp\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
11874 p = "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
11879 if (find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
11882 if (STACK_TOP_P (operands[0]))
11883 p = "{rp\t%0, %1|p\t%1, %0}";
11885 p = "{p\t%1, %0|rp\t%0, %1}";
11887 if (STACK_TOP_P (operands[0]))
11888 p = "p\t{%0, %1|%1, %0}"; /* st(1) = st(1) op st(0); pop */
11890 p = "rp\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2); pop */
11895 if (STACK_TOP_P (operands[0]))
11897 if (STACK_TOP_P (operands[1]))
11898 p = "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
11900 p = "r\t{%y1, %0|%0, %y1}"; /* st(0) = st(r1) op st(0) */
11903 else if (STACK_TOP_P (operands[1]))
11906 p = "{\t%1, %0|r\t%0, %1}";
11908 p = "r\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2) */
11914 p = "{r\t%2, %0|\t%0, %2}";
11916 p = "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
11922 gcc_unreachable ();
11929 /* Return needed mode for entity in optimize_mode_switching pass. */
11932 ix86_mode_needed (int entity, rtx insn)
11934 enum attr_i387_cw mode;
11936 /* The mode UNINITIALIZED is used to store control word after a
11937 function call or ASM pattern. The mode ANY specify that function
11938 has no requirements on the control word and make no changes in the
11939 bits we are interested in. */
11942 || (NONJUMP_INSN_P (insn)
11943 && (asm_noperands (PATTERN (insn)) >= 0
11944 || GET_CODE (PATTERN (insn)) == ASM_INPUT)))
11945 return I387_CW_UNINITIALIZED;
11947 if (recog_memoized (insn) < 0)
11948 return I387_CW_ANY;
11950 mode = get_attr_i387_cw (insn);
11955 if (mode == I387_CW_TRUNC)
11960 if (mode == I387_CW_FLOOR)
11965 if (mode == I387_CW_CEIL)
11970 if (mode == I387_CW_MASK_PM)
11975 gcc_unreachable ();
11978 return I387_CW_ANY;
11981 /* Output code to initialize control word copies used by trunc?f?i and
11982 rounding patterns. CURRENT_MODE is set to current control word,
11983 while NEW_MODE is set to new control word. */
11986 emit_i387_cw_initialization (int mode)
11988 rtx stored_mode = assign_386_stack_local (HImode, SLOT_CW_STORED);
11991 enum ix86_stack_slot slot;
11993 rtx reg = gen_reg_rtx (HImode);
11995 emit_insn (gen_x86_fnstcw_1 (stored_mode));
11996 emit_move_insn (reg, copy_rtx (stored_mode));
11998 if (TARGET_64BIT || TARGET_PARTIAL_REG_STALL
11999 || optimize_function_for_size_p (cfun))
12003 case I387_CW_TRUNC:
12004 /* round toward zero (truncate) */
12005 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0c00)));
12006 slot = SLOT_CW_TRUNC;
12009 case I387_CW_FLOOR:
12010 /* round down toward -oo */
12011 emit_insn (gen_andhi3 (reg, reg, GEN_INT (~0x0c00)));
12012 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0400)));
12013 slot = SLOT_CW_FLOOR;
12017 /* round up toward +oo */
12018 emit_insn (gen_andhi3 (reg, reg, GEN_INT (~0x0c00)));
12019 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0800)));
12020 slot = SLOT_CW_CEIL;
12023 case I387_CW_MASK_PM:
12024 /* mask precision exception for nearbyint() */
12025 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0020)));
12026 slot = SLOT_CW_MASK_PM;
12030 gcc_unreachable ();
12037 case I387_CW_TRUNC:
12038 /* round toward zero (truncate) */
12039 emit_insn (gen_movsi_insv_1 (reg, GEN_INT (0xc)));
12040 slot = SLOT_CW_TRUNC;
12043 case I387_CW_FLOOR:
12044 /* round down toward -oo */
12045 emit_insn (gen_movsi_insv_1 (reg, GEN_INT (0x4)));
12046 slot = SLOT_CW_FLOOR;
12050 /* round up toward +oo */
12051 emit_insn (gen_movsi_insv_1 (reg, GEN_INT (0x8)));
12052 slot = SLOT_CW_CEIL;
12055 case I387_CW_MASK_PM:
12056 /* mask precision exception for nearbyint() */
12057 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0020)));
12058 slot = SLOT_CW_MASK_PM;
12062 gcc_unreachable ();
12066 gcc_assert (slot < MAX_386_STACK_LOCALS);
12068 new_mode = assign_386_stack_local (HImode, slot);
12069 emit_move_insn (new_mode, reg);
12072 /* Output code for INSN to convert a float to a signed int. OPERANDS
12073 are the insn operands. The output may be [HSD]Imode and the input
12074 operand may be [SDX]Fmode. */
12077 output_fix_trunc (rtx insn, rtx *operands, int fisttp)
12079 int stack_top_dies = find_regno_note (insn, REG_DEAD, FIRST_STACK_REG) != 0;
12080 int dimode_p = GET_MODE (operands[0]) == DImode;
12081 int round_mode = get_attr_i387_cw (insn);
12083 /* Jump through a hoop or two for DImode, since the hardware has no
12084 non-popping instruction. We used to do this a different way, but
12085 that was somewhat fragile and broke with post-reload splitters. */
12086 if ((dimode_p || fisttp) && !stack_top_dies)
12087 output_asm_insn ("fld\t%y1", operands);
12089 gcc_assert (STACK_TOP_P (operands[1]));
12090 gcc_assert (MEM_P (operands[0]));
12091 gcc_assert (GET_MODE (operands[1]) != TFmode);
12094 output_asm_insn ("fisttp%z0\t%0", operands);
12097 if (round_mode != I387_CW_ANY)
12098 output_asm_insn ("fldcw\t%3", operands);
12099 if (stack_top_dies || dimode_p)
12100 output_asm_insn ("fistp%z0\t%0", operands);
12102 output_asm_insn ("fist%z0\t%0", operands);
12103 if (round_mode != I387_CW_ANY)
12104 output_asm_insn ("fldcw\t%2", operands);
12110 /* Output code for x87 ffreep insn. The OPNO argument, which may only
12111 have the values zero or one, indicates the ffreep insn's operand
12112 from the OPERANDS array. */
12114 static const char *
12115 output_387_ffreep (rtx *operands ATTRIBUTE_UNUSED, int opno)
12117 if (TARGET_USE_FFREEP)
12118 #if HAVE_AS_IX86_FFREEP
12119 return opno ? "ffreep\t%y1" : "ffreep\t%y0";
12122 static char retval[] = ".word\t0xc_df";
12123 int regno = REGNO (operands[opno]);
12125 gcc_assert (FP_REGNO_P (regno));
12127 retval[9] = '0' + (regno - FIRST_STACK_REG);
12132 return opno ? "fstp\t%y1" : "fstp\t%y0";
12136 /* Output code for INSN to compare OPERANDS. EFLAGS_P is 1 when fcomi
12137 should be used. UNORDERED_P is true when fucom should be used. */
12140 output_fp_compare (rtx insn, rtx *operands, int eflags_p, int unordered_p)
12142 int stack_top_dies;
12143 rtx cmp_op0, cmp_op1;
12144 int is_sse = SSE_REG_P (operands[0]) || SSE_REG_P (operands[1]);
12148 cmp_op0 = operands[0];
12149 cmp_op1 = operands[1];
12153 cmp_op0 = operands[1];
12154 cmp_op1 = operands[2];
12159 static const char ucomiss[] = "vucomiss\t{%1, %0|%0, %1}";
12160 static const char ucomisd[] = "vucomisd\t{%1, %0|%0, %1}";
12161 static const char comiss[] = "vcomiss\t{%1, %0|%0, %1}";
12162 static const char comisd[] = "vcomisd\t{%1, %0|%0, %1}";
12164 if (GET_MODE (operands[0]) == SFmode)
12166 return &ucomiss[TARGET_AVX ? 0 : 1];
12168 return &comiss[TARGET_AVX ? 0 : 1];
12171 return &ucomisd[TARGET_AVX ? 0 : 1];
12173 return &comisd[TARGET_AVX ? 0 : 1];
12176 gcc_assert (STACK_TOP_P (cmp_op0));
12178 stack_top_dies = find_regno_note (insn, REG_DEAD, FIRST_STACK_REG) != 0;
12180 if (cmp_op1 == CONST0_RTX (GET_MODE (cmp_op1)))
12182 if (stack_top_dies)
12184 output_asm_insn ("ftst\n\tfnstsw\t%0", operands);
12185 return output_387_ffreep (operands, 1);
12188 return "ftst\n\tfnstsw\t%0";
12191 if (STACK_REG_P (cmp_op1)
12193 && find_regno_note (insn, REG_DEAD, REGNO (cmp_op1))
12194 && REGNO (cmp_op1) != FIRST_STACK_REG)
12196 /* If both the top of the 387 stack dies, and the other operand
12197 is also a stack register that dies, then this must be a
12198 `fcompp' float compare */
12202 /* There is no double popping fcomi variant. Fortunately,
12203 eflags is immune from the fstp's cc clobbering. */
12205 output_asm_insn ("fucomip\t{%y1, %0|%0, %y1}", operands);
12207 output_asm_insn ("fcomip\t{%y1, %0|%0, %y1}", operands);
12208 return output_387_ffreep (operands, 0);
12213 return "fucompp\n\tfnstsw\t%0";
12215 return "fcompp\n\tfnstsw\t%0";
12220 /* Encoded here as eflags_p | intmode | unordered_p | stack_top_dies. */
12222 static const char * const alt[16] =
12224 "fcom%z2\t%y2\n\tfnstsw\t%0",
12225 "fcomp%z2\t%y2\n\tfnstsw\t%0",
12226 "fucom%z2\t%y2\n\tfnstsw\t%0",
12227 "fucomp%z2\t%y2\n\tfnstsw\t%0",
12229 "ficom%z2\t%y2\n\tfnstsw\t%0",
12230 "ficomp%z2\t%y2\n\tfnstsw\t%0",
12234 "fcomi\t{%y1, %0|%0, %y1}",
12235 "fcomip\t{%y1, %0|%0, %y1}",
12236 "fucomi\t{%y1, %0|%0, %y1}",
12237 "fucomip\t{%y1, %0|%0, %y1}",
12248 mask = eflags_p << 3;
12249 mask |= (GET_MODE_CLASS (GET_MODE (cmp_op1)) == MODE_INT) << 2;
12250 mask |= unordered_p << 1;
12251 mask |= stack_top_dies;
12253 gcc_assert (mask < 16);
12262 ix86_output_addr_vec_elt (FILE *file, int value)
12264 const char *directive = ASM_LONG;
12268 directive = ASM_QUAD;
12270 gcc_assert (!TARGET_64BIT);
12273 fprintf (file, "%s%s%d\n", directive, LPREFIX, value);
12277 ix86_output_addr_diff_elt (FILE *file, int value, int rel)
12279 const char *directive = ASM_LONG;
12282 if (TARGET_64BIT && CASE_VECTOR_MODE == DImode)
12283 directive = ASM_QUAD;
12285 gcc_assert (!TARGET_64BIT);
12287 /* We can't use @GOTOFF for text labels on VxWorks; see gotoff_operand. */
12288 if (TARGET_64BIT || TARGET_VXWORKS_RTP)
12289 fprintf (file, "%s%s%d-%s%d\n",
12290 directive, LPREFIX, value, LPREFIX, rel);
12291 else if (HAVE_AS_GOTOFF_IN_DATA)
12292 fprintf (file, "%s%s%d@GOTOFF\n", ASM_LONG, LPREFIX, value);
12294 else if (TARGET_MACHO)
12296 fprintf (file, "%s%s%d-", ASM_LONG, LPREFIX, value);
12297 machopic_output_function_base_name (file);
12298 fprintf(file, "\n");
12302 asm_fprintf (file, "%s%U%s+[.-%s%d]\n",
12303 ASM_LONG, GOT_SYMBOL_NAME, LPREFIX, value);
12306 /* Generate either "mov $0, reg" or "xor reg, reg", as appropriate
12310 ix86_expand_clear (rtx dest)
12314 /* We play register width games, which are only valid after reload. */
12315 gcc_assert (reload_completed);
12317 /* Avoid HImode and its attendant prefix byte. */
12318 if (GET_MODE_SIZE (GET_MODE (dest)) < 4)
12319 dest = gen_rtx_REG (SImode, REGNO (dest));
12320 tmp = gen_rtx_SET (VOIDmode, dest, const0_rtx);
12322 /* This predicate should match that for movsi_xor and movdi_xor_rex64. */
12323 if (reload_completed && (!TARGET_USE_MOV0 || optimize_insn_for_speed_p ()))
12325 rtx clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
12326 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, tmp, clob));
12332 /* X is an unchanging MEM. If it is a constant pool reference, return
12333 the constant pool rtx, else NULL. */
12336 maybe_get_pool_constant (rtx x)
12338 x = ix86_delegitimize_address (XEXP (x, 0));
12340 if (GET_CODE (x) == SYMBOL_REF && CONSTANT_POOL_ADDRESS_P (x))
12341 return get_pool_constant (x);
12347 ix86_expand_move (enum machine_mode mode, rtx operands[])
12350 enum tls_model model;
12355 if (GET_CODE (op1) == SYMBOL_REF)
12357 model = SYMBOL_REF_TLS_MODEL (op1);
12360 op1 = legitimize_tls_address (op1, model, true);
12361 op1 = force_operand (op1, op0);
12365 else if (TARGET_DLLIMPORT_DECL_ATTRIBUTES
12366 && SYMBOL_REF_DLLIMPORT_P (op1))
12367 op1 = legitimize_dllimport_symbol (op1, false);
12369 else if (GET_CODE (op1) == CONST
12370 && GET_CODE (XEXP (op1, 0)) == PLUS
12371 && GET_CODE (XEXP (XEXP (op1, 0), 0)) == SYMBOL_REF)
12373 rtx addend = XEXP (XEXP (op1, 0), 1);
12374 rtx symbol = XEXP (XEXP (op1, 0), 0);
12377 model = SYMBOL_REF_TLS_MODEL (symbol);
12379 tmp = legitimize_tls_address (symbol, model, true);
12380 else if (TARGET_DLLIMPORT_DECL_ATTRIBUTES
12381 && SYMBOL_REF_DLLIMPORT_P (symbol))
12382 tmp = legitimize_dllimport_symbol (symbol, true);
12386 tmp = force_operand (tmp, NULL);
12387 tmp = expand_simple_binop (Pmode, PLUS, tmp, addend,
12388 op0, 1, OPTAB_DIRECT);
12394 if (flag_pic && mode == Pmode && symbolic_operand (op1, Pmode))
12396 if (TARGET_MACHO && !TARGET_64BIT)
12401 rtx temp = ((reload_in_progress
12402 || ((op0 && REG_P (op0))
12404 ? op0 : gen_reg_rtx (Pmode));
12405 op1 = machopic_indirect_data_reference (op1, temp);
12406 op1 = machopic_legitimize_pic_address (op1, mode,
12407 temp == op1 ? 0 : temp);
12409 else if (MACHOPIC_INDIRECT)
12410 op1 = machopic_indirect_data_reference (op1, 0);
12418 op1 = force_reg (Pmode, op1);
12419 else if (!TARGET_64BIT || !x86_64_movabs_operand (op1, Pmode))
12421 rtx reg = !can_create_pseudo_p () ? op0 : NULL_RTX;
12422 op1 = legitimize_pic_address (op1, reg);
12431 && (PUSH_ROUNDING (GET_MODE_SIZE (mode)) != GET_MODE_SIZE (mode)
12432 || !push_operand (op0, mode))
12434 op1 = force_reg (mode, op1);
12436 if (push_operand (op0, mode)
12437 && ! general_no_elim_operand (op1, mode))
12438 op1 = copy_to_mode_reg (mode, op1);
12440 /* Force large constants in 64bit compilation into register
12441 to get them CSEed. */
12442 if (can_create_pseudo_p ()
12443 && (mode == DImode) && TARGET_64BIT
12444 && immediate_operand (op1, mode)
12445 && !x86_64_zext_immediate_operand (op1, VOIDmode)
12446 && !register_operand (op0, mode)
12448 op1 = copy_to_mode_reg (mode, op1);
12450 if (can_create_pseudo_p ()
12451 && FLOAT_MODE_P (mode)
12452 && GET_CODE (op1) == CONST_DOUBLE)
12454 /* If we are loading a floating point constant to a register,
12455 force the value to memory now, since we'll get better code
12456 out the back end. */
12458 op1 = validize_mem (force_const_mem (mode, op1));
12459 if (!register_operand (op0, mode))
12461 rtx temp = gen_reg_rtx (mode);
12462 emit_insn (gen_rtx_SET (VOIDmode, temp, op1));
12463 emit_move_insn (op0, temp);
12469 emit_insn (gen_rtx_SET (VOIDmode, op0, op1));
12473 ix86_expand_vector_move (enum machine_mode mode, rtx operands[])
12475 rtx op0 = operands[0], op1 = operands[1];
12476 unsigned int align = GET_MODE_ALIGNMENT (mode);
12478 /* Force constants other than zero into memory. We do not know how
12479 the instructions used to build constants modify the upper 64 bits
12480 of the register, once we have that information we may be able
12481 to handle some of them more efficiently. */
12482 if (can_create_pseudo_p ()
12483 && register_operand (op0, mode)
12484 && (CONSTANT_P (op1)
12485 || (GET_CODE (op1) == SUBREG
12486 && CONSTANT_P (SUBREG_REG (op1))))
12487 && standard_sse_constant_p (op1) <= 0)
12488 op1 = validize_mem (force_const_mem (mode, op1));
12490 /* We need to check memory alignment for SSE mode since attribute
12491 can make operands unaligned. */
12492 if (can_create_pseudo_p ()
12493 && SSE_REG_MODE_P (mode)
12494 && ((MEM_P (op0) && (MEM_ALIGN (op0) < align))
12495 || (MEM_P (op1) && (MEM_ALIGN (op1) < align))))
12499 /* ix86_expand_vector_move_misalign() does not like constants ... */
12500 if (CONSTANT_P (op1)
12501 || (GET_CODE (op1) == SUBREG
12502 && CONSTANT_P (SUBREG_REG (op1))))
12503 op1 = validize_mem (force_const_mem (mode, op1));
12505 /* ... nor both arguments in memory. */
12506 if (!register_operand (op0, mode)
12507 && !register_operand (op1, mode))
12508 op1 = force_reg (mode, op1);
12510 tmp[0] = op0; tmp[1] = op1;
12511 ix86_expand_vector_move_misalign (mode, tmp);
12515 /* Make operand1 a register if it isn't already. */
12516 if (can_create_pseudo_p ()
12517 && !register_operand (op0, mode)
12518 && !register_operand (op1, mode))
12520 emit_move_insn (op0, force_reg (GET_MODE (op0), op1));
12524 emit_insn (gen_rtx_SET (VOIDmode, op0, op1));
12527 /* Implement the movmisalign patterns for SSE. Non-SSE modes go
12528 straight to ix86_expand_vector_move. */
12529 /* Code generation for scalar reg-reg moves of single and double precision data:
12530 if (x86_sse_partial_reg_dependency == true | x86_sse_split_regs == true)
12534 if (x86_sse_partial_reg_dependency == true)
12539 Code generation for scalar loads of double precision data:
12540 if (x86_sse_split_regs == true)
12541 movlpd mem, reg (gas syntax)
12545 Code generation for unaligned packed loads of single precision data
12546 (x86_sse_unaligned_move_optimal overrides x86_sse_partial_reg_dependency):
12547 if (x86_sse_unaligned_move_optimal)
12550 if (x86_sse_partial_reg_dependency == true)
12562 Code generation for unaligned packed loads of double precision data
12563 (x86_sse_unaligned_move_optimal overrides x86_sse_split_regs):
12564 if (x86_sse_unaligned_move_optimal)
12567 if (x86_sse_split_regs == true)
12580 ix86_expand_vector_move_misalign (enum machine_mode mode, rtx operands[])
12589 switch (GET_MODE_CLASS (mode))
12591 case MODE_VECTOR_INT:
12593 switch (GET_MODE_SIZE (mode))
12596 op0 = gen_lowpart (V16QImode, op0);
12597 op1 = gen_lowpart (V16QImode, op1);
12598 emit_insn (gen_avx_movdqu (op0, op1));
12601 op0 = gen_lowpart (V32QImode, op0);
12602 op1 = gen_lowpart (V32QImode, op1);
12603 emit_insn (gen_avx_movdqu256 (op0, op1));
12606 gcc_unreachable ();
12609 case MODE_VECTOR_FLOAT:
12610 op0 = gen_lowpart (mode, op0);
12611 op1 = gen_lowpart (mode, op1);
12616 emit_insn (gen_avx_movups (op0, op1));
12619 emit_insn (gen_avx_movups256 (op0, op1));
12622 emit_insn (gen_avx_movupd (op0, op1));
12625 emit_insn (gen_avx_movupd256 (op0, op1));
12628 gcc_unreachable ();
12633 gcc_unreachable ();
12641 /* If we're optimizing for size, movups is the smallest. */
12642 if (optimize_insn_for_size_p ())
12644 op0 = gen_lowpart (V4SFmode, op0);
12645 op1 = gen_lowpart (V4SFmode, op1);
12646 emit_insn (gen_sse_movups (op0, op1));
12650 /* ??? If we have typed data, then it would appear that using
12651 movdqu is the only way to get unaligned data loaded with
12653 if (TARGET_SSE2 && GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
12655 op0 = gen_lowpart (V16QImode, op0);
12656 op1 = gen_lowpart (V16QImode, op1);
12657 emit_insn (gen_sse2_movdqu (op0, op1));
12661 if (TARGET_SSE2 && mode == V2DFmode)
12665 if (TARGET_SSE_UNALIGNED_MOVE_OPTIMAL)
12667 op0 = gen_lowpart (V2DFmode, op0);
12668 op1 = gen_lowpart (V2DFmode, op1);
12669 emit_insn (gen_sse2_movupd (op0, op1));
12673 /* When SSE registers are split into halves, we can avoid
12674 writing to the top half twice. */
12675 if (TARGET_SSE_SPLIT_REGS)
12677 emit_clobber (op0);
12682 /* ??? Not sure about the best option for the Intel chips.
12683 The following would seem to satisfy; the register is
12684 entirely cleared, breaking the dependency chain. We
12685 then store to the upper half, with a dependency depth
12686 of one. A rumor has it that Intel recommends two movsd
12687 followed by an unpacklpd, but this is unconfirmed. And
12688 given that the dependency depth of the unpacklpd would
12689 still be one, I'm not sure why this would be better. */
12690 zero = CONST0_RTX (V2DFmode);
12693 m = adjust_address (op1, DFmode, 0);
12694 emit_insn (gen_sse2_loadlpd (op0, zero, m));
12695 m = adjust_address (op1, DFmode, 8);
12696 emit_insn (gen_sse2_loadhpd (op0, op0, m));
12700 if (TARGET_SSE_UNALIGNED_MOVE_OPTIMAL)
12702 op0 = gen_lowpart (V4SFmode, op0);
12703 op1 = gen_lowpart (V4SFmode, op1);
12704 emit_insn (gen_sse_movups (op0, op1));
12708 if (TARGET_SSE_PARTIAL_REG_DEPENDENCY)
12709 emit_move_insn (op0, CONST0_RTX (mode));
12711 emit_clobber (op0);
12713 if (mode != V4SFmode)
12714 op0 = gen_lowpart (V4SFmode, op0);
12715 m = adjust_address (op1, V2SFmode, 0);
12716 emit_insn (gen_sse_loadlps (op0, op0, m));
12717 m = adjust_address (op1, V2SFmode, 8);
12718 emit_insn (gen_sse_loadhps (op0, op0, m));
12721 else if (MEM_P (op0))
12723 /* If we're optimizing for size, movups is the smallest. */
12724 if (optimize_insn_for_size_p ())
12726 op0 = gen_lowpart (V4SFmode, op0);
12727 op1 = gen_lowpart (V4SFmode, op1);
12728 emit_insn (gen_sse_movups (op0, op1));
12732 /* ??? Similar to above, only less clear because of quote
12733 typeless stores unquote. */
12734 if (TARGET_SSE2 && !TARGET_SSE_TYPELESS_STORES
12735 && GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
12737 op0 = gen_lowpart (V16QImode, op0);
12738 op1 = gen_lowpart (V16QImode, op1);
12739 emit_insn (gen_sse2_movdqu (op0, op1));
12743 if (TARGET_SSE2 && mode == V2DFmode)
12745 m = adjust_address (op0, DFmode, 0);
12746 emit_insn (gen_sse2_storelpd (m, op1));
12747 m = adjust_address (op0, DFmode, 8);
12748 emit_insn (gen_sse2_storehpd (m, op1));
12752 if (mode != V4SFmode)
12753 op1 = gen_lowpart (V4SFmode, op1);
12754 m = adjust_address (op0, V2SFmode, 0);
12755 emit_insn (gen_sse_storelps (m, op1));
12756 m = adjust_address (op0, V2SFmode, 8);
12757 emit_insn (gen_sse_storehps (m, op1));
12761 gcc_unreachable ();
12764 /* Expand a push in MODE. This is some mode for which we do not support
12765 proper push instructions, at least from the registers that we expect
12766 the value to live in. */
12769 ix86_expand_push (enum machine_mode mode, rtx x)
12773 tmp = expand_simple_binop (Pmode, PLUS, stack_pointer_rtx,
12774 GEN_INT (-GET_MODE_SIZE (mode)),
12775 stack_pointer_rtx, 1, OPTAB_DIRECT);
12776 if (tmp != stack_pointer_rtx)
12777 emit_move_insn (stack_pointer_rtx, tmp);
12779 tmp = gen_rtx_MEM (mode, stack_pointer_rtx);
12781 /* When we push an operand onto stack, it has to be aligned at least
12782 at the function argument boundary. However since we don't have
12783 the argument type, we can't determine the actual argument
12785 emit_move_insn (tmp, x);
12788 /* Helper function of ix86_fixup_binary_operands to canonicalize
12789 operand order. Returns true if the operands should be swapped. */
12792 ix86_swap_binary_operands_p (enum rtx_code code, enum machine_mode mode,
12795 rtx dst = operands[0];
12796 rtx src1 = operands[1];
12797 rtx src2 = operands[2];
12799 /* If the operation is not commutative, we can't do anything. */
12800 if (GET_RTX_CLASS (code) != RTX_COMM_ARITH)
12803 /* Highest priority is that src1 should match dst. */
12804 if (rtx_equal_p (dst, src1))
12806 if (rtx_equal_p (dst, src2))
12809 /* Next highest priority is that immediate constants come second. */
12810 if (immediate_operand (src2, mode))
12812 if (immediate_operand (src1, mode))
12815 /* Lowest priority is that memory references should come second. */
12825 /* Fix up OPERANDS to satisfy ix86_binary_operator_ok. Return the
12826 destination to use for the operation. If different from the true
12827 destination in operands[0], a copy operation will be required. */
12830 ix86_fixup_binary_operands (enum rtx_code code, enum machine_mode mode,
12833 rtx dst = operands[0];
12834 rtx src1 = operands[1];
12835 rtx src2 = operands[2];
12837 /* Canonicalize operand order. */
12838 if (ix86_swap_binary_operands_p (code, mode, operands))
12842 /* It is invalid to swap operands of different modes. */
12843 gcc_assert (GET_MODE (src1) == GET_MODE (src2));
12850 /* Both source operands cannot be in memory. */
12851 if (MEM_P (src1) && MEM_P (src2))
12853 /* Optimization: Only read from memory once. */
12854 if (rtx_equal_p (src1, src2))
12856 src2 = force_reg (mode, src2);
12860 src2 = force_reg (mode, src2);
12863 /* If the destination is memory, and we do not have matching source
12864 operands, do things in registers. */
12865 if (MEM_P (dst) && !rtx_equal_p (dst, src1))
12866 dst = gen_reg_rtx (mode);
12868 /* Source 1 cannot be a constant. */
12869 if (CONSTANT_P (src1))
12870 src1 = force_reg (mode, src1);
12872 /* Source 1 cannot be a non-matching memory. */
12873 if (MEM_P (src1) && !rtx_equal_p (dst, src1))
12874 src1 = force_reg (mode, src1);
12876 operands[1] = src1;
12877 operands[2] = src2;
12881 /* Similarly, but assume that the destination has already been
12882 set up properly. */
12885 ix86_fixup_binary_operands_no_copy (enum rtx_code code,
12886 enum machine_mode mode, rtx operands[])
12888 rtx dst = ix86_fixup_binary_operands (code, mode, operands);
12889 gcc_assert (dst == operands[0]);
12892 /* Attempt to expand a binary operator. Make the expansion closer to the
12893 actual machine, then just general_operand, which will allow 3 separate
12894 memory references (one output, two input) in a single insn. */
12897 ix86_expand_binary_operator (enum rtx_code code, enum machine_mode mode,
12900 rtx src1, src2, dst, op, clob;
12902 dst = ix86_fixup_binary_operands (code, mode, operands);
12903 src1 = operands[1];
12904 src2 = operands[2];
12906 /* Emit the instruction. */
12908 op = gen_rtx_SET (VOIDmode, dst, gen_rtx_fmt_ee (code, mode, src1, src2));
12909 if (reload_in_progress)
12911 /* Reload doesn't know about the flags register, and doesn't know that
12912 it doesn't want to clobber it. We can only do this with PLUS. */
12913 gcc_assert (code == PLUS);
12918 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
12919 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
12922 /* Fix up the destination if needed. */
12923 if (dst != operands[0])
12924 emit_move_insn (operands[0], dst);
12927 /* Return TRUE or FALSE depending on whether the binary operator meets the
12928 appropriate constraints. */
12931 ix86_binary_operator_ok (enum rtx_code code, enum machine_mode mode,
12934 rtx dst = operands[0];
12935 rtx src1 = operands[1];
12936 rtx src2 = operands[2];
12938 /* Both source operands cannot be in memory. */
12939 if (MEM_P (src1) && MEM_P (src2))
12942 /* Canonicalize operand order for commutative operators. */
12943 if (ix86_swap_binary_operands_p (code, mode, operands))
12950 /* If the destination is memory, we must have a matching source operand. */
12951 if (MEM_P (dst) && !rtx_equal_p (dst, src1))
12954 /* Source 1 cannot be a constant. */
12955 if (CONSTANT_P (src1))
12958 /* Source 1 cannot be a non-matching memory. */
12959 if (MEM_P (src1) && !rtx_equal_p (dst, src1))
12965 /* Attempt to expand a unary operator. Make the expansion closer to the
12966 actual machine, then just general_operand, which will allow 2 separate
12967 memory references (one output, one input) in a single insn. */
12970 ix86_expand_unary_operator (enum rtx_code code, enum machine_mode mode,
12973 int matching_memory;
12974 rtx src, dst, op, clob;
12979 /* If the destination is memory, and we do not have matching source
12980 operands, do things in registers. */
12981 matching_memory = 0;
12984 if (rtx_equal_p (dst, src))
12985 matching_memory = 1;
12987 dst = gen_reg_rtx (mode);
12990 /* When source operand is memory, destination must match. */
12991 if (MEM_P (src) && !matching_memory)
12992 src = force_reg (mode, src);
12994 /* Emit the instruction. */
12996 op = gen_rtx_SET (VOIDmode, dst, gen_rtx_fmt_e (code, mode, src));
12997 if (reload_in_progress || code == NOT)
12999 /* Reload doesn't know about the flags register, and doesn't know that
13000 it doesn't want to clobber it. */
13001 gcc_assert (code == NOT);
13006 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
13007 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
13010 /* Fix up the destination if needed. */
13011 if (dst != operands[0])
13012 emit_move_insn (operands[0], dst);
13015 #define LEA_SEARCH_THRESHOLD 12
13017 /* Search backward for non-agu definition of register number REGNO1
13018 or register number REGNO2 in INSN's basic block until
13019 1. Pass LEA_SEARCH_THRESHOLD instructions, or
13020 2. Reach BB boundary, or
13021 3. Reach agu definition.
13022 Returns the distance between the non-agu definition point and INSN.
13023 If no definition point, returns -1. */
13026 distance_non_agu_define (unsigned int regno1, unsigned int regno2,
13029 basic_block bb = BLOCK_FOR_INSN (insn);
13032 enum attr_type insn_type;
13034 if (insn != BB_HEAD (bb))
13036 rtx prev = PREV_INSN (insn);
13037 while (prev && distance < LEA_SEARCH_THRESHOLD)
13042 for (def_rec = DF_INSN_DEFS (prev); *def_rec; def_rec++)
13043 if (DF_REF_TYPE (*def_rec) == DF_REF_REG_DEF
13044 && !DF_REF_IS_ARTIFICIAL (*def_rec)
13045 && (regno1 == DF_REF_REGNO (*def_rec)
13046 || regno2 == DF_REF_REGNO (*def_rec)))
13048 insn_type = get_attr_type (prev);
13049 if (insn_type != TYPE_LEA)
13053 if (prev == BB_HEAD (bb))
13055 prev = PREV_INSN (prev);
13059 if (distance < LEA_SEARCH_THRESHOLD)
13063 bool simple_loop = false;
13065 FOR_EACH_EDGE (e, ei, bb->preds)
13068 simple_loop = true;
13074 rtx prev = BB_END (bb);
13077 && distance < LEA_SEARCH_THRESHOLD)
13082 for (def_rec = DF_INSN_DEFS (prev); *def_rec; def_rec++)
13083 if (DF_REF_TYPE (*def_rec) == DF_REF_REG_DEF
13084 && !DF_REF_IS_ARTIFICIAL (*def_rec)
13085 && (regno1 == DF_REF_REGNO (*def_rec)
13086 || regno2 == DF_REF_REGNO (*def_rec)))
13088 insn_type = get_attr_type (prev);
13089 if (insn_type != TYPE_LEA)
13093 prev = PREV_INSN (prev);
13101 /* get_attr_type may modify recog data. We want to make sure
13102 that recog data is valid for instruction INSN, on which
13103 distance_non_agu_define is called. INSN is unchanged here. */
13104 extract_insn_cached (insn);
13108 /* Return the distance between INSN and the next insn that uses
13109 register number REGNO0 in memory address. Return -1 if no such
13110 a use is found within LEA_SEARCH_THRESHOLD or REGNO0 is set. */
13113 distance_agu_use (unsigned int regno0, rtx insn)
13115 basic_block bb = BLOCK_FOR_INSN (insn);
13120 if (insn != BB_END (bb))
13122 rtx next = NEXT_INSN (insn);
13123 while (next && distance < LEA_SEARCH_THRESHOLD)
13129 for (use_rec = DF_INSN_USES (next); *use_rec; use_rec++)
13130 if ((DF_REF_TYPE (*use_rec) == DF_REF_REG_MEM_LOAD
13131 || DF_REF_TYPE (*use_rec) == DF_REF_REG_MEM_STORE)
13132 && regno0 == DF_REF_REGNO (*use_rec))
13134 /* Return DISTANCE if OP0 is used in memory
13135 address in NEXT. */
13139 for (def_rec = DF_INSN_DEFS (next); *def_rec; def_rec++)
13140 if (DF_REF_TYPE (*def_rec) == DF_REF_REG_DEF
13141 && !DF_REF_IS_ARTIFICIAL (*def_rec)
13142 && regno0 == DF_REF_REGNO (*def_rec))
13144 /* Return -1 if OP0 is set in NEXT. */
13148 if (next == BB_END (bb))
13150 next = NEXT_INSN (next);
13154 if (distance < LEA_SEARCH_THRESHOLD)
13158 bool simple_loop = false;
13160 FOR_EACH_EDGE (e, ei, bb->succs)
13163 simple_loop = true;
13169 rtx next = BB_HEAD (bb);
13172 && distance < LEA_SEARCH_THRESHOLD)
13178 for (use_rec = DF_INSN_USES (next); *use_rec; use_rec++)
13179 if ((DF_REF_TYPE (*use_rec) == DF_REF_REG_MEM_LOAD
13180 || DF_REF_TYPE (*use_rec) == DF_REF_REG_MEM_STORE)
13181 && regno0 == DF_REF_REGNO (*use_rec))
13183 /* Return DISTANCE if OP0 is used in memory
13184 address in NEXT. */
13188 for (def_rec = DF_INSN_DEFS (next); *def_rec; def_rec++)
13189 if (DF_REF_TYPE (*def_rec) == DF_REF_REG_DEF
13190 && !DF_REF_IS_ARTIFICIAL (*def_rec)
13191 && regno0 == DF_REF_REGNO (*def_rec))
13193 /* Return -1 if OP0 is set in NEXT. */
13198 next = NEXT_INSN (next);
13206 /* Define this macro to tune LEA priority vs ADD, it take effect when
13207 there is a dilemma of choicing LEA or ADD
13208 Negative value: ADD is more preferred than LEA
13210 Positive value: LEA is more preferred than ADD*/
13211 #define IX86_LEA_PRIORITY 2
13213 /* Return true if it is ok to optimize an ADD operation to LEA
13214 operation to avoid flag register consumation. For the processors
13215 like ATOM, if the destination register of LEA holds an actual
13216 address which will be used soon, LEA is better and otherwise ADD
13220 ix86_lea_for_add_ok (enum rtx_code code ATTRIBUTE_UNUSED,
13221 rtx insn, rtx operands[])
13223 unsigned int regno0 = true_regnum (operands[0]);
13224 unsigned int regno1 = true_regnum (operands[1]);
13225 unsigned int regno2;
13227 if (!TARGET_OPT_AGU || optimize_function_for_size_p (cfun))
13228 return regno0 != regno1;
13230 regno2 = true_regnum (operands[2]);
13232 /* If a = b + c, (a!=b && a!=c), must use lea form. */
13233 if (regno0 != regno1 && regno0 != regno2)
13237 int dist_define, dist_use;
13238 dist_define = distance_non_agu_define (regno1, regno2, insn);
13239 if (dist_define <= 0)
13242 /* If this insn has both backward non-agu dependence and forward
13243 agu dependence, the one with short distance take effect. */
13244 dist_use = distance_agu_use (regno0, insn);
13246 || (dist_define + IX86_LEA_PRIORITY) < dist_use)
13253 /* Return true if destination reg of SET_BODY is shift count of
13257 ix86_dep_by_shift_count_body (const_rtx set_body, const_rtx use_body)
13263 /* Retrieve destination of SET_BODY. */
13264 switch (GET_CODE (set_body))
13267 set_dest = SET_DEST (set_body);
13268 if (!set_dest || !REG_P (set_dest))
13272 for (i = XVECLEN (set_body, 0) - 1; i >= 0; i--)
13273 if (ix86_dep_by_shift_count_body (XVECEXP (set_body, 0, i),
13281 /* Retrieve shift count of USE_BODY. */
13282 switch (GET_CODE (use_body))
13285 shift_rtx = XEXP (use_body, 1);
13288 for (i = XVECLEN (use_body, 0) - 1; i >= 0; i--)
13289 if (ix86_dep_by_shift_count_body (set_body,
13290 XVECEXP (use_body, 0, i)))
13298 && (GET_CODE (shift_rtx) == ASHIFT
13299 || GET_CODE (shift_rtx) == LSHIFTRT
13300 || GET_CODE (shift_rtx) == ASHIFTRT
13301 || GET_CODE (shift_rtx) == ROTATE
13302 || GET_CODE (shift_rtx) == ROTATERT))
13304 rtx shift_count = XEXP (shift_rtx, 1);
13306 /* Return true if shift count is dest of SET_BODY. */
13307 if (REG_P (shift_count)
13308 && true_regnum (set_dest) == true_regnum (shift_count))
13315 /* Return true if destination reg of SET_INSN is shift count of
13319 ix86_dep_by_shift_count (const_rtx set_insn, const_rtx use_insn)
13321 return ix86_dep_by_shift_count_body (PATTERN (set_insn),
13322 PATTERN (use_insn));
13325 /* Return TRUE or FALSE depending on whether the unary operator meets the
13326 appropriate constraints. */
13329 ix86_unary_operator_ok (enum rtx_code code ATTRIBUTE_UNUSED,
13330 enum machine_mode mode ATTRIBUTE_UNUSED,
13331 rtx operands[2] ATTRIBUTE_UNUSED)
13333 /* If one of operands is memory, source and destination must match. */
13334 if ((MEM_P (operands[0])
13335 || MEM_P (operands[1]))
13336 && ! rtx_equal_p (operands[0], operands[1]))
13341 /* Post-reload splitter for converting an SF or DFmode value in an
13342 SSE register into an unsigned SImode. */
13345 ix86_split_convert_uns_si_sse (rtx operands[])
13347 enum machine_mode vecmode;
13348 rtx value, large, zero_or_two31, input, two31, x;
13350 large = operands[1];
13351 zero_or_two31 = operands[2];
13352 input = operands[3];
13353 two31 = operands[4];
13354 vecmode = GET_MODE (large);
13355 value = gen_rtx_REG (vecmode, REGNO (operands[0]));
13357 /* Load up the value into the low element. We must ensure that the other
13358 elements are valid floats -- zero is the easiest such value. */
13361 if (vecmode == V4SFmode)
13362 emit_insn (gen_vec_setv4sf_0 (value, CONST0_RTX (V4SFmode), input));
13364 emit_insn (gen_sse2_loadlpd (value, CONST0_RTX (V2DFmode), input));
13368 input = gen_rtx_REG (vecmode, REGNO (input));
13369 emit_move_insn (value, CONST0_RTX (vecmode));
13370 if (vecmode == V4SFmode)
13371 emit_insn (gen_sse_movss (value, value, input));
13373 emit_insn (gen_sse2_movsd (value, value, input));
13376 emit_move_insn (large, two31);
13377 emit_move_insn (zero_or_two31, MEM_P (two31) ? large : two31);
13379 x = gen_rtx_fmt_ee (LE, vecmode, large, value);
13380 emit_insn (gen_rtx_SET (VOIDmode, large, x));
13382 x = gen_rtx_AND (vecmode, zero_or_two31, large);
13383 emit_insn (gen_rtx_SET (VOIDmode, zero_or_two31, x));
13385 x = gen_rtx_MINUS (vecmode, value, zero_or_two31);
13386 emit_insn (gen_rtx_SET (VOIDmode, value, x));
13388 large = gen_rtx_REG (V4SImode, REGNO (large));
13389 emit_insn (gen_ashlv4si3 (large, large, GEN_INT (31)));
13391 x = gen_rtx_REG (V4SImode, REGNO (value));
13392 if (vecmode == V4SFmode)
13393 emit_insn (gen_sse2_cvttps2dq (x, value));
13395 emit_insn (gen_sse2_cvttpd2dq (x, value));
13398 emit_insn (gen_xorv4si3 (value, value, large));
13401 /* Convert an unsigned DImode value into a DFmode, using only SSE.
13402 Expects the 64-bit DImode to be supplied in a pair of integral
13403 registers. Requires SSE2; will use SSE3 if available. For x86_32,
13404 -mfpmath=sse, !optimize_size only. */
13407 ix86_expand_convert_uns_didf_sse (rtx target, rtx input)
13409 REAL_VALUE_TYPE bias_lo_rvt, bias_hi_rvt;
13410 rtx int_xmm, fp_xmm;
13411 rtx biases, exponents;
13414 int_xmm = gen_reg_rtx (V4SImode);
13415 if (TARGET_INTER_UNIT_MOVES)
13416 emit_insn (gen_movdi_to_sse (int_xmm, input));
13417 else if (TARGET_SSE_SPLIT_REGS)
13419 emit_clobber (int_xmm);
13420 emit_move_insn (gen_lowpart (DImode, int_xmm), input);
13424 x = gen_reg_rtx (V2DImode);
13425 ix86_expand_vector_init_one_nonzero (false, V2DImode, x, input, 0);
13426 emit_move_insn (int_xmm, gen_lowpart (V4SImode, x));
13429 x = gen_rtx_CONST_VECTOR (V4SImode,
13430 gen_rtvec (4, GEN_INT (0x43300000UL),
13431 GEN_INT (0x45300000UL),
13432 const0_rtx, const0_rtx));
13433 exponents = validize_mem (force_const_mem (V4SImode, x));
13435 /* int_xmm = {0x45300000UL, fp_xmm/hi, 0x43300000, fp_xmm/lo } */
13436 emit_insn (gen_sse2_punpckldq (int_xmm, int_xmm, exponents));
13438 /* Concatenating (juxtaposing) (0x43300000UL ## fp_value_low_xmm)
13439 yields a valid DF value equal to (0x1.0p52 + double(fp_value_lo_xmm)).
13440 Similarly (0x45300000UL ## fp_value_hi_xmm) yields
13441 (0x1.0p84 + double(fp_value_hi_xmm)).
13442 Note these exponents differ by 32. */
13444 fp_xmm = copy_to_mode_reg (V2DFmode, gen_lowpart (V2DFmode, int_xmm));
13446 /* Subtract off those 0x1.0p52 and 0x1.0p84 biases, to produce values
13447 in [0,2**32-1] and [0]+[2**32,2**64-1] respectively. */
13448 real_ldexp (&bias_lo_rvt, &dconst1, 52);
13449 real_ldexp (&bias_hi_rvt, &dconst1, 84);
13450 biases = const_double_from_real_value (bias_lo_rvt, DFmode);
13451 x = const_double_from_real_value (bias_hi_rvt, DFmode);
13452 biases = gen_rtx_CONST_VECTOR (V2DFmode, gen_rtvec (2, biases, x));
13453 biases = validize_mem (force_const_mem (V2DFmode, biases));
13454 emit_insn (gen_subv2df3 (fp_xmm, fp_xmm, biases));
13456 /* Add the upper and lower DFmode values together. */
13458 emit_insn (gen_sse3_haddv2df3 (fp_xmm, fp_xmm, fp_xmm));
13461 x = copy_to_mode_reg (V2DFmode, fp_xmm);
13462 emit_insn (gen_sse2_unpckhpd (fp_xmm, fp_xmm, fp_xmm));
13463 emit_insn (gen_addv2df3 (fp_xmm, fp_xmm, x));
13466 ix86_expand_vector_extract (false, target, fp_xmm, 0);
13469 /* Not used, but eases macroization of patterns. */
13471 ix86_expand_convert_uns_sixf_sse (rtx target ATTRIBUTE_UNUSED,
13472 rtx input ATTRIBUTE_UNUSED)
13474 gcc_unreachable ();
13477 /* Convert an unsigned SImode value into a DFmode. Only currently used
13478 for SSE, but applicable anywhere. */
13481 ix86_expand_convert_uns_sidf_sse (rtx target, rtx input)
13483 REAL_VALUE_TYPE TWO31r;
13486 x = expand_simple_binop (SImode, PLUS, input, GEN_INT (-2147483647 - 1),
13487 NULL, 1, OPTAB_DIRECT);
13489 fp = gen_reg_rtx (DFmode);
13490 emit_insn (gen_floatsidf2 (fp, x));
13492 real_ldexp (&TWO31r, &dconst1, 31);
13493 x = const_double_from_real_value (TWO31r, DFmode);
13495 x = expand_simple_binop (DFmode, PLUS, fp, x, target, 0, OPTAB_DIRECT);
13497 emit_move_insn (target, x);
13500 /* Convert a signed DImode value into a DFmode. Only used for SSE in
13501 32-bit mode; otherwise we have a direct convert instruction. */
13504 ix86_expand_convert_sign_didf_sse (rtx target, rtx input)
13506 REAL_VALUE_TYPE TWO32r;
13507 rtx fp_lo, fp_hi, x;
13509 fp_lo = gen_reg_rtx (DFmode);
13510 fp_hi = gen_reg_rtx (DFmode);
13512 emit_insn (gen_floatsidf2 (fp_hi, gen_highpart (SImode, input)));
13514 real_ldexp (&TWO32r, &dconst1, 32);
13515 x = const_double_from_real_value (TWO32r, DFmode);
13516 fp_hi = expand_simple_binop (DFmode, MULT, fp_hi, x, fp_hi, 0, OPTAB_DIRECT);
13518 ix86_expand_convert_uns_sidf_sse (fp_lo, gen_lowpart (SImode, input));
13520 x = expand_simple_binop (DFmode, PLUS, fp_hi, fp_lo, target,
13523 emit_move_insn (target, x);
13526 /* Convert an unsigned SImode value into a SFmode, using only SSE.
13527 For x86_32, -mfpmath=sse, !optimize_size only. */
13529 ix86_expand_convert_uns_sisf_sse (rtx target, rtx input)
13531 REAL_VALUE_TYPE ONE16r;
13532 rtx fp_hi, fp_lo, int_hi, int_lo, x;
13534 real_ldexp (&ONE16r, &dconst1, 16);
13535 x = const_double_from_real_value (ONE16r, SFmode);
13536 int_lo = expand_simple_binop (SImode, AND, input, GEN_INT(0xffff),
13537 NULL, 0, OPTAB_DIRECT);
13538 int_hi = expand_simple_binop (SImode, LSHIFTRT, input, GEN_INT(16),
13539 NULL, 0, OPTAB_DIRECT);
13540 fp_hi = gen_reg_rtx (SFmode);
13541 fp_lo = gen_reg_rtx (SFmode);
13542 emit_insn (gen_floatsisf2 (fp_hi, int_hi));
13543 emit_insn (gen_floatsisf2 (fp_lo, int_lo));
13544 fp_hi = expand_simple_binop (SFmode, MULT, fp_hi, x, fp_hi,
13546 fp_hi = expand_simple_binop (SFmode, PLUS, fp_hi, fp_lo, target,
13548 if (!rtx_equal_p (target, fp_hi))
13549 emit_move_insn (target, fp_hi);
13552 /* A subroutine of ix86_build_signbit_mask_vector. If VECT is true,
13553 then replicate the value for all elements of the vector
13557 ix86_build_const_vector (enum machine_mode mode, bool vect, rtx value)
13564 v = gen_rtvec (4, value, value, value, value);
13565 return gen_rtx_CONST_VECTOR (V4SImode, v);
13569 v = gen_rtvec (2, value, value);
13570 return gen_rtx_CONST_VECTOR (V2DImode, v);
13574 v = gen_rtvec (4, value, value, value, value);
13576 v = gen_rtvec (4, value, CONST0_RTX (SFmode),
13577 CONST0_RTX (SFmode), CONST0_RTX (SFmode));
13578 return gen_rtx_CONST_VECTOR (V4SFmode, v);
13582 v = gen_rtvec (2, value, value);
13584 v = gen_rtvec (2, value, CONST0_RTX (DFmode));
13585 return gen_rtx_CONST_VECTOR (V2DFmode, v);
13588 gcc_unreachable ();
13592 /* A subroutine of ix86_expand_fp_absneg_operator, copysign expanders
13593 and ix86_expand_int_vcond. Create a mask for the sign bit in MODE
13594 for an SSE register. If VECT is true, then replicate the mask for
13595 all elements of the vector register. If INVERT is true, then create
13596 a mask excluding the sign bit. */
13599 ix86_build_signbit_mask (enum machine_mode mode, bool vect, bool invert)
13601 enum machine_mode vec_mode, imode;
13602 HOST_WIDE_INT hi, lo;
13607 /* Find the sign bit, sign extended to 2*HWI. */
13613 vec_mode = (mode == SImode) ? V4SImode : V4SFmode;
13614 lo = 0x80000000, hi = lo < 0;
13620 vec_mode = (mode == DImode) ? V2DImode : V2DFmode;
13621 if (HOST_BITS_PER_WIDE_INT >= 64)
13622 lo = (HOST_WIDE_INT)1 << shift, hi = -1;
13624 lo = 0, hi = (HOST_WIDE_INT)1 << (shift - HOST_BITS_PER_WIDE_INT);
13629 vec_mode = VOIDmode;
13630 if (HOST_BITS_PER_WIDE_INT >= 64)
13633 lo = 0, hi = (HOST_WIDE_INT)1 << shift;
13640 lo = 0, hi = (HOST_WIDE_INT)1 << (shift - HOST_BITS_PER_WIDE_INT);
13644 lo = ~lo, hi = ~hi;
13650 mask = immed_double_const (lo, hi, imode);
13652 vec = gen_rtvec (2, v, mask);
13653 v = gen_rtx_CONST_VECTOR (V2DImode, vec);
13654 v = copy_to_mode_reg (mode, gen_lowpart (mode, v));
13661 gcc_unreachable ();
13665 lo = ~lo, hi = ~hi;
13667 /* Force this value into the low part of a fp vector constant. */
13668 mask = immed_double_const (lo, hi, imode);
13669 mask = gen_lowpart (mode, mask);
13671 if (vec_mode == VOIDmode)
13672 return force_reg (mode, mask);
13674 v = ix86_build_const_vector (mode, vect, mask);
13675 return force_reg (vec_mode, v);
13678 /* Generate code for floating point ABS or NEG. */
13681 ix86_expand_fp_absneg_operator (enum rtx_code code, enum machine_mode mode,
13684 rtx mask, set, use, clob, dst, src;
13685 bool use_sse = false;
13686 bool vector_mode = VECTOR_MODE_P (mode);
13687 enum machine_mode elt_mode = mode;
13691 elt_mode = GET_MODE_INNER (mode);
13694 else if (mode == TFmode)
13696 else if (TARGET_SSE_MATH)
13697 use_sse = SSE_FLOAT_MODE_P (mode);
13699 /* NEG and ABS performed with SSE use bitwise mask operations.
13700 Create the appropriate mask now. */
13702 mask = ix86_build_signbit_mask (elt_mode, vector_mode, code == ABS);
13711 set = gen_rtx_fmt_ee (code == NEG ? XOR : AND, mode, src, mask);
13712 set = gen_rtx_SET (VOIDmode, dst, set);
13717 set = gen_rtx_fmt_e (code, mode, src);
13718 set = gen_rtx_SET (VOIDmode, dst, set);
13721 use = gen_rtx_USE (VOIDmode, mask);
13722 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
13723 emit_insn (gen_rtx_PARALLEL (VOIDmode,
13724 gen_rtvec (3, set, use, clob)));
13731 /* Expand a copysign operation. Special case operand 0 being a constant. */
13734 ix86_expand_copysign (rtx operands[])
13736 enum machine_mode mode;
13737 rtx dest, op0, op1, mask, nmask;
13739 dest = operands[0];
13743 mode = GET_MODE (dest);
13745 if (GET_CODE (op0) == CONST_DOUBLE)
13747 rtx (*copysign_insn)(rtx, rtx, rtx, rtx);
13749 if (real_isneg (CONST_DOUBLE_REAL_VALUE (op0)))
13750 op0 = simplify_unary_operation (ABS, mode, op0, mode);
13752 if (mode == SFmode || mode == DFmode)
13754 enum machine_mode vmode;
13756 vmode = mode == SFmode ? V4SFmode : V2DFmode;
13758 if (op0 == CONST0_RTX (mode))
13759 op0 = CONST0_RTX (vmode);
13764 if (mode == SFmode)
13765 v = gen_rtvec (4, op0, CONST0_RTX (SFmode),
13766 CONST0_RTX (SFmode), CONST0_RTX (SFmode));
13768 v = gen_rtvec (2, op0, CONST0_RTX (DFmode));
13770 op0 = force_reg (vmode, gen_rtx_CONST_VECTOR (vmode, v));
13773 else if (op0 != CONST0_RTX (mode))
13774 op0 = force_reg (mode, op0);
13776 mask = ix86_build_signbit_mask (mode, 0, 0);
13778 if (mode == SFmode)
13779 copysign_insn = gen_copysignsf3_const;
13780 else if (mode == DFmode)
13781 copysign_insn = gen_copysigndf3_const;
13783 copysign_insn = gen_copysigntf3_const;
13785 emit_insn (copysign_insn (dest, op0, op1, mask));
13789 rtx (*copysign_insn)(rtx, rtx, rtx, rtx, rtx, rtx);
13791 nmask = ix86_build_signbit_mask (mode, 0, 1);
13792 mask = ix86_build_signbit_mask (mode, 0, 0);
13794 if (mode == SFmode)
13795 copysign_insn = gen_copysignsf3_var;
13796 else if (mode == DFmode)
13797 copysign_insn = gen_copysigndf3_var;
13799 copysign_insn = gen_copysigntf3_var;
13801 emit_insn (copysign_insn (dest, NULL_RTX, op0, op1, nmask, mask));
13805 /* Deconstruct a copysign operation into bit masks. Operand 0 is known to
13806 be a constant, and so has already been expanded into a vector constant. */
13809 ix86_split_copysign_const (rtx operands[])
13811 enum machine_mode mode, vmode;
13812 rtx dest, op0, op1, mask, x;
13814 dest = operands[0];
13817 mask = operands[3];
13819 mode = GET_MODE (dest);
13820 vmode = GET_MODE (mask);
13822 dest = simplify_gen_subreg (vmode, dest, mode, 0);
13823 x = gen_rtx_AND (vmode, dest, mask);
13824 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
13826 if (op0 != CONST0_RTX (vmode))
13828 x = gen_rtx_IOR (vmode, dest, op0);
13829 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
13833 /* Deconstruct a copysign operation into bit masks. Operand 0 is variable,
13834 so we have to do two masks. */
13837 ix86_split_copysign_var (rtx operands[])
13839 enum machine_mode mode, vmode;
13840 rtx dest, scratch, op0, op1, mask, nmask, x;
13842 dest = operands[0];
13843 scratch = operands[1];
13846 nmask = operands[4];
13847 mask = operands[5];
13849 mode = GET_MODE (dest);
13850 vmode = GET_MODE (mask);
13852 if (rtx_equal_p (op0, op1))
13854 /* Shouldn't happen often (it's useless, obviously), but when it does
13855 we'd generate incorrect code if we continue below. */
13856 emit_move_insn (dest, op0);
13860 if (REG_P (mask) && REGNO (dest) == REGNO (mask)) /* alternative 0 */
13862 gcc_assert (REGNO (op1) == REGNO (scratch));
13864 x = gen_rtx_AND (vmode, scratch, mask);
13865 emit_insn (gen_rtx_SET (VOIDmode, scratch, x));
13868 op0 = simplify_gen_subreg (vmode, op0, mode, 0);
13869 x = gen_rtx_NOT (vmode, dest);
13870 x = gen_rtx_AND (vmode, x, op0);
13871 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
13875 if (REGNO (op1) == REGNO (scratch)) /* alternative 1,3 */
13877 x = gen_rtx_AND (vmode, scratch, mask);
13879 else /* alternative 2,4 */
13881 gcc_assert (REGNO (mask) == REGNO (scratch));
13882 op1 = simplify_gen_subreg (vmode, op1, mode, 0);
13883 x = gen_rtx_AND (vmode, scratch, op1);
13885 emit_insn (gen_rtx_SET (VOIDmode, scratch, x));
13887 if (REGNO (op0) == REGNO (dest)) /* alternative 1,2 */
13889 dest = simplify_gen_subreg (vmode, op0, mode, 0);
13890 x = gen_rtx_AND (vmode, dest, nmask);
13892 else /* alternative 3,4 */
13894 gcc_assert (REGNO (nmask) == REGNO (dest));
13896 op0 = simplify_gen_subreg (vmode, op0, mode, 0);
13897 x = gen_rtx_AND (vmode, dest, op0);
13899 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
13902 x = gen_rtx_IOR (vmode, dest, scratch);
13903 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
13906 /* Return TRUE or FALSE depending on whether the first SET in INSN
13907 has source and destination with matching CC modes, and that the
13908 CC mode is at least as constrained as REQ_MODE. */
13911 ix86_match_ccmode (rtx insn, enum machine_mode req_mode)
13914 enum machine_mode set_mode;
13916 set = PATTERN (insn);
13917 if (GET_CODE (set) == PARALLEL)
13918 set = XVECEXP (set, 0, 0);
13919 gcc_assert (GET_CODE (set) == SET);
13920 gcc_assert (GET_CODE (SET_SRC (set)) == COMPARE);
13922 set_mode = GET_MODE (SET_DEST (set));
13926 if (req_mode != CCNOmode
13927 && (req_mode != CCmode
13928 || XEXP (SET_SRC (set), 1) != const0_rtx))
13932 if (req_mode == CCGCmode)
13936 if (req_mode == CCGOCmode || req_mode == CCNOmode)
13940 if (req_mode == CCZmode)
13951 gcc_unreachable ();
13954 return (GET_MODE (SET_SRC (set)) == set_mode);
13957 /* Generate insn patterns to do an integer compare of OPERANDS. */
13960 ix86_expand_int_compare (enum rtx_code code, rtx op0, rtx op1)
13962 enum machine_mode cmpmode;
13965 cmpmode = SELECT_CC_MODE (code, op0, op1);
13966 flags = gen_rtx_REG (cmpmode, FLAGS_REG);
13968 /* This is very simple, but making the interface the same as in the
13969 FP case makes the rest of the code easier. */
13970 tmp = gen_rtx_COMPARE (cmpmode, op0, op1);
13971 emit_insn (gen_rtx_SET (VOIDmode, flags, tmp));
13973 /* Return the test that should be put into the flags user, i.e.
13974 the bcc, scc, or cmov instruction. */
13975 return gen_rtx_fmt_ee (code, VOIDmode, flags, const0_rtx);
13978 /* Figure out whether to use ordered or unordered fp comparisons.
13979 Return the appropriate mode to use. */
13982 ix86_fp_compare_mode (enum rtx_code code ATTRIBUTE_UNUSED)
13984 /* ??? In order to make all comparisons reversible, we do all comparisons
13985 non-trapping when compiling for IEEE. Once gcc is able to distinguish
13986 all forms trapping and nontrapping comparisons, we can make inequality
13987 comparisons trapping again, since it results in better code when using
13988 FCOM based compares. */
13989 return TARGET_IEEE_FP ? CCFPUmode : CCFPmode;
13993 ix86_cc_mode (enum rtx_code code, rtx op0, rtx op1)
13995 enum machine_mode mode = GET_MODE (op0);
13997 if (SCALAR_FLOAT_MODE_P (mode))
13999 gcc_assert (!DECIMAL_FLOAT_MODE_P (mode));
14000 return ix86_fp_compare_mode (code);
14005 /* Only zero flag is needed. */
14006 case EQ: /* ZF=0 */
14007 case NE: /* ZF!=0 */
14009 /* Codes needing carry flag. */
14010 case GEU: /* CF=0 */
14011 case LTU: /* CF=1 */
14012 /* Detect overflow checks. They need just the carry flag. */
14013 if (GET_CODE (op0) == PLUS
14014 && rtx_equal_p (op1, XEXP (op0, 0)))
14018 case GTU: /* CF=0 & ZF=0 */
14019 case LEU: /* CF=1 | ZF=1 */
14020 /* Detect overflow checks. They need just the carry flag. */
14021 if (GET_CODE (op0) == MINUS
14022 && rtx_equal_p (op1, XEXP (op0, 0)))
14026 /* Codes possibly doable only with sign flag when
14027 comparing against zero. */
14028 case GE: /* SF=OF or SF=0 */
14029 case LT: /* SF<>OF or SF=1 */
14030 if (op1 == const0_rtx)
14033 /* For other cases Carry flag is not required. */
14035 /* Codes doable only with sign flag when comparing
14036 against zero, but we miss jump instruction for it
14037 so we need to use relational tests against overflow
14038 that thus needs to be zero. */
14039 case GT: /* ZF=0 & SF=OF */
14040 case LE: /* ZF=1 | SF<>OF */
14041 if (op1 == const0_rtx)
14045 /* strcmp pattern do (use flags) and combine may ask us for proper
14050 gcc_unreachable ();
14054 /* Return the fixed registers used for condition codes. */
14057 ix86_fixed_condition_code_regs (unsigned int *p1, unsigned int *p2)
14064 /* If two condition code modes are compatible, return a condition code
14065 mode which is compatible with both. Otherwise, return
14068 static enum machine_mode
14069 ix86_cc_modes_compatible (enum machine_mode m1, enum machine_mode m2)
14074 if (GET_MODE_CLASS (m1) != MODE_CC || GET_MODE_CLASS (m2) != MODE_CC)
14077 if ((m1 == CCGCmode && m2 == CCGOCmode)
14078 || (m1 == CCGOCmode && m2 == CCGCmode))
14084 gcc_unreachable ();
14114 /* These are only compatible with themselves, which we already
14120 /* Split comparison code CODE into comparisons we can do using branch
14121 instructions. BYPASS_CODE is comparison code for branch that will
14122 branch around FIRST_CODE and SECOND_CODE. If some of branches
14123 is not required, set value to UNKNOWN.
14124 We never require more than two branches. */
14127 ix86_fp_comparison_codes (enum rtx_code code, enum rtx_code *bypass_code,
14128 enum rtx_code *first_code,
14129 enum rtx_code *second_code)
14131 *first_code = code;
14132 *bypass_code = UNKNOWN;
14133 *second_code = UNKNOWN;
14135 /* The fcomi comparison sets flags as follows:
14145 case GT: /* GTU - CF=0 & ZF=0 */
14146 case GE: /* GEU - CF=0 */
14147 case ORDERED: /* PF=0 */
14148 case UNORDERED: /* PF=1 */
14149 case UNEQ: /* EQ - ZF=1 */
14150 case UNLT: /* LTU - CF=1 */
14151 case UNLE: /* LEU - CF=1 | ZF=1 */
14152 case LTGT: /* EQ - ZF=0 */
14154 case LT: /* LTU - CF=1 - fails on unordered */
14155 *first_code = UNLT;
14156 *bypass_code = UNORDERED;
14158 case LE: /* LEU - CF=1 | ZF=1 - fails on unordered */
14159 *first_code = UNLE;
14160 *bypass_code = UNORDERED;
14162 case EQ: /* EQ - ZF=1 - fails on unordered */
14163 *first_code = UNEQ;
14164 *bypass_code = UNORDERED;
14166 case NE: /* NE - ZF=0 - fails on unordered */
14167 *first_code = LTGT;
14168 *second_code = UNORDERED;
14170 case UNGE: /* GEU - CF=0 - fails on unordered */
14172 *second_code = UNORDERED;
14174 case UNGT: /* GTU - CF=0 & ZF=0 - fails on unordered */
14176 *second_code = UNORDERED;
14179 gcc_unreachable ();
14181 if (!TARGET_IEEE_FP)
14183 *second_code = UNKNOWN;
14184 *bypass_code = UNKNOWN;
14188 /* Return cost of comparison done fcom + arithmetics operations on AX.
14189 All following functions do use number of instructions as a cost metrics.
14190 In future this should be tweaked to compute bytes for optimize_size and
14191 take into account performance of various instructions on various CPUs. */
14193 ix86_fp_comparison_arithmetics_cost (enum rtx_code code)
14195 if (!TARGET_IEEE_FP)
14197 /* The cost of code output by ix86_expand_fp_compare. */
14221 gcc_unreachable ();
14225 /* Return cost of comparison done using fcomi operation.
14226 See ix86_fp_comparison_arithmetics_cost for the metrics. */
14228 ix86_fp_comparison_fcomi_cost (enum rtx_code code)
14230 enum rtx_code bypass_code, first_code, second_code;
14231 /* Return arbitrarily high cost when instruction is not supported - this
14232 prevents gcc from using it. */
14235 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
14236 return (bypass_code != UNKNOWN || second_code != UNKNOWN) + 2;
14239 /* Return cost of comparison done using sahf operation.
14240 See ix86_fp_comparison_arithmetics_cost for the metrics. */
14242 ix86_fp_comparison_sahf_cost (enum rtx_code code)
14244 enum rtx_code bypass_code, first_code, second_code;
14245 /* Return arbitrarily high cost when instruction is not preferred - this
14246 avoids gcc from using it. */
14247 if (!(TARGET_SAHF && (TARGET_USE_SAHF || optimize_insn_for_size_p ())))
14249 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
14250 return (bypass_code != UNKNOWN || second_code != UNKNOWN) + 3;
14253 /* Compute cost of the comparison done using any method.
14254 See ix86_fp_comparison_arithmetics_cost for the metrics. */
14256 ix86_fp_comparison_cost (enum rtx_code code)
14258 int fcomi_cost, sahf_cost, arithmetics_cost = 1024;
14261 fcomi_cost = ix86_fp_comparison_fcomi_cost (code);
14262 sahf_cost = ix86_fp_comparison_sahf_cost (code);
14264 min = arithmetics_cost = ix86_fp_comparison_arithmetics_cost (code);
14265 if (min > sahf_cost)
14267 if (min > fcomi_cost)
14272 /* Return true if we should use an FCOMI instruction for this
14276 ix86_use_fcomi_compare (enum rtx_code code ATTRIBUTE_UNUSED)
14278 enum rtx_code swapped_code = swap_condition (code);
14280 return ((ix86_fp_comparison_cost (code)
14281 == ix86_fp_comparison_fcomi_cost (code))
14282 || (ix86_fp_comparison_cost (swapped_code)
14283 == ix86_fp_comparison_fcomi_cost (swapped_code)));
14286 /* Swap, force into registers, or otherwise massage the two operands
14287 to a fp comparison. The operands are updated in place; the new
14288 comparison code is returned. */
14290 static enum rtx_code
14291 ix86_prepare_fp_compare_args (enum rtx_code code, rtx *pop0, rtx *pop1)
14293 enum machine_mode fpcmp_mode = ix86_fp_compare_mode (code);
14294 rtx op0 = *pop0, op1 = *pop1;
14295 enum machine_mode op_mode = GET_MODE (op0);
14296 int is_sse = TARGET_SSE_MATH && SSE_FLOAT_MODE_P (op_mode);
14298 /* All of the unordered compare instructions only work on registers.
14299 The same is true of the fcomi compare instructions. The XFmode
14300 compare instructions require registers except when comparing
14301 against zero or when converting operand 1 from fixed point to
14305 && (fpcmp_mode == CCFPUmode
14306 || (op_mode == XFmode
14307 && ! (standard_80387_constant_p (op0) == 1
14308 || standard_80387_constant_p (op1) == 1)
14309 && GET_CODE (op1) != FLOAT)
14310 || ix86_use_fcomi_compare (code)))
14312 op0 = force_reg (op_mode, op0);
14313 op1 = force_reg (op_mode, op1);
14317 /* %%% We only allow op1 in memory; op0 must be st(0). So swap
14318 things around if they appear profitable, otherwise force op0
14319 into a register. */
14321 if (standard_80387_constant_p (op0) == 0
14323 && ! (standard_80387_constant_p (op1) == 0
14327 tmp = op0, op0 = op1, op1 = tmp;
14328 code = swap_condition (code);
14332 op0 = force_reg (op_mode, op0);
14334 if (CONSTANT_P (op1))
14336 int tmp = standard_80387_constant_p (op1);
14338 op1 = validize_mem (force_const_mem (op_mode, op1));
14342 op1 = force_reg (op_mode, op1);
14345 op1 = force_reg (op_mode, op1);
14349 /* Try to rearrange the comparison to make it cheaper. */
14350 if (ix86_fp_comparison_cost (code)
14351 > ix86_fp_comparison_cost (swap_condition (code))
14352 && (REG_P (op1) || can_create_pseudo_p ()))
14355 tmp = op0, op0 = op1, op1 = tmp;
14356 code = swap_condition (code);
14358 op0 = force_reg (op_mode, op0);
14366 /* Convert comparison codes we use to represent FP comparison to integer
14367 code that will result in proper branch. Return UNKNOWN if no such code
14371 ix86_fp_compare_code_to_integer (enum rtx_code code)
14400 /* Generate insn patterns to do a floating point compare of OPERANDS. */
14403 ix86_expand_fp_compare (enum rtx_code code, rtx op0, rtx op1, rtx scratch,
14404 rtx *second_test, rtx *bypass_test)
14406 enum machine_mode fpcmp_mode, intcmp_mode;
14408 int cost = ix86_fp_comparison_cost (code);
14409 enum rtx_code bypass_code, first_code, second_code;
14411 fpcmp_mode = ix86_fp_compare_mode (code);
14412 code = ix86_prepare_fp_compare_args (code, &op0, &op1);
14415 *second_test = NULL_RTX;
14417 *bypass_test = NULL_RTX;
14419 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
14421 /* Do fcomi/sahf based test when profitable. */
14422 if (ix86_fp_comparison_arithmetics_cost (code) > cost
14423 && (bypass_code == UNKNOWN || bypass_test)
14424 && (second_code == UNKNOWN || second_test))
14426 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
14427 tmp = gen_rtx_SET (VOIDmode, gen_rtx_REG (fpcmp_mode, FLAGS_REG),
14433 gcc_assert (TARGET_SAHF);
14436 scratch = gen_reg_rtx (HImode);
14437 tmp2 = gen_rtx_CLOBBER (VOIDmode, scratch);
14439 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, tmp, tmp2)));
14442 /* The FP codes work out to act like unsigned. */
14443 intcmp_mode = fpcmp_mode;
14445 if (bypass_code != UNKNOWN)
14446 *bypass_test = gen_rtx_fmt_ee (bypass_code, VOIDmode,
14447 gen_rtx_REG (intcmp_mode, FLAGS_REG),
14449 if (second_code != UNKNOWN)
14450 *second_test = gen_rtx_fmt_ee (second_code, VOIDmode,
14451 gen_rtx_REG (intcmp_mode, FLAGS_REG),
14456 /* Sadness wrt reg-stack pops killing fpsr -- gotta get fnstsw first. */
14457 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
14458 tmp2 = gen_rtx_UNSPEC (HImode, gen_rtvec (1, tmp), UNSPEC_FNSTSW);
14460 scratch = gen_reg_rtx (HImode);
14461 emit_insn (gen_rtx_SET (VOIDmode, scratch, tmp2));
14463 /* In the unordered case, we have to check C2 for NaN's, which
14464 doesn't happen to work out to anything nice combination-wise.
14465 So do some bit twiddling on the value we've got in AH to come
14466 up with an appropriate set of condition codes. */
14468 intcmp_mode = CCNOmode;
14473 if (code == GT || !TARGET_IEEE_FP)
14475 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x45)));
14480 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
14481 emit_insn (gen_addqi_ext_1 (scratch, scratch, constm1_rtx));
14482 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x44)));
14483 intcmp_mode = CCmode;
14489 if (code == LT && TARGET_IEEE_FP)
14491 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
14492 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x01)));
14493 intcmp_mode = CCmode;
14498 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x01)));
14504 if (code == GE || !TARGET_IEEE_FP)
14506 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x05)));
14511 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
14512 emit_insn (gen_xorqi_cc_ext_1 (scratch, scratch,
14519 if (code == LE && TARGET_IEEE_FP)
14521 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
14522 emit_insn (gen_addqi_ext_1 (scratch, scratch, constm1_rtx));
14523 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x40)));
14524 intcmp_mode = CCmode;
14529 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x45)));
14535 if (code == EQ && TARGET_IEEE_FP)
14537 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
14538 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x40)));
14539 intcmp_mode = CCmode;
14544 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x40)));
14551 if (code == NE && TARGET_IEEE_FP)
14553 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
14554 emit_insn (gen_xorqi_cc_ext_1 (scratch, scratch,
14560 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x40)));
14566 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x04)));
14570 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x04)));
14575 gcc_unreachable ();
14579 /* Return the test that should be put into the flags user, i.e.
14580 the bcc, scc, or cmov instruction. */
14581 return gen_rtx_fmt_ee (code, VOIDmode,
14582 gen_rtx_REG (intcmp_mode, FLAGS_REG),
14587 ix86_expand_compare (enum rtx_code code, rtx *second_test, rtx *bypass_test)
14590 op0 = ix86_compare_op0;
14591 op1 = ix86_compare_op1;
14594 *second_test = NULL_RTX;
14596 *bypass_test = NULL_RTX;
14598 if (GET_MODE_CLASS (GET_MODE (ix86_compare_op0)) == MODE_CC)
14599 ret = gen_rtx_fmt_ee (code, VOIDmode, ix86_compare_op0, ix86_compare_op1);
14601 else if (SCALAR_FLOAT_MODE_P (GET_MODE (op0)))
14603 gcc_assert (!DECIMAL_FLOAT_MODE_P (GET_MODE (op0)));
14604 ret = ix86_expand_fp_compare (code, op0, op1, NULL_RTX,
14605 second_test, bypass_test);
14608 ret = ix86_expand_int_compare (code, op0, op1);
14613 /* Return true if the CODE will result in nontrivial jump sequence. */
14615 ix86_fp_jump_nontrivial_p (enum rtx_code code)
14617 enum rtx_code bypass_code, first_code, second_code;
14620 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
14621 return bypass_code != UNKNOWN || second_code != UNKNOWN;
14625 ix86_expand_branch (enum rtx_code code, rtx label)
14629 switch (GET_MODE (ix86_compare_op0))
14635 tmp = ix86_expand_compare (code, NULL, NULL);
14636 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
14637 gen_rtx_LABEL_REF (VOIDmode, label),
14639 emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
14648 enum rtx_code bypass_code, first_code, second_code;
14650 code = ix86_prepare_fp_compare_args (code, &ix86_compare_op0,
14651 &ix86_compare_op1);
14653 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
14655 /* Check whether we will use the natural sequence with one jump. If
14656 so, we can expand jump early. Otherwise delay expansion by
14657 creating compound insn to not confuse optimizers. */
14658 if (bypass_code == UNKNOWN && second_code == UNKNOWN)
14660 ix86_split_fp_branch (code, ix86_compare_op0, ix86_compare_op1,
14661 gen_rtx_LABEL_REF (VOIDmode, label),
14662 pc_rtx, NULL_RTX, NULL_RTX);
14666 tmp = gen_rtx_fmt_ee (code, VOIDmode,
14667 ix86_compare_op0, ix86_compare_op1);
14668 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
14669 gen_rtx_LABEL_REF (VOIDmode, label),
14671 tmp = gen_rtx_SET (VOIDmode, pc_rtx, tmp);
14673 use_fcomi = ix86_use_fcomi_compare (code);
14674 vec = rtvec_alloc (3 + !use_fcomi);
14675 RTVEC_ELT (vec, 0) = tmp;
14677 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCFPmode, FPSR_REG));
14679 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCFPmode, FLAGS_REG));
14682 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (HImode));
14684 emit_jump_insn (gen_rtx_PARALLEL (VOIDmode, vec));
14693 /* Expand DImode branch into multiple compare+branch. */
14695 rtx lo[2], hi[2], label2;
14696 enum rtx_code code1, code2, code3;
14697 enum machine_mode submode;
14699 if (CONSTANT_P (ix86_compare_op0) && ! CONSTANT_P (ix86_compare_op1))
14701 tmp = ix86_compare_op0;
14702 ix86_compare_op0 = ix86_compare_op1;
14703 ix86_compare_op1 = tmp;
14704 code = swap_condition (code);
14706 if (GET_MODE (ix86_compare_op0) == DImode)
14708 split_di (&ix86_compare_op0, 1, lo+0, hi+0);
14709 split_di (&ix86_compare_op1, 1, lo+1, hi+1);
14714 split_ti (&ix86_compare_op0, 1, lo+0, hi+0);
14715 split_ti (&ix86_compare_op1, 1, lo+1, hi+1);
14719 /* When comparing for equality, we can use (hi0^hi1)|(lo0^lo1) to
14720 avoid two branches. This costs one extra insn, so disable when
14721 optimizing for size. */
14723 if ((code == EQ || code == NE)
14724 && (!optimize_insn_for_size_p ()
14725 || hi[1] == const0_rtx || lo[1] == const0_rtx))
14730 if (hi[1] != const0_rtx)
14731 xor1 = expand_binop (submode, xor_optab, xor1, hi[1],
14732 NULL_RTX, 0, OPTAB_WIDEN);
14735 if (lo[1] != const0_rtx)
14736 xor0 = expand_binop (submode, xor_optab, xor0, lo[1],
14737 NULL_RTX, 0, OPTAB_WIDEN);
14739 tmp = expand_binop (submode, ior_optab, xor1, xor0,
14740 NULL_RTX, 0, OPTAB_WIDEN);
14742 ix86_compare_op0 = tmp;
14743 ix86_compare_op1 = const0_rtx;
14744 ix86_expand_branch (code, label);
14748 /* Otherwise, if we are doing less-than or greater-or-equal-than,
14749 op1 is a constant and the low word is zero, then we can just
14750 examine the high word. Similarly for low word -1 and
14751 less-or-equal-than or greater-than. */
14753 if (CONST_INT_P (hi[1]))
14756 case LT: case LTU: case GE: case GEU:
14757 if (lo[1] == const0_rtx)
14759 ix86_compare_op0 = hi[0];
14760 ix86_compare_op1 = hi[1];
14761 ix86_expand_branch (code, label);
14765 case LE: case LEU: case GT: case GTU:
14766 if (lo[1] == constm1_rtx)
14768 ix86_compare_op0 = hi[0];
14769 ix86_compare_op1 = hi[1];
14770 ix86_expand_branch (code, label);
14778 /* Otherwise, we need two or three jumps. */
14780 label2 = gen_label_rtx ();
14783 code2 = swap_condition (code);
14784 code3 = unsigned_condition (code);
14788 case LT: case GT: case LTU: case GTU:
14791 case LE: code1 = LT; code2 = GT; break;
14792 case GE: code1 = GT; code2 = LT; break;
14793 case LEU: code1 = LTU; code2 = GTU; break;
14794 case GEU: code1 = GTU; code2 = LTU; break;
14796 case EQ: code1 = UNKNOWN; code2 = NE; break;
14797 case NE: code2 = UNKNOWN; break;
14800 gcc_unreachable ();
14805 * if (hi(a) < hi(b)) goto true;
14806 * if (hi(a) > hi(b)) goto false;
14807 * if (lo(a) < lo(b)) goto true;
14811 ix86_compare_op0 = hi[0];
14812 ix86_compare_op1 = hi[1];
14814 if (code1 != UNKNOWN)
14815 ix86_expand_branch (code1, label);
14816 if (code2 != UNKNOWN)
14817 ix86_expand_branch (code2, label2);
14819 ix86_compare_op0 = lo[0];
14820 ix86_compare_op1 = lo[1];
14821 ix86_expand_branch (code3, label);
14823 if (code2 != UNKNOWN)
14824 emit_label (label2);
14829 /* If we have already emitted a compare insn, go straight to simple.
14830 ix86_expand_compare won't emit anything if ix86_compare_emitted
14832 gcc_assert (GET_MODE_CLASS (GET_MODE (ix86_compare_op0)) == MODE_CC);
14837 /* Split branch based on floating point condition. */
14839 ix86_split_fp_branch (enum rtx_code code, rtx op1, rtx op2,
14840 rtx target1, rtx target2, rtx tmp, rtx pushed)
14842 rtx second, bypass;
14843 rtx label = NULL_RTX;
14845 int bypass_probability = -1, second_probability = -1, probability = -1;
14848 if (target2 != pc_rtx)
14851 code = reverse_condition_maybe_unordered (code);
14856 condition = ix86_expand_fp_compare (code, op1, op2,
14857 tmp, &second, &bypass);
14859 /* Remove pushed operand from stack. */
14861 ix86_free_from_memory (GET_MODE (pushed));
14863 if (split_branch_probability >= 0)
14865 /* Distribute the probabilities across the jumps.
14866 Assume the BYPASS and SECOND to be always test
14868 probability = split_branch_probability;
14870 /* Value of 1 is low enough to make no need for probability
14871 to be updated. Later we may run some experiments and see
14872 if unordered values are more frequent in practice. */
14874 bypass_probability = 1;
14876 second_probability = 1;
14878 if (bypass != NULL_RTX)
14880 label = gen_label_rtx ();
14881 i = emit_jump_insn (gen_rtx_SET
14883 gen_rtx_IF_THEN_ELSE (VOIDmode,
14885 gen_rtx_LABEL_REF (VOIDmode,
14888 if (bypass_probability >= 0)
14890 = gen_rtx_EXPR_LIST (REG_BR_PROB,
14891 GEN_INT (bypass_probability),
14894 i = emit_jump_insn (gen_rtx_SET
14896 gen_rtx_IF_THEN_ELSE (VOIDmode,
14897 condition, target1, target2)));
14898 if (probability >= 0)
14900 = gen_rtx_EXPR_LIST (REG_BR_PROB,
14901 GEN_INT (probability),
14903 if (second != NULL_RTX)
14905 i = emit_jump_insn (gen_rtx_SET
14907 gen_rtx_IF_THEN_ELSE (VOIDmode, second, target1,
14909 if (second_probability >= 0)
14911 = gen_rtx_EXPR_LIST (REG_BR_PROB,
14912 GEN_INT (second_probability),
14915 if (label != NULL_RTX)
14916 emit_label (label);
14920 ix86_expand_setcc (enum rtx_code code, rtx dest)
14922 rtx ret, tmp, tmpreg, equiv;
14923 rtx second_test, bypass_test;
14925 if (GET_MODE (ix86_compare_op0) == (TARGET_64BIT ? TImode : DImode))
14926 return 0; /* FAIL */
14928 gcc_assert (GET_MODE (dest) == QImode);
14930 ret = ix86_expand_compare (code, &second_test, &bypass_test);
14931 PUT_MODE (ret, QImode);
14936 emit_insn (gen_rtx_SET (VOIDmode, tmp, ret));
14937 if (bypass_test || second_test)
14939 rtx test = second_test;
14941 rtx tmp2 = gen_reg_rtx (QImode);
14944 gcc_assert (!second_test);
14945 test = bypass_test;
14947 PUT_CODE (test, reverse_condition_maybe_unordered (GET_CODE (test)));
14949 PUT_MODE (test, QImode);
14950 emit_insn (gen_rtx_SET (VOIDmode, tmp2, test));
14953 emit_insn (gen_andqi3 (tmp, tmpreg, tmp2));
14955 emit_insn (gen_iorqi3 (tmp, tmpreg, tmp2));
14958 /* Attach a REG_EQUAL note describing the comparison result. */
14959 if (ix86_compare_op0 && ix86_compare_op1)
14961 equiv = simplify_gen_relational (code, QImode,
14962 GET_MODE (ix86_compare_op0),
14963 ix86_compare_op0, ix86_compare_op1);
14964 set_unique_reg_note (get_last_insn (), REG_EQUAL, equiv);
14967 return 1; /* DONE */
14970 /* Expand comparison setting or clearing carry flag. Return true when
14971 successful and set pop for the operation. */
14973 ix86_expand_carry_flag_compare (enum rtx_code code, rtx op0, rtx op1, rtx *pop)
14975 enum machine_mode mode =
14976 GET_MODE (op0) != VOIDmode ? GET_MODE (op0) : GET_MODE (op1);
14978 /* Do not handle DImode compares that go through special path. */
14979 if (mode == (TARGET_64BIT ? TImode : DImode))
14982 if (SCALAR_FLOAT_MODE_P (mode))
14984 rtx second_test = NULL, bypass_test = NULL;
14985 rtx compare_op, compare_seq;
14987 gcc_assert (!DECIMAL_FLOAT_MODE_P (mode));
14989 /* Shortcut: following common codes never translate
14990 into carry flag compares. */
14991 if (code == EQ || code == NE || code == UNEQ || code == LTGT
14992 || code == ORDERED || code == UNORDERED)
14995 /* These comparisons require zero flag; swap operands so they won't. */
14996 if ((code == GT || code == UNLE || code == LE || code == UNGT)
14997 && !TARGET_IEEE_FP)
15002 code = swap_condition (code);
15005 /* Try to expand the comparison and verify that we end up with
15006 carry flag based comparison. This fails to be true only when
15007 we decide to expand comparison using arithmetic that is not
15008 too common scenario. */
15010 compare_op = ix86_expand_fp_compare (code, op0, op1, NULL_RTX,
15011 &second_test, &bypass_test);
15012 compare_seq = get_insns ();
15015 if (second_test || bypass_test)
15018 if (GET_MODE (XEXP (compare_op, 0)) == CCFPmode
15019 || GET_MODE (XEXP (compare_op, 0)) == CCFPUmode)
15020 code = ix86_fp_compare_code_to_integer (GET_CODE (compare_op));
15022 code = GET_CODE (compare_op);
15024 if (code != LTU && code != GEU)
15027 emit_insn (compare_seq);
15032 if (!INTEGRAL_MODE_P (mode))
15041 /* Convert a==0 into (unsigned)a<1. */
15044 if (op1 != const0_rtx)
15047 code = (code == EQ ? LTU : GEU);
15050 /* Convert a>b into b<a or a>=b-1. */
15053 if (CONST_INT_P (op1))
15055 op1 = gen_int_mode (INTVAL (op1) + 1, GET_MODE (op0));
15056 /* Bail out on overflow. We still can swap operands but that
15057 would force loading of the constant into register. */
15058 if (op1 == const0_rtx
15059 || !x86_64_immediate_operand (op1, GET_MODE (op1)))
15061 code = (code == GTU ? GEU : LTU);
15068 code = (code == GTU ? LTU : GEU);
15072 /* Convert a>=0 into (unsigned)a<0x80000000. */
15075 if (mode == DImode || op1 != const0_rtx)
15077 op1 = gen_int_mode (1 << (GET_MODE_BITSIZE (mode) - 1), mode);
15078 code = (code == LT ? GEU : LTU);
15082 if (mode == DImode || op1 != constm1_rtx)
15084 op1 = gen_int_mode (1 << (GET_MODE_BITSIZE (mode) - 1), mode);
15085 code = (code == LE ? GEU : LTU);
15091 /* Swapping operands may cause constant to appear as first operand. */
15092 if (!nonimmediate_operand (op0, VOIDmode))
15094 if (!can_create_pseudo_p ())
15096 op0 = force_reg (mode, op0);
15098 ix86_compare_op0 = op0;
15099 ix86_compare_op1 = op1;
15100 *pop = ix86_expand_compare (code, NULL, NULL);
15101 gcc_assert (GET_CODE (*pop) == LTU || GET_CODE (*pop) == GEU);
15106 ix86_expand_int_movcc (rtx operands[])
15108 enum rtx_code code = GET_CODE (operands[1]), compare_code;
15109 rtx compare_seq, compare_op;
15110 rtx second_test, bypass_test;
15111 enum machine_mode mode = GET_MODE (operands[0]);
15112 bool sign_bit_compare_p = false;;
15115 compare_op = ix86_expand_compare (code, &second_test, &bypass_test);
15116 compare_seq = get_insns ();
15119 compare_code = GET_CODE (compare_op);
15121 if ((ix86_compare_op1 == const0_rtx && (code == GE || code == LT))
15122 || (ix86_compare_op1 == constm1_rtx && (code == GT || code == LE)))
15123 sign_bit_compare_p = true;
15125 /* Don't attempt mode expansion here -- if we had to expand 5 or 6
15126 HImode insns, we'd be swallowed in word prefix ops. */
15128 if ((mode != HImode || TARGET_FAST_PREFIX)
15129 && (mode != (TARGET_64BIT ? TImode : DImode))
15130 && CONST_INT_P (operands[2])
15131 && CONST_INT_P (operands[3]))
15133 rtx out = operands[0];
15134 HOST_WIDE_INT ct = INTVAL (operands[2]);
15135 HOST_WIDE_INT cf = INTVAL (operands[3]);
15136 HOST_WIDE_INT diff;
15139 /* Sign bit compares are better done using shifts than we do by using
15141 if (sign_bit_compare_p
15142 || ix86_expand_carry_flag_compare (code, ix86_compare_op0,
15143 ix86_compare_op1, &compare_op))
15145 /* Detect overlap between destination and compare sources. */
15148 if (!sign_bit_compare_p)
15150 bool fpcmp = false;
15152 compare_code = GET_CODE (compare_op);
15154 if (GET_MODE (XEXP (compare_op, 0)) == CCFPmode
15155 || GET_MODE (XEXP (compare_op, 0)) == CCFPUmode)
15158 compare_code = ix86_fp_compare_code_to_integer (compare_code);
15161 /* To simplify rest of code, restrict to the GEU case. */
15162 if (compare_code == LTU)
15164 HOST_WIDE_INT tmp = ct;
15167 compare_code = reverse_condition (compare_code);
15168 code = reverse_condition (code);
15173 PUT_CODE (compare_op,
15174 reverse_condition_maybe_unordered
15175 (GET_CODE (compare_op)));
15177 PUT_CODE (compare_op, reverse_condition (GET_CODE (compare_op)));
15181 if (reg_overlap_mentioned_p (out, ix86_compare_op0)
15182 || reg_overlap_mentioned_p (out, ix86_compare_op1))
15183 tmp = gen_reg_rtx (mode);
15185 if (mode == DImode)
15186 emit_insn (gen_x86_movdicc_0_m1_rex64 (tmp, compare_op));
15188 emit_insn (gen_x86_movsicc_0_m1 (gen_lowpart (SImode, tmp), compare_op));
15192 if (code == GT || code == GE)
15193 code = reverse_condition (code);
15196 HOST_WIDE_INT tmp = ct;
15201 tmp = emit_store_flag (tmp, code, ix86_compare_op0,
15202 ix86_compare_op1, VOIDmode, 0, -1);
15215 tmp = expand_simple_binop (mode, PLUS,
15217 copy_rtx (tmp), 1, OPTAB_DIRECT);
15228 tmp = expand_simple_binop (mode, IOR,
15230 copy_rtx (tmp), 1, OPTAB_DIRECT);
15232 else if (diff == -1 && ct)
15242 tmp = expand_simple_unop (mode, NOT, tmp, copy_rtx (tmp), 1);
15244 tmp = expand_simple_binop (mode, PLUS,
15245 copy_rtx (tmp), GEN_INT (cf),
15246 copy_rtx (tmp), 1, OPTAB_DIRECT);
15254 * andl cf - ct, dest
15264 tmp = expand_simple_unop (mode, NOT, tmp, copy_rtx (tmp), 1);
15267 tmp = expand_simple_binop (mode, AND,
15269 gen_int_mode (cf - ct, mode),
15270 copy_rtx (tmp), 1, OPTAB_DIRECT);
15272 tmp = expand_simple_binop (mode, PLUS,
15273 copy_rtx (tmp), GEN_INT (ct),
15274 copy_rtx (tmp), 1, OPTAB_DIRECT);
15277 if (!rtx_equal_p (tmp, out))
15278 emit_move_insn (copy_rtx (out), copy_rtx (tmp));
15280 return 1; /* DONE */
15285 enum machine_mode cmp_mode = GET_MODE (ix86_compare_op0);
15288 tmp = ct, ct = cf, cf = tmp;
15291 if (SCALAR_FLOAT_MODE_P (cmp_mode))
15293 gcc_assert (!DECIMAL_FLOAT_MODE_P (cmp_mode));
15295 /* We may be reversing unordered compare to normal compare, that
15296 is not valid in general (we may convert non-trapping condition
15297 to trapping one), however on i386 we currently emit all
15298 comparisons unordered. */
15299 compare_code = reverse_condition_maybe_unordered (compare_code);
15300 code = reverse_condition_maybe_unordered (code);
15304 compare_code = reverse_condition (compare_code);
15305 code = reverse_condition (code);
15309 compare_code = UNKNOWN;
15310 if (GET_MODE_CLASS (GET_MODE (ix86_compare_op0)) == MODE_INT
15311 && CONST_INT_P (ix86_compare_op1))
15313 if (ix86_compare_op1 == const0_rtx
15314 && (code == LT || code == GE))
15315 compare_code = code;
15316 else if (ix86_compare_op1 == constm1_rtx)
15320 else if (code == GT)
15325 /* Optimize dest = (op0 < 0) ? -1 : cf. */
15326 if (compare_code != UNKNOWN
15327 && GET_MODE (ix86_compare_op0) == GET_MODE (out)
15328 && (cf == -1 || ct == -1))
15330 /* If lea code below could be used, only optimize
15331 if it results in a 2 insn sequence. */
15333 if (! (diff == 1 || diff == 2 || diff == 4 || diff == 8
15334 || diff == 3 || diff == 5 || diff == 9)
15335 || (compare_code == LT && ct == -1)
15336 || (compare_code == GE && cf == -1))
15339 * notl op1 (if necessary)
15347 code = reverse_condition (code);
15350 out = emit_store_flag (out, code, ix86_compare_op0,
15351 ix86_compare_op1, VOIDmode, 0, -1);
15353 out = expand_simple_binop (mode, IOR,
15355 out, 1, OPTAB_DIRECT);
15356 if (out != operands[0])
15357 emit_move_insn (operands[0], out);
15359 return 1; /* DONE */
15364 if ((diff == 1 || diff == 2 || diff == 4 || diff == 8
15365 || diff == 3 || diff == 5 || diff == 9)
15366 && ((mode != QImode && mode != HImode) || !TARGET_PARTIAL_REG_STALL)
15368 || x86_64_immediate_operand (GEN_INT (cf), VOIDmode)))
15374 * lea cf(dest*(ct-cf)),dest
15378 * This also catches the degenerate setcc-only case.
15384 out = emit_store_flag (out, code, ix86_compare_op0,
15385 ix86_compare_op1, VOIDmode, 0, 1);
15388 /* On x86_64 the lea instruction operates on Pmode, so we need
15389 to get arithmetics done in proper mode to match. */
15391 tmp = copy_rtx (out);
15395 out1 = copy_rtx (out);
15396 tmp = gen_rtx_MULT (mode, out1, GEN_INT (diff & ~1));
15400 tmp = gen_rtx_PLUS (mode, tmp, out1);
15406 tmp = gen_rtx_PLUS (mode, tmp, GEN_INT (cf));
15409 if (!rtx_equal_p (tmp, out))
15412 out = force_operand (tmp, copy_rtx (out));
15414 emit_insn (gen_rtx_SET (VOIDmode, copy_rtx (out), copy_rtx (tmp)));
15416 if (!rtx_equal_p (out, operands[0]))
15417 emit_move_insn (operands[0], copy_rtx (out));
15419 return 1; /* DONE */
15423 * General case: Jumpful:
15424 * xorl dest,dest cmpl op1, op2
15425 * cmpl op1, op2 movl ct, dest
15426 * setcc dest jcc 1f
15427 * decl dest movl cf, dest
15428 * andl (cf-ct),dest 1:
15431 * Size 20. Size 14.
15433 * This is reasonably steep, but branch mispredict costs are
15434 * high on modern cpus, so consider failing only if optimizing
15438 if ((!TARGET_CMOVE || (mode == QImode && TARGET_PARTIAL_REG_STALL))
15439 && BRANCH_COST (optimize_insn_for_speed_p (),
15444 enum machine_mode cmp_mode = GET_MODE (ix86_compare_op0);
15449 if (SCALAR_FLOAT_MODE_P (cmp_mode))
15451 gcc_assert (!DECIMAL_FLOAT_MODE_P (cmp_mode));
15453 /* We may be reversing unordered compare to normal compare,
15454 that is not valid in general (we may convert non-trapping
15455 condition to trapping one), however on i386 we currently
15456 emit all comparisons unordered. */
15457 code = reverse_condition_maybe_unordered (code);
15461 code = reverse_condition (code);
15462 if (compare_code != UNKNOWN)
15463 compare_code = reverse_condition (compare_code);
15467 if (compare_code != UNKNOWN)
15469 /* notl op1 (if needed)
15474 For x < 0 (resp. x <= -1) there will be no notl,
15475 so if possible swap the constants to get rid of the
15477 True/false will be -1/0 while code below (store flag
15478 followed by decrement) is 0/-1, so the constants need
15479 to be exchanged once more. */
15481 if (compare_code == GE || !cf)
15483 code = reverse_condition (code);
15488 HOST_WIDE_INT tmp = cf;
15493 out = emit_store_flag (out, code, ix86_compare_op0,
15494 ix86_compare_op1, VOIDmode, 0, -1);
15498 out = emit_store_flag (out, code, ix86_compare_op0,
15499 ix86_compare_op1, VOIDmode, 0, 1);
15501 out = expand_simple_binop (mode, PLUS, copy_rtx (out), constm1_rtx,
15502 copy_rtx (out), 1, OPTAB_DIRECT);
15505 out = expand_simple_binop (mode, AND, copy_rtx (out),
15506 gen_int_mode (cf - ct, mode),
15507 copy_rtx (out), 1, OPTAB_DIRECT);
15509 out = expand_simple_binop (mode, PLUS, copy_rtx (out), GEN_INT (ct),
15510 copy_rtx (out), 1, OPTAB_DIRECT);
15511 if (!rtx_equal_p (out, operands[0]))
15512 emit_move_insn (operands[0], copy_rtx (out));
15514 return 1; /* DONE */
15518 if (!TARGET_CMOVE || (mode == QImode && TARGET_PARTIAL_REG_STALL))
15520 /* Try a few things more with specific constants and a variable. */
15523 rtx var, orig_out, out, tmp;
15525 if (BRANCH_COST (optimize_insn_for_speed_p (), false) <= 2)
15526 return 0; /* FAIL */
15528 /* If one of the two operands is an interesting constant, load a
15529 constant with the above and mask it in with a logical operation. */
15531 if (CONST_INT_P (operands[2]))
15534 if (INTVAL (operands[2]) == 0 && operands[3] != constm1_rtx)
15535 operands[3] = constm1_rtx, op = and_optab;
15536 else if (INTVAL (operands[2]) == -1 && operands[3] != const0_rtx)
15537 operands[3] = const0_rtx, op = ior_optab;
15539 return 0; /* FAIL */
15541 else if (CONST_INT_P (operands[3]))
15544 if (INTVAL (operands[3]) == 0 && operands[2] != constm1_rtx)
15545 operands[2] = constm1_rtx, op = and_optab;
15546 else if (INTVAL (operands[3]) == -1 && operands[3] != const0_rtx)
15547 operands[2] = const0_rtx, op = ior_optab;
15549 return 0; /* FAIL */
15552 return 0; /* FAIL */
15554 orig_out = operands[0];
15555 tmp = gen_reg_rtx (mode);
15558 /* Recurse to get the constant loaded. */
15559 if (ix86_expand_int_movcc (operands) == 0)
15560 return 0; /* FAIL */
15562 /* Mask in the interesting variable. */
15563 out = expand_binop (mode, op, var, tmp, orig_out, 0,
15565 if (!rtx_equal_p (out, orig_out))
15566 emit_move_insn (copy_rtx (orig_out), copy_rtx (out));
15568 return 1; /* DONE */
15572 * For comparison with above,
15582 if (! nonimmediate_operand (operands[2], mode))
15583 operands[2] = force_reg (mode, operands[2]);
15584 if (! nonimmediate_operand (operands[3], mode))
15585 operands[3] = force_reg (mode, operands[3]);
15587 if (bypass_test && reg_overlap_mentioned_p (operands[0], operands[3]))
15589 rtx tmp = gen_reg_rtx (mode);
15590 emit_move_insn (tmp, operands[3]);
15593 if (second_test && reg_overlap_mentioned_p (operands[0], operands[2]))
15595 rtx tmp = gen_reg_rtx (mode);
15596 emit_move_insn (tmp, operands[2]);
15600 if (! register_operand (operands[2], VOIDmode)
15602 || ! register_operand (operands[3], VOIDmode)))
15603 operands[2] = force_reg (mode, operands[2]);
15606 && ! register_operand (operands[3], VOIDmode))
15607 operands[3] = force_reg (mode, operands[3]);
15609 emit_insn (compare_seq);
15610 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
15611 gen_rtx_IF_THEN_ELSE (mode,
15612 compare_op, operands[2],
15615 emit_insn (gen_rtx_SET (VOIDmode, copy_rtx (operands[0]),
15616 gen_rtx_IF_THEN_ELSE (mode,
15618 copy_rtx (operands[3]),
15619 copy_rtx (operands[0]))));
15621 emit_insn (gen_rtx_SET (VOIDmode, copy_rtx (operands[0]),
15622 gen_rtx_IF_THEN_ELSE (mode,
15624 copy_rtx (operands[2]),
15625 copy_rtx (operands[0]))));
15627 return 1; /* DONE */
15630 /* Swap, force into registers, or otherwise massage the two operands
15631 to an sse comparison with a mask result. Thus we differ a bit from
15632 ix86_prepare_fp_compare_args which expects to produce a flags result.
15634 The DEST operand exists to help determine whether to commute commutative
15635 operators. The POP0/POP1 operands are updated in place. The new
15636 comparison code is returned, or UNKNOWN if not implementable. */
15638 static enum rtx_code
15639 ix86_prepare_sse_fp_compare_args (rtx dest, enum rtx_code code,
15640 rtx *pop0, rtx *pop1)
15648 /* We have no LTGT as an operator. We could implement it with
15649 NE & ORDERED, but this requires an extra temporary. It's
15650 not clear that it's worth it. */
15657 /* These are supported directly. */
15664 /* For commutative operators, try to canonicalize the destination
15665 operand to be first in the comparison - this helps reload to
15666 avoid extra moves. */
15667 if (!dest || !rtx_equal_p (dest, *pop1))
15675 /* These are not supported directly. Swap the comparison operands
15676 to transform into something that is supported. */
15680 code = swap_condition (code);
15684 gcc_unreachable ();
15690 /* Detect conditional moves that exactly match min/max operational
15691 semantics. Note that this is IEEE safe, as long as we don't
15692 interchange the operands.
15694 Returns FALSE if this conditional move doesn't match a MIN/MAX,
15695 and TRUE if the operation is successful and instructions are emitted. */
15698 ix86_expand_sse_fp_minmax (rtx dest, enum rtx_code code, rtx cmp_op0,
15699 rtx cmp_op1, rtx if_true, rtx if_false)
15701 enum machine_mode mode;
15707 else if (code == UNGE)
15710 if_true = if_false;
15716 if (rtx_equal_p (cmp_op0, if_true) && rtx_equal_p (cmp_op1, if_false))
15718 else if (rtx_equal_p (cmp_op1, if_true) && rtx_equal_p (cmp_op0, if_false))
15723 mode = GET_MODE (dest);
15725 /* We want to check HONOR_NANS and HONOR_SIGNED_ZEROS here,
15726 but MODE may be a vector mode and thus not appropriate. */
15727 if (!flag_finite_math_only || !flag_unsafe_math_optimizations)
15729 int u = is_min ? UNSPEC_IEEE_MIN : UNSPEC_IEEE_MAX;
15732 if_true = force_reg (mode, if_true);
15733 v = gen_rtvec (2, if_true, if_false);
15734 tmp = gen_rtx_UNSPEC (mode, v, u);
15738 code = is_min ? SMIN : SMAX;
15739 tmp = gen_rtx_fmt_ee (code, mode, if_true, if_false);
15742 emit_insn (gen_rtx_SET (VOIDmode, dest, tmp));
15746 /* Expand an sse vector comparison. Return the register with the result. */
15749 ix86_expand_sse_cmp (rtx dest, enum rtx_code code, rtx cmp_op0, rtx cmp_op1,
15750 rtx op_true, rtx op_false)
15752 enum machine_mode mode = GET_MODE (dest);
15755 cmp_op0 = force_reg (mode, cmp_op0);
15756 if (!nonimmediate_operand (cmp_op1, mode))
15757 cmp_op1 = force_reg (mode, cmp_op1);
15760 || reg_overlap_mentioned_p (dest, op_true)
15761 || reg_overlap_mentioned_p (dest, op_false))
15762 dest = gen_reg_rtx (mode);
15764 x = gen_rtx_fmt_ee (code, mode, cmp_op0, cmp_op1);
15765 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
15770 /* Expand DEST = CMP ? OP_TRUE : OP_FALSE into a sequence of logical
15771 operations. This is used for both scalar and vector conditional moves. */
15774 ix86_expand_sse_movcc (rtx dest, rtx cmp, rtx op_true, rtx op_false)
15776 enum machine_mode mode = GET_MODE (dest);
15779 if (op_false == CONST0_RTX (mode))
15781 op_true = force_reg (mode, op_true);
15782 x = gen_rtx_AND (mode, cmp, op_true);
15783 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
15785 else if (op_true == CONST0_RTX (mode))
15787 op_false = force_reg (mode, op_false);
15788 x = gen_rtx_NOT (mode, cmp);
15789 x = gen_rtx_AND (mode, x, op_false);
15790 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
15792 else if (TARGET_SSE5)
15794 rtx pcmov = gen_rtx_SET (mode, dest,
15795 gen_rtx_IF_THEN_ELSE (mode, cmp,
15802 op_true = force_reg (mode, op_true);
15803 op_false = force_reg (mode, op_false);
15805 t2 = gen_reg_rtx (mode);
15807 t3 = gen_reg_rtx (mode);
15811 x = gen_rtx_AND (mode, op_true, cmp);
15812 emit_insn (gen_rtx_SET (VOIDmode, t2, x));
15814 x = gen_rtx_NOT (mode, cmp);
15815 x = gen_rtx_AND (mode, x, op_false);
15816 emit_insn (gen_rtx_SET (VOIDmode, t3, x));
15818 x = gen_rtx_IOR (mode, t3, t2);
15819 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
15823 /* Expand a floating-point conditional move. Return true if successful. */
15826 ix86_expand_fp_movcc (rtx operands[])
15828 enum machine_mode mode = GET_MODE (operands[0]);
15829 enum rtx_code code = GET_CODE (operands[1]);
15830 rtx tmp, compare_op, second_test, bypass_test;
15832 if (TARGET_SSE_MATH && SSE_FLOAT_MODE_P (mode))
15834 enum machine_mode cmode;
15836 /* Since we've no cmove for sse registers, don't force bad register
15837 allocation just to gain access to it. Deny movcc when the
15838 comparison mode doesn't match the move mode. */
15839 cmode = GET_MODE (ix86_compare_op0);
15840 if (cmode == VOIDmode)
15841 cmode = GET_MODE (ix86_compare_op1);
15845 code = ix86_prepare_sse_fp_compare_args (operands[0], code,
15847 &ix86_compare_op1);
15848 if (code == UNKNOWN)
15851 if (ix86_expand_sse_fp_minmax (operands[0], code, ix86_compare_op0,
15852 ix86_compare_op1, operands[2],
15856 tmp = ix86_expand_sse_cmp (operands[0], code, ix86_compare_op0,
15857 ix86_compare_op1, operands[2], operands[3]);
15858 ix86_expand_sse_movcc (operands[0], tmp, operands[2], operands[3]);
15862 /* The floating point conditional move instructions don't directly
15863 support conditions resulting from a signed integer comparison. */
15865 compare_op = ix86_expand_compare (code, &second_test, &bypass_test);
15867 /* The floating point conditional move instructions don't directly
15868 support signed integer comparisons. */
15870 if (!fcmov_comparison_operator (compare_op, VOIDmode))
15872 gcc_assert (!second_test && !bypass_test);
15873 tmp = gen_reg_rtx (QImode);
15874 ix86_expand_setcc (code, tmp);
15876 ix86_compare_op0 = tmp;
15877 ix86_compare_op1 = const0_rtx;
15878 compare_op = ix86_expand_compare (code, &second_test, &bypass_test);
15880 if (bypass_test && reg_overlap_mentioned_p (operands[0], operands[3]))
15882 tmp = gen_reg_rtx (mode);
15883 emit_move_insn (tmp, operands[3]);
15886 if (second_test && reg_overlap_mentioned_p (operands[0], operands[2]))
15888 tmp = gen_reg_rtx (mode);
15889 emit_move_insn (tmp, operands[2]);
15893 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
15894 gen_rtx_IF_THEN_ELSE (mode, compare_op,
15895 operands[2], operands[3])));
15897 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
15898 gen_rtx_IF_THEN_ELSE (mode, bypass_test,
15899 operands[3], operands[0])));
15901 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
15902 gen_rtx_IF_THEN_ELSE (mode, second_test,
15903 operands[2], operands[0])));
15908 /* Expand a floating-point vector conditional move; a vcond operation
15909 rather than a movcc operation. */
15912 ix86_expand_fp_vcond (rtx operands[])
15914 enum rtx_code code = GET_CODE (operands[3]);
15917 code = ix86_prepare_sse_fp_compare_args (operands[0], code,
15918 &operands[4], &operands[5]);
15919 if (code == UNKNOWN)
15922 if (ix86_expand_sse_fp_minmax (operands[0], code, operands[4],
15923 operands[5], operands[1], operands[2]))
15926 cmp = ix86_expand_sse_cmp (operands[0], code, operands[4], operands[5],
15927 operands[1], operands[2]);
15928 ix86_expand_sse_movcc (operands[0], cmp, operands[1], operands[2]);
15932 /* Expand a signed/unsigned integral vector conditional move. */
15935 ix86_expand_int_vcond (rtx operands[])
15937 enum machine_mode mode = GET_MODE (operands[0]);
15938 enum rtx_code code = GET_CODE (operands[3]);
15939 bool negate = false;
15942 cop0 = operands[4];
15943 cop1 = operands[5];
15945 /* SSE5 supports all of the comparisons on all vector int types. */
15948 /* Canonicalize the comparison to EQ, GT, GTU. */
15959 code = reverse_condition (code);
15965 code = reverse_condition (code);
15971 code = swap_condition (code);
15972 x = cop0, cop0 = cop1, cop1 = x;
15976 gcc_unreachable ();
15979 /* Only SSE4.1/SSE4.2 supports V2DImode. */
15980 if (mode == V2DImode)
15985 /* SSE4.1 supports EQ. */
15986 if (!TARGET_SSE4_1)
15992 /* SSE4.2 supports GT/GTU. */
15993 if (!TARGET_SSE4_2)
15998 gcc_unreachable ();
16002 /* Unsigned parallel compare is not supported by the hardware. Play some
16003 tricks to turn this into a signed comparison against 0. */
16006 cop0 = force_reg (mode, cop0);
16015 /* Perform a parallel modulo subtraction. */
16016 t1 = gen_reg_rtx (mode);
16017 emit_insn ((mode == V4SImode
16019 : gen_subv2di3) (t1, cop0, cop1));
16021 /* Extract the original sign bit of op0. */
16022 mask = ix86_build_signbit_mask (GET_MODE_INNER (mode),
16024 t2 = gen_reg_rtx (mode);
16025 emit_insn ((mode == V4SImode
16027 : gen_andv2di3) (t2, cop0, mask));
16029 /* XOR it back into the result of the subtraction. This results
16030 in the sign bit set iff we saw unsigned underflow. */
16031 x = gen_reg_rtx (mode);
16032 emit_insn ((mode == V4SImode
16034 : gen_xorv2di3) (x, t1, t2));
16042 /* Perform a parallel unsigned saturating subtraction. */
16043 x = gen_reg_rtx (mode);
16044 emit_insn (gen_rtx_SET (VOIDmode, x,
16045 gen_rtx_US_MINUS (mode, cop0, cop1)));
16052 gcc_unreachable ();
16056 cop1 = CONST0_RTX (mode);
16060 x = ix86_expand_sse_cmp (operands[0], code, cop0, cop1,
16061 operands[1+negate], operands[2-negate]);
16063 ix86_expand_sse_movcc (operands[0], x, operands[1+negate],
16064 operands[2-negate]);
16068 /* Unpack OP[1] into the next wider integer vector type. UNSIGNED_P is
16069 true if we should do zero extension, else sign extension. HIGH_P is
16070 true if we want the N/2 high elements, else the low elements. */
16073 ix86_expand_sse_unpack (rtx operands[2], bool unsigned_p, bool high_p)
16075 enum machine_mode imode = GET_MODE (operands[1]);
16076 rtx (*unpack)(rtx, rtx, rtx);
16083 unpack = gen_vec_interleave_highv16qi;
16085 unpack = gen_vec_interleave_lowv16qi;
16089 unpack = gen_vec_interleave_highv8hi;
16091 unpack = gen_vec_interleave_lowv8hi;
16095 unpack = gen_vec_interleave_highv4si;
16097 unpack = gen_vec_interleave_lowv4si;
16100 gcc_unreachable ();
16103 dest = gen_lowpart (imode, operands[0]);
16106 se = force_reg (imode, CONST0_RTX (imode));
16108 se = ix86_expand_sse_cmp (gen_reg_rtx (imode), GT, CONST0_RTX (imode),
16109 operands[1], pc_rtx, pc_rtx);
16111 emit_insn (unpack (dest, operands[1], se));
16114 /* This function performs the same task as ix86_expand_sse_unpack,
16115 but with SSE4.1 instructions. */
16118 ix86_expand_sse4_unpack (rtx operands[2], bool unsigned_p, bool high_p)
16120 enum machine_mode imode = GET_MODE (operands[1]);
16121 rtx (*unpack)(rtx, rtx);
16128 unpack = gen_sse4_1_zero_extendv8qiv8hi2;
16130 unpack = gen_sse4_1_extendv8qiv8hi2;
16134 unpack = gen_sse4_1_zero_extendv4hiv4si2;
16136 unpack = gen_sse4_1_extendv4hiv4si2;
16140 unpack = gen_sse4_1_zero_extendv2siv2di2;
16142 unpack = gen_sse4_1_extendv2siv2di2;
16145 gcc_unreachable ();
16148 dest = operands[0];
16151 /* Shift higher 8 bytes to lower 8 bytes. */
16152 src = gen_reg_rtx (imode);
16153 emit_insn (gen_sse2_lshrti3 (gen_lowpart (TImode, src),
16154 gen_lowpart (TImode, operands[1]),
16160 emit_insn (unpack (dest, src));
16163 /* This function performs the same task as ix86_expand_sse_unpack,
16164 but with sse5 instructions. */
16167 ix86_expand_sse5_unpack (rtx operands[2], bool unsigned_p, bool high_p)
16169 enum machine_mode imode = GET_MODE (operands[1]);
16170 int pperm_bytes[16];
16172 int h = (high_p) ? 8 : 0;
16175 rtvec v = rtvec_alloc (16);
16178 rtx op0 = operands[0], op1 = operands[1];
16183 vs = rtvec_alloc (8);
16184 h2 = (high_p) ? 8 : 0;
16185 for (i = 0; i < 8; i++)
16187 pperm_bytes[2*i+0] = PPERM_SRC | PPERM_SRC2 | i | h;
16188 pperm_bytes[2*i+1] = ((unsigned_p)
16190 : PPERM_SIGN | PPERM_SRC2 | i | h);
16193 for (i = 0; i < 16; i++)
16194 RTVEC_ELT (v, i) = GEN_INT (pperm_bytes[i]);
16196 for (i = 0; i < 8; i++)
16197 RTVEC_ELT (vs, i) = GEN_INT (i + h2);
16199 p = gen_rtx_PARALLEL (VOIDmode, vs);
16200 x = force_reg (V16QImode, gen_rtx_CONST_VECTOR (V16QImode, v));
16202 emit_insn (gen_sse5_pperm_zero_v16qi_v8hi (op0, op1, p, x));
16204 emit_insn (gen_sse5_pperm_sign_v16qi_v8hi (op0, op1, p, x));
16208 vs = rtvec_alloc (4);
16209 h2 = (high_p) ? 4 : 0;
16210 for (i = 0; i < 4; i++)
16212 sign_extend = ((unsigned_p)
16214 : PPERM_SIGN | PPERM_SRC2 | ((2*i) + 1 + h));
16215 pperm_bytes[4*i+0] = PPERM_SRC | PPERM_SRC2 | ((2*i) + 0 + h);
16216 pperm_bytes[4*i+1] = PPERM_SRC | PPERM_SRC2 | ((2*i) + 1 + h);
16217 pperm_bytes[4*i+2] = sign_extend;
16218 pperm_bytes[4*i+3] = sign_extend;
16221 for (i = 0; i < 16; i++)
16222 RTVEC_ELT (v, i) = GEN_INT (pperm_bytes[i]);
16224 for (i = 0; i < 4; i++)
16225 RTVEC_ELT (vs, i) = GEN_INT (i + h2);
16227 p = gen_rtx_PARALLEL (VOIDmode, vs);
16228 x = force_reg (V16QImode, gen_rtx_CONST_VECTOR (V16QImode, v));
16230 emit_insn (gen_sse5_pperm_zero_v8hi_v4si (op0, op1, p, x));
16232 emit_insn (gen_sse5_pperm_sign_v8hi_v4si (op0, op1, p, x));
16236 vs = rtvec_alloc (2);
16237 h2 = (high_p) ? 2 : 0;
16238 for (i = 0; i < 2; i++)
16240 sign_extend = ((unsigned_p)
16242 : PPERM_SIGN | PPERM_SRC2 | ((4*i) + 3 + h));
16243 pperm_bytes[8*i+0] = PPERM_SRC | PPERM_SRC2 | ((4*i) + 0 + h);
16244 pperm_bytes[8*i+1] = PPERM_SRC | PPERM_SRC2 | ((4*i) + 1 + h);
16245 pperm_bytes[8*i+2] = PPERM_SRC | PPERM_SRC2 | ((4*i) + 2 + h);
16246 pperm_bytes[8*i+3] = PPERM_SRC | PPERM_SRC2 | ((4*i) + 3 + h);
16247 pperm_bytes[8*i+4] = sign_extend;
16248 pperm_bytes[8*i+5] = sign_extend;
16249 pperm_bytes[8*i+6] = sign_extend;
16250 pperm_bytes[8*i+7] = sign_extend;
16253 for (i = 0; i < 16; i++)
16254 RTVEC_ELT (v, i) = GEN_INT (pperm_bytes[i]);
16256 for (i = 0; i < 2; i++)
16257 RTVEC_ELT (vs, i) = GEN_INT (i + h2);
16259 p = gen_rtx_PARALLEL (VOIDmode, vs);
16260 x = force_reg (V16QImode, gen_rtx_CONST_VECTOR (V16QImode, v));
16262 emit_insn (gen_sse5_pperm_zero_v4si_v2di (op0, op1, p, x));
16264 emit_insn (gen_sse5_pperm_sign_v4si_v2di (op0, op1, p, x));
16268 gcc_unreachable ();
16274 /* Pack the high bits from OPERANDS[1] and low bits from OPERANDS[2] into the
16275 next narrower integer vector type */
16277 ix86_expand_sse5_pack (rtx operands[3])
16279 enum machine_mode imode = GET_MODE (operands[0]);
16280 int pperm_bytes[16];
16282 rtvec v = rtvec_alloc (16);
16284 rtx op0 = operands[0];
16285 rtx op1 = operands[1];
16286 rtx op2 = operands[2];
16291 for (i = 0; i < 8; i++)
16293 pperm_bytes[i+0] = PPERM_SRC | PPERM_SRC1 | (i*2);
16294 pperm_bytes[i+8] = PPERM_SRC | PPERM_SRC2 | (i*2);
16297 for (i = 0; i < 16; i++)
16298 RTVEC_ELT (v, i) = GEN_INT (pperm_bytes[i]);
16300 x = force_reg (V16QImode, gen_rtx_CONST_VECTOR (V16QImode, v));
16301 emit_insn (gen_sse5_pperm_pack_v8hi_v16qi (op0, op1, op2, x));
16305 for (i = 0; i < 4; i++)
16307 pperm_bytes[(2*i)+0] = PPERM_SRC | PPERM_SRC1 | ((i*4) + 0);
16308 pperm_bytes[(2*i)+1] = PPERM_SRC | PPERM_SRC1 | ((i*4) + 1);
16309 pperm_bytes[(2*i)+8] = PPERM_SRC | PPERM_SRC2 | ((i*4) + 0);
16310 pperm_bytes[(2*i)+9] = PPERM_SRC | PPERM_SRC2 | ((i*4) + 1);
16313 for (i = 0; i < 16; i++)
16314 RTVEC_ELT (v, i) = GEN_INT (pperm_bytes[i]);
16316 x = force_reg (V16QImode, gen_rtx_CONST_VECTOR (V16QImode, v));
16317 emit_insn (gen_sse5_pperm_pack_v4si_v8hi (op0, op1, op2, x));
16321 for (i = 0; i < 2; i++)
16323 pperm_bytes[(4*i)+0] = PPERM_SRC | PPERM_SRC1 | ((i*8) + 0);
16324 pperm_bytes[(4*i)+1] = PPERM_SRC | PPERM_SRC1 | ((i*8) + 1);
16325 pperm_bytes[(4*i)+2] = PPERM_SRC | PPERM_SRC1 | ((i*8) + 2);
16326 pperm_bytes[(4*i)+3] = PPERM_SRC | PPERM_SRC1 | ((i*8) + 3);
16327 pperm_bytes[(4*i)+8] = PPERM_SRC | PPERM_SRC2 | ((i*8) + 0);
16328 pperm_bytes[(4*i)+9] = PPERM_SRC | PPERM_SRC2 | ((i*8) + 1);
16329 pperm_bytes[(4*i)+10] = PPERM_SRC | PPERM_SRC2 | ((i*8) + 2);
16330 pperm_bytes[(4*i)+11] = PPERM_SRC | PPERM_SRC2 | ((i*8) + 3);
16333 for (i = 0; i < 16; i++)
16334 RTVEC_ELT (v, i) = GEN_INT (pperm_bytes[i]);
16336 x = force_reg (V16QImode, gen_rtx_CONST_VECTOR (V16QImode, v));
16337 emit_insn (gen_sse5_pperm_pack_v2di_v4si (op0, op1, op2, x));
16341 gcc_unreachable ();
16347 /* Expand conditional increment or decrement using adb/sbb instructions.
16348 The default case using setcc followed by the conditional move can be
16349 done by generic code. */
16351 ix86_expand_int_addcc (rtx operands[])
16353 enum rtx_code code = GET_CODE (operands[1]);
16355 rtx val = const0_rtx;
16356 bool fpcmp = false;
16357 enum machine_mode mode = GET_MODE (operands[0]);
16359 if (operands[3] != const1_rtx
16360 && operands[3] != constm1_rtx)
16362 if (!ix86_expand_carry_flag_compare (code, ix86_compare_op0,
16363 ix86_compare_op1, &compare_op))
16365 code = GET_CODE (compare_op);
16367 if (GET_MODE (XEXP (compare_op, 0)) == CCFPmode
16368 || GET_MODE (XEXP (compare_op, 0)) == CCFPUmode)
16371 code = ix86_fp_compare_code_to_integer (code);
16378 PUT_CODE (compare_op,
16379 reverse_condition_maybe_unordered
16380 (GET_CODE (compare_op)));
16382 PUT_CODE (compare_op, reverse_condition (GET_CODE (compare_op)));
16384 PUT_MODE (compare_op, mode);
16386 /* Construct either adc or sbb insn. */
16387 if ((code == LTU) == (operands[3] == constm1_rtx))
16389 switch (GET_MODE (operands[0]))
16392 emit_insn (gen_subqi3_carry (operands[0], operands[2], val, compare_op));
16395 emit_insn (gen_subhi3_carry (operands[0], operands[2], val, compare_op));
16398 emit_insn (gen_subsi3_carry (operands[0], operands[2], val, compare_op));
16401 emit_insn (gen_subdi3_carry_rex64 (operands[0], operands[2], val, compare_op));
16404 gcc_unreachable ();
16409 switch (GET_MODE (operands[0]))
16412 emit_insn (gen_addqi3_carry (operands[0], operands[2], val, compare_op));
16415 emit_insn (gen_addhi3_carry (operands[0], operands[2], val, compare_op));
16418 emit_insn (gen_addsi3_carry (operands[0], operands[2], val, compare_op));
16421 emit_insn (gen_adddi3_carry_rex64 (operands[0], operands[2], val, compare_op));
16424 gcc_unreachable ();
16427 return 1; /* DONE */
16431 /* Split operands 0 and 1 into SImode parts. Similar to split_di, but
16432 works for floating pointer parameters and nonoffsetable memories.
16433 For pushes, it returns just stack offsets; the values will be saved
16434 in the right order. Maximally three parts are generated. */
16437 ix86_split_to_parts (rtx operand, rtx *parts, enum machine_mode mode)
16442 size = mode==XFmode ? 3 : GET_MODE_SIZE (mode) / 4;
16444 size = (GET_MODE_SIZE (mode) + 4) / 8;
16446 gcc_assert (!REG_P (operand) || !MMX_REGNO_P (REGNO (operand)));
16447 gcc_assert (size >= 2 && size <= 4);
16449 /* Optimize constant pool reference to immediates. This is used by fp
16450 moves, that force all constants to memory to allow combining. */
16451 if (MEM_P (operand) && MEM_READONLY_P (operand))
16453 rtx tmp = maybe_get_pool_constant (operand);
16458 if (MEM_P (operand) && !offsettable_memref_p (operand))
16460 /* The only non-offsetable memories we handle are pushes. */
16461 int ok = push_operand (operand, VOIDmode);
16465 operand = copy_rtx (operand);
16466 PUT_MODE (operand, Pmode);
16467 parts[0] = parts[1] = parts[2] = parts[3] = operand;
16471 if (GET_CODE (operand) == CONST_VECTOR)
16473 enum machine_mode imode = int_mode_for_mode (mode);
16474 /* Caution: if we looked through a constant pool memory above,
16475 the operand may actually have a different mode now. That's
16476 ok, since we want to pun this all the way back to an integer. */
16477 operand = simplify_subreg (imode, operand, GET_MODE (operand), 0);
16478 gcc_assert (operand != NULL);
16484 if (mode == DImode)
16485 split_di (&operand, 1, &parts[0], &parts[1]);
16490 if (REG_P (operand))
16492 gcc_assert (reload_completed);
16493 for (i = 0; i < size; i++)
16494 parts[i] = gen_rtx_REG (SImode, REGNO (operand) + i);
16496 else if (offsettable_memref_p (operand))
16498 operand = adjust_address (operand, SImode, 0);
16499 parts[0] = operand;
16500 for (i = 1; i < size; i++)
16501 parts[i] = adjust_address (operand, SImode, 4 * i);
16503 else if (GET_CODE (operand) == CONST_DOUBLE)
16508 REAL_VALUE_FROM_CONST_DOUBLE (r, operand);
16512 real_to_target (l, &r, mode);
16513 parts[3] = gen_int_mode (l[3], SImode);
16514 parts[2] = gen_int_mode (l[2], SImode);
16517 REAL_VALUE_TO_TARGET_LONG_DOUBLE (r, l);
16518 parts[2] = gen_int_mode (l[2], SImode);
16521 REAL_VALUE_TO_TARGET_DOUBLE (r, l);
16524 gcc_unreachable ();
16526 parts[1] = gen_int_mode (l[1], SImode);
16527 parts[0] = gen_int_mode (l[0], SImode);
16530 gcc_unreachable ();
16535 if (mode == TImode)
16536 split_ti (&operand, 1, &parts[0], &parts[1]);
16537 if (mode == XFmode || mode == TFmode)
16539 enum machine_mode upper_mode = mode==XFmode ? SImode : DImode;
16540 if (REG_P (operand))
16542 gcc_assert (reload_completed);
16543 parts[0] = gen_rtx_REG (DImode, REGNO (operand) + 0);
16544 parts[1] = gen_rtx_REG (upper_mode, REGNO (operand) + 1);
16546 else if (offsettable_memref_p (operand))
16548 operand = adjust_address (operand, DImode, 0);
16549 parts[0] = operand;
16550 parts[1] = adjust_address (operand, upper_mode, 8);
16552 else if (GET_CODE (operand) == CONST_DOUBLE)
16557 REAL_VALUE_FROM_CONST_DOUBLE (r, operand);
16558 real_to_target (l, &r, mode);
16560 /* Do not use shift by 32 to avoid warning on 32bit systems. */
16561 if (HOST_BITS_PER_WIDE_INT >= 64)
16564 ((l[0] & (((HOST_WIDE_INT) 2 << 31) - 1))
16565 + ((((HOST_WIDE_INT) l[1]) << 31) << 1),
16568 parts[0] = immed_double_const (l[0], l[1], DImode);
16570 if (upper_mode == SImode)
16571 parts[1] = gen_int_mode (l[2], SImode);
16572 else if (HOST_BITS_PER_WIDE_INT >= 64)
16575 ((l[2] & (((HOST_WIDE_INT) 2 << 31) - 1))
16576 + ((((HOST_WIDE_INT) l[3]) << 31) << 1),
16579 parts[1] = immed_double_const (l[2], l[3], DImode);
16582 gcc_unreachable ();
16589 /* Emit insns to perform a move or push of DI, DF, XF, and TF values.
16590 Return false when normal moves are needed; true when all required
16591 insns have been emitted. Operands 2-4 contain the input values
16592 int the correct order; operands 5-7 contain the output values. */
16595 ix86_split_long_move (rtx operands[])
16600 int collisions = 0;
16601 enum machine_mode mode = GET_MODE (operands[0]);
16602 bool collisionparts[4];
16604 /* The DFmode expanders may ask us to move double.
16605 For 64bit target this is single move. By hiding the fact
16606 here we simplify i386.md splitters. */
16607 if (GET_MODE_SIZE (GET_MODE (operands[0])) == 8 && TARGET_64BIT)
16609 /* Optimize constant pool reference to immediates. This is used by
16610 fp moves, that force all constants to memory to allow combining. */
16612 if (MEM_P (operands[1])
16613 && GET_CODE (XEXP (operands[1], 0)) == SYMBOL_REF
16614 && CONSTANT_POOL_ADDRESS_P (XEXP (operands[1], 0)))
16615 operands[1] = get_pool_constant (XEXP (operands[1], 0));
16616 if (push_operand (operands[0], VOIDmode))
16618 operands[0] = copy_rtx (operands[0]);
16619 PUT_MODE (operands[0], Pmode);
16622 operands[0] = gen_lowpart (DImode, operands[0]);
16623 operands[1] = gen_lowpart (DImode, operands[1]);
16624 emit_move_insn (operands[0], operands[1]);
16628 /* The only non-offsettable memory we handle is push. */
16629 if (push_operand (operands[0], VOIDmode))
16632 gcc_assert (!MEM_P (operands[0])
16633 || offsettable_memref_p (operands[0]));
16635 nparts = ix86_split_to_parts (operands[1], part[1], GET_MODE (operands[0]));
16636 ix86_split_to_parts (operands[0], part[0], GET_MODE (operands[0]));
16638 /* When emitting push, take care for source operands on the stack. */
16639 if (push && MEM_P (operands[1])
16640 && reg_overlap_mentioned_p (stack_pointer_rtx, operands[1]))
16641 for (i = 0; i < nparts - 1; i++)
16642 part[1][i] = change_address (part[1][i],
16643 GET_MODE (part[1][i]),
16644 XEXP (part[1][i + 1], 0));
16646 /* We need to do copy in the right order in case an address register
16647 of the source overlaps the destination. */
16648 if (REG_P (part[0][0]) && MEM_P (part[1][0]))
16652 for (i = 0; i < nparts; i++)
16655 = reg_overlap_mentioned_p (part[0][i], XEXP (part[1][0], 0));
16656 if (collisionparts[i])
16660 /* Collision in the middle part can be handled by reordering. */
16661 if (collisions == 1 && nparts == 3 && collisionparts [1])
16663 tmp = part[0][1]; part[0][1] = part[0][2]; part[0][2] = tmp;
16664 tmp = part[1][1]; part[1][1] = part[1][2]; part[1][2] = tmp;
16666 else if (collisions == 1
16668 && (collisionparts [1] || collisionparts [2]))
16670 if (collisionparts [1])
16672 tmp = part[0][1]; part[0][1] = part[0][2]; part[0][2] = tmp;
16673 tmp = part[1][1]; part[1][1] = part[1][2]; part[1][2] = tmp;
16677 tmp = part[0][2]; part[0][2] = part[0][3]; part[0][3] = tmp;
16678 tmp = part[1][2]; part[1][2] = part[1][3]; part[1][3] = tmp;
16682 /* If there are more collisions, we can't handle it by reordering.
16683 Do an lea to the last part and use only one colliding move. */
16684 else if (collisions > 1)
16690 base = part[0][nparts - 1];
16692 /* Handle the case when the last part isn't valid for lea.
16693 Happens in 64-bit mode storing the 12-byte XFmode. */
16694 if (GET_MODE (base) != Pmode)
16695 base = gen_rtx_REG (Pmode, REGNO (base));
16697 emit_insn (gen_rtx_SET (VOIDmode, base, XEXP (part[1][0], 0)));
16698 part[1][0] = replace_equiv_address (part[1][0], base);
16699 for (i = 1; i < nparts; i++)
16701 tmp = plus_constant (base, UNITS_PER_WORD * i);
16702 part[1][i] = replace_equiv_address (part[1][i], tmp);
16713 if (TARGET_128BIT_LONG_DOUBLE && mode == XFmode)
16714 emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx, GEN_INT (-4)));
16715 emit_move_insn (part[0][2], part[1][2]);
16717 else if (nparts == 4)
16719 emit_move_insn (part[0][3], part[1][3]);
16720 emit_move_insn (part[0][2], part[1][2]);
16725 /* In 64bit mode we don't have 32bit push available. In case this is
16726 register, it is OK - we will just use larger counterpart. We also
16727 retype memory - these comes from attempt to avoid REX prefix on
16728 moving of second half of TFmode value. */
16729 if (GET_MODE (part[1][1]) == SImode)
16731 switch (GET_CODE (part[1][1]))
16734 part[1][1] = adjust_address (part[1][1], DImode, 0);
16738 part[1][1] = gen_rtx_REG (DImode, REGNO (part[1][1]));
16742 gcc_unreachable ();
16745 if (GET_MODE (part[1][0]) == SImode)
16746 part[1][0] = part[1][1];
16749 emit_move_insn (part[0][1], part[1][1]);
16750 emit_move_insn (part[0][0], part[1][0]);
16754 /* Choose correct order to not overwrite the source before it is copied. */
16755 if ((REG_P (part[0][0])
16756 && REG_P (part[1][1])
16757 && (REGNO (part[0][0]) == REGNO (part[1][1])
16759 && REGNO (part[0][0]) == REGNO (part[1][2]))
16761 && REGNO (part[0][0]) == REGNO (part[1][3]))))
16763 && reg_overlap_mentioned_p (part[0][0], XEXP (part[1][0], 0))))
16765 for (i = 0, j = nparts - 1; i < nparts; i++, j--)
16767 operands[2 + i] = part[0][j];
16768 operands[6 + i] = part[1][j];
16773 for (i = 0; i < nparts; i++)
16775 operands[2 + i] = part[0][i];
16776 operands[6 + i] = part[1][i];
16780 /* If optimizing for size, attempt to locally unCSE nonzero constants. */
16781 if (optimize_insn_for_size_p ())
16783 for (j = 0; j < nparts - 1; j++)
16784 if (CONST_INT_P (operands[6 + j])
16785 && operands[6 + j] != const0_rtx
16786 && REG_P (operands[2 + j]))
16787 for (i = j; i < nparts - 1; i++)
16788 if (CONST_INT_P (operands[7 + i])
16789 && INTVAL (operands[7 + i]) == INTVAL (operands[6 + j]))
16790 operands[7 + i] = operands[2 + j];
16793 for (i = 0; i < nparts; i++)
16794 emit_move_insn (operands[2 + i], operands[6 + i]);
16799 /* Helper function of ix86_split_ashl used to generate an SImode/DImode
16800 left shift by a constant, either using a single shift or
16801 a sequence of add instructions. */
16804 ix86_expand_ashl_const (rtx operand, int count, enum machine_mode mode)
16808 emit_insn ((mode == DImode
16810 : gen_adddi3) (operand, operand, operand));
16812 else if (!optimize_insn_for_size_p ()
16813 && count * ix86_cost->add <= ix86_cost->shift_const)
16816 for (i=0; i<count; i++)
16818 emit_insn ((mode == DImode
16820 : gen_adddi3) (operand, operand, operand));
16824 emit_insn ((mode == DImode
16826 : gen_ashldi3) (operand, operand, GEN_INT (count)));
16830 ix86_split_ashl (rtx *operands, rtx scratch, enum machine_mode mode)
16832 rtx low[2], high[2];
16834 const int single_width = mode == DImode ? 32 : 64;
16836 if (CONST_INT_P (operands[2]))
16838 (mode == DImode ? split_di : split_ti) (operands, 2, low, high);
16839 count = INTVAL (operands[2]) & (single_width * 2 - 1);
16841 if (count >= single_width)
16843 emit_move_insn (high[0], low[1]);
16844 emit_move_insn (low[0], const0_rtx);
16846 if (count > single_width)
16847 ix86_expand_ashl_const (high[0], count - single_width, mode);
16851 if (!rtx_equal_p (operands[0], operands[1]))
16852 emit_move_insn (operands[0], operands[1]);
16853 emit_insn ((mode == DImode
16855 : gen_x86_64_shld) (high[0], low[0], GEN_INT (count)));
16856 ix86_expand_ashl_const (low[0], count, mode);
16861 (mode == DImode ? split_di : split_ti) (operands, 1, low, high);
16863 if (operands[1] == const1_rtx)
16865 /* Assuming we've chosen a QImode capable registers, then 1 << N
16866 can be done with two 32/64-bit shifts, no branches, no cmoves. */
16867 if (ANY_QI_REG_P (low[0]) && ANY_QI_REG_P (high[0]))
16869 rtx s, d, flags = gen_rtx_REG (CCZmode, FLAGS_REG);
16871 ix86_expand_clear (low[0]);
16872 ix86_expand_clear (high[0]);
16873 emit_insn (gen_testqi_ccz_1 (operands[2], GEN_INT (single_width)));
16875 d = gen_lowpart (QImode, low[0]);
16876 d = gen_rtx_STRICT_LOW_PART (VOIDmode, d);
16877 s = gen_rtx_EQ (QImode, flags, const0_rtx);
16878 emit_insn (gen_rtx_SET (VOIDmode, d, s));
16880 d = gen_lowpart (QImode, high[0]);
16881 d = gen_rtx_STRICT_LOW_PART (VOIDmode, d);
16882 s = gen_rtx_NE (QImode, flags, const0_rtx);
16883 emit_insn (gen_rtx_SET (VOIDmode, d, s));
16886 /* Otherwise, we can get the same results by manually performing
16887 a bit extract operation on bit 5/6, and then performing the two
16888 shifts. The two methods of getting 0/1 into low/high are exactly
16889 the same size. Avoiding the shift in the bit extract case helps
16890 pentium4 a bit; no one else seems to care much either way. */
16895 if (TARGET_PARTIAL_REG_STALL && !optimize_insn_for_size_p ())
16896 x = gen_rtx_ZERO_EXTEND (mode == DImode ? SImode : DImode, operands[2]);
16898 x = gen_lowpart (mode == DImode ? SImode : DImode, operands[2]);
16899 emit_insn (gen_rtx_SET (VOIDmode, high[0], x));
16901 emit_insn ((mode == DImode
16903 : gen_lshrdi3) (high[0], high[0], GEN_INT (mode == DImode ? 5 : 6)));
16904 emit_insn ((mode == DImode
16906 : gen_anddi3) (high[0], high[0], GEN_INT (1)));
16907 emit_move_insn (low[0], high[0]);
16908 emit_insn ((mode == DImode
16910 : gen_xordi3) (low[0], low[0], GEN_INT (1)));
16913 emit_insn ((mode == DImode
16915 : gen_ashldi3) (low[0], low[0], operands[2]));
16916 emit_insn ((mode == DImode
16918 : gen_ashldi3) (high[0], high[0], operands[2]));
16922 if (operands[1] == constm1_rtx)
16924 /* For -1 << N, we can avoid the shld instruction, because we
16925 know that we're shifting 0...31/63 ones into a -1. */
16926 emit_move_insn (low[0], constm1_rtx);
16927 if (optimize_insn_for_size_p ())
16928 emit_move_insn (high[0], low[0]);
16930 emit_move_insn (high[0], constm1_rtx);
16934 if (!rtx_equal_p (operands[0], operands[1]))
16935 emit_move_insn (operands[0], operands[1]);
16937 (mode == DImode ? split_di : split_ti) (operands, 1, low, high);
16938 emit_insn ((mode == DImode
16940 : gen_x86_64_shld) (high[0], low[0], operands[2]));
16943 emit_insn ((mode == DImode ? gen_ashlsi3 : gen_ashldi3) (low[0], low[0], operands[2]));
16945 if (TARGET_CMOVE && scratch)
16947 ix86_expand_clear (scratch);
16948 emit_insn ((mode == DImode
16949 ? gen_x86_shift_adj_1
16950 : gen_x86_64_shift_adj_1) (high[0], low[0], operands[2],
16954 emit_insn ((mode == DImode
16955 ? gen_x86_shift_adj_2
16956 : gen_x86_64_shift_adj_2) (high[0], low[0], operands[2]));
16960 ix86_split_ashr (rtx *operands, rtx scratch, enum machine_mode mode)
16962 rtx low[2], high[2];
16964 const int single_width = mode == DImode ? 32 : 64;
16966 if (CONST_INT_P (operands[2]))
16968 (mode == DImode ? split_di : split_ti) (operands, 2, low, high);
16969 count = INTVAL (operands[2]) & (single_width * 2 - 1);
16971 if (count == single_width * 2 - 1)
16973 emit_move_insn (high[0], high[1]);
16974 emit_insn ((mode == DImode
16976 : gen_ashrdi3) (high[0], high[0],
16977 GEN_INT (single_width - 1)));
16978 emit_move_insn (low[0], high[0]);
16981 else if (count >= single_width)
16983 emit_move_insn (low[0], high[1]);
16984 emit_move_insn (high[0], low[0]);
16985 emit_insn ((mode == DImode
16987 : gen_ashrdi3) (high[0], high[0],
16988 GEN_INT (single_width - 1)));
16989 if (count > single_width)
16990 emit_insn ((mode == DImode
16992 : gen_ashrdi3) (low[0], low[0],
16993 GEN_INT (count - single_width)));
16997 if (!rtx_equal_p (operands[0], operands[1]))
16998 emit_move_insn (operands[0], operands[1]);
16999 emit_insn ((mode == DImode
17001 : gen_x86_64_shrd) (low[0], high[0], GEN_INT (count)));
17002 emit_insn ((mode == DImode
17004 : gen_ashrdi3) (high[0], high[0], GEN_INT (count)));
17009 if (!rtx_equal_p (operands[0], operands[1]))
17010 emit_move_insn (operands[0], operands[1]);
17012 (mode == DImode ? split_di : split_ti) (operands, 1, low, high);
17014 emit_insn ((mode == DImode
17016 : gen_x86_64_shrd) (low[0], high[0], operands[2]));
17017 emit_insn ((mode == DImode
17019 : gen_ashrdi3) (high[0], high[0], operands[2]));
17021 if (TARGET_CMOVE && scratch)
17023 emit_move_insn (scratch, high[0]);
17024 emit_insn ((mode == DImode
17026 : gen_ashrdi3) (scratch, scratch,
17027 GEN_INT (single_width - 1)));
17028 emit_insn ((mode == DImode
17029 ? gen_x86_shift_adj_1
17030 : gen_x86_64_shift_adj_1) (low[0], high[0], operands[2],
17034 emit_insn ((mode == DImode
17035 ? gen_x86_shift_adj_3
17036 : gen_x86_64_shift_adj_3) (low[0], high[0], operands[2]));
17041 ix86_split_lshr (rtx *operands, rtx scratch, enum machine_mode mode)
17043 rtx low[2], high[2];
17045 const int single_width = mode == DImode ? 32 : 64;
17047 if (CONST_INT_P (operands[2]))
17049 (mode == DImode ? split_di : split_ti) (operands, 2, low, high);
17050 count = INTVAL (operands[2]) & (single_width * 2 - 1);
17052 if (count >= single_width)
17054 emit_move_insn (low[0], high[1]);
17055 ix86_expand_clear (high[0]);
17057 if (count > single_width)
17058 emit_insn ((mode == DImode
17060 : gen_lshrdi3) (low[0], low[0],
17061 GEN_INT (count - single_width)));
17065 if (!rtx_equal_p (operands[0], operands[1]))
17066 emit_move_insn (operands[0], operands[1]);
17067 emit_insn ((mode == DImode
17069 : gen_x86_64_shrd) (low[0], high[0], GEN_INT (count)));
17070 emit_insn ((mode == DImode
17072 : gen_lshrdi3) (high[0], high[0], GEN_INT (count)));
17077 if (!rtx_equal_p (operands[0], operands[1]))
17078 emit_move_insn (operands[0], operands[1]);
17080 (mode == DImode ? split_di : split_ti) (operands, 1, low, high);
17082 emit_insn ((mode == DImode
17084 : gen_x86_64_shrd) (low[0], high[0], operands[2]));
17085 emit_insn ((mode == DImode
17087 : gen_lshrdi3) (high[0], high[0], operands[2]));
17089 /* Heh. By reversing the arguments, we can reuse this pattern. */
17090 if (TARGET_CMOVE && scratch)
17092 ix86_expand_clear (scratch);
17093 emit_insn ((mode == DImode
17094 ? gen_x86_shift_adj_1
17095 : gen_x86_64_shift_adj_1) (low[0], high[0], operands[2],
17099 emit_insn ((mode == DImode
17100 ? gen_x86_shift_adj_2
17101 : gen_x86_64_shift_adj_2) (low[0], high[0], operands[2]));
17105 /* Predict just emitted jump instruction to be taken with probability PROB. */
17107 predict_jump (int prob)
17109 rtx insn = get_last_insn ();
17110 gcc_assert (JUMP_P (insn));
17112 = gen_rtx_EXPR_LIST (REG_BR_PROB,
17117 /* Helper function for the string operations below. Dest VARIABLE whether
17118 it is aligned to VALUE bytes. If true, jump to the label. */
17120 ix86_expand_aligntest (rtx variable, int value, bool epilogue)
17122 rtx label = gen_label_rtx ();
17123 rtx tmpcount = gen_reg_rtx (GET_MODE (variable));
17124 if (GET_MODE (variable) == DImode)
17125 emit_insn (gen_anddi3 (tmpcount, variable, GEN_INT (value)));
17127 emit_insn (gen_andsi3 (tmpcount, variable, GEN_INT (value)));
17128 emit_cmp_and_jump_insns (tmpcount, const0_rtx, EQ, 0, GET_MODE (variable),
17131 predict_jump (REG_BR_PROB_BASE * 50 / 100);
17133 predict_jump (REG_BR_PROB_BASE * 90 / 100);
17137 /* Adjust COUNTER by the VALUE. */
17139 ix86_adjust_counter (rtx countreg, HOST_WIDE_INT value)
17141 if (GET_MODE (countreg) == DImode)
17142 emit_insn (gen_adddi3 (countreg, countreg, GEN_INT (-value)));
17144 emit_insn (gen_addsi3 (countreg, countreg, GEN_INT (-value)));
17147 /* Zero extend possibly SImode EXP to Pmode register. */
17149 ix86_zero_extend_to_Pmode (rtx exp)
17152 if (GET_MODE (exp) == VOIDmode)
17153 return force_reg (Pmode, exp);
17154 if (GET_MODE (exp) == Pmode)
17155 return copy_to_mode_reg (Pmode, exp);
17156 r = gen_reg_rtx (Pmode);
17157 emit_insn (gen_zero_extendsidi2 (r, exp));
17161 /* Divide COUNTREG by SCALE. */
17163 scale_counter (rtx countreg, int scale)
17166 rtx piece_size_mask;
17170 if (CONST_INT_P (countreg))
17171 return GEN_INT (INTVAL (countreg) / scale);
17172 gcc_assert (REG_P (countreg));
17174 piece_size_mask = GEN_INT (scale - 1);
17175 sc = expand_simple_binop (GET_MODE (countreg), LSHIFTRT, countreg,
17176 GEN_INT (exact_log2 (scale)),
17177 NULL, 1, OPTAB_DIRECT);
17181 /* Return mode for the memcpy/memset loop counter. Prefer SImode over
17182 DImode for constant loop counts. */
17184 static enum machine_mode
17185 counter_mode (rtx count_exp)
17187 if (GET_MODE (count_exp) != VOIDmode)
17188 return GET_MODE (count_exp);
17189 if (GET_CODE (count_exp) != CONST_INT)
17191 if (TARGET_64BIT && (INTVAL (count_exp) & ~0xffffffff))
17196 /* When SRCPTR is non-NULL, output simple loop to move memory
17197 pointer to SRCPTR to DESTPTR via chunks of MODE unrolled UNROLL times,
17198 overall size is COUNT specified in bytes. When SRCPTR is NULL, output the
17199 equivalent loop to set memory by VALUE (supposed to be in MODE).
17201 The size is rounded down to whole number of chunk size moved at once.
17202 SRCMEM and DESTMEM provide MEMrtx to feed proper aliasing info. */
17206 expand_set_or_movmem_via_loop (rtx destmem, rtx srcmem,
17207 rtx destptr, rtx srcptr, rtx value,
17208 rtx count, enum machine_mode mode, int unroll,
17211 rtx out_label, top_label, iter, tmp;
17212 enum machine_mode iter_mode = counter_mode (count);
17213 rtx piece_size = GEN_INT (GET_MODE_SIZE (mode) * unroll);
17214 rtx piece_size_mask = GEN_INT (~((GET_MODE_SIZE (mode) * unroll) - 1));
17220 top_label = gen_label_rtx ();
17221 out_label = gen_label_rtx ();
17222 iter = gen_reg_rtx (iter_mode);
17224 size = expand_simple_binop (iter_mode, AND, count, piece_size_mask,
17225 NULL, 1, OPTAB_DIRECT);
17226 /* Those two should combine. */
17227 if (piece_size == const1_rtx)
17229 emit_cmp_and_jump_insns (size, const0_rtx, EQ, NULL_RTX, iter_mode,
17231 predict_jump (REG_BR_PROB_BASE * 10 / 100);
17233 emit_move_insn (iter, const0_rtx);
17235 emit_label (top_label);
17237 tmp = convert_modes (Pmode, iter_mode, iter, true);
17238 x_addr = gen_rtx_PLUS (Pmode, destptr, tmp);
17239 destmem = change_address (destmem, mode, x_addr);
17243 y_addr = gen_rtx_PLUS (Pmode, srcptr, copy_rtx (tmp));
17244 srcmem = change_address (srcmem, mode, y_addr);
17246 /* When unrolling for chips that reorder memory reads and writes,
17247 we can save registers by using single temporary.
17248 Also using 4 temporaries is overkill in 32bit mode. */
17249 if (!TARGET_64BIT && 0)
17251 for (i = 0; i < unroll; i++)
17256 adjust_address (copy_rtx (destmem), mode, GET_MODE_SIZE (mode));
17258 adjust_address (copy_rtx (srcmem), mode, GET_MODE_SIZE (mode));
17260 emit_move_insn (destmem, srcmem);
17266 gcc_assert (unroll <= 4);
17267 for (i = 0; i < unroll; i++)
17269 tmpreg[i] = gen_reg_rtx (mode);
17273 adjust_address (copy_rtx (srcmem), mode, GET_MODE_SIZE (mode));
17275 emit_move_insn (tmpreg[i], srcmem);
17277 for (i = 0; i < unroll; i++)
17282 adjust_address (copy_rtx (destmem), mode, GET_MODE_SIZE (mode));
17284 emit_move_insn (destmem, tmpreg[i]);
17289 for (i = 0; i < unroll; i++)
17293 adjust_address (copy_rtx (destmem), mode, GET_MODE_SIZE (mode));
17294 emit_move_insn (destmem, value);
17297 tmp = expand_simple_binop (iter_mode, PLUS, iter, piece_size, iter,
17298 true, OPTAB_LIB_WIDEN);
17300 emit_move_insn (iter, tmp);
17302 emit_cmp_and_jump_insns (iter, size, LT, NULL_RTX, iter_mode,
17304 if (expected_size != -1)
17306 expected_size /= GET_MODE_SIZE (mode) * unroll;
17307 if (expected_size == 0)
17309 else if (expected_size > REG_BR_PROB_BASE)
17310 predict_jump (REG_BR_PROB_BASE - 1);
17312 predict_jump (REG_BR_PROB_BASE - (REG_BR_PROB_BASE + expected_size / 2) / expected_size);
17315 predict_jump (REG_BR_PROB_BASE * 80 / 100);
17316 iter = ix86_zero_extend_to_Pmode (iter);
17317 tmp = expand_simple_binop (Pmode, PLUS, destptr, iter, destptr,
17318 true, OPTAB_LIB_WIDEN);
17319 if (tmp != destptr)
17320 emit_move_insn (destptr, tmp);
17323 tmp = expand_simple_binop (Pmode, PLUS, srcptr, iter, srcptr,
17324 true, OPTAB_LIB_WIDEN);
17326 emit_move_insn (srcptr, tmp);
17328 emit_label (out_label);
17331 /* Output "rep; mov" instruction.
17332 Arguments have same meaning as for previous function */
17334 expand_movmem_via_rep_mov (rtx destmem, rtx srcmem,
17335 rtx destptr, rtx srcptr,
17337 enum machine_mode mode)
17343 /* If the size is known, it is shorter to use rep movs. */
17344 if (mode == QImode && CONST_INT_P (count)
17345 && !(INTVAL (count) & 3))
17348 if (destptr != XEXP (destmem, 0) || GET_MODE (destmem) != BLKmode)
17349 destmem = adjust_automodify_address_nv (destmem, BLKmode, destptr, 0);
17350 if (srcptr != XEXP (srcmem, 0) || GET_MODE (srcmem) != BLKmode)
17351 srcmem = adjust_automodify_address_nv (srcmem, BLKmode, srcptr, 0);
17352 countreg = ix86_zero_extend_to_Pmode (scale_counter (count, GET_MODE_SIZE (mode)));
17353 if (mode != QImode)
17355 destexp = gen_rtx_ASHIFT (Pmode, countreg,
17356 GEN_INT (exact_log2 (GET_MODE_SIZE (mode))));
17357 destexp = gen_rtx_PLUS (Pmode, destexp, destptr);
17358 srcexp = gen_rtx_ASHIFT (Pmode, countreg,
17359 GEN_INT (exact_log2 (GET_MODE_SIZE (mode))));
17360 srcexp = gen_rtx_PLUS (Pmode, srcexp, srcptr);
17364 destexp = gen_rtx_PLUS (Pmode, destptr, countreg);
17365 srcexp = gen_rtx_PLUS (Pmode, srcptr, countreg);
17367 if (CONST_INT_P (count))
17369 count = GEN_INT (INTVAL (count)
17370 & ~((HOST_WIDE_INT) GET_MODE_SIZE (mode) - 1));
17371 destmem = shallow_copy_rtx (destmem);
17372 srcmem = shallow_copy_rtx (srcmem);
17373 set_mem_size (destmem, count);
17374 set_mem_size (srcmem, count);
17378 if (MEM_SIZE (destmem))
17379 set_mem_size (destmem, NULL_RTX);
17380 if (MEM_SIZE (srcmem))
17381 set_mem_size (srcmem, NULL_RTX);
17383 emit_insn (gen_rep_mov (destptr, destmem, srcptr, srcmem, countreg,
17387 /* Output "rep; stos" instruction.
17388 Arguments have same meaning as for previous function */
17390 expand_setmem_via_rep_stos (rtx destmem, rtx destptr, rtx value,
17391 rtx count, enum machine_mode mode,
17397 if (destptr != XEXP (destmem, 0) || GET_MODE (destmem) != BLKmode)
17398 destmem = adjust_automodify_address_nv (destmem, BLKmode, destptr, 0);
17399 value = force_reg (mode, gen_lowpart (mode, value));
17400 countreg = ix86_zero_extend_to_Pmode (scale_counter (count, GET_MODE_SIZE (mode)));
17401 if (mode != QImode)
17403 destexp = gen_rtx_ASHIFT (Pmode, countreg,
17404 GEN_INT (exact_log2 (GET_MODE_SIZE (mode))));
17405 destexp = gen_rtx_PLUS (Pmode, destexp, destptr);
17408 destexp = gen_rtx_PLUS (Pmode, destptr, countreg);
17409 if (orig_value == const0_rtx && CONST_INT_P (count))
17411 count = GEN_INT (INTVAL (count)
17412 & ~((HOST_WIDE_INT) GET_MODE_SIZE (mode) - 1));
17413 destmem = shallow_copy_rtx (destmem);
17414 set_mem_size (destmem, count);
17416 else if (MEM_SIZE (destmem))
17417 set_mem_size (destmem, NULL_RTX);
17418 emit_insn (gen_rep_stos (destptr, countreg, destmem, value, destexp));
17422 emit_strmov (rtx destmem, rtx srcmem,
17423 rtx destptr, rtx srcptr, enum machine_mode mode, int offset)
17425 rtx src = adjust_automodify_address_nv (srcmem, mode, srcptr, offset);
17426 rtx dest = adjust_automodify_address_nv (destmem, mode, destptr, offset);
17427 emit_insn (gen_strmov (destptr, dest, srcptr, src));
17430 /* Output code to copy at most count & (max_size - 1) bytes from SRC to DEST. */
17432 expand_movmem_epilogue (rtx destmem, rtx srcmem,
17433 rtx destptr, rtx srcptr, rtx count, int max_size)
17436 if (CONST_INT_P (count))
17438 HOST_WIDE_INT countval = INTVAL (count);
17441 if ((countval & 0x10) && max_size > 16)
17445 emit_strmov (destmem, srcmem, destptr, srcptr, DImode, offset);
17446 emit_strmov (destmem, srcmem, destptr, srcptr, DImode, offset + 8);
17449 gcc_unreachable ();
17452 if ((countval & 0x08) && max_size > 8)
17455 emit_strmov (destmem, srcmem, destptr, srcptr, DImode, offset);
17458 emit_strmov (destmem, srcmem, destptr, srcptr, SImode, offset);
17459 emit_strmov (destmem, srcmem, destptr, srcptr, SImode, offset + 4);
17463 if ((countval & 0x04) && max_size > 4)
17465 emit_strmov (destmem, srcmem, destptr, srcptr, SImode, offset);
17468 if ((countval & 0x02) && max_size > 2)
17470 emit_strmov (destmem, srcmem, destptr, srcptr, HImode, offset);
17473 if ((countval & 0x01) && max_size > 1)
17475 emit_strmov (destmem, srcmem, destptr, srcptr, QImode, offset);
17482 count = expand_simple_binop (GET_MODE (count), AND, count, GEN_INT (max_size - 1),
17483 count, 1, OPTAB_DIRECT);
17484 expand_set_or_movmem_via_loop (destmem, srcmem, destptr, srcptr, NULL,
17485 count, QImode, 1, 4);
17489 /* When there are stringops, we can cheaply increase dest and src pointers.
17490 Otherwise we save code size by maintaining offset (zero is readily
17491 available from preceding rep operation) and using x86 addressing modes.
17493 if (TARGET_SINGLE_STRINGOP)
17497 rtx label = ix86_expand_aligntest (count, 4, true);
17498 src = change_address (srcmem, SImode, srcptr);
17499 dest = change_address (destmem, SImode, destptr);
17500 emit_insn (gen_strmov (destptr, dest, srcptr, src));
17501 emit_label (label);
17502 LABEL_NUSES (label) = 1;
17506 rtx label = ix86_expand_aligntest (count, 2, true);
17507 src = change_address (srcmem, HImode, srcptr);
17508 dest = change_address (destmem, HImode, destptr);
17509 emit_insn (gen_strmov (destptr, dest, srcptr, src));
17510 emit_label (label);
17511 LABEL_NUSES (label) = 1;
17515 rtx label = ix86_expand_aligntest (count, 1, true);
17516 src = change_address (srcmem, QImode, srcptr);
17517 dest = change_address (destmem, QImode, destptr);
17518 emit_insn (gen_strmov (destptr, dest, srcptr, src));
17519 emit_label (label);
17520 LABEL_NUSES (label) = 1;
17525 rtx offset = force_reg (Pmode, const0_rtx);
17530 rtx label = ix86_expand_aligntest (count, 4, true);
17531 src = change_address (srcmem, SImode, srcptr);
17532 dest = change_address (destmem, SImode, destptr);
17533 emit_move_insn (dest, src);
17534 tmp = expand_simple_binop (Pmode, PLUS, offset, GEN_INT (4), NULL,
17535 true, OPTAB_LIB_WIDEN);
17537 emit_move_insn (offset, tmp);
17538 emit_label (label);
17539 LABEL_NUSES (label) = 1;
17543 rtx label = ix86_expand_aligntest (count, 2, true);
17544 tmp = gen_rtx_PLUS (Pmode, srcptr, offset);
17545 src = change_address (srcmem, HImode, tmp);
17546 tmp = gen_rtx_PLUS (Pmode, destptr, offset);
17547 dest = change_address (destmem, HImode, tmp);
17548 emit_move_insn (dest, src);
17549 tmp = expand_simple_binop (Pmode, PLUS, offset, GEN_INT (2), tmp,
17550 true, OPTAB_LIB_WIDEN);
17552 emit_move_insn (offset, tmp);
17553 emit_label (label);
17554 LABEL_NUSES (label) = 1;
17558 rtx label = ix86_expand_aligntest (count, 1, true);
17559 tmp = gen_rtx_PLUS (Pmode, srcptr, offset);
17560 src = change_address (srcmem, QImode, tmp);
17561 tmp = gen_rtx_PLUS (Pmode, destptr, offset);
17562 dest = change_address (destmem, QImode, tmp);
17563 emit_move_insn (dest, src);
17564 emit_label (label);
17565 LABEL_NUSES (label) = 1;
17570 /* Output code to set at most count & (max_size - 1) bytes starting by DEST. */
17572 expand_setmem_epilogue_via_loop (rtx destmem, rtx destptr, rtx value,
17573 rtx count, int max_size)
17576 expand_simple_binop (counter_mode (count), AND, count,
17577 GEN_INT (max_size - 1), count, 1, OPTAB_DIRECT);
17578 expand_set_or_movmem_via_loop (destmem, NULL, destptr, NULL,
17579 gen_lowpart (QImode, value), count, QImode,
17583 /* Output code to set at most count & (max_size - 1) bytes starting by DEST. */
17585 expand_setmem_epilogue (rtx destmem, rtx destptr, rtx value, rtx count, int max_size)
17589 if (CONST_INT_P (count))
17591 HOST_WIDE_INT countval = INTVAL (count);
17594 if ((countval & 0x10) && max_size > 16)
17598 dest = adjust_automodify_address_nv (destmem, DImode, destptr, offset);
17599 emit_insn (gen_strset (destptr, dest, value));
17600 dest = adjust_automodify_address_nv (destmem, DImode, destptr, offset + 8);
17601 emit_insn (gen_strset (destptr, dest, value));
17604 gcc_unreachable ();
17607 if ((countval & 0x08) && max_size > 8)
17611 dest = adjust_automodify_address_nv (destmem, DImode, destptr, offset);
17612 emit_insn (gen_strset (destptr, dest, value));
17616 dest = adjust_automodify_address_nv (destmem, SImode, destptr, offset);
17617 emit_insn (gen_strset (destptr, dest, value));
17618 dest = adjust_automodify_address_nv (destmem, SImode, destptr, offset + 4);
17619 emit_insn (gen_strset (destptr, dest, value));
17623 if ((countval & 0x04) && max_size > 4)
17625 dest = adjust_automodify_address_nv (destmem, SImode, destptr, offset);
17626 emit_insn (gen_strset (destptr, dest, gen_lowpart (SImode, value)));
17629 if ((countval & 0x02) && max_size > 2)
17631 dest = adjust_automodify_address_nv (destmem, HImode, destptr, offset);
17632 emit_insn (gen_strset (destptr, dest, gen_lowpart (HImode, value)));
17635 if ((countval & 0x01) && max_size > 1)
17637 dest = adjust_automodify_address_nv (destmem, QImode, destptr, offset);
17638 emit_insn (gen_strset (destptr, dest, gen_lowpart (QImode, value)));
17645 expand_setmem_epilogue_via_loop (destmem, destptr, value, count, max_size);
17650 rtx label = ix86_expand_aligntest (count, 16, true);
17653 dest = change_address (destmem, DImode, destptr);
17654 emit_insn (gen_strset (destptr, dest, value));
17655 emit_insn (gen_strset (destptr, dest, value));
17659 dest = change_address (destmem, SImode, destptr);
17660 emit_insn (gen_strset (destptr, dest, value));
17661 emit_insn (gen_strset (destptr, dest, value));
17662 emit_insn (gen_strset (destptr, dest, value));
17663 emit_insn (gen_strset (destptr, dest, value));
17665 emit_label (label);
17666 LABEL_NUSES (label) = 1;
17670 rtx label = ix86_expand_aligntest (count, 8, true);
17673 dest = change_address (destmem, DImode, destptr);
17674 emit_insn (gen_strset (destptr, dest, value));
17678 dest = change_address (destmem, SImode, destptr);
17679 emit_insn (gen_strset (destptr, dest, value));
17680 emit_insn (gen_strset (destptr, dest, value));
17682 emit_label (label);
17683 LABEL_NUSES (label) = 1;
17687 rtx label = ix86_expand_aligntest (count, 4, true);
17688 dest = change_address (destmem, SImode, destptr);
17689 emit_insn (gen_strset (destptr, dest, gen_lowpart (SImode, value)));
17690 emit_label (label);
17691 LABEL_NUSES (label) = 1;
17695 rtx label = ix86_expand_aligntest (count, 2, true);
17696 dest = change_address (destmem, HImode, destptr);
17697 emit_insn (gen_strset (destptr, dest, gen_lowpart (HImode, value)));
17698 emit_label (label);
17699 LABEL_NUSES (label) = 1;
17703 rtx label = ix86_expand_aligntest (count, 1, true);
17704 dest = change_address (destmem, QImode, destptr);
17705 emit_insn (gen_strset (destptr, dest, gen_lowpart (QImode, value)));
17706 emit_label (label);
17707 LABEL_NUSES (label) = 1;
17711 /* Copy enough from DEST to SRC to align DEST known to by aligned by ALIGN to
17712 DESIRED_ALIGNMENT. */
17714 expand_movmem_prologue (rtx destmem, rtx srcmem,
17715 rtx destptr, rtx srcptr, rtx count,
17716 int align, int desired_alignment)
17718 if (align <= 1 && desired_alignment > 1)
17720 rtx label = ix86_expand_aligntest (destptr, 1, false);
17721 srcmem = change_address (srcmem, QImode, srcptr);
17722 destmem = change_address (destmem, QImode, destptr);
17723 emit_insn (gen_strmov (destptr, destmem, srcptr, srcmem));
17724 ix86_adjust_counter (count, 1);
17725 emit_label (label);
17726 LABEL_NUSES (label) = 1;
17728 if (align <= 2 && desired_alignment > 2)
17730 rtx label = ix86_expand_aligntest (destptr, 2, false);
17731 srcmem = change_address (srcmem, HImode, srcptr);
17732 destmem = change_address (destmem, HImode, destptr);
17733 emit_insn (gen_strmov (destptr, destmem, srcptr, srcmem));
17734 ix86_adjust_counter (count, 2);
17735 emit_label (label);
17736 LABEL_NUSES (label) = 1;
17738 if (align <= 4 && desired_alignment > 4)
17740 rtx label = ix86_expand_aligntest (destptr, 4, false);
17741 srcmem = change_address (srcmem, SImode, srcptr);
17742 destmem = change_address (destmem, SImode, destptr);
17743 emit_insn (gen_strmov (destptr, destmem, srcptr, srcmem));
17744 ix86_adjust_counter (count, 4);
17745 emit_label (label);
17746 LABEL_NUSES (label) = 1;
17748 gcc_assert (desired_alignment <= 8);
17751 /* Copy enough from DST to SRC to align DST known to DESIRED_ALIGN.
17752 ALIGN_BYTES is how many bytes need to be copied. */
17754 expand_constant_movmem_prologue (rtx dst, rtx *srcp, rtx destreg, rtx srcreg,
17755 int desired_align, int align_bytes)
17758 rtx src_size, dst_size;
17760 int src_align_bytes = get_mem_align_offset (src, desired_align * BITS_PER_UNIT);
17761 if (src_align_bytes >= 0)
17762 src_align_bytes = desired_align - src_align_bytes;
17763 src_size = MEM_SIZE (src);
17764 dst_size = MEM_SIZE (dst);
17765 if (align_bytes & 1)
17767 dst = adjust_automodify_address_nv (dst, QImode, destreg, 0);
17768 src = adjust_automodify_address_nv (src, QImode, srcreg, 0);
17770 emit_insn (gen_strmov (destreg, dst, srcreg, src));
17772 if (align_bytes & 2)
17774 dst = adjust_automodify_address_nv (dst, HImode, destreg, off);
17775 src = adjust_automodify_address_nv (src, HImode, srcreg, off);
17776 if (MEM_ALIGN (dst) < 2 * BITS_PER_UNIT)
17777 set_mem_align (dst, 2 * BITS_PER_UNIT);
17778 if (src_align_bytes >= 0
17779 && (src_align_bytes & 1) == (align_bytes & 1)
17780 && MEM_ALIGN (src) < 2 * BITS_PER_UNIT)
17781 set_mem_align (src, 2 * BITS_PER_UNIT);
17783 emit_insn (gen_strmov (destreg, dst, srcreg, src));
17785 if (align_bytes & 4)
17787 dst = adjust_automodify_address_nv (dst, SImode, destreg, off);
17788 src = adjust_automodify_address_nv (src, SImode, srcreg, off);
17789 if (MEM_ALIGN (dst) < 4 * BITS_PER_UNIT)
17790 set_mem_align (dst, 4 * BITS_PER_UNIT);
17791 if (src_align_bytes >= 0)
17793 unsigned int src_align = 0;
17794 if ((src_align_bytes & 3) == (align_bytes & 3))
17796 else if ((src_align_bytes & 1) == (align_bytes & 1))
17798 if (MEM_ALIGN (src) < src_align * BITS_PER_UNIT)
17799 set_mem_align (src, src_align * BITS_PER_UNIT);
17802 emit_insn (gen_strmov (destreg, dst, srcreg, src));
17804 dst = adjust_automodify_address_nv (dst, BLKmode, destreg, off);
17805 src = adjust_automodify_address_nv (src, BLKmode, srcreg, off);
17806 if (MEM_ALIGN (dst) < (unsigned int) desired_align * BITS_PER_UNIT)
17807 set_mem_align (dst, desired_align * BITS_PER_UNIT);
17808 if (src_align_bytes >= 0)
17810 unsigned int src_align = 0;
17811 if ((src_align_bytes & 7) == (align_bytes & 7))
17813 else if ((src_align_bytes & 3) == (align_bytes & 3))
17815 else if ((src_align_bytes & 1) == (align_bytes & 1))
17817 if (src_align > (unsigned int) desired_align)
17818 src_align = desired_align;
17819 if (MEM_ALIGN (src) < src_align * BITS_PER_UNIT)
17820 set_mem_align (src, src_align * BITS_PER_UNIT);
17823 set_mem_size (dst, GEN_INT (INTVAL (dst_size) - align_bytes));
17825 set_mem_size (dst, GEN_INT (INTVAL (src_size) - align_bytes));
17830 /* Set enough from DEST to align DEST known to by aligned by ALIGN to
17831 DESIRED_ALIGNMENT. */
17833 expand_setmem_prologue (rtx destmem, rtx destptr, rtx value, rtx count,
17834 int align, int desired_alignment)
17836 if (align <= 1 && desired_alignment > 1)
17838 rtx label = ix86_expand_aligntest (destptr, 1, false);
17839 destmem = change_address (destmem, QImode, destptr);
17840 emit_insn (gen_strset (destptr, destmem, gen_lowpart (QImode, value)));
17841 ix86_adjust_counter (count, 1);
17842 emit_label (label);
17843 LABEL_NUSES (label) = 1;
17845 if (align <= 2 && desired_alignment > 2)
17847 rtx label = ix86_expand_aligntest (destptr, 2, false);
17848 destmem = change_address (destmem, HImode, destptr);
17849 emit_insn (gen_strset (destptr, destmem, gen_lowpart (HImode, value)));
17850 ix86_adjust_counter (count, 2);
17851 emit_label (label);
17852 LABEL_NUSES (label) = 1;
17854 if (align <= 4 && desired_alignment > 4)
17856 rtx label = ix86_expand_aligntest (destptr, 4, false);
17857 destmem = change_address (destmem, SImode, destptr);
17858 emit_insn (gen_strset (destptr, destmem, gen_lowpart (SImode, value)));
17859 ix86_adjust_counter (count, 4);
17860 emit_label (label);
17861 LABEL_NUSES (label) = 1;
17863 gcc_assert (desired_alignment <= 8);
17866 /* Set enough from DST to align DST known to by aligned by ALIGN to
17867 DESIRED_ALIGN. ALIGN_BYTES is how many bytes need to be stored. */
17869 expand_constant_setmem_prologue (rtx dst, rtx destreg, rtx value,
17870 int desired_align, int align_bytes)
17873 rtx dst_size = MEM_SIZE (dst);
17874 if (align_bytes & 1)
17876 dst = adjust_automodify_address_nv (dst, QImode, destreg, 0);
17878 emit_insn (gen_strset (destreg, dst,
17879 gen_lowpart (QImode, value)));
17881 if (align_bytes & 2)
17883 dst = adjust_automodify_address_nv (dst, HImode, destreg, off);
17884 if (MEM_ALIGN (dst) < 2 * BITS_PER_UNIT)
17885 set_mem_align (dst, 2 * BITS_PER_UNIT);
17887 emit_insn (gen_strset (destreg, dst,
17888 gen_lowpart (HImode, value)));
17890 if (align_bytes & 4)
17892 dst = adjust_automodify_address_nv (dst, SImode, destreg, off);
17893 if (MEM_ALIGN (dst) < 4 * BITS_PER_UNIT)
17894 set_mem_align (dst, 4 * BITS_PER_UNIT);
17896 emit_insn (gen_strset (destreg, dst,
17897 gen_lowpart (SImode, value)));
17899 dst = adjust_automodify_address_nv (dst, BLKmode, destreg, off);
17900 if (MEM_ALIGN (dst) < (unsigned int) desired_align * BITS_PER_UNIT)
17901 set_mem_align (dst, desired_align * BITS_PER_UNIT);
17903 set_mem_size (dst, GEN_INT (INTVAL (dst_size) - align_bytes));
17907 /* Given COUNT and EXPECTED_SIZE, decide on codegen of string operation. */
17908 static enum stringop_alg
17909 decide_alg (HOST_WIDE_INT count, HOST_WIDE_INT expected_size, bool memset,
17910 int *dynamic_check)
17912 const struct stringop_algs * algs;
17913 bool optimize_for_speed;
17914 /* Algorithms using the rep prefix want at least edi and ecx;
17915 additionally, memset wants eax and memcpy wants esi. Don't
17916 consider such algorithms if the user has appropriated those
17917 registers for their own purposes. */
17918 bool rep_prefix_usable = !(fixed_regs[CX_REG] || fixed_regs[DI_REG]
17920 ? fixed_regs[AX_REG] : fixed_regs[SI_REG]));
17922 #define ALG_USABLE_P(alg) (rep_prefix_usable \
17923 || (alg != rep_prefix_1_byte \
17924 && alg != rep_prefix_4_byte \
17925 && alg != rep_prefix_8_byte))
17926 const struct processor_costs *cost;
17928 /* Even if the string operation call is cold, we still might spend a lot
17929 of time processing large blocks. */
17930 if (optimize_function_for_size_p (cfun)
17931 || (optimize_insn_for_size_p ()
17932 && expected_size != -1 && expected_size < 256))
17933 optimize_for_speed = false;
17935 optimize_for_speed = true;
17937 cost = optimize_for_speed ? ix86_cost : &ix86_size_cost;
17939 *dynamic_check = -1;
17941 algs = &cost->memset[TARGET_64BIT != 0];
17943 algs = &cost->memcpy[TARGET_64BIT != 0];
17944 if (stringop_alg != no_stringop && ALG_USABLE_P (stringop_alg))
17945 return stringop_alg;
17946 /* rep; movq or rep; movl is the smallest variant. */
17947 else if (!optimize_for_speed)
17949 if (!count || (count & 3))
17950 return rep_prefix_usable ? rep_prefix_1_byte : loop_1_byte;
17952 return rep_prefix_usable ? rep_prefix_4_byte : loop;
17954 /* Very tiny blocks are best handled via the loop, REP is expensive to setup.
17956 else if (expected_size != -1 && expected_size < 4)
17957 return loop_1_byte;
17958 else if (expected_size != -1)
17961 enum stringop_alg alg = libcall;
17962 for (i = 0; i < NAX_STRINGOP_ALGS; i++)
17964 /* We get here if the algorithms that were not libcall-based
17965 were rep-prefix based and we are unable to use rep prefixes
17966 based on global register usage. Break out of the loop and
17967 use the heuristic below. */
17968 if (algs->size[i].max == 0)
17970 if (algs->size[i].max >= expected_size || algs->size[i].max == -1)
17972 enum stringop_alg candidate = algs->size[i].alg;
17974 if (candidate != libcall && ALG_USABLE_P (candidate))
17976 /* Honor TARGET_INLINE_ALL_STRINGOPS by picking
17977 last non-libcall inline algorithm. */
17978 if (TARGET_INLINE_ALL_STRINGOPS)
17980 /* When the current size is best to be copied by a libcall,
17981 but we are still forced to inline, run the heuristic below
17982 that will pick code for medium sized blocks. */
17983 if (alg != libcall)
17987 else if (ALG_USABLE_P (candidate))
17991 gcc_assert (TARGET_INLINE_ALL_STRINGOPS || !rep_prefix_usable);
17993 /* When asked to inline the call anyway, try to pick meaningful choice.
17994 We look for maximal size of block that is faster to copy by hand and
17995 take blocks of at most of that size guessing that average size will
17996 be roughly half of the block.
17998 If this turns out to be bad, we might simply specify the preferred
17999 choice in ix86_costs. */
18000 if ((TARGET_INLINE_ALL_STRINGOPS || TARGET_INLINE_STRINGOPS_DYNAMICALLY)
18001 && (algs->unknown_size == libcall || !ALG_USABLE_P (algs->unknown_size)))
18004 enum stringop_alg alg;
18006 bool any_alg_usable_p = true;
18008 for (i = 0; i < NAX_STRINGOP_ALGS; i++)
18010 enum stringop_alg candidate = algs->size[i].alg;
18011 any_alg_usable_p = any_alg_usable_p && ALG_USABLE_P (candidate);
18013 if (candidate != libcall && candidate
18014 && ALG_USABLE_P (candidate))
18015 max = algs->size[i].max;
18017 /* If there aren't any usable algorithms, then recursing on
18018 smaller sizes isn't going to find anything. Just return the
18019 simple byte-at-a-time copy loop. */
18020 if (!any_alg_usable_p)
18022 /* Pick something reasonable. */
18023 if (TARGET_INLINE_STRINGOPS_DYNAMICALLY)
18024 *dynamic_check = 128;
18025 return loop_1_byte;
18029 alg = decide_alg (count, max / 2, memset, dynamic_check);
18030 gcc_assert (*dynamic_check == -1);
18031 gcc_assert (alg != libcall);
18032 if (TARGET_INLINE_STRINGOPS_DYNAMICALLY)
18033 *dynamic_check = max;
18036 return ALG_USABLE_P (algs->unknown_size) ? algs->unknown_size : libcall;
18037 #undef ALG_USABLE_P
18040 /* Decide on alignment. We know that the operand is already aligned to ALIGN
18041 (ALIGN can be based on profile feedback and thus it is not 100% guaranteed). */
18043 decide_alignment (int align,
18044 enum stringop_alg alg,
18047 int desired_align = 0;
18051 gcc_unreachable ();
18053 case unrolled_loop:
18054 desired_align = GET_MODE_SIZE (Pmode);
18056 case rep_prefix_8_byte:
18059 case rep_prefix_4_byte:
18060 /* PentiumPro has special logic triggering for 8 byte aligned blocks.
18061 copying whole cacheline at once. */
18062 if (TARGET_PENTIUMPRO)
18067 case rep_prefix_1_byte:
18068 /* PentiumPro has special logic triggering for 8 byte aligned blocks.
18069 copying whole cacheline at once. */
18070 if (TARGET_PENTIUMPRO)
18084 if (desired_align < align)
18085 desired_align = align;
18086 if (expected_size != -1 && expected_size < 4)
18087 desired_align = align;
18088 return desired_align;
18091 /* Return the smallest power of 2 greater than VAL. */
18093 smallest_pow2_greater_than (int val)
18101 /* Expand string move (memcpy) operation. Use i386 string operations when
18102 profitable. expand_setmem contains similar code. The code depends upon
18103 architecture, block size and alignment, but always has the same
18106 1) Prologue guard: Conditional that jumps up to epilogues for small
18107 blocks that can be handled by epilogue alone. This is faster but
18108 also needed for correctness, since prologue assume the block is larger
18109 than the desired alignment.
18111 Optional dynamic check for size and libcall for large
18112 blocks is emitted here too, with -minline-stringops-dynamically.
18114 2) Prologue: copy first few bytes in order to get destination aligned
18115 to DESIRED_ALIGN. It is emitted only when ALIGN is less than
18116 DESIRED_ALIGN and and up to DESIRED_ALIGN - ALIGN bytes can be copied.
18117 We emit either a jump tree on power of two sized blocks, or a byte loop.
18119 3) Main body: the copying loop itself, copying in SIZE_NEEDED chunks
18120 with specified algorithm.
18122 4) Epilogue: code copying tail of the block that is too small to be
18123 handled by main body (or up to size guarded by prologue guard). */
18126 ix86_expand_movmem (rtx dst, rtx src, rtx count_exp, rtx align_exp,
18127 rtx expected_align_exp, rtx expected_size_exp)
18133 rtx jump_around_label = NULL;
18134 HOST_WIDE_INT align = 1;
18135 unsigned HOST_WIDE_INT count = 0;
18136 HOST_WIDE_INT expected_size = -1;
18137 int size_needed = 0, epilogue_size_needed;
18138 int desired_align = 0, align_bytes = 0;
18139 enum stringop_alg alg;
18141 bool need_zero_guard = false;
18143 if (CONST_INT_P (align_exp))
18144 align = INTVAL (align_exp);
18145 /* i386 can do misaligned access on reasonably increased cost. */
18146 if (CONST_INT_P (expected_align_exp)
18147 && INTVAL (expected_align_exp) > align)
18148 align = INTVAL (expected_align_exp);
18149 /* ALIGN is the minimum of destination and source alignment, but we care here
18150 just about destination alignment. */
18151 else if (MEM_ALIGN (dst) > (unsigned HOST_WIDE_INT) align * BITS_PER_UNIT)
18152 align = MEM_ALIGN (dst) / BITS_PER_UNIT;
18154 if (CONST_INT_P (count_exp))
18155 count = expected_size = INTVAL (count_exp);
18156 if (CONST_INT_P (expected_size_exp) && count == 0)
18157 expected_size = INTVAL (expected_size_exp);
18159 /* Make sure we don't need to care about overflow later on. */
18160 if (count > ((unsigned HOST_WIDE_INT) 1 << 30))
18163 /* Step 0: Decide on preferred algorithm, desired alignment and
18164 size of chunks to be copied by main loop. */
18166 alg = decide_alg (count, expected_size, false, &dynamic_check);
18167 desired_align = decide_alignment (align, alg, expected_size);
18169 if (!TARGET_ALIGN_STRINGOPS)
18170 align = desired_align;
18172 if (alg == libcall)
18174 gcc_assert (alg != no_stringop);
18176 count_exp = copy_to_mode_reg (GET_MODE (count_exp), count_exp);
18177 destreg = copy_to_mode_reg (Pmode, XEXP (dst, 0));
18178 srcreg = copy_to_mode_reg (Pmode, XEXP (src, 0));
18183 gcc_unreachable ();
18185 need_zero_guard = true;
18186 size_needed = GET_MODE_SIZE (Pmode);
18188 case unrolled_loop:
18189 need_zero_guard = true;
18190 size_needed = GET_MODE_SIZE (Pmode) * (TARGET_64BIT ? 4 : 2);
18192 case rep_prefix_8_byte:
18195 case rep_prefix_4_byte:
18198 case rep_prefix_1_byte:
18202 need_zero_guard = true;
18207 epilogue_size_needed = size_needed;
18209 /* Step 1: Prologue guard. */
18211 /* Alignment code needs count to be in register. */
18212 if (CONST_INT_P (count_exp) && desired_align > align)
18214 if (INTVAL (count_exp) > desired_align
18215 && INTVAL (count_exp) > size_needed)
18218 = get_mem_align_offset (dst, desired_align * BITS_PER_UNIT);
18219 if (align_bytes <= 0)
18222 align_bytes = desired_align - align_bytes;
18224 if (align_bytes == 0)
18225 count_exp = force_reg (counter_mode (count_exp), count_exp);
18227 gcc_assert (desired_align >= 1 && align >= 1);
18229 /* Ensure that alignment prologue won't copy past end of block. */
18230 if (size_needed > 1 || (desired_align > 1 && desired_align > align))
18232 epilogue_size_needed = MAX (size_needed - 1, desired_align - align);
18233 /* Epilogue always copies COUNT_EXP & EPILOGUE_SIZE_NEEDED bytes.
18234 Make sure it is power of 2. */
18235 epilogue_size_needed = smallest_pow2_greater_than (epilogue_size_needed);
18239 if (count < (unsigned HOST_WIDE_INT)epilogue_size_needed)
18241 /* If main algorithm works on QImode, no epilogue is needed.
18242 For small sizes just don't align anything. */
18243 if (size_needed == 1)
18244 desired_align = align;
18251 label = gen_label_rtx ();
18252 emit_cmp_and_jump_insns (count_exp,
18253 GEN_INT (epilogue_size_needed),
18254 LTU, 0, counter_mode (count_exp), 1, label);
18255 if (expected_size == -1 || expected_size < epilogue_size_needed)
18256 predict_jump (REG_BR_PROB_BASE * 60 / 100);
18258 predict_jump (REG_BR_PROB_BASE * 20 / 100);
18262 /* Emit code to decide on runtime whether library call or inline should be
18264 if (dynamic_check != -1)
18266 if (CONST_INT_P (count_exp))
18268 if (UINTVAL (count_exp) >= (unsigned HOST_WIDE_INT)dynamic_check)
18270 emit_block_move_via_libcall (dst, src, count_exp, false);
18271 count_exp = const0_rtx;
18277 rtx hot_label = gen_label_rtx ();
18278 jump_around_label = gen_label_rtx ();
18279 emit_cmp_and_jump_insns (count_exp, GEN_INT (dynamic_check - 1),
18280 LEU, 0, GET_MODE (count_exp), 1, hot_label);
18281 predict_jump (REG_BR_PROB_BASE * 90 / 100);
18282 emit_block_move_via_libcall (dst, src, count_exp, false);
18283 emit_jump (jump_around_label);
18284 emit_label (hot_label);
18288 /* Step 2: Alignment prologue. */
18290 if (desired_align > align)
18292 if (align_bytes == 0)
18294 /* Except for the first move in epilogue, we no longer know
18295 constant offset in aliasing info. It don't seems to worth
18296 the pain to maintain it for the first move, so throw away
18298 src = change_address (src, BLKmode, srcreg);
18299 dst = change_address (dst, BLKmode, destreg);
18300 expand_movmem_prologue (dst, src, destreg, srcreg, count_exp, align,
18305 /* If we know how many bytes need to be stored before dst is
18306 sufficiently aligned, maintain aliasing info accurately. */
18307 dst = expand_constant_movmem_prologue (dst, &src, destreg, srcreg,
18308 desired_align, align_bytes);
18309 count_exp = plus_constant (count_exp, -align_bytes);
18310 count -= align_bytes;
18312 if (need_zero_guard
18313 && (count < (unsigned HOST_WIDE_INT) size_needed
18314 || (align_bytes == 0
18315 && count < ((unsigned HOST_WIDE_INT) size_needed
18316 + desired_align - align))))
18318 /* It is possible that we copied enough so the main loop will not
18320 gcc_assert (size_needed > 1);
18321 if (label == NULL_RTX)
18322 label = gen_label_rtx ();
18323 emit_cmp_and_jump_insns (count_exp,
18324 GEN_INT (size_needed),
18325 LTU, 0, counter_mode (count_exp), 1, label);
18326 if (expected_size == -1
18327 || expected_size < (desired_align - align) / 2 + size_needed)
18328 predict_jump (REG_BR_PROB_BASE * 20 / 100);
18330 predict_jump (REG_BR_PROB_BASE * 60 / 100);
18333 if (label && size_needed == 1)
18335 emit_label (label);
18336 LABEL_NUSES (label) = 1;
18338 epilogue_size_needed = 1;
18340 else if (label == NULL_RTX)
18341 epilogue_size_needed = size_needed;
18343 /* Step 3: Main loop. */
18349 gcc_unreachable ();
18351 expand_set_or_movmem_via_loop (dst, src, destreg, srcreg, NULL,
18352 count_exp, QImode, 1, expected_size);
18355 expand_set_or_movmem_via_loop (dst, src, destreg, srcreg, NULL,
18356 count_exp, Pmode, 1, expected_size);
18358 case unrolled_loop:
18359 /* Unroll only by factor of 2 in 32bit mode, since we don't have enough
18360 registers for 4 temporaries anyway. */
18361 expand_set_or_movmem_via_loop (dst, src, destreg, srcreg, NULL,
18362 count_exp, Pmode, TARGET_64BIT ? 4 : 2,
18365 case rep_prefix_8_byte:
18366 expand_movmem_via_rep_mov (dst, src, destreg, srcreg, count_exp,
18369 case rep_prefix_4_byte:
18370 expand_movmem_via_rep_mov (dst, src, destreg, srcreg, count_exp,
18373 case rep_prefix_1_byte:
18374 expand_movmem_via_rep_mov (dst, src, destreg, srcreg, count_exp,
18378 /* Adjust properly the offset of src and dest memory for aliasing. */
18379 if (CONST_INT_P (count_exp))
18381 src = adjust_automodify_address_nv (src, BLKmode, srcreg,
18382 (count / size_needed) * size_needed);
18383 dst = adjust_automodify_address_nv (dst, BLKmode, destreg,
18384 (count / size_needed) * size_needed);
18388 src = change_address (src, BLKmode, srcreg);
18389 dst = change_address (dst, BLKmode, destreg);
18392 /* Step 4: Epilogue to copy the remaining bytes. */
18396 /* When the main loop is done, COUNT_EXP might hold original count,
18397 while we want to copy only COUNT_EXP & SIZE_NEEDED bytes.
18398 Epilogue code will actually copy COUNT_EXP & EPILOGUE_SIZE_NEEDED
18399 bytes. Compensate if needed. */
18401 if (size_needed < epilogue_size_needed)
18404 expand_simple_binop (counter_mode (count_exp), AND, count_exp,
18405 GEN_INT (size_needed - 1), count_exp, 1,
18407 if (tmp != count_exp)
18408 emit_move_insn (count_exp, tmp);
18410 emit_label (label);
18411 LABEL_NUSES (label) = 1;
18414 if (count_exp != const0_rtx && epilogue_size_needed > 1)
18415 expand_movmem_epilogue (dst, src, destreg, srcreg, count_exp,
18416 epilogue_size_needed);
18417 if (jump_around_label)
18418 emit_label (jump_around_label);
18422 /* Helper function for memcpy. For QImode value 0xXY produce
18423 0xXYXYXYXY of wide specified by MODE. This is essentially
18424 a * 0x10101010, but we can do slightly better than
18425 synth_mult by unwinding the sequence by hand on CPUs with
18428 promote_duplicated_reg (enum machine_mode mode, rtx val)
18430 enum machine_mode valmode = GET_MODE (val);
18432 int nops = mode == DImode ? 3 : 2;
18434 gcc_assert (mode == SImode || mode == DImode);
18435 if (val == const0_rtx)
18436 return copy_to_mode_reg (mode, const0_rtx);
18437 if (CONST_INT_P (val))
18439 HOST_WIDE_INT v = INTVAL (val) & 255;
18443 if (mode == DImode)
18444 v |= (v << 16) << 16;
18445 return copy_to_mode_reg (mode, gen_int_mode (v, mode));
18448 if (valmode == VOIDmode)
18450 if (valmode != QImode)
18451 val = gen_lowpart (QImode, val);
18452 if (mode == QImode)
18454 if (!TARGET_PARTIAL_REG_STALL)
18456 if (ix86_cost->mult_init[mode == DImode ? 3 : 2]
18457 + ix86_cost->mult_bit * (mode == DImode ? 8 : 4)
18458 <= (ix86_cost->shift_const + ix86_cost->add) * nops
18459 + (COSTS_N_INSNS (TARGET_PARTIAL_REG_STALL == 0)))
18461 rtx reg = convert_modes (mode, QImode, val, true);
18462 tmp = promote_duplicated_reg (mode, const1_rtx);
18463 return expand_simple_binop (mode, MULT, reg, tmp, NULL, 1,
18468 rtx reg = convert_modes (mode, QImode, val, true);
18470 if (!TARGET_PARTIAL_REG_STALL)
18471 if (mode == SImode)
18472 emit_insn (gen_movsi_insv_1 (reg, reg));
18474 emit_insn (gen_movdi_insv_1_rex64 (reg, reg));
18477 tmp = expand_simple_binop (mode, ASHIFT, reg, GEN_INT (8),
18478 NULL, 1, OPTAB_DIRECT);
18480 expand_simple_binop (mode, IOR, reg, tmp, reg, 1, OPTAB_DIRECT);
18482 tmp = expand_simple_binop (mode, ASHIFT, reg, GEN_INT (16),
18483 NULL, 1, OPTAB_DIRECT);
18484 reg = expand_simple_binop (mode, IOR, reg, tmp, reg, 1, OPTAB_DIRECT);
18485 if (mode == SImode)
18487 tmp = expand_simple_binop (mode, ASHIFT, reg, GEN_INT (32),
18488 NULL, 1, OPTAB_DIRECT);
18489 reg = expand_simple_binop (mode, IOR, reg, tmp, reg, 1, OPTAB_DIRECT);
18494 /* Duplicate value VAL using promote_duplicated_reg into maximal size that will
18495 be needed by main loop copying SIZE_NEEDED chunks and prologue getting
18496 alignment from ALIGN to DESIRED_ALIGN. */
18498 promote_duplicated_reg_to_size (rtx val, int size_needed, int desired_align, int align)
18503 && (size_needed > 4 || (desired_align > align && desired_align > 4)))
18504 promoted_val = promote_duplicated_reg (DImode, val);
18505 else if (size_needed > 2 || (desired_align > align && desired_align > 2))
18506 promoted_val = promote_duplicated_reg (SImode, val);
18507 else if (size_needed > 1 || (desired_align > align && desired_align > 1))
18508 promoted_val = promote_duplicated_reg (HImode, val);
18510 promoted_val = val;
18512 return promoted_val;
18515 /* Expand string clear operation (bzero). Use i386 string operations when
18516 profitable. See expand_movmem comment for explanation of individual
18517 steps performed. */
18519 ix86_expand_setmem (rtx dst, rtx count_exp, rtx val_exp, rtx align_exp,
18520 rtx expected_align_exp, rtx expected_size_exp)
18525 rtx jump_around_label = NULL;
18526 HOST_WIDE_INT align = 1;
18527 unsigned HOST_WIDE_INT count = 0;
18528 HOST_WIDE_INT expected_size = -1;
18529 int size_needed = 0, epilogue_size_needed;
18530 int desired_align = 0, align_bytes = 0;
18531 enum stringop_alg alg;
18532 rtx promoted_val = NULL;
18533 bool force_loopy_epilogue = false;
18535 bool need_zero_guard = false;
18537 if (CONST_INT_P (align_exp))
18538 align = INTVAL (align_exp);
18539 /* i386 can do misaligned access on reasonably increased cost. */
18540 if (CONST_INT_P (expected_align_exp)
18541 && INTVAL (expected_align_exp) > align)
18542 align = INTVAL (expected_align_exp);
18543 if (CONST_INT_P (count_exp))
18544 count = expected_size = INTVAL (count_exp);
18545 if (CONST_INT_P (expected_size_exp) && count == 0)
18546 expected_size = INTVAL (expected_size_exp);
18548 /* Make sure we don't need to care about overflow later on. */
18549 if (count > ((unsigned HOST_WIDE_INT) 1 << 30))
18552 /* Step 0: Decide on preferred algorithm, desired alignment and
18553 size of chunks to be copied by main loop. */
18555 alg = decide_alg (count, expected_size, true, &dynamic_check);
18556 desired_align = decide_alignment (align, alg, expected_size);
18558 if (!TARGET_ALIGN_STRINGOPS)
18559 align = desired_align;
18561 if (alg == libcall)
18563 gcc_assert (alg != no_stringop);
18565 count_exp = copy_to_mode_reg (counter_mode (count_exp), count_exp);
18566 destreg = copy_to_mode_reg (Pmode, XEXP (dst, 0));
18571 gcc_unreachable ();
18573 need_zero_guard = true;
18574 size_needed = GET_MODE_SIZE (Pmode);
18576 case unrolled_loop:
18577 need_zero_guard = true;
18578 size_needed = GET_MODE_SIZE (Pmode) * 4;
18580 case rep_prefix_8_byte:
18583 case rep_prefix_4_byte:
18586 case rep_prefix_1_byte:
18590 need_zero_guard = true;
18594 epilogue_size_needed = size_needed;
18596 /* Step 1: Prologue guard. */
18598 /* Alignment code needs count to be in register. */
18599 if (CONST_INT_P (count_exp) && desired_align > align)
18601 if (INTVAL (count_exp) > desired_align
18602 && INTVAL (count_exp) > size_needed)
18605 = get_mem_align_offset (dst, desired_align * BITS_PER_UNIT);
18606 if (align_bytes <= 0)
18609 align_bytes = desired_align - align_bytes;
18611 if (align_bytes == 0)
18613 enum machine_mode mode = SImode;
18614 if (TARGET_64BIT && (count & ~0xffffffff))
18616 count_exp = force_reg (mode, count_exp);
18619 /* Do the cheap promotion to allow better CSE across the
18620 main loop and epilogue (ie one load of the big constant in the
18621 front of all code. */
18622 if (CONST_INT_P (val_exp))
18623 promoted_val = promote_duplicated_reg_to_size (val_exp, size_needed,
18624 desired_align, align);
18625 /* Ensure that alignment prologue won't copy past end of block. */
18626 if (size_needed > 1 || (desired_align > 1 && desired_align > align))
18628 epilogue_size_needed = MAX (size_needed - 1, desired_align - align);
18629 /* Epilogue always copies COUNT_EXP & (EPILOGUE_SIZE_NEEDED - 1) bytes.
18630 Make sure it is power of 2. */
18631 epilogue_size_needed = smallest_pow2_greater_than (epilogue_size_needed);
18633 /* To improve performance of small blocks, we jump around the VAL
18634 promoting mode. This mean that if the promoted VAL is not constant,
18635 we might not use it in the epilogue and have to use byte
18637 if (epilogue_size_needed > 2 && !promoted_val)
18638 force_loopy_epilogue = true;
18641 if (count < (unsigned HOST_WIDE_INT)epilogue_size_needed)
18643 /* If main algorithm works on QImode, no epilogue is needed.
18644 For small sizes just don't align anything. */
18645 if (size_needed == 1)
18646 desired_align = align;
18653 label = gen_label_rtx ();
18654 emit_cmp_and_jump_insns (count_exp,
18655 GEN_INT (epilogue_size_needed),
18656 LTU, 0, counter_mode (count_exp), 1, label);
18657 if (expected_size == -1 || expected_size <= epilogue_size_needed)
18658 predict_jump (REG_BR_PROB_BASE * 60 / 100);
18660 predict_jump (REG_BR_PROB_BASE * 20 / 100);
18663 if (dynamic_check != -1)
18665 rtx hot_label = gen_label_rtx ();
18666 jump_around_label = gen_label_rtx ();
18667 emit_cmp_and_jump_insns (count_exp, GEN_INT (dynamic_check - 1),
18668 LEU, 0, counter_mode (count_exp), 1, hot_label);
18669 predict_jump (REG_BR_PROB_BASE * 90 / 100);
18670 set_storage_via_libcall (dst, count_exp, val_exp, false);
18671 emit_jump (jump_around_label);
18672 emit_label (hot_label);
18675 /* Step 2: Alignment prologue. */
18677 /* Do the expensive promotion once we branched off the small blocks. */
18679 promoted_val = promote_duplicated_reg_to_size (val_exp, size_needed,
18680 desired_align, align);
18681 gcc_assert (desired_align >= 1 && align >= 1);
18683 if (desired_align > align)
18685 if (align_bytes == 0)
18687 /* Except for the first move in epilogue, we no longer know
18688 constant offset in aliasing info. It don't seems to worth
18689 the pain to maintain it for the first move, so throw away
18691 dst = change_address (dst, BLKmode, destreg);
18692 expand_setmem_prologue (dst, destreg, promoted_val, count_exp, align,
18697 /* If we know how many bytes need to be stored before dst is
18698 sufficiently aligned, maintain aliasing info accurately. */
18699 dst = expand_constant_setmem_prologue (dst, destreg, promoted_val,
18700 desired_align, align_bytes);
18701 count_exp = plus_constant (count_exp, -align_bytes);
18702 count -= align_bytes;
18704 if (need_zero_guard
18705 && (count < (unsigned HOST_WIDE_INT) size_needed
18706 || (align_bytes == 0
18707 && count < ((unsigned HOST_WIDE_INT) size_needed
18708 + desired_align - align))))
18710 /* It is possible that we copied enough so the main loop will not
18712 gcc_assert (size_needed > 1);
18713 if (label == NULL_RTX)
18714 label = gen_label_rtx ();
18715 emit_cmp_and_jump_insns (count_exp,
18716 GEN_INT (size_needed),
18717 LTU, 0, counter_mode (count_exp), 1, label);
18718 if (expected_size == -1
18719 || expected_size < (desired_align - align) / 2 + size_needed)
18720 predict_jump (REG_BR_PROB_BASE * 20 / 100);
18722 predict_jump (REG_BR_PROB_BASE * 60 / 100);
18725 if (label && size_needed == 1)
18727 emit_label (label);
18728 LABEL_NUSES (label) = 1;
18730 promoted_val = val_exp;
18731 epilogue_size_needed = 1;
18733 else if (label == NULL_RTX)
18734 epilogue_size_needed = size_needed;
18736 /* Step 3: Main loop. */
18742 gcc_unreachable ();
18744 expand_set_or_movmem_via_loop (dst, NULL, destreg, NULL, promoted_val,
18745 count_exp, QImode, 1, expected_size);
18748 expand_set_or_movmem_via_loop (dst, NULL, destreg, NULL, promoted_val,
18749 count_exp, Pmode, 1, expected_size);
18751 case unrolled_loop:
18752 expand_set_or_movmem_via_loop (dst, NULL, destreg, NULL, promoted_val,
18753 count_exp, Pmode, 4, expected_size);
18755 case rep_prefix_8_byte:
18756 expand_setmem_via_rep_stos (dst, destreg, promoted_val, count_exp,
18759 case rep_prefix_4_byte:
18760 expand_setmem_via_rep_stos (dst, destreg, promoted_val, count_exp,
18763 case rep_prefix_1_byte:
18764 expand_setmem_via_rep_stos (dst, destreg, promoted_val, count_exp,
18768 /* Adjust properly the offset of src and dest memory for aliasing. */
18769 if (CONST_INT_P (count_exp))
18770 dst = adjust_automodify_address_nv (dst, BLKmode, destreg,
18771 (count / size_needed) * size_needed);
18773 dst = change_address (dst, BLKmode, destreg);
18775 /* Step 4: Epilogue to copy the remaining bytes. */
18779 /* When the main loop is done, COUNT_EXP might hold original count,
18780 while we want to copy only COUNT_EXP & SIZE_NEEDED bytes.
18781 Epilogue code will actually copy COUNT_EXP & EPILOGUE_SIZE_NEEDED
18782 bytes. Compensate if needed. */
18784 if (size_needed < epilogue_size_needed)
18787 expand_simple_binop (counter_mode (count_exp), AND, count_exp,
18788 GEN_INT (size_needed - 1), count_exp, 1,
18790 if (tmp != count_exp)
18791 emit_move_insn (count_exp, tmp);
18793 emit_label (label);
18794 LABEL_NUSES (label) = 1;
18797 if (count_exp != const0_rtx && epilogue_size_needed > 1)
18799 if (force_loopy_epilogue)
18800 expand_setmem_epilogue_via_loop (dst, destreg, val_exp, count_exp,
18801 epilogue_size_needed);
18803 expand_setmem_epilogue (dst, destreg, promoted_val, count_exp,
18804 epilogue_size_needed);
18806 if (jump_around_label)
18807 emit_label (jump_around_label);
18811 /* Expand the appropriate insns for doing strlen if not just doing
18814 out = result, initialized with the start address
18815 align_rtx = alignment of the address.
18816 scratch = scratch register, initialized with the startaddress when
18817 not aligned, otherwise undefined
18819 This is just the body. It needs the initializations mentioned above and
18820 some address computing at the end. These things are done in i386.md. */
18823 ix86_expand_strlensi_unroll_1 (rtx out, rtx src, rtx align_rtx)
18827 rtx align_2_label = NULL_RTX;
18828 rtx align_3_label = NULL_RTX;
18829 rtx align_4_label = gen_label_rtx ();
18830 rtx end_0_label = gen_label_rtx ();
18832 rtx tmpreg = gen_reg_rtx (SImode);
18833 rtx scratch = gen_reg_rtx (SImode);
18837 if (CONST_INT_P (align_rtx))
18838 align = INTVAL (align_rtx);
18840 /* Loop to check 1..3 bytes for null to get an aligned pointer. */
18842 /* Is there a known alignment and is it less than 4? */
18845 rtx scratch1 = gen_reg_rtx (Pmode);
18846 emit_move_insn (scratch1, out);
18847 /* Is there a known alignment and is it not 2? */
18850 align_3_label = gen_label_rtx (); /* Label when aligned to 3-byte */
18851 align_2_label = gen_label_rtx (); /* Label when aligned to 2-byte */
18853 /* Leave just the 3 lower bits. */
18854 align_rtx = expand_binop (Pmode, and_optab, scratch1, GEN_INT (3),
18855 NULL_RTX, 0, OPTAB_WIDEN);
18857 emit_cmp_and_jump_insns (align_rtx, const0_rtx, EQ, NULL,
18858 Pmode, 1, align_4_label);
18859 emit_cmp_and_jump_insns (align_rtx, const2_rtx, EQ, NULL,
18860 Pmode, 1, align_2_label);
18861 emit_cmp_and_jump_insns (align_rtx, const2_rtx, GTU, NULL,
18862 Pmode, 1, align_3_label);
18866 /* Since the alignment is 2, we have to check 2 or 0 bytes;
18867 check if is aligned to 4 - byte. */
18869 align_rtx = expand_binop (Pmode, and_optab, scratch1, const2_rtx,
18870 NULL_RTX, 0, OPTAB_WIDEN);
18872 emit_cmp_and_jump_insns (align_rtx, const0_rtx, EQ, NULL,
18873 Pmode, 1, align_4_label);
18876 mem = change_address (src, QImode, out);
18878 /* Now compare the bytes. */
18880 /* Compare the first n unaligned byte on a byte per byte basis. */
18881 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL,
18882 QImode, 1, end_0_label);
18884 /* Increment the address. */
18885 emit_insn ((*ix86_gen_add3) (out, out, const1_rtx));
18887 /* Not needed with an alignment of 2 */
18890 emit_label (align_2_label);
18892 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL, QImode, 1,
18895 emit_insn ((*ix86_gen_add3) (out, out, const1_rtx));
18897 emit_label (align_3_label);
18900 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL, QImode, 1,
18903 emit_insn ((*ix86_gen_add3) (out, out, const1_rtx));
18906 /* Generate loop to check 4 bytes at a time. It is not a good idea to
18907 align this loop. It gives only huge programs, but does not help to
18909 emit_label (align_4_label);
18911 mem = change_address (src, SImode, out);
18912 emit_move_insn (scratch, mem);
18913 emit_insn ((*ix86_gen_add3) (out, out, GEN_INT (4)));
18915 /* This formula yields a nonzero result iff one of the bytes is zero.
18916 This saves three branches inside loop and many cycles. */
18918 emit_insn (gen_addsi3 (tmpreg, scratch, GEN_INT (-0x01010101)));
18919 emit_insn (gen_one_cmplsi2 (scratch, scratch));
18920 emit_insn (gen_andsi3 (tmpreg, tmpreg, scratch));
18921 emit_insn (gen_andsi3 (tmpreg, tmpreg,
18922 gen_int_mode (0x80808080, SImode)));
18923 emit_cmp_and_jump_insns (tmpreg, const0_rtx, EQ, 0, SImode, 1,
18928 rtx reg = gen_reg_rtx (SImode);
18929 rtx reg2 = gen_reg_rtx (Pmode);
18930 emit_move_insn (reg, tmpreg);
18931 emit_insn (gen_lshrsi3 (reg, reg, GEN_INT (16)));
18933 /* If zero is not in the first two bytes, move two bytes forward. */
18934 emit_insn (gen_testsi_ccno_1 (tmpreg, GEN_INT (0x8080)));
18935 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
18936 tmp = gen_rtx_EQ (VOIDmode, tmp, const0_rtx);
18937 emit_insn (gen_rtx_SET (VOIDmode, tmpreg,
18938 gen_rtx_IF_THEN_ELSE (SImode, tmp,
18941 /* Emit lea manually to avoid clobbering of flags. */
18942 emit_insn (gen_rtx_SET (SImode, reg2,
18943 gen_rtx_PLUS (Pmode, out, const2_rtx)));
18945 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
18946 tmp = gen_rtx_EQ (VOIDmode, tmp, const0_rtx);
18947 emit_insn (gen_rtx_SET (VOIDmode, out,
18948 gen_rtx_IF_THEN_ELSE (Pmode, tmp,
18955 rtx end_2_label = gen_label_rtx ();
18956 /* Is zero in the first two bytes? */
18958 emit_insn (gen_testsi_ccno_1 (tmpreg, GEN_INT (0x8080)));
18959 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
18960 tmp = gen_rtx_NE (VOIDmode, tmp, const0_rtx);
18961 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
18962 gen_rtx_LABEL_REF (VOIDmode, end_2_label),
18964 tmp = emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
18965 JUMP_LABEL (tmp) = end_2_label;
18967 /* Not in the first two. Move two bytes forward. */
18968 emit_insn (gen_lshrsi3 (tmpreg, tmpreg, GEN_INT (16)));
18969 emit_insn ((*ix86_gen_add3) (out, out, const2_rtx));
18971 emit_label (end_2_label);
18975 /* Avoid branch in fixing the byte. */
18976 tmpreg = gen_lowpart (QImode, tmpreg);
18977 emit_insn (gen_addqi3_cc (tmpreg, tmpreg, tmpreg));
18978 cmp = gen_rtx_LTU (Pmode, gen_rtx_REG (CCmode, FLAGS_REG), const0_rtx);
18979 emit_insn ((*ix86_gen_sub3_carry) (out, out, GEN_INT (3), cmp));
18981 emit_label (end_0_label);
18984 /* Expand strlen. */
18987 ix86_expand_strlen (rtx out, rtx src, rtx eoschar, rtx align)
18989 rtx addr, scratch1, scratch2, scratch3, scratch4;
18991 /* The generic case of strlen expander is long. Avoid it's
18992 expanding unless TARGET_INLINE_ALL_STRINGOPS. */
18994 if (TARGET_UNROLL_STRLEN && eoschar == const0_rtx && optimize > 1
18995 && !TARGET_INLINE_ALL_STRINGOPS
18996 && !optimize_insn_for_size_p ()
18997 && (!CONST_INT_P (align) || INTVAL (align) < 4))
19000 addr = force_reg (Pmode, XEXP (src, 0));
19001 scratch1 = gen_reg_rtx (Pmode);
19003 if (TARGET_UNROLL_STRLEN && eoschar == const0_rtx && optimize > 1
19004 && !optimize_insn_for_size_p ())
19006 /* Well it seems that some optimizer does not combine a call like
19007 foo(strlen(bar), strlen(bar));
19008 when the move and the subtraction is done here. It does calculate
19009 the length just once when these instructions are done inside of
19010 output_strlen_unroll(). But I think since &bar[strlen(bar)] is
19011 often used and I use one fewer register for the lifetime of
19012 output_strlen_unroll() this is better. */
19014 emit_move_insn (out, addr);
19016 ix86_expand_strlensi_unroll_1 (out, src, align);
19018 /* strlensi_unroll_1 returns the address of the zero at the end of
19019 the string, like memchr(), so compute the length by subtracting
19020 the start address. */
19021 emit_insn ((*ix86_gen_sub3) (out, out, addr));
19027 /* Can't use this if the user has appropriated eax, ecx, or edi. */
19028 if (fixed_regs[AX_REG] || fixed_regs[CX_REG] || fixed_regs[DI_REG])
19031 scratch2 = gen_reg_rtx (Pmode);
19032 scratch3 = gen_reg_rtx (Pmode);
19033 scratch4 = force_reg (Pmode, constm1_rtx);
19035 emit_move_insn (scratch3, addr);
19036 eoschar = force_reg (QImode, eoschar);
19038 src = replace_equiv_address_nv (src, scratch3);
19040 /* If .md starts supporting :P, this can be done in .md. */
19041 unspec = gen_rtx_UNSPEC (Pmode, gen_rtvec (4, src, eoschar, align,
19042 scratch4), UNSPEC_SCAS);
19043 emit_insn (gen_strlenqi_1 (scratch1, scratch3, unspec));
19044 emit_insn ((*ix86_gen_one_cmpl2) (scratch2, scratch1));
19045 emit_insn ((*ix86_gen_add3) (out, scratch2, constm1_rtx));
19050 /* For given symbol (function) construct code to compute address of it's PLT
19051 entry in large x86-64 PIC model. */
19053 construct_plt_address (rtx symbol)
19055 rtx tmp = gen_reg_rtx (Pmode);
19056 rtx unspec = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, symbol), UNSPEC_PLTOFF);
19058 gcc_assert (GET_CODE (symbol) == SYMBOL_REF);
19059 gcc_assert (ix86_cmodel == CM_LARGE_PIC);
19061 emit_move_insn (tmp, gen_rtx_CONST (Pmode, unspec));
19062 emit_insn (gen_adddi3 (tmp, tmp, pic_offset_table_rtx));
19067 ix86_expand_call (rtx retval, rtx fnaddr, rtx callarg1,
19069 rtx pop, int sibcall)
19071 rtx use = NULL, call;
19073 if (pop == const0_rtx)
19075 gcc_assert (!TARGET_64BIT || !pop);
19077 if (TARGET_MACHO && !TARGET_64BIT)
19080 if (flag_pic && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF)
19081 fnaddr = machopic_indirect_call_target (fnaddr);
19086 /* Static functions and indirect calls don't need the pic register. */
19087 if (flag_pic && (!TARGET_64BIT || ix86_cmodel == CM_LARGE_PIC)
19088 && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF
19089 && ! SYMBOL_REF_LOCAL_P (XEXP (fnaddr, 0)))
19090 use_reg (&use, pic_offset_table_rtx);
19093 if (TARGET_64BIT && INTVAL (callarg2) >= 0)
19095 rtx al = gen_rtx_REG (QImode, AX_REG);
19096 emit_move_insn (al, callarg2);
19097 use_reg (&use, al);
19100 if (ix86_cmodel == CM_LARGE_PIC
19101 && GET_CODE (fnaddr) == MEM
19102 && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF
19103 && !local_symbolic_operand (XEXP (fnaddr, 0), VOIDmode))
19104 fnaddr = gen_rtx_MEM (QImode, construct_plt_address (XEXP (fnaddr, 0)));
19105 else if (! call_insn_operand (XEXP (fnaddr, 0), Pmode))
19107 fnaddr = copy_to_mode_reg (Pmode, XEXP (fnaddr, 0));
19108 fnaddr = gen_rtx_MEM (QImode, fnaddr);
19110 if (sibcall && TARGET_64BIT
19111 && !constant_call_address_operand (XEXP (fnaddr, 0), Pmode))
19114 addr = copy_to_mode_reg (Pmode, XEXP (fnaddr, 0));
19115 fnaddr = gen_rtx_REG (Pmode, R11_REG);
19116 emit_move_insn (fnaddr, addr);
19117 fnaddr = gen_rtx_MEM (QImode, fnaddr);
19120 call = gen_rtx_CALL (VOIDmode, fnaddr, callarg1);
19122 call = gen_rtx_SET (VOIDmode, retval, call);
19125 pop = gen_rtx_PLUS (Pmode, stack_pointer_rtx, pop);
19126 pop = gen_rtx_SET (VOIDmode, stack_pointer_rtx, pop);
19127 call = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, call, pop));
19130 && ix86_cfun_abi () == MS_ABI
19131 && (!callarg2 || INTVAL (callarg2) != -2))
19133 /* We need to represent that SI and DI registers are clobbered
19135 static int clobbered_registers[] = {
19136 XMM6_REG, XMM7_REG, XMM8_REG,
19137 XMM9_REG, XMM10_REG, XMM11_REG,
19138 XMM12_REG, XMM13_REG, XMM14_REG,
19139 XMM15_REG, SI_REG, DI_REG
19142 rtx vec[ARRAY_SIZE (clobbered_registers) + 2];
19143 rtx unspec = gen_rtx_UNSPEC (VOIDmode, gen_rtvec (1, const0_rtx),
19144 UNSPEC_MS_TO_SYSV_CALL);
19148 for (i = 0; i < ARRAY_SIZE (clobbered_registers); i++)
19149 vec[i + 2] = gen_rtx_CLOBBER (SSE_REGNO_P (clobbered_registers[i])
19152 (SSE_REGNO_P (clobbered_registers[i])
19154 clobbered_registers[i]));
19156 call = gen_rtx_PARALLEL (VOIDmode,
19157 gen_rtvec_v (ARRAY_SIZE (clobbered_registers)
19161 call = emit_call_insn (call);
19163 CALL_INSN_FUNCTION_USAGE (call) = use;
19167 /* Clear stack slot assignments remembered from previous functions.
19168 This is called from INIT_EXPANDERS once before RTL is emitted for each
19171 static struct machine_function *
19172 ix86_init_machine_status (void)
19174 struct machine_function *f;
19176 f = GGC_CNEW (struct machine_function);
19177 f->use_fast_prologue_epilogue_nregs = -1;
19178 f->tls_descriptor_call_expanded_p = 0;
19179 f->call_abi = ix86_abi;
19184 /* Return a MEM corresponding to a stack slot with mode MODE.
19185 Allocate a new slot if necessary.
19187 The RTL for a function can have several slots available: N is
19188 which slot to use. */
19191 assign_386_stack_local (enum machine_mode mode, enum ix86_stack_slot n)
19193 struct stack_local_entry *s;
19195 gcc_assert (n < MAX_386_STACK_LOCALS);
19197 /* Virtual slot is valid only before vregs are instantiated. */
19198 gcc_assert ((n == SLOT_VIRTUAL) == !virtuals_instantiated);
19200 for (s = ix86_stack_locals; s; s = s->next)
19201 if (s->mode == mode && s->n == n)
19202 return copy_rtx (s->rtl);
19204 s = (struct stack_local_entry *)
19205 ggc_alloc (sizeof (struct stack_local_entry));
19208 s->rtl = assign_stack_local (mode, GET_MODE_SIZE (mode), 0);
19210 s->next = ix86_stack_locals;
19211 ix86_stack_locals = s;
19215 /* Construct the SYMBOL_REF for the tls_get_addr function. */
19217 static GTY(()) rtx ix86_tls_symbol;
19219 ix86_tls_get_addr (void)
19222 if (!ix86_tls_symbol)
19224 ix86_tls_symbol = gen_rtx_SYMBOL_REF (Pmode,
19225 (TARGET_ANY_GNU_TLS
19227 ? "___tls_get_addr"
19228 : "__tls_get_addr");
19231 return ix86_tls_symbol;
19234 /* Construct the SYMBOL_REF for the _TLS_MODULE_BASE_ symbol. */
19236 static GTY(()) rtx ix86_tls_module_base_symbol;
19238 ix86_tls_module_base (void)
19241 if (!ix86_tls_module_base_symbol)
19243 ix86_tls_module_base_symbol = gen_rtx_SYMBOL_REF (Pmode,
19244 "_TLS_MODULE_BASE_");
19245 SYMBOL_REF_FLAGS (ix86_tls_module_base_symbol)
19246 |= TLS_MODEL_GLOBAL_DYNAMIC << SYMBOL_FLAG_TLS_SHIFT;
19249 return ix86_tls_module_base_symbol;
19252 /* Calculate the length of the memory address in the instruction
19253 encoding. Does not include the one-byte modrm, opcode, or prefix. */
19256 memory_address_length (rtx addr)
19258 struct ix86_address parts;
19259 rtx base, index, disp;
19263 if (GET_CODE (addr) == PRE_DEC
19264 || GET_CODE (addr) == POST_INC
19265 || GET_CODE (addr) == PRE_MODIFY
19266 || GET_CODE (addr) == POST_MODIFY)
19269 ok = ix86_decompose_address (addr, &parts);
19272 if (parts.base && GET_CODE (parts.base) == SUBREG)
19273 parts.base = SUBREG_REG (parts.base);
19274 if (parts.index && GET_CODE (parts.index) == SUBREG)
19275 parts.index = SUBREG_REG (parts.index);
19278 index = parts.index;
19283 - esp as the base always wants an index,
19284 - ebp as the base always wants a displacement. */
19286 /* Register Indirect. */
19287 if (base && !index && !disp)
19289 /* esp (for its index) and ebp (for its displacement) need
19290 the two-byte modrm form. */
19291 if (addr == stack_pointer_rtx
19292 || addr == arg_pointer_rtx
19293 || addr == frame_pointer_rtx
19294 || addr == hard_frame_pointer_rtx)
19298 /* Direct Addressing. */
19299 else if (disp && !base && !index)
19304 /* Find the length of the displacement constant. */
19307 if (base && satisfies_constraint_K (disp))
19312 /* ebp always wants a displacement. */
19313 else if (base == hard_frame_pointer_rtx)
19316 /* An index requires the two-byte modrm form.... */
19318 /* ...like esp, which always wants an index. */
19319 || base == stack_pointer_rtx
19320 || base == arg_pointer_rtx
19321 || base == frame_pointer_rtx)
19328 /* Compute default value for "length_immediate" attribute. When SHORTFORM
19329 is set, expect that insn have 8bit immediate alternative. */
19331 ix86_attr_length_immediate_default (rtx insn, int shortform)
19335 extract_insn_cached (insn);
19336 for (i = recog_data.n_operands - 1; i >= 0; --i)
19337 if (CONSTANT_P (recog_data.operand[i]))
19340 if (shortform && satisfies_constraint_K (recog_data.operand[i]))
19344 switch (get_attr_mode (insn))
19355 /* Immediates for DImode instructions are encoded as 32bit sign extended values. */
19360 fatal_insn ("unknown insn mode", insn);
19366 /* Compute default value for "length_address" attribute. */
19368 ix86_attr_length_address_default (rtx insn)
19372 if (get_attr_type (insn) == TYPE_LEA)
19374 rtx set = PATTERN (insn);
19376 if (GET_CODE (set) == PARALLEL)
19377 set = XVECEXP (set, 0, 0);
19379 gcc_assert (GET_CODE (set) == SET);
19381 return memory_address_length (SET_SRC (set));
19384 extract_insn_cached (insn);
19385 for (i = recog_data.n_operands - 1; i >= 0; --i)
19386 if (MEM_P (recog_data.operand[i]))
19388 return memory_address_length (XEXP (recog_data.operand[i], 0));
19394 /* Compute default value for "length_vex" attribute. It includes
19395 2 or 3 byte VEX prefix and 1 opcode byte. */
19398 ix86_attr_length_vex_default (rtx insn, int has_0f_opcode,
19403 /* Only 0f opcode can use 2 byte VEX prefix and VEX W bit uses 3
19404 byte VEX prefix. */
19405 if (!has_0f_opcode || has_vex_w)
19408 /* We can always use 2 byte VEX prefix in 32bit. */
19412 extract_insn_cached (insn);
19414 for (i = recog_data.n_operands - 1; i >= 0; --i)
19415 if (REG_P (recog_data.operand[i]))
19417 /* REX.W bit uses 3 byte VEX prefix. */
19418 if (GET_MODE (recog_data.operand[i]) == DImode)
19423 /* REX.X or REX.B bits use 3 byte VEX prefix. */
19424 if (MEM_P (recog_data.operand[i])
19425 && x86_extended_reg_mentioned_p (recog_data.operand[i]))
19432 /* Return the maximum number of instructions a cpu can issue. */
19435 ix86_issue_rate (void)
19439 case PROCESSOR_PENTIUM:
19440 case PROCESSOR_ATOM:
19444 case PROCESSOR_PENTIUMPRO:
19445 case PROCESSOR_PENTIUM4:
19446 case PROCESSOR_ATHLON:
19448 case PROCESSOR_AMDFAM10:
19449 case PROCESSOR_NOCONA:
19450 case PROCESSOR_GENERIC32:
19451 case PROCESSOR_GENERIC64:
19454 case PROCESSOR_CORE2:
19462 /* A subroutine of ix86_adjust_cost -- return true iff INSN reads flags set
19463 by DEP_INSN and nothing set by DEP_INSN. */
19466 ix86_flags_dependent (rtx insn, rtx dep_insn, enum attr_type insn_type)
19470 /* Simplify the test for uninteresting insns. */
19471 if (insn_type != TYPE_SETCC
19472 && insn_type != TYPE_ICMOV
19473 && insn_type != TYPE_FCMOV
19474 && insn_type != TYPE_IBR)
19477 if ((set = single_set (dep_insn)) != 0)
19479 set = SET_DEST (set);
19482 else if (GET_CODE (PATTERN (dep_insn)) == PARALLEL
19483 && XVECLEN (PATTERN (dep_insn), 0) == 2
19484 && GET_CODE (XVECEXP (PATTERN (dep_insn), 0, 0)) == SET
19485 && GET_CODE (XVECEXP (PATTERN (dep_insn), 0, 1)) == SET)
19487 set = SET_DEST (XVECEXP (PATTERN (dep_insn), 0, 0));
19488 set2 = SET_DEST (XVECEXP (PATTERN (dep_insn), 0, 0));
19493 if (!REG_P (set) || REGNO (set) != FLAGS_REG)
19496 /* This test is true if the dependent insn reads the flags but
19497 not any other potentially set register. */
19498 if (!reg_overlap_mentioned_p (set, PATTERN (insn)))
19501 if (set2 && reg_overlap_mentioned_p (set2, PATTERN (insn)))
19507 /* Return true iff USE_INSN has a memory address with operands set by
19511 ix86_agi_dependent (rtx set_insn, rtx use_insn)
19514 extract_insn_cached (use_insn);
19515 for (i = recog_data.n_operands - 1; i >= 0; --i)
19516 if (MEM_P (recog_data.operand[i]))
19518 rtx addr = XEXP (recog_data.operand[i], 0);
19519 return modified_in_p (addr, set_insn) != 0;
19525 ix86_adjust_cost (rtx insn, rtx link, rtx dep_insn, int cost)
19527 enum attr_type insn_type, dep_insn_type;
19528 enum attr_memory memory;
19530 int dep_insn_code_number;
19532 /* Anti and output dependencies have zero cost on all CPUs. */
19533 if (REG_NOTE_KIND (link) != 0)
19536 dep_insn_code_number = recog_memoized (dep_insn);
19538 /* If we can't recognize the insns, we can't really do anything. */
19539 if (dep_insn_code_number < 0 || recog_memoized (insn) < 0)
19542 insn_type = get_attr_type (insn);
19543 dep_insn_type = get_attr_type (dep_insn);
19547 case PROCESSOR_PENTIUM:
19548 /* Address Generation Interlock adds a cycle of latency. */
19549 if (insn_type == TYPE_LEA)
19551 rtx addr = PATTERN (insn);
19553 if (GET_CODE (addr) == PARALLEL)
19554 addr = XVECEXP (addr, 0, 0);
19556 gcc_assert (GET_CODE (addr) == SET);
19558 addr = SET_SRC (addr);
19559 if (modified_in_p (addr, dep_insn))
19562 else if (ix86_agi_dependent (dep_insn, insn))
19565 /* ??? Compares pair with jump/setcc. */
19566 if (ix86_flags_dependent (insn, dep_insn, insn_type))
19569 /* Floating point stores require value to be ready one cycle earlier. */
19570 if (insn_type == TYPE_FMOV
19571 && get_attr_memory (insn) == MEMORY_STORE
19572 && !ix86_agi_dependent (dep_insn, insn))
19576 case PROCESSOR_PENTIUMPRO:
19577 memory = get_attr_memory (insn);
19579 /* INT->FP conversion is expensive. */
19580 if (get_attr_fp_int_src (dep_insn))
19583 /* There is one cycle extra latency between an FP op and a store. */
19584 if (insn_type == TYPE_FMOV
19585 && (set = single_set (dep_insn)) != NULL_RTX
19586 && (set2 = single_set (insn)) != NULL_RTX
19587 && rtx_equal_p (SET_DEST (set), SET_SRC (set2))
19588 && MEM_P (SET_DEST (set2)))
19591 /* Show ability of reorder buffer to hide latency of load by executing
19592 in parallel with previous instruction in case
19593 previous instruction is not needed to compute the address. */
19594 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
19595 && !ix86_agi_dependent (dep_insn, insn))
19597 /* Claim moves to take one cycle, as core can issue one load
19598 at time and the next load can start cycle later. */
19599 if (dep_insn_type == TYPE_IMOV
19600 || dep_insn_type == TYPE_FMOV)
19608 memory = get_attr_memory (insn);
19610 /* The esp dependency is resolved before the instruction is really
19612 if ((insn_type == TYPE_PUSH || insn_type == TYPE_POP)
19613 && (dep_insn_type == TYPE_PUSH || dep_insn_type == TYPE_POP))
19616 /* INT->FP conversion is expensive. */
19617 if (get_attr_fp_int_src (dep_insn))
19620 /* Show ability of reorder buffer to hide latency of load by executing
19621 in parallel with previous instruction in case
19622 previous instruction is not needed to compute the address. */
19623 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
19624 && !ix86_agi_dependent (dep_insn, insn))
19626 /* Claim moves to take one cycle, as core can issue one load
19627 at time and the next load can start cycle later. */
19628 if (dep_insn_type == TYPE_IMOV
19629 || dep_insn_type == TYPE_FMOV)
19638 case PROCESSOR_ATHLON:
19640 case PROCESSOR_AMDFAM10:
19641 case PROCESSOR_ATOM:
19642 case PROCESSOR_GENERIC32:
19643 case PROCESSOR_GENERIC64:
19644 memory = get_attr_memory (insn);
19646 /* Show ability of reorder buffer to hide latency of load by executing
19647 in parallel with previous instruction in case
19648 previous instruction is not needed to compute the address. */
19649 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
19650 && !ix86_agi_dependent (dep_insn, insn))
19652 enum attr_unit unit = get_attr_unit (insn);
19655 /* Because of the difference between the length of integer and
19656 floating unit pipeline preparation stages, the memory operands
19657 for floating point are cheaper.
19659 ??? For Athlon it the difference is most probably 2. */
19660 if (unit == UNIT_INTEGER || unit == UNIT_UNKNOWN)
19663 loadcost = TARGET_ATHLON ? 2 : 0;
19665 if (cost >= loadcost)
19678 /* How many alternative schedules to try. This should be as wide as the
19679 scheduling freedom in the DFA, but no wider. Making this value too
19680 large results extra work for the scheduler. */
19683 ia32_multipass_dfa_lookahead (void)
19687 case PROCESSOR_PENTIUM:
19690 case PROCESSOR_PENTIUMPRO:
19700 /* Compute the alignment given to a constant that is being placed in memory.
19701 EXP is the constant and ALIGN is the alignment that the object would
19703 The value of this function is used instead of that alignment to align
19707 ix86_constant_alignment (tree exp, int align)
19709 if (TREE_CODE (exp) == REAL_CST || TREE_CODE (exp) == VECTOR_CST
19710 || TREE_CODE (exp) == INTEGER_CST)
19712 if (TYPE_MODE (TREE_TYPE (exp)) == DFmode && align < 64)
19714 else if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (exp))) && align < 128)
19717 else if (!optimize_size && TREE_CODE (exp) == STRING_CST
19718 && TREE_STRING_LENGTH (exp) >= 31 && align < BITS_PER_WORD)
19719 return BITS_PER_WORD;
19724 /* Compute the alignment for a static variable.
19725 TYPE is the data type, and ALIGN is the alignment that
19726 the object would ordinarily have. The value of this function is used
19727 instead of that alignment to align the object. */
19730 ix86_data_alignment (tree type, int align)
19732 int max_align = optimize_size ? BITS_PER_WORD : MIN (256, MAX_OFILE_ALIGNMENT);
19734 if (AGGREGATE_TYPE_P (type)
19735 && TYPE_SIZE (type)
19736 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
19737 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= (unsigned) max_align
19738 || TREE_INT_CST_HIGH (TYPE_SIZE (type)))
19739 && align < max_align)
19742 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
19743 to 16byte boundary. */
19746 if (AGGREGATE_TYPE_P (type)
19747 && TYPE_SIZE (type)
19748 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
19749 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= 128
19750 || TREE_INT_CST_HIGH (TYPE_SIZE (type))) && align < 128)
19754 if (TREE_CODE (type) == ARRAY_TYPE)
19756 if (TYPE_MODE (TREE_TYPE (type)) == DFmode && align < 64)
19758 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type))) && align < 128)
19761 else if (TREE_CODE (type) == COMPLEX_TYPE)
19764 if (TYPE_MODE (type) == DCmode && align < 64)
19766 if ((TYPE_MODE (type) == XCmode
19767 || TYPE_MODE (type) == TCmode) && align < 128)
19770 else if ((TREE_CODE (type) == RECORD_TYPE
19771 || TREE_CODE (type) == UNION_TYPE
19772 || TREE_CODE (type) == QUAL_UNION_TYPE)
19773 && TYPE_FIELDS (type))
19775 if (DECL_MODE (TYPE_FIELDS (type)) == DFmode && align < 64)
19777 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type))) && align < 128)
19780 else if (TREE_CODE (type) == REAL_TYPE || TREE_CODE (type) == VECTOR_TYPE
19781 || TREE_CODE (type) == INTEGER_TYPE)
19783 if (TYPE_MODE (type) == DFmode && align < 64)
19785 if (ALIGN_MODE_128 (TYPE_MODE (type)) && align < 128)
19792 /* Compute the alignment for a local variable or a stack slot. EXP is
19793 the data type or decl itself, MODE is the widest mode available and
19794 ALIGN is the alignment that the object would ordinarily have. The
19795 value of this macro is used instead of that alignment to align the
19799 ix86_local_alignment (tree exp, enum machine_mode mode,
19800 unsigned int align)
19804 if (exp && DECL_P (exp))
19806 type = TREE_TYPE (exp);
19815 /* Don't do dynamic stack realignment for long long objects with
19816 -mpreferred-stack-boundary=2. */
19819 && ix86_preferred_stack_boundary < 64
19820 && (mode == DImode || (type && TYPE_MODE (type) == DImode))
19821 && (!type || !TYPE_USER_ALIGN (type))
19822 && (!decl || !DECL_USER_ALIGN (decl)))
19825 /* If TYPE is NULL, we are allocating a stack slot for caller-save
19826 register in MODE. We will return the largest alignment of XF
19830 if (mode == XFmode && align < GET_MODE_ALIGNMENT (DFmode))
19831 align = GET_MODE_ALIGNMENT (DFmode);
19835 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
19836 to 16byte boundary. */
19839 if (AGGREGATE_TYPE_P (type)
19840 && TYPE_SIZE (type)
19841 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
19842 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= 16
19843 || TREE_INT_CST_HIGH (TYPE_SIZE (type))) && align < 128)
19846 if (TREE_CODE (type) == ARRAY_TYPE)
19848 if (TYPE_MODE (TREE_TYPE (type)) == DFmode && align < 64)
19850 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type))) && align < 128)
19853 else if (TREE_CODE (type) == COMPLEX_TYPE)
19855 if (TYPE_MODE (type) == DCmode && align < 64)
19857 if ((TYPE_MODE (type) == XCmode
19858 || TYPE_MODE (type) == TCmode) && align < 128)
19861 else if ((TREE_CODE (type) == RECORD_TYPE
19862 || TREE_CODE (type) == UNION_TYPE
19863 || TREE_CODE (type) == QUAL_UNION_TYPE)
19864 && TYPE_FIELDS (type))
19866 if (DECL_MODE (TYPE_FIELDS (type)) == DFmode && align < 64)
19868 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type))) && align < 128)
19871 else if (TREE_CODE (type) == REAL_TYPE || TREE_CODE (type) == VECTOR_TYPE
19872 || TREE_CODE (type) == INTEGER_TYPE)
19875 if (TYPE_MODE (type) == DFmode && align < 64)
19877 if (ALIGN_MODE_128 (TYPE_MODE (type)) && align < 128)
19883 /* Emit RTL insns to initialize the variable parts of a trampoline.
19884 FNADDR is an RTX for the address of the function's pure code.
19885 CXT is an RTX for the static chain value for the function. */
19887 x86_initialize_trampoline (rtx tramp, rtx fnaddr, rtx cxt)
19891 /* Compute offset from the end of the jmp to the target function. */
19892 rtx disp = expand_binop (SImode, sub_optab, fnaddr,
19893 plus_constant (tramp, 10),
19894 NULL_RTX, 1, OPTAB_DIRECT);
19895 emit_move_insn (gen_rtx_MEM (QImode, tramp),
19896 gen_int_mode (0xb9, QImode));
19897 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 1)), cxt);
19898 emit_move_insn (gen_rtx_MEM (QImode, plus_constant (tramp, 5)),
19899 gen_int_mode (0xe9, QImode));
19900 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 6)), disp);
19905 /* Try to load address using shorter movl instead of movabs.
19906 We may want to support movq for kernel mode, but kernel does not use
19907 trampolines at the moment. */
19908 if (x86_64_zext_immediate_operand (fnaddr, VOIDmode))
19910 fnaddr = copy_to_mode_reg (DImode, fnaddr);
19911 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
19912 gen_int_mode (0xbb41, HImode));
19913 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, offset + 2)),
19914 gen_lowpart (SImode, fnaddr));
19919 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
19920 gen_int_mode (0xbb49, HImode));
19921 emit_move_insn (gen_rtx_MEM (DImode, plus_constant (tramp, offset + 2)),
19925 /* Load static chain using movabs to r10. */
19926 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
19927 gen_int_mode (0xba49, HImode));
19928 emit_move_insn (gen_rtx_MEM (DImode, plus_constant (tramp, offset + 2)),
19931 /* Jump to the r11 */
19932 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
19933 gen_int_mode (0xff49, HImode));
19934 emit_move_insn (gen_rtx_MEM (QImode, plus_constant (tramp, offset+2)),
19935 gen_int_mode (0xe3, QImode));
19937 gcc_assert (offset <= TRAMPOLINE_SIZE);
19940 #ifdef ENABLE_EXECUTE_STACK
19941 emit_library_call (gen_rtx_SYMBOL_REF (Pmode, "__enable_execute_stack"),
19942 LCT_NORMAL, VOIDmode, 1, tramp, Pmode);
19946 /* Codes for all the SSE/MMX builtins. */
19949 IX86_BUILTIN_ADDPS,
19950 IX86_BUILTIN_ADDSS,
19951 IX86_BUILTIN_DIVPS,
19952 IX86_BUILTIN_DIVSS,
19953 IX86_BUILTIN_MULPS,
19954 IX86_BUILTIN_MULSS,
19955 IX86_BUILTIN_SUBPS,
19956 IX86_BUILTIN_SUBSS,
19958 IX86_BUILTIN_CMPEQPS,
19959 IX86_BUILTIN_CMPLTPS,
19960 IX86_BUILTIN_CMPLEPS,
19961 IX86_BUILTIN_CMPGTPS,
19962 IX86_BUILTIN_CMPGEPS,
19963 IX86_BUILTIN_CMPNEQPS,
19964 IX86_BUILTIN_CMPNLTPS,
19965 IX86_BUILTIN_CMPNLEPS,
19966 IX86_BUILTIN_CMPNGTPS,
19967 IX86_BUILTIN_CMPNGEPS,
19968 IX86_BUILTIN_CMPORDPS,
19969 IX86_BUILTIN_CMPUNORDPS,
19970 IX86_BUILTIN_CMPEQSS,
19971 IX86_BUILTIN_CMPLTSS,
19972 IX86_BUILTIN_CMPLESS,
19973 IX86_BUILTIN_CMPNEQSS,
19974 IX86_BUILTIN_CMPNLTSS,
19975 IX86_BUILTIN_CMPNLESS,
19976 IX86_BUILTIN_CMPNGTSS,
19977 IX86_BUILTIN_CMPNGESS,
19978 IX86_BUILTIN_CMPORDSS,
19979 IX86_BUILTIN_CMPUNORDSS,
19981 IX86_BUILTIN_COMIEQSS,
19982 IX86_BUILTIN_COMILTSS,
19983 IX86_BUILTIN_COMILESS,
19984 IX86_BUILTIN_COMIGTSS,
19985 IX86_BUILTIN_COMIGESS,
19986 IX86_BUILTIN_COMINEQSS,
19987 IX86_BUILTIN_UCOMIEQSS,
19988 IX86_BUILTIN_UCOMILTSS,
19989 IX86_BUILTIN_UCOMILESS,
19990 IX86_BUILTIN_UCOMIGTSS,
19991 IX86_BUILTIN_UCOMIGESS,
19992 IX86_BUILTIN_UCOMINEQSS,
19994 IX86_BUILTIN_CVTPI2PS,
19995 IX86_BUILTIN_CVTPS2PI,
19996 IX86_BUILTIN_CVTSI2SS,
19997 IX86_BUILTIN_CVTSI642SS,
19998 IX86_BUILTIN_CVTSS2SI,
19999 IX86_BUILTIN_CVTSS2SI64,
20000 IX86_BUILTIN_CVTTPS2PI,
20001 IX86_BUILTIN_CVTTSS2SI,
20002 IX86_BUILTIN_CVTTSS2SI64,
20004 IX86_BUILTIN_MAXPS,
20005 IX86_BUILTIN_MAXSS,
20006 IX86_BUILTIN_MINPS,
20007 IX86_BUILTIN_MINSS,
20009 IX86_BUILTIN_LOADUPS,
20010 IX86_BUILTIN_STOREUPS,
20011 IX86_BUILTIN_MOVSS,
20013 IX86_BUILTIN_MOVHLPS,
20014 IX86_BUILTIN_MOVLHPS,
20015 IX86_BUILTIN_LOADHPS,
20016 IX86_BUILTIN_LOADLPS,
20017 IX86_BUILTIN_STOREHPS,
20018 IX86_BUILTIN_STORELPS,
20020 IX86_BUILTIN_MASKMOVQ,
20021 IX86_BUILTIN_MOVMSKPS,
20022 IX86_BUILTIN_PMOVMSKB,
20024 IX86_BUILTIN_MOVNTPS,
20025 IX86_BUILTIN_MOVNTQ,
20027 IX86_BUILTIN_LOADDQU,
20028 IX86_BUILTIN_STOREDQU,
20030 IX86_BUILTIN_PACKSSWB,
20031 IX86_BUILTIN_PACKSSDW,
20032 IX86_BUILTIN_PACKUSWB,
20034 IX86_BUILTIN_PADDB,
20035 IX86_BUILTIN_PADDW,
20036 IX86_BUILTIN_PADDD,
20037 IX86_BUILTIN_PADDQ,
20038 IX86_BUILTIN_PADDSB,
20039 IX86_BUILTIN_PADDSW,
20040 IX86_BUILTIN_PADDUSB,
20041 IX86_BUILTIN_PADDUSW,
20042 IX86_BUILTIN_PSUBB,
20043 IX86_BUILTIN_PSUBW,
20044 IX86_BUILTIN_PSUBD,
20045 IX86_BUILTIN_PSUBQ,
20046 IX86_BUILTIN_PSUBSB,
20047 IX86_BUILTIN_PSUBSW,
20048 IX86_BUILTIN_PSUBUSB,
20049 IX86_BUILTIN_PSUBUSW,
20052 IX86_BUILTIN_PANDN,
20056 IX86_BUILTIN_PAVGB,
20057 IX86_BUILTIN_PAVGW,
20059 IX86_BUILTIN_PCMPEQB,
20060 IX86_BUILTIN_PCMPEQW,
20061 IX86_BUILTIN_PCMPEQD,
20062 IX86_BUILTIN_PCMPGTB,
20063 IX86_BUILTIN_PCMPGTW,
20064 IX86_BUILTIN_PCMPGTD,
20066 IX86_BUILTIN_PMADDWD,
20068 IX86_BUILTIN_PMAXSW,
20069 IX86_BUILTIN_PMAXUB,
20070 IX86_BUILTIN_PMINSW,
20071 IX86_BUILTIN_PMINUB,
20073 IX86_BUILTIN_PMULHUW,
20074 IX86_BUILTIN_PMULHW,
20075 IX86_BUILTIN_PMULLW,
20077 IX86_BUILTIN_PSADBW,
20078 IX86_BUILTIN_PSHUFW,
20080 IX86_BUILTIN_PSLLW,
20081 IX86_BUILTIN_PSLLD,
20082 IX86_BUILTIN_PSLLQ,
20083 IX86_BUILTIN_PSRAW,
20084 IX86_BUILTIN_PSRAD,
20085 IX86_BUILTIN_PSRLW,
20086 IX86_BUILTIN_PSRLD,
20087 IX86_BUILTIN_PSRLQ,
20088 IX86_BUILTIN_PSLLWI,
20089 IX86_BUILTIN_PSLLDI,
20090 IX86_BUILTIN_PSLLQI,
20091 IX86_BUILTIN_PSRAWI,
20092 IX86_BUILTIN_PSRADI,
20093 IX86_BUILTIN_PSRLWI,
20094 IX86_BUILTIN_PSRLDI,
20095 IX86_BUILTIN_PSRLQI,
20097 IX86_BUILTIN_PUNPCKHBW,
20098 IX86_BUILTIN_PUNPCKHWD,
20099 IX86_BUILTIN_PUNPCKHDQ,
20100 IX86_BUILTIN_PUNPCKLBW,
20101 IX86_BUILTIN_PUNPCKLWD,
20102 IX86_BUILTIN_PUNPCKLDQ,
20104 IX86_BUILTIN_SHUFPS,
20106 IX86_BUILTIN_RCPPS,
20107 IX86_BUILTIN_RCPSS,
20108 IX86_BUILTIN_RSQRTPS,
20109 IX86_BUILTIN_RSQRTPS_NR,
20110 IX86_BUILTIN_RSQRTSS,
20111 IX86_BUILTIN_RSQRTF,
20112 IX86_BUILTIN_SQRTPS,
20113 IX86_BUILTIN_SQRTPS_NR,
20114 IX86_BUILTIN_SQRTSS,
20116 IX86_BUILTIN_UNPCKHPS,
20117 IX86_BUILTIN_UNPCKLPS,
20119 IX86_BUILTIN_ANDPS,
20120 IX86_BUILTIN_ANDNPS,
20122 IX86_BUILTIN_XORPS,
20125 IX86_BUILTIN_LDMXCSR,
20126 IX86_BUILTIN_STMXCSR,
20127 IX86_BUILTIN_SFENCE,
20129 /* 3DNow! Original */
20130 IX86_BUILTIN_FEMMS,
20131 IX86_BUILTIN_PAVGUSB,
20132 IX86_BUILTIN_PF2ID,
20133 IX86_BUILTIN_PFACC,
20134 IX86_BUILTIN_PFADD,
20135 IX86_BUILTIN_PFCMPEQ,
20136 IX86_BUILTIN_PFCMPGE,
20137 IX86_BUILTIN_PFCMPGT,
20138 IX86_BUILTIN_PFMAX,
20139 IX86_BUILTIN_PFMIN,
20140 IX86_BUILTIN_PFMUL,
20141 IX86_BUILTIN_PFRCP,
20142 IX86_BUILTIN_PFRCPIT1,
20143 IX86_BUILTIN_PFRCPIT2,
20144 IX86_BUILTIN_PFRSQIT1,
20145 IX86_BUILTIN_PFRSQRT,
20146 IX86_BUILTIN_PFSUB,
20147 IX86_BUILTIN_PFSUBR,
20148 IX86_BUILTIN_PI2FD,
20149 IX86_BUILTIN_PMULHRW,
20151 /* 3DNow! Athlon Extensions */
20152 IX86_BUILTIN_PF2IW,
20153 IX86_BUILTIN_PFNACC,
20154 IX86_BUILTIN_PFPNACC,
20155 IX86_BUILTIN_PI2FW,
20156 IX86_BUILTIN_PSWAPDSI,
20157 IX86_BUILTIN_PSWAPDSF,
20160 IX86_BUILTIN_ADDPD,
20161 IX86_BUILTIN_ADDSD,
20162 IX86_BUILTIN_DIVPD,
20163 IX86_BUILTIN_DIVSD,
20164 IX86_BUILTIN_MULPD,
20165 IX86_BUILTIN_MULSD,
20166 IX86_BUILTIN_SUBPD,
20167 IX86_BUILTIN_SUBSD,
20169 IX86_BUILTIN_CMPEQPD,
20170 IX86_BUILTIN_CMPLTPD,
20171 IX86_BUILTIN_CMPLEPD,
20172 IX86_BUILTIN_CMPGTPD,
20173 IX86_BUILTIN_CMPGEPD,
20174 IX86_BUILTIN_CMPNEQPD,
20175 IX86_BUILTIN_CMPNLTPD,
20176 IX86_BUILTIN_CMPNLEPD,
20177 IX86_BUILTIN_CMPNGTPD,
20178 IX86_BUILTIN_CMPNGEPD,
20179 IX86_BUILTIN_CMPORDPD,
20180 IX86_BUILTIN_CMPUNORDPD,
20181 IX86_BUILTIN_CMPEQSD,
20182 IX86_BUILTIN_CMPLTSD,
20183 IX86_BUILTIN_CMPLESD,
20184 IX86_BUILTIN_CMPNEQSD,
20185 IX86_BUILTIN_CMPNLTSD,
20186 IX86_BUILTIN_CMPNLESD,
20187 IX86_BUILTIN_CMPORDSD,
20188 IX86_BUILTIN_CMPUNORDSD,
20190 IX86_BUILTIN_COMIEQSD,
20191 IX86_BUILTIN_COMILTSD,
20192 IX86_BUILTIN_COMILESD,
20193 IX86_BUILTIN_COMIGTSD,
20194 IX86_BUILTIN_COMIGESD,
20195 IX86_BUILTIN_COMINEQSD,
20196 IX86_BUILTIN_UCOMIEQSD,
20197 IX86_BUILTIN_UCOMILTSD,
20198 IX86_BUILTIN_UCOMILESD,
20199 IX86_BUILTIN_UCOMIGTSD,
20200 IX86_BUILTIN_UCOMIGESD,
20201 IX86_BUILTIN_UCOMINEQSD,
20203 IX86_BUILTIN_MAXPD,
20204 IX86_BUILTIN_MAXSD,
20205 IX86_BUILTIN_MINPD,
20206 IX86_BUILTIN_MINSD,
20208 IX86_BUILTIN_ANDPD,
20209 IX86_BUILTIN_ANDNPD,
20211 IX86_BUILTIN_XORPD,
20213 IX86_BUILTIN_SQRTPD,
20214 IX86_BUILTIN_SQRTSD,
20216 IX86_BUILTIN_UNPCKHPD,
20217 IX86_BUILTIN_UNPCKLPD,
20219 IX86_BUILTIN_SHUFPD,
20221 IX86_BUILTIN_LOADUPD,
20222 IX86_BUILTIN_STOREUPD,
20223 IX86_BUILTIN_MOVSD,
20225 IX86_BUILTIN_LOADHPD,
20226 IX86_BUILTIN_LOADLPD,
20228 IX86_BUILTIN_CVTDQ2PD,
20229 IX86_BUILTIN_CVTDQ2PS,
20231 IX86_BUILTIN_CVTPD2DQ,
20232 IX86_BUILTIN_CVTPD2PI,
20233 IX86_BUILTIN_CVTPD2PS,
20234 IX86_BUILTIN_CVTTPD2DQ,
20235 IX86_BUILTIN_CVTTPD2PI,
20237 IX86_BUILTIN_CVTPI2PD,
20238 IX86_BUILTIN_CVTSI2SD,
20239 IX86_BUILTIN_CVTSI642SD,
20241 IX86_BUILTIN_CVTSD2SI,
20242 IX86_BUILTIN_CVTSD2SI64,
20243 IX86_BUILTIN_CVTSD2SS,
20244 IX86_BUILTIN_CVTSS2SD,
20245 IX86_BUILTIN_CVTTSD2SI,
20246 IX86_BUILTIN_CVTTSD2SI64,
20248 IX86_BUILTIN_CVTPS2DQ,
20249 IX86_BUILTIN_CVTPS2PD,
20250 IX86_BUILTIN_CVTTPS2DQ,
20252 IX86_BUILTIN_MOVNTI,
20253 IX86_BUILTIN_MOVNTPD,
20254 IX86_BUILTIN_MOVNTDQ,
20256 IX86_BUILTIN_MOVQ128,
20259 IX86_BUILTIN_MASKMOVDQU,
20260 IX86_BUILTIN_MOVMSKPD,
20261 IX86_BUILTIN_PMOVMSKB128,
20263 IX86_BUILTIN_PACKSSWB128,
20264 IX86_BUILTIN_PACKSSDW128,
20265 IX86_BUILTIN_PACKUSWB128,
20267 IX86_BUILTIN_PADDB128,
20268 IX86_BUILTIN_PADDW128,
20269 IX86_BUILTIN_PADDD128,
20270 IX86_BUILTIN_PADDQ128,
20271 IX86_BUILTIN_PADDSB128,
20272 IX86_BUILTIN_PADDSW128,
20273 IX86_BUILTIN_PADDUSB128,
20274 IX86_BUILTIN_PADDUSW128,
20275 IX86_BUILTIN_PSUBB128,
20276 IX86_BUILTIN_PSUBW128,
20277 IX86_BUILTIN_PSUBD128,
20278 IX86_BUILTIN_PSUBQ128,
20279 IX86_BUILTIN_PSUBSB128,
20280 IX86_BUILTIN_PSUBSW128,
20281 IX86_BUILTIN_PSUBUSB128,
20282 IX86_BUILTIN_PSUBUSW128,
20284 IX86_BUILTIN_PAND128,
20285 IX86_BUILTIN_PANDN128,
20286 IX86_BUILTIN_POR128,
20287 IX86_BUILTIN_PXOR128,
20289 IX86_BUILTIN_PAVGB128,
20290 IX86_BUILTIN_PAVGW128,
20292 IX86_BUILTIN_PCMPEQB128,
20293 IX86_BUILTIN_PCMPEQW128,
20294 IX86_BUILTIN_PCMPEQD128,
20295 IX86_BUILTIN_PCMPGTB128,
20296 IX86_BUILTIN_PCMPGTW128,
20297 IX86_BUILTIN_PCMPGTD128,
20299 IX86_BUILTIN_PMADDWD128,
20301 IX86_BUILTIN_PMAXSW128,
20302 IX86_BUILTIN_PMAXUB128,
20303 IX86_BUILTIN_PMINSW128,
20304 IX86_BUILTIN_PMINUB128,
20306 IX86_BUILTIN_PMULUDQ,
20307 IX86_BUILTIN_PMULUDQ128,
20308 IX86_BUILTIN_PMULHUW128,
20309 IX86_BUILTIN_PMULHW128,
20310 IX86_BUILTIN_PMULLW128,
20312 IX86_BUILTIN_PSADBW128,
20313 IX86_BUILTIN_PSHUFHW,
20314 IX86_BUILTIN_PSHUFLW,
20315 IX86_BUILTIN_PSHUFD,
20317 IX86_BUILTIN_PSLLDQI128,
20318 IX86_BUILTIN_PSLLWI128,
20319 IX86_BUILTIN_PSLLDI128,
20320 IX86_BUILTIN_PSLLQI128,
20321 IX86_BUILTIN_PSRAWI128,
20322 IX86_BUILTIN_PSRADI128,
20323 IX86_BUILTIN_PSRLDQI128,
20324 IX86_BUILTIN_PSRLWI128,
20325 IX86_BUILTIN_PSRLDI128,
20326 IX86_BUILTIN_PSRLQI128,
20328 IX86_BUILTIN_PSLLDQ128,
20329 IX86_BUILTIN_PSLLW128,
20330 IX86_BUILTIN_PSLLD128,
20331 IX86_BUILTIN_PSLLQ128,
20332 IX86_BUILTIN_PSRAW128,
20333 IX86_BUILTIN_PSRAD128,
20334 IX86_BUILTIN_PSRLW128,
20335 IX86_BUILTIN_PSRLD128,
20336 IX86_BUILTIN_PSRLQ128,
20338 IX86_BUILTIN_PUNPCKHBW128,
20339 IX86_BUILTIN_PUNPCKHWD128,
20340 IX86_BUILTIN_PUNPCKHDQ128,
20341 IX86_BUILTIN_PUNPCKHQDQ128,
20342 IX86_BUILTIN_PUNPCKLBW128,
20343 IX86_BUILTIN_PUNPCKLWD128,
20344 IX86_BUILTIN_PUNPCKLDQ128,
20345 IX86_BUILTIN_PUNPCKLQDQ128,
20347 IX86_BUILTIN_CLFLUSH,
20348 IX86_BUILTIN_MFENCE,
20349 IX86_BUILTIN_LFENCE,
20352 IX86_BUILTIN_ADDSUBPS,
20353 IX86_BUILTIN_HADDPS,
20354 IX86_BUILTIN_HSUBPS,
20355 IX86_BUILTIN_MOVSHDUP,
20356 IX86_BUILTIN_MOVSLDUP,
20357 IX86_BUILTIN_ADDSUBPD,
20358 IX86_BUILTIN_HADDPD,
20359 IX86_BUILTIN_HSUBPD,
20360 IX86_BUILTIN_LDDQU,
20362 IX86_BUILTIN_MONITOR,
20363 IX86_BUILTIN_MWAIT,
20366 IX86_BUILTIN_PHADDW,
20367 IX86_BUILTIN_PHADDD,
20368 IX86_BUILTIN_PHADDSW,
20369 IX86_BUILTIN_PHSUBW,
20370 IX86_BUILTIN_PHSUBD,
20371 IX86_BUILTIN_PHSUBSW,
20372 IX86_BUILTIN_PMADDUBSW,
20373 IX86_BUILTIN_PMULHRSW,
20374 IX86_BUILTIN_PSHUFB,
20375 IX86_BUILTIN_PSIGNB,
20376 IX86_BUILTIN_PSIGNW,
20377 IX86_BUILTIN_PSIGND,
20378 IX86_BUILTIN_PALIGNR,
20379 IX86_BUILTIN_PABSB,
20380 IX86_BUILTIN_PABSW,
20381 IX86_BUILTIN_PABSD,
20383 IX86_BUILTIN_PHADDW128,
20384 IX86_BUILTIN_PHADDD128,
20385 IX86_BUILTIN_PHADDSW128,
20386 IX86_BUILTIN_PHSUBW128,
20387 IX86_BUILTIN_PHSUBD128,
20388 IX86_BUILTIN_PHSUBSW128,
20389 IX86_BUILTIN_PMADDUBSW128,
20390 IX86_BUILTIN_PMULHRSW128,
20391 IX86_BUILTIN_PSHUFB128,
20392 IX86_BUILTIN_PSIGNB128,
20393 IX86_BUILTIN_PSIGNW128,
20394 IX86_BUILTIN_PSIGND128,
20395 IX86_BUILTIN_PALIGNR128,
20396 IX86_BUILTIN_PABSB128,
20397 IX86_BUILTIN_PABSW128,
20398 IX86_BUILTIN_PABSD128,
20400 /* AMDFAM10 - SSE4A New Instructions. */
20401 IX86_BUILTIN_MOVNTSD,
20402 IX86_BUILTIN_MOVNTSS,
20403 IX86_BUILTIN_EXTRQI,
20404 IX86_BUILTIN_EXTRQ,
20405 IX86_BUILTIN_INSERTQI,
20406 IX86_BUILTIN_INSERTQ,
20409 IX86_BUILTIN_BLENDPD,
20410 IX86_BUILTIN_BLENDPS,
20411 IX86_BUILTIN_BLENDVPD,
20412 IX86_BUILTIN_BLENDVPS,
20413 IX86_BUILTIN_PBLENDVB128,
20414 IX86_BUILTIN_PBLENDW128,
20419 IX86_BUILTIN_INSERTPS128,
20421 IX86_BUILTIN_MOVNTDQA,
20422 IX86_BUILTIN_MPSADBW128,
20423 IX86_BUILTIN_PACKUSDW128,
20424 IX86_BUILTIN_PCMPEQQ,
20425 IX86_BUILTIN_PHMINPOSUW128,
20427 IX86_BUILTIN_PMAXSB128,
20428 IX86_BUILTIN_PMAXSD128,
20429 IX86_BUILTIN_PMAXUD128,
20430 IX86_BUILTIN_PMAXUW128,
20432 IX86_BUILTIN_PMINSB128,
20433 IX86_BUILTIN_PMINSD128,
20434 IX86_BUILTIN_PMINUD128,
20435 IX86_BUILTIN_PMINUW128,
20437 IX86_BUILTIN_PMOVSXBW128,
20438 IX86_BUILTIN_PMOVSXBD128,
20439 IX86_BUILTIN_PMOVSXBQ128,
20440 IX86_BUILTIN_PMOVSXWD128,
20441 IX86_BUILTIN_PMOVSXWQ128,
20442 IX86_BUILTIN_PMOVSXDQ128,
20444 IX86_BUILTIN_PMOVZXBW128,
20445 IX86_BUILTIN_PMOVZXBD128,
20446 IX86_BUILTIN_PMOVZXBQ128,
20447 IX86_BUILTIN_PMOVZXWD128,
20448 IX86_BUILTIN_PMOVZXWQ128,
20449 IX86_BUILTIN_PMOVZXDQ128,
20451 IX86_BUILTIN_PMULDQ128,
20452 IX86_BUILTIN_PMULLD128,
20454 IX86_BUILTIN_ROUNDPD,
20455 IX86_BUILTIN_ROUNDPS,
20456 IX86_BUILTIN_ROUNDSD,
20457 IX86_BUILTIN_ROUNDSS,
20459 IX86_BUILTIN_PTESTZ,
20460 IX86_BUILTIN_PTESTC,
20461 IX86_BUILTIN_PTESTNZC,
20463 IX86_BUILTIN_VEC_INIT_V2SI,
20464 IX86_BUILTIN_VEC_INIT_V4HI,
20465 IX86_BUILTIN_VEC_INIT_V8QI,
20466 IX86_BUILTIN_VEC_EXT_V2DF,
20467 IX86_BUILTIN_VEC_EXT_V2DI,
20468 IX86_BUILTIN_VEC_EXT_V4SF,
20469 IX86_BUILTIN_VEC_EXT_V4SI,
20470 IX86_BUILTIN_VEC_EXT_V8HI,
20471 IX86_BUILTIN_VEC_EXT_V2SI,
20472 IX86_BUILTIN_VEC_EXT_V4HI,
20473 IX86_BUILTIN_VEC_EXT_V16QI,
20474 IX86_BUILTIN_VEC_SET_V2DI,
20475 IX86_BUILTIN_VEC_SET_V4SF,
20476 IX86_BUILTIN_VEC_SET_V4SI,
20477 IX86_BUILTIN_VEC_SET_V8HI,
20478 IX86_BUILTIN_VEC_SET_V4HI,
20479 IX86_BUILTIN_VEC_SET_V16QI,
20481 IX86_BUILTIN_VEC_PACK_SFIX,
20484 IX86_BUILTIN_CRC32QI,
20485 IX86_BUILTIN_CRC32HI,
20486 IX86_BUILTIN_CRC32SI,
20487 IX86_BUILTIN_CRC32DI,
20489 IX86_BUILTIN_PCMPESTRI128,
20490 IX86_BUILTIN_PCMPESTRM128,
20491 IX86_BUILTIN_PCMPESTRA128,
20492 IX86_BUILTIN_PCMPESTRC128,
20493 IX86_BUILTIN_PCMPESTRO128,
20494 IX86_BUILTIN_PCMPESTRS128,
20495 IX86_BUILTIN_PCMPESTRZ128,
20496 IX86_BUILTIN_PCMPISTRI128,
20497 IX86_BUILTIN_PCMPISTRM128,
20498 IX86_BUILTIN_PCMPISTRA128,
20499 IX86_BUILTIN_PCMPISTRC128,
20500 IX86_BUILTIN_PCMPISTRO128,
20501 IX86_BUILTIN_PCMPISTRS128,
20502 IX86_BUILTIN_PCMPISTRZ128,
20504 IX86_BUILTIN_PCMPGTQ,
20506 /* AES instructions */
20507 IX86_BUILTIN_AESENC128,
20508 IX86_BUILTIN_AESENCLAST128,
20509 IX86_BUILTIN_AESDEC128,
20510 IX86_BUILTIN_AESDECLAST128,
20511 IX86_BUILTIN_AESIMC128,
20512 IX86_BUILTIN_AESKEYGENASSIST128,
20514 /* PCLMUL instruction */
20515 IX86_BUILTIN_PCLMULQDQ128,
20518 IX86_BUILTIN_ADDPD256,
20519 IX86_BUILTIN_ADDPS256,
20520 IX86_BUILTIN_ADDSUBPD256,
20521 IX86_BUILTIN_ADDSUBPS256,
20522 IX86_BUILTIN_ANDPD256,
20523 IX86_BUILTIN_ANDPS256,
20524 IX86_BUILTIN_ANDNPD256,
20525 IX86_BUILTIN_ANDNPS256,
20526 IX86_BUILTIN_BLENDPD256,
20527 IX86_BUILTIN_BLENDPS256,
20528 IX86_BUILTIN_BLENDVPD256,
20529 IX86_BUILTIN_BLENDVPS256,
20530 IX86_BUILTIN_DIVPD256,
20531 IX86_BUILTIN_DIVPS256,
20532 IX86_BUILTIN_DPPS256,
20533 IX86_BUILTIN_HADDPD256,
20534 IX86_BUILTIN_HADDPS256,
20535 IX86_BUILTIN_HSUBPD256,
20536 IX86_BUILTIN_HSUBPS256,
20537 IX86_BUILTIN_MAXPD256,
20538 IX86_BUILTIN_MAXPS256,
20539 IX86_BUILTIN_MINPD256,
20540 IX86_BUILTIN_MINPS256,
20541 IX86_BUILTIN_MULPD256,
20542 IX86_BUILTIN_MULPS256,
20543 IX86_BUILTIN_ORPD256,
20544 IX86_BUILTIN_ORPS256,
20545 IX86_BUILTIN_SHUFPD256,
20546 IX86_BUILTIN_SHUFPS256,
20547 IX86_BUILTIN_SUBPD256,
20548 IX86_BUILTIN_SUBPS256,
20549 IX86_BUILTIN_XORPD256,
20550 IX86_BUILTIN_XORPS256,
20551 IX86_BUILTIN_CMPSD,
20552 IX86_BUILTIN_CMPSS,
20553 IX86_BUILTIN_CMPPD,
20554 IX86_BUILTIN_CMPPS,
20555 IX86_BUILTIN_CMPPD256,
20556 IX86_BUILTIN_CMPPS256,
20557 IX86_BUILTIN_CVTDQ2PD256,
20558 IX86_BUILTIN_CVTDQ2PS256,
20559 IX86_BUILTIN_CVTPD2PS256,
20560 IX86_BUILTIN_CVTPS2DQ256,
20561 IX86_BUILTIN_CVTPS2PD256,
20562 IX86_BUILTIN_CVTTPD2DQ256,
20563 IX86_BUILTIN_CVTPD2DQ256,
20564 IX86_BUILTIN_CVTTPS2DQ256,
20565 IX86_BUILTIN_EXTRACTF128PD256,
20566 IX86_BUILTIN_EXTRACTF128PS256,
20567 IX86_BUILTIN_EXTRACTF128SI256,
20568 IX86_BUILTIN_VZEROALL,
20569 IX86_BUILTIN_VZEROUPPER,
20570 IX86_BUILTIN_VZEROUPPER_REX64,
20571 IX86_BUILTIN_VPERMILVARPD,
20572 IX86_BUILTIN_VPERMILVARPS,
20573 IX86_BUILTIN_VPERMILVARPD256,
20574 IX86_BUILTIN_VPERMILVARPS256,
20575 IX86_BUILTIN_VPERMILPD,
20576 IX86_BUILTIN_VPERMILPS,
20577 IX86_BUILTIN_VPERMILPD256,
20578 IX86_BUILTIN_VPERMILPS256,
20579 IX86_BUILTIN_VPERM2F128PD256,
20580 IX86_BUILTIN_VPERM2F128PS256,
20581 IX86_BUILTIN_VPERM2F128SI256,
20582 IX86_BUILTIN_VBROADCASTSS,
20583 IX86_BUILTIN_VBROADCASTSD256,
20584 IX86_BUILTIN_VBROADCASTSS256,
20585 IX86_BUILTIN_VBROADCASTPD256,
20586 IX86_BUILTIN_VBROADCASTPS256,
20587 IX86_BUILTIN_VINSERTF128PD256,
20588 IX86_BUILTIN_VINSERTF128PS256,
20589 IX86_BUILTIN_VINSERTF128SI256,
20590 IX86_BUILTIN_LOADUPD256,
20591 IX86_BUILTIN_LOADUPS256,
20592 IX86_BUILTIN_STOREUPD256,
20593 IX86_BUILTIN_STOREUPS256,
20594 IX86_BUILTIN_LDDQU256,
20595 IX86_BUILTIN_MOVNTDQ256,
20596 IX86_BUILTIN_MOVNTPD256,
20597 IX86_BUILTIN_MOVNTPS256,
20598 IX86_BUILTIN_LOADDQU256,
20599 IX86_BUILTIN_STOREDQU256,
20600 IX86_BUILTIN_MASKLOADPD,
20601 IX86_BUILTIN_MASKLOADPS,
20602 IX86_BUILTIN_MASKSTOREPD,
20603 IX86_BUILTIN_MASKSTOREPS,
20604 IX86_BUILTIN_MASKLOADPD256,
20605 IX86_BUILTIN_MASKLOADPS256,
20606 IX86_BUILTIN_MASKSTOREPD256,
20607 IX86_BUILTIN_MASKSTOREPS256,
20608 IX86_BUILTIN_MOVSHDUP256,
20609 IX86_BUILTIN_MOVSLDUP256,
20610 IX86_BUILTIN_MOVDDUP256,
20612 IX86_BUILTIN_SQRTPD256,
20613 IX86_BUILTIN_SQRTPS256,
20614 IX86_BUILTIN_SQRTPS_NR256,
20615 IX86_BUILTIN_RSQRTPS256,
20616 IX86_BUILTIN_RSQRTPS_NR256,
20618 IX86_BUILTIN_RCPPS256,
20620 IX86_BUILTIN_ROUNDPD256,
20621 IX86_BUILTIN_ROUNDPS256,
20623 IX86_BUILTIN_UNPCKHPD256,
20624 IX86_BUILTIN_UNPCKLPD256,
20625 IX86_BUILTIN_UNPCKHPS256,
20626 IX86_BUILTIN_UNPCKLPS256,
20628 IX86_BUILTIN_SI256_SI,
20629 IX86_BUILTIN_PS256_PS,
20630 IX86_BUILTIN_PD256_PD,
20631 IX86_BUILTIN_SI_SI256,
20632 IX86_BUILTIN_PS_PS256,
20633 IX86_BUILTIN_PD_PD256,
20635 IX86_BUILTIN_VTESTZPD,
20636 IX86_BUILTIN_VTESTCPD,
20637 IX86_BUILTIN_VTESTNZCPD,
20638 IX86_BUILTIN_VTESTZPS,
20639 IX86_BUILTIN_VTESTCPS,
20640 IX86_BUILTIN_VTESTNZCPS,
20641 IX86_BUILTIN_VTESTZPD256,
20642 IX86_BUILTIN_VTESTCPD256,
20643 IX86_BUILTIN_VTESTNZCPD256,
20644 IX86_BUILTIN_VTESTZPS256,
20645 IX86_BUILTIN_VTESTCPS256,
20646 IX86_BUILTIN_VTESTNZCPS256,
20647 IX86_BUILTIN_PTESTZ256,
20648 IX86_BUILTIN_PTESTC256,
20649 IX86_BUILTIN_PTESTNZC256,
20651 IX86_BUILTIN_MOVMSKPD256,
20652 IX86_BUILTIN_MOVMSKPS256,
20654 /* TFmode support builtins. */
20656 IX86_BUILTIN_HUGE_VALQ,
20657 IX86_BUILTIN_FABSQ,
20658 IX86_BUILTIN_COPYSIGNQ,
20660 /* SSE5 instructions */
20661 IX86_BUILTIN_FMADDSS,
20662 IX86_BUILTIN_FMADDSD,
20663 IX86_BUILTIN_FMADDPS,
20664 IX86_BUILTIN_FMADDPD,
20665 IX86_BUILTIN_FMSUBSS,
20666 IX86_BUILTIN_FMSUBSD,
20667 IX86_BUILTIN_FMSUBPS,
20668 IX86_BUILTIN_FMSUBPD,
20669 IX86_BUILTIN_FNMADDSS,
20670 IX86_BUILTIN_FNMADDSD,
20671 IX86_BUILTIN_FNMADDPS,
20672 IX86_BUILTIN_FNMADDPD,
20673 IX86_BUILTIN_FNMSUBSS,
20674 IX86_BUILTIN_FNMSUBSD,
20675 IX86_BUILTIN_FNMSUBPS,
20676 IX86_BUILTIN_FNMSUBPD,
20677 IX86_BUILTIN_PCMOV,
20678 IX86_BUILTIN_PCMOV_V2DI,
20679 IX86_BUILTIN_PCMOV_V4SI,
20680 IX86_BUILTIN_PCMOV_V8HI,
20681 IX86_BUILTIN_PCMOV_V16QI,
20682 IX86_BUILTIN_PCMOV_V4SF,
20683 IX86_BUILTIN_PCMOV_V2DF,
20684 IX86_BUILTIN_PPERM,
20685 IX86_BUILTIN_PERMPS,
20686 IX86_BUILTIN_PERMPD,
20687 IX86_BUILTIN_PMACSSWW,
20688 IX86_BUILTIN_PMACSWW,
20689 IX86_BUILTIN_PMACSSWD,
20690 IX86_BUILTIN_PMACSWD,
20691 IX86_BUILTIN_PMACSSDD,
20692 IX86_BUILTIN_PMACSDD,
20693 IX86_BUILTIN_PMACSSDQL,
20694 IX86_BUILTIN_PMACSSDQH,
20695 IX86_BUILTIN_PMACSDQL,
20696 IX86_BUILTIN_PMACSDQH,
20697 IX86_BUILTIN_PMADCSSWD,
20698 IX86_BUILTIN_PMADCSWD,
20699 IX86_BUILTIN_PHADDBW,
20700 IX86_BUILTIN_PHADDBD,
20701 IX86_BUILTIN_PHADDBQ,
20702 IX86_BUILTIN_PHADDWD,
20703 IX86_BUILTIN_PHADDWQ,
20704 IX86_BUILTIN_PHADDDQ,
20705 IX86_BUILTIN_PHADDUBW,
20706 IX86_BUILTIN_PHADDUBD,
20707 IX86_BUILTIN_PHADDUBQ,
20708 IX86_BUILTIN_PHADDUWD,
20709 IX86_BUILTIN_PHADDUWQ,
20710 IX86_BUILTIN_PHADDUDQ,
20711 IX86_BUILTIN_PHSUBBW,
20712 IX86_BUILTIN_PHSUBWD,
20713 IX86_BUILTIN_PHSUBDQ,
20714 IX86_BUILTIN_PROTB,
20715 IX86_BUILTIN_PROTW,
20716 IX86_BUILTIN_PROTD,
20717 IX86_BUILTIN_PROTQ,
20718 IX86_BUILTIN_PROTB_IMM,
20719 IX86_BUILTIN_PROTW_IMM,
20720 IX86_BUILTIN_PROTD_IMM,
20721 IX86_BUILTIN_PROTQ_IMM,
20722 IX86_BUILTIN_PSHLB,
20723 IX86_BUILTIN_PSHLW,
20724 IX86_BUILTIN_PSHLD,
20725 IX86_BUILTIN_PSHLQ,
20726 IX86_BUILTIN_PSHAB,
20727 IX86_BUILTIN_PSHAW,
20728 IX86_BUILTIN_PSHAD,
20729 IX86_BUILTIN_PSHAQ,
20730 IX86_BUILTIN_FRCZSS,
20731 IX86_BUILTIN_FRCZSD,
20732 IX86_BUILTIN_FRCZPS,
20733 IX86_BUILTIN_FRCZPD,
20734 IX86_BUILTIN_CVTPH2PS,
20735 IX86_BUILTIN_CVTPS2PH,
20737 IX86_BUILTIN_COMEQSS,
20738 IX86_BUILTIN_COMNESS,
20739 IX86_BUILTIN_COMLTSS,
20740 IX86_BUILTIN_COMLESS,
20741 IX86_BUILTIN_COMGTSS,
20742 IX86_BUILTIN_COMGESS,
20743 IX86_BUILTIN_COMUEQSS,
20744 IX86_BUILTIN_COMUNESS,
20745 IX86_BUILTIN_COMULTSS,
20746 IX86_BUILTIN_COMULESS,
20747 IX86_BUILTIN_COMUGTSS,
20748 IX86_BUILTIN_COMUGESS,
20749 IX86_BUILTIN_COMORDSS,
20750 IX86_BUILTIN_COMUNORDSS,
20751 IX86_BUILTIN_COMFALSESS,
20752 IX86_BUILTIN_COMTRUESS,
20754 IX86_BUILTIN_COMEQSD,
20755 IX86_BUILTIN_COMNESD,
20756 IX86_BUILTIN_COMLTSD,
20757 IX86_BUILTIN_COMLESD,
20758 IX86_BUILTIN_COMGTSD,
20759 IX86_BUILTIN_COMGESD,
20760 IX86_BUILTIN_COMUEQSD,
20761 IX86_BUILTIN_COMUNESD,
20762 IX86_BUILTIN_COMULTSD,
20763 IX86_BUILTIN_COMULESD,
20764 IX86_BUILTIN_COMUGTSD,
20765 IX86_BUILTIN_COMUGESD,
20766 IX86_BUILTIN_COMORDSD,
20767 IX86_BUILTIN_COMUNORDSD,
20768 IX86_BUILTIN_COMFALSESD,
20769 IX86_BUILTIN_COMTRUESD,
20771 IX86_BUILTIN_COMEQPS,
20772 IX86_BUILTIN_COMNEPS,
20773 IX86_BUILTIN_COMLTPS,
20774 IX86_BUILTIN_COMLEPS,
20775 IX86_BUILTIN_COMGTPS,
20776 IX86_BUILTIN_COMGEPS,
20777 IX86_BUILTIN_COMUEQPS,
20778 IX86_BUILTIN_COMUNEPS,
20779 IX86_BUILTIN_COMULTPS,
20780 IX86_BUILTIN_COMULEPS,
20781 IX86_BUILTIN_COMUGTPS,
20782 IX86_BUILTIN_COMUGEPS,
20783 IX86_BUILTIN_COMORDPS,
20784 IX86_BUILTIN_COMUNORDPS,
20785 IX86_BUILTIN_COMFALSEPS,
20786 IX86_BUILTIN_COMTRUEPS,
20788 IX86_BUILTIN_COMEQPD,
20789 IX86_BUILTIN_COMNEPD,
20790 IX86_BUILTIN_COMLTPD,
20791 IX86_BUILTIN_COMLEPD,
20792 IX86_BUILTIN_COMGTPD,
20793 IX86_BUILTIN_COMGEPD,
20794 IX86_BUILTIN_COMUEQPD,
20795 IX86_BUILTIN_COMUNEPD,
20796 IX86_BUILTIN_COMULTPD,
20797 IX86_BUILTIN_COMULEPD,
20798 IX86_BUILTIN_COMUGTPD,
20799 IX86_BUILTIN_COMUGEPD,
20800 IX86_BUILTIN_COMORDPD,
20801 IX86_BUILTIN_COMUNORDPD,
20802 IX86_BUILTIN_COMFALSEPD,
20803 IX86_BUILTIN_COMTRUEPD,
20805 IX86_BUILTIN_PCOMEQUB,
20806 IX86_BUILTIN_PCOMNEUB,
20807 IX86_BUILTIN_PCOMLTUB,
20808 IX86_BUILTIN_PCOMLEUB,
20809 IX86_BUILTIN_PCOMGTUB,
20810 IX86_BUILTIN_PCOMGEUB,
20811 IX86_BUILTIN_PCOMFALSEUB,
20812 IX86_BUILTIN_PCOMTRUEUB,
20813 IX86_BUILTIN_PCOMEQUW,
20814 IX86_BUILTIN_PCOMNEUW,
20815 IX86_BUILTIN_PCOMLTUW,
20816 IX86_BUILTIN_PCOMLEUW,
20817 IX86_BUILTIN_PCOMGTUW,
20818 IX86_BUILTIN_PCOMGEUW,
20819 IX86_BUILTIN_PCOMFALSEUW,
20820 IX86_BUILTIN_PCOMTRUEUW,
20821 IX86_BUILTIN_PCOMEQUD,
20822 IX86_BUILTIN_PCOMNEUD,
20823 IX86_BUILTIN_PCOMLTUD,
20824 IX86_BUILTIN_PCOMLEUD,
20825 IX86_BUILTIN_PCOMGTUD,
20826 IX86_BUILTIN_PCOMGEUD,
20827 IX86_BUILTIN_PCOMFALSEUD,
20828 IX86_BUILTIN_PCOMTRUEUD,
20829 IX86_BUILTIN_PCOMEQUQ,
20830 IX86_BUILTIN_PCOMNEUQ,
20831 IX86_BUILTIN_PCOMLTUQ,
20832 IX86_BUILTIN_PCOMLEUQ,
20833 IX86_BUILTIN_PCOMGTUQ,
20834 IX86_BUILTIN_PCOMGEUQ,
20835 IX86_BUILTIN_PCOMFALSEUQ,
20836 IX86_BUILTIN_PCOMTRUEUQ,
20838 IX86_BUILTIN_PCOMEQB,
20839 IX86_BUILTIN_PCOMNEB,
20840 IX86_BUILTIN_PCOMLTB,
20841 IX86_BUILTIN_PCOMLEB,
20842 IX86_BUILTIN_PCOMGTB,
20843 IX86_BUILTIN_PCOMGEB,
20844 IX86_BUILTIN_PCOMFALSEB,
20845 IX86_BUILTIN_PCOMTRUEB,
20846 IX86_BUILTIN_PCOMEQW,
20847 IX86_BUILTIN_PCOMNEW,
20848 IX86_BUILTIN_PCOMLTW,
20849 IX86_BUILTIN_PCOMLEW,
20850 IX86_BUILTIN_PCOMGTW,
20851 IX86_BUILTIN_PCOMGEW,
20852 IX86_BUILTIN_PCOMFALSEW,
20853 IX86_BUILTIN_PCOMTRUEW,
20854 IX86_BUILTIN_PCOMEQD,
20855 IX86_BUILTIN_PCOMNED,
20856 IX86_BUILTIN_PCOMLTD,
20857 IX86_BUILTIN_PCOMLED,
20858 IX86_BUILTIN_PCOMGTD,
20859 IX86_BUILTIN_PCOMGED,
20860 IX86_BUILTIN_PCOMFALSED,
20861 IX86_BUILTIN_PCOMTRUED,
20862 IX86_BUILTIN_PCOMEQQ,
20863 IX86_BUILTIN_PCOMNEQ,
20864 IX86_BUILTIN_PCOMLTQ,
20865 IX86_BUILTIN_PCOMLEQ,
20866 IX86_BUILTIN_PCOMGTQ,
20867 IX86_BUILTIN_PCOMGEQ,
20868 IX86_BUILTIN_PCOMFALSEQ,
20869 IX86_BUILTIN_PCOMTRUEQ,
20874 /* Table for the ix86 builtin decls. */
20875 static GTY(()) tree ix86_builtins[(int) IX86_BUILTIN_MAX];
20877 /* Table of all of the builtin functions that are possible with different ISA's
20878 but are waiting to be built until a function is declared to use that
20880 struct builtin_isa GTY(())
20882 tree type; /* builtin type to use in the declaration */
20883 const char *name; /* function name */
20884 int isa; /* isa_flags this builtin is defined for */
20885 bool const_p; /* true if the declaration is constant */
20888 static GTY(()) struct builtin_isa ix86_builtins_isa[(int) IX86_BUILTIN_MAX];
20891 /* Add an ix86 target builtin function with CODE, NAME and TYPE. Save the MASK
20892 * of which isa_flags to use in the ix86_builtins_isa array. Stores the
20893 * function decl in the ix86_builtins array. Returns the function decl or
20894 * NULL_TREE, if the builtin was not added.
20896 * If the front end has a special hook for builtin functions, delay adding
20897 * builtin functions that aren't in the current ISA until the ISA is changed
20898 * with function specific optimization. Doing so, can save about 300K for the
20899 * default compiler. When the builtin is expanded, check at that time whether
20902 * If the front end doesn't have a special hook, record all builtins, even if
20903 * it isn't an instruction set in the current ISA in case the user uses
20904 * function specific options for a different ISA, so that we don't get scope
20905 * errors if a builtin is added in the middle of a function scope. */
20908 def_builtin (int mask, const char *name, tree type, enum ix86_builtins code)
20910 tree decl = NULL_TREE;
20912 if (!(mask & OPTION_MASK_ISA_64BIT) || TARGET_64BIT)
20914 ix86_builtins_isa[(int) code].isa = mask;
20916 if ((mask & ix86_isa_flags) != 0
20917 || (lang_hooks.builtin_function
20918 == lang_hooks.builtin_function_ext_scope))
20921 decl = add_builtin_function (name, type, code, BUILT_IN_MD, NULL,
20923 ix86_builtins[(int) code] = decl;
20924 ix86_builtins_isa[(int) code].type = NULL_TREE;
20928 ix86_builtins[(int) code] = NULL_TREE;
20929 ix86_builtins_isa[(int) code].const_p = false;
20930 ix86_builtins_isa[(int) code].type = type;
20931 ix86_builtins_isa[(int) code].name = name;
20938 /* Like def_builtin, but also marks the function decl "const". */
20941 def_builtin_const (int mask, const char *name, tree type,
20942 enum ix86_builtins code)
20944 tree decl = def_builtin (mask, name, type, code);
20946 TREE_READONLY (decl) = 1;
20948 ix86_builtins_isa[(int) code].const_p = true;
20953 /* Add any new builtin functions for a given ISA that may not have been
20954 declared. This saves a bit of space compared to adding all of the
20955 declarations to the tree, even if we didn't use them. */
20958 ix86_add_new_builtins (int isa)
20963 for (i = 0; i < (int)IX86_BUILTIN_MAX; i++)
20965 if ((ix86_builtins_isa[i].isa & isa) != 0
20966 && ix86_builtins_isa[i].type != NULL_TREE)
20968 decl = add_builtin_function_ext_scope (ix86_builtins_isa[i].name,
20969 ix86_builtins_isa[i].type,
20970 i, BUILT_IN_MD, NULL,
20973 ix86_builtins[i] = decl;
20974 ix86_builtins_isa[i].type = NULL_TREE;
20975 if (ix86_builtins_isa[i].const_p)
20976 TREE_READONLY (decl) = 1;
20981 /* Bits for builtin_description.flag. */
20983 /* Set when we don't support the comparison natively, and should
20984 swap_comparison in order to support it. */
20985 #define BUILTIN_DESC_SWAP_OPERANDS 1
20987 struct builtin_description
20989 const unsigned int mask;
20990 const enum insn_code icode;
20991 const char *const name;
20992 const enum ix86_builtins code;
20993 const enum rtx_code comparison;
20997 static const struct builtin_description bdesc_comi[] =
20999 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comieq", IX86_BUILTIN_COMIEQSS, UNEQ, 0 },
21000 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comilt", IX86_BUILTIN_COMILTSS, UNLT, 0 },
21001 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comile", IX86_BUILTIN_COMILESS, UNLE, 0 },
21002 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comigt", IX86_BUILTIN_COMIGTSS, GT, 0 },
21003 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comige", IX86_BUILTIN_COMIGESS, GE, 0 },
21004 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comineq", IX86_BUILTIN_COMINEQSS, LTGT, 0 },
21005 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomieq", IX86_BUILTIN_UCOMIEQSS, UNEQ, 0 },
21006 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomilt", IX86_BUILTIN_UCOMILTSS, UNLT, 0 },
21007 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomile", IX86_BUILTIN_UCOMILESS, UNLE, 0 },
21008 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomigt", IX86_BUILTIN_UCOMIGTSS, GT, 0 },
21009 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomige", IX86_BUILTIN_UCOMIGESS, GE, 0 },
21010 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomineq", IX86_BUILTIN_UCOMINEQSS, LTGT, 0 },
21011 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdeq", IX86_BUILTIN_COMIEQSD, UNEQ, 0 },
21012 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdlt", IX86_BUILTIN_COMILTSD, UNLT, 0 },
21013 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdle", IX86_BUILTIN_COMILESD, UNLE, 0 },
21014 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdgt", IX86_BUILTIN_COMIGTSD, GT, 0 },
21015 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdge", IX86_BUILTIN_COMIGESD, GE, 0 },
21016 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdneq", IX86_BUILTIN_COMINEQSD, LTGT, 0 },
21017 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdeq", IX86_BUILTIN_UCOMIEQSD, UNEQ, 0 },
21018 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdlt", IX86_BUILTIN_UCOMILTSD, UNLT, 0 },
21019 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdle", IX86_BUILTIN_UCOMILESD, UNLE, 0 },
21020 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdgt", IX86_BUILTIN_UCOMIGTSD, GT, 0 },
21021 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdge", IX86_BUILTIN_UCOMIGESD, GE, 0 },
21022 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdneq", IX86_BUILTIN_UCOMINEQSD, LTGT, 0 },
21025 static const struct builtin_description bdesc_pcmpestr[] =
21028 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestri128", IX86_BUILTIN_PCMPESTRI128, UNKNOWN, 0 },
21029 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestrm128", IX86_BUILTIN_PCMPESTRM128, UNKNOWN, 0 },
21030 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestria128", IX86_BUILTIN_PCMPESTRA128, UNKNOWN, (int) CCAmode },
21031 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestric128", IX86_BUILTIN_PCMPESTRC128, UNKNOWN, (int) CCCmode },
21032 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestrio128", IX86_BUILTIN_PCMPESTRO128, UNKNOWN, (int) CCOmode },
21033 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestris128", IX86_BUILTIN_PCMPESTRS128, UNKNOWN, (int) CCSmode },
21034 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestriz128", IX86_BUILTIN_PCMPESTRZ128, UNKNOWN, (int) CCZmode },
21037 static const struct builtin_description bdesc_pcmpistr[] =
21040 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistri128", IX86_BUILTIN_PCMPISTRI128, UNKNOWN, 0 },
21041 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistrm128", IX86_BUILTIN_PCMPISTRM128, UNKNOWN, 0 },
21042 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistria128", IX86_BUILTIN_PCMPISTRA128, UNKNOWN, (int) CCAmode },
21043 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistric128", IX86_BUILTIN_PCMPISTRC128, UNKNOWN, (int) CCCmode },
21044 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistrio128", IX86_BUILTIN_PCMPISTRO128, UNKNOWN, (int) CCOmode },
21045 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistris128", IX86_BUILTIN_PCMPISTRS128, UNKNOWN, (int) CCSmode },
21046 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistriz128", IX86_BUILTIN_PCMPISTRZ128, UNKNOWN, (int) CCZmode },
21049 /* Special builtin types */
21050 enum ix86_special_builtin_type
21052 SPECIAL_FTYPE_UNKNOWN,
21054 V32QI_FTYPE_PCCHAR,
21055 V16QI_FTYPE_PCCHAR,
21057 V8SF_FTYPE_PCFLOAT,
21059 V4DF_FTYPE_PCDOUBLE,
21060 V4SF_FTYPE_PCFLOAT,
21061 V2DF_FTYPE_PCDOUBLE,
21062 V8SF_FTYPE_PCV8SF_V8SF,
21063 V4DF_FTYPE_PCV4DF_V4DF,
21064 V4SF_FTYPE_V4SF_PCV2SF,
21065 V4SF_FTYPE_PCV4SF_V4SF,
21066 V2DF_FTYPE_V2DF_PCDOUBLE,
21067 V2DF_FTYPE_PCV2DF_V2DF,
21069 VOID_FTYPE_PV2SF_V4SF,
21070 VOID_FTYPE_PV4DI_V4DI,
21071 VOID_FTYPE_PV2DI_V2DI,
21072 VOID_FTYPE_PCHAR_V32QI,
21073 VOID_FTYPE_PCHAR_V16QI,
21074 VOID_FTYPE_PFLOAT_V8SF,
21075 VOID_FTYPE_PFLOAT_V4SF,
21076 VOID_FTYPE_PDOUBLE_V4DF,
21077 VOID_FTYPE_PDOUBLE_V2DF,
21079 VOID_FTYPE_PINT_INT,
21080 VOID_FTYPE_PV8SF_V8SF_V8SF,
21081 VOID_FTYPE_PV4DF_V4DF_V4DF,
21082 VOID_FTYPE_PV4SF_V4SF_V4SF,
21083 VOID_FTYPE_PV2DF_V2DF_V2DF
21086 /* Builtin types */
21087 enum ix86_builtin_type
21090 FLOAT128_FTYPE_FLOAT128,
21092 FLOAT128_FTYPE_FLOAT128_FLOAT128,
21093 INT_FTYPE_V8SF_V8SF_PTEST,
21094 INT_FTYPE_V4DI_V4DI_PTEST,
21095 INT_FTYPE_V4DF_V4DF_PTEST,
21096 INT_FTYPE_V4SF_V4SF_PTEST,
21097 INT_FTYPE_V2DI_V2DI_PTEST,
21098 INT_FTYPE_V2DF_V2DF_PTEST,
21130 V4SF_FTYPE_V4SF_VEC_MERGE,
21139 V2DF_FTYPE_V2DF_VEC_MERGE,
21150 V16QI_FTYPE_V16QI_V16QI,
21151 V16QI_FTYPE_V8HI_V8HI,
21152 V8QI_FTYPE_V8QI_V8QI,
21153 V8QI_FTYPE_V4HI_V4HI,
21154 V8HI_FTYPE_V8HI_V8HI,
21155 V8HI_FTYPE_V8HI_V8HI_COUNT,
21156 V8HI_FTYPE_V16QI_V16QI,
21157 V8HI_FTYPE_V4SI_V4SI,
21158 V8HI_FTYPE_V8HI_SI_COUNT,
21159 V8SF_FTYPE_V8SF_V8SF,
21160 V8SF_FTYPE_V8SF_V8SI,
21161 V4SI_FTYPE_V4SI_V4SI,
21162 V4SI_FTYPE_V4SI_V4SI_COUNT,
21163 V4SI_FTYPE_V8HI_V8HI,
21164 V4SI_FTYPE_V4SF_V4SF,
21165 V4SI_FTYPE_V2DF_V2DF,
21166 V4SI_FTYPE_V4SI_SI_COUNT,
21167 V4HI_FTYPE_V4HI_V4HI,
21168 V4HI_FTYPE_V4HI_V4HI_COUNT,
21169 V4HI_FTYPE_V8QI_V8QI,
21170 V4HI_FTYPE_V2SI_V2SI,
21171 V4HI_FTYPE_V4HI_SI_COUNT,
21172 V4DF_FTYPE_V4DF_V4DF,
21173 V4DF_FTYPE_V4DF_V4DI,
21174 V4SF_FTYPE_V4SF_V4SF,
21175 V4SF_FTYPE_V4SF_V4SF_SWAP,
21176 V4SF_FTYPE_V4SF_V4SI,
21177 V4SF_FTYPE_V4SF_V2SI,
21178 V4SF_FTYPE_V4SF_V2DF,
21179 V4SF_FTYPE_V4SF_DI,
21180 V4SF_FTYPE_V4SF_SI,
21181 V2DI_FTYPE_V2DI_V2DI,
21182 V2DI_FTYPE_V2DI_V2DI_COUNT,
21183 V2DI_FTYPE_V16QI_V16QI,
21184 V2DI_FTYPE_V4SI_V4SI,
21185 V2DI_FTYPE_V2DI_V16QI,
21186 V2DI_FTYPE_V2DF_V2DF,
21187 V2DI_FTYPE_V2DI_SI_COUNT,
21188 V2SI_FTYPE_V2SI_V2SI,
21189 V2SI_FTYPE_V2SI_V2SI_COUNT,
21190 V2SI_FTYPE_V4HI_V4HI,
21191 V2SI_FTYPE_V2SF_V2SF,
21192 V2SI_FTYPE_V2SI_SI_COUNT,
21193 V2DF_FTYPE_V2DF_V2DF,
21194 V2DF_FTYPE_V2DF_V2DF_SWAP,
21195 V2DF_FTYPE_V2DF_V4SF,
21196 V2DF_FTYPE_V2DF_V2DI,
21197 V2DF_FTYPE_V2DF_DI,
21198 V2DF_FTYPE_V2DF_SI,
21199 V2SF_FTYPE_V2SF_V2SF,
21200 V1DI_FTYPE_V1DI_V1DI,
21201 V1DI_FTYPE_V1DI_V1DI_COUNT,
21202 V1DI_FTYPE_V8QI_V8QI,
21203 V1DI_FTYPE_V2SI_V2SI,
21204 V1DI_FTYPE_V1DI_SI_COUNT,
21205 UINT64_FTYPE_UINT64_UINT64,
21206 UINT_FTYPE_UINT_UINT,
21207 UINT_FTYPE_UINT_USHORT,
21208 UINT_FTYPE_UINT_UCHAR,
21209 V8HI_FTYPE_V8HI_INT,
21210 V4SI_FTYPE_V4SI_INT,
21211 V4HI_FTYPE_V4HI_INT,
21212 V8SF_FTYPE_V8SF_INT,
21213 V4SI_FTYPE_V8SI_INT,
21214 V4SF_FTYPE_V8SF_INT,
21215 V2DF_FTYPE_V4DF_INT,
21216 V4DF_FTYPE_V4DF_INT,
21217 V4SF_FTYPE_V4SF_INT,
21218 V2DI_FTYPE_V2DI_INT,
21219 V2DI2TI_FTYPE_V2DI_INT,
21220 V2DF_FTYPE_V2DF_INT,
21221 V16QI_FTYPE_V16QI_V16QI_V16QI,
21222 V8SF_FTYPE_V8SF_V8SF_V8SF,
21223 V4DF_FTYPE_V4DF_V4DF_V4DF,
21224 V4SF_FTYPE_V4SF_V4SF_V4SF,
21225 V2DF_FTYPE_V2DF_V2DF_V2DF,
21226 V16QI_FTYPE_V16QI_V16QI_INT,
21227 V8SI_FTYPE_V8SI_V8SI_INT,
21228 V8SI_FTYPE_V8SI_V4SI_INT,
21229 V8HI_FTYPE_V8HI_V8HI_INT,
21230 V8SF_FTYPE_V8SF_V8SF_INT,
21231 V8SF_FTYPE_V8SF_V4SF_INT,
21232 V4SI_FTYPE_V4SI_V4SI_INT,
21233 V4DF_FTYPE_V4DF_V4DF_INT,
21234 V4DF_FTYPE_V4DF_V2DF_INT,
21235 V4SF_FTYPE_V4SF_V4SF_INT,
21236 V2DI_FTYPE_V2DI_V2DI_INT,
21237 V2DI2TI_FTYPE_V2DI_V2DI_INT,
21238 V1DI2DI_FTYPE_V1DI_V1DI_INT,
21239 V2DF_FTYPE_V2DF_V2DF_INT,
21240 V2DI_FTYPE_V2DI_UINT_UINT,
21241 V2DI_FTYPE_V2DI_V2DI_UINT_UINT
21244 /* Special builtins with variable number of arguments. */
21245 static const struct builtin_description bdesc_special_args[] =
21248 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_emms, "__builtin_ia32_emms", IX86_BUILTIN_EMMS, UNKNOWN, (int) VOID_FTYPE_VOID },
21251 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_femms, "__builtin_ia32_femms", IX86_BUILTIN_FEMMS, UNKNOWN, (int) VOID_FTYPE_VOID },
21254 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movups, "__builtin_ia32_storeups", IX86_BUILTIN_STOREUPS, UNKNOWN, (int) VOID_FTYPE_PFLOAT_V4SF },
21255 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movntv4sf, "__builtin_ia32_movntps", IX86_BUILTIN_MOVNTPS, UNKNOWN, (int) VOID_FTYPE_PFLOAT_V4SF },
21256 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movups, "__builtin_ia32_loadups", IX86_BUILTIN_LOADUPS, UNKNOWN, (int) V4SF_FTYPE_PCFLOAT },
21258 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_loadhps_exp, "__builtin_ia32_loadhps", IX86_BUILTIN_LOADHPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_PCV2SF },
21259 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_loadlps_exp, "__builtin_ia32_loadlps", IX86_BUILTIN_LOADLPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_PCV2SF },
21260 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_storehps, "__builtin_ia32_storehps", IX86_BUILTIN_STOREHPS, UNKNOWN, (int) VOID_FTYPE_PV2SF_V4SF },
21261 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_storelps, "__builtin_ia32_storelps", IX86_BUILTIN_STORELPS, UNKNOWN, (int) VOID_FTYPE_PV2SF_V4SF },
21263 /* SSE or 3DNow!A */
21264 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_sse_sfence, "__builtin_ia32_sfence", IX86_BUILTIN_SFENCE, UNKNOWN, (int) VOID_FTYPE_VOID },
21265 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_sse_movntdi, "__builtin_ia32_movntq", IX86_BUILTIN_MOVNTQ, UNKNOWN, (int) VOID_FTYPE_PDI_DI },
21268 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_lfence, "__builtin_ia32_lfence", IX86_BUILTIN_LFENCE, UNKNOWN, (int) VOID_FTYPE_VOID },
21269 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_mfence, 0, IX86_BUILTIN_MFENCE, UNKNOWN, (int) VOID_FTYPE_VOID },
21270 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movupd, "__builtin_ia32_storeupd", IX86_BUILTIN_STOREUPD, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V2DF },
21271 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movdqu, "__builtin_ia32_storedqu", IX86_BUILTIN_STOREDQU, UNKNOWN, (int) VOID_FTYPE_PCHAR_V16QI },
21272 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movntv2df, "__builtin_ia32_movntpd", IX86_BUILTIN_MOVNTPD, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V2DF },
21273 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movntv2di, "__builtin_ia32_movntdq", IX86_BUILTIN_MOVNTDQ, UNKNOWN, (int) VOID_FTYPE_PV2DI_V2DI },
21274 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movntsi, "__builtin_ia32_movnti", IX86_BUILTIN_MOVNTI, UNKNOWN, (int) VOID_FTYPE_PINT_INT },
21275 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movupd, "__builtin_ia32_loadupd", IX86_BUILTIN_LOADUPD, UNKNOWN, (int) V2DF_FTYPE_PCDOUBLE },
21276 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movdqu, "__builtin_ia32_loaddqu", IX86_BUILTIN_LOADDQU, UNKNOWN, (int) V16QI_FTYPE_PCCHAR },
21278 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_loadhpd_exp, "__builtin_ia32_loadhpd", IX86_BUILTIN_LOADHPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_PCDOUBLE },
21279 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_loadlpd_exp, "__builtin_ia32_loadlpd", IX86_BUILTIN_LOADLPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_PCDOUBLE },
21282 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_lddqu, "__builtin_ia32_lddqu", IX86_BUILTIN_LDDQU, UNKNOWN, (int) V16QI_FTYPE_PCCHAR },
21285 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_movntdqa, "__builtin_ia32_movntdqa", IX86_BUILTIN_MOVNTDQA, UNKNOWN, (int) V2DI_FTYPE_PV2DI },
21288 { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_vmmovntv2df, "__builtin_ia32_movntsd", IX86_BUILTIN_MOVNTSD, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V2DF },
21289 { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_vmmovntv4sf, "__builtin_ia32_movntss", IX86_BUILTIN_MOVNTSS, UNKNOWN, (int) VOID_FTYPE_PFLOAT_V4SF },
21292 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vzeroall, "__builtin_ia32_vzeroall", IX86_BUILTIN_VZEROALL, UNKNOWN, (int) VOID_FTYPE_VOID },
21293 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vzeroupper, 0, IX86_BUILTIN_VZEROUPPER, UNKNOWN, (int) VOID_FTYPE_VOID },
21294 { OPTION_MASK_ISA_AVX | OPTION_MASK_ISA_64BIT, CODE_FOR_avx_vzeroupper_rex64, 0, IX86_BUILTIN_VZEROUPPER_REX64, UNKNOWN, (int) VOID_FTYPE_VOID },
21296 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vbroadcastss, "__builtin_ia32_vbroadcastss", IX86_BUILTIN_VBROADCASTSS, UNKNOWN, (int) V4SF_FTYPE_PCFLOAT },
21297 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vbroadcastsd256, "__builtin_ia32_vbroadcastsd256", IX86_BUILTIN_VBROADCASTSD256, UNKNOWN, (int) V4DF_FTYPE_PCDOUBLE },
21298 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vbroadcastss256, "__builtin_ia32_vbroadcastss256", IX86_BUILTIN_VBROADCASTSS256, UNKNOWN, (int) V8SF_FTYPE_PCFLOAT },
21299 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vbroadcastf128_pd256, "__builtin_ia32_vbroadcastf128_pd256", IX86_BUILTIN_VBROADCASTPD256, UNKNOWN, (int) V4DF_FTYPE_PCV2DF },
21300 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vbroadcastf128_ps256, "__builtin_ia32_vbroadcastf128_ps256", IX86_BUILTIN_VBROADCASTPS256, UNKNOWN, (int) V8SF_FTYPE_PCV4SF },
21302 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movupd256, "__builtin_ia32_loadupd256", IX86_BUILTIN_LOADUPD256, UNKNOWN, (int) V4DF_FTYPE_PCDOUBLE },
21303 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movups256, "__builtin_ia32_loadups256", IX86_BUILTIN_LOADUPS256, UNKNOWN, (int) V8SF_FTYPE_PCFLOAT },
21304 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movupd256, "__builtin_ia32_storeupd256", IX86_BUILTIN_STOREUPD256, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V4DF },
21305 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movups256, "__builtin_ia32_storeups256", IX86_BUILTIN_STOREUPS256, UNKNOWN, (int) VOID_FTYPE_PFLOAT_V8SF },
21306 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movdqu256, "__builtin_ia32_loaddqu256", IX86_BUILTIN_LOADDQU256, UNKNOWN, (int) V32QI_FTYPE_PCCHAR },
21307 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movdqu256, "__builtin_ia32_storedqu256", IX86_BUILTIN_STOREDQU256, UNKNOWN, (int) VOID_FTYPE_PCHAR_V32QI },
21308 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_lddqu256, "__builtin_ia32_lddqu256", IX86_BUILTIN_LDDQU256, UNKNOWN, (int) V32QI_FTYPE_PCCHAR },
21310 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movntv4di, "__builtin_ia32_movntdq256", IX86_BUILTIN_MOVNTDQ256, UNKNOWN, (int) VOID_FTYPE_PV4DI_V4DI },
21311 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movntv4df, "__builtin_ia32_movntpd256", IX86_BUILTIN_MOVNTPD256, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V4DF },
21312 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movntv8sf, "__builtin_ia32_movntps256", IX86_BUILTIN_MOVNTPS256, UNKNOWN, (int) VOID_FTYPE_PFLOAT_V8SF },
21314 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskloadpd, "__builtin_ia32_maskloadpd", IX86_BUILTIN_MASKLOADPD, UNKNOWN, (int) V2DF_FTYPE_PCV2DF_V2DF },
21315 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskloadps, "__builtin_ia32_maskloadps", IX86_BUILTIN_MASKLOADPS, UNKNOWN, (int) V4SF_FTYPE_PCV4SF_V4SF },
21316 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskloadpd256, "__builtin_ia32_maskloadpd256", IX86_BUILTIN_MASKLOADPD256, UNKNOWN, (int) V4DF_FTYPE_PCV4DF_V4DF },
21317 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskloadps256, "__builtin_ia32_maskloadps256", IX86_BUILTIN_MASKLOADPS256, UNKNOWN, (int) V8SF_FTYPE_PCV8SF_V8SF },
21318 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskstorepd, "__builtin_ia32_maskstorepd", IX86_BUILTIN_MASKSTOREPD, UNKNOWN, (int) VOID_FTYPE_PV2DF_V2DF_V2DF },
21319 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskstoreps, "__builtin_ia32_maskstoreps", IX86_BUILTIN_MASKSTOREPS, UNKNOWN, (int) VOID_FTYPE_PV4SF_V4SF_V4SF },
21320 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskstorepd256, "__builtin_ia32_maskstorepd256", IX86_BUILTIN_MASKSTOREPD256, UNKNOWN, (int) VOID_FTYPE_PV4DF_V4DF_V4DF },
21321 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskstoreps256, "__builtin_ia32_maskstoreps256", IX86_BUILTIN_MASKSTOREPS256, UNKNOWN, (int) VOID_FTYPE_PV8SF_V8SF_V8SF },
21324 /* Builtins with variable number of arguments. */
21325 static const struct builtin_description bdesc_args[] =
21328 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_addv8qi3, "__builtin_ia32_paddb", IX86_BUILTIN_PADDB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
21329 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_addv4hi3, "__builtin_ia32_paddw", IX86_BUILTIN_PADDW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
21330 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_addv2si3, "__builtin_ia32_paddd", IX86_BUILTIN_PADDD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
21331 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_subv8qi3, "__builtin_ia32_psubb", IX86_BUILTIN_PSUBB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
21332 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_subv4hi3, "__builtin_ia32_psubw", IX86_BUILTIN_PSUBW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
21333 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_subv2si3, "__builtin_ia32_psubd", IX86_BUILTIN_PSUBD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
21335 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ssaddv8qi3, "__builtin_ia32_paddsb", IX86_BUILTIN_PADDSB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
21336 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ssaddv4hi3, "__builtin_ia32_paddsw", IX86_BUILTIN_PADDSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
21337 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_sssubv8qi3, "__builtin_ia32_psubsb", IX86_BUILTIN_PSUBSB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
21338 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_sssubv4hi3, "__builtin_ia32_psubsw", IX86_BUILTIN_PSUBSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
21339 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_usaddv8qi3, "__builtin_ia32_paddusb", IX86_BUILTIN_PADDUSB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
21340 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_usaddv4hi3, "__builtin_ia32_paddusw", IX86_BUILTIN_PADDUSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
21341 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ussubv8qi3, "__builtin_ia32_psubusb", IX86_BUILTIN_PSUBUSB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
21342 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ussubv4hi3, "__builtin_ia32_psubusw", IX86_BUILTIN_PSUBUSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
21344 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_mulv4hi3, "__builtin_ia32_pmullw", IX86_BUILTIN_PMULLW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
21345 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_smulv4hi3_highpart, "__builtin_ia32_pmulhw", IX86_BUILTIN_PMULHW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
21347 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_andv2si3, "__builtin_ia32_pand", IX86_BUILTIN_PAND, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
21348 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_andnotv2si3, "__builtin_ia32_pandn", IX86_BUILTIN_PANDN, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
21349 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_iorv2si3, "__builtin_ia32_por", IX86_BUILTIN_POR, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
21350 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_xorv2si3, "__builtin_ia32_pxor", IX86_BUILTIN_PXOR, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
21352 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_eqv8qi3, "__builtin_ia32_pcmpeqb", IX86_BUILTIN_PCMPEQB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
21353 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_eqv4hi3, "__builtin_ia32_pcmpeqw", IX86_BUILTIN_PCMPEQW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
21354 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_eqv2si3, "__builtin_ia32_pcmpeqd", IX86_BUILTIN_PCMPEQD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
21355 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_gtv8qi3, "__builtin_ia32_pcmpgtb", IX86_BUILTIN_PCMPGTB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
21356 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_gtv4hi3, "__builtin_ia32_pcmpgtw", IX86_BUILTIN_PCMPGTW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
21357 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_gtv2si3, "__builtin_ia32_pcmpgtd", IX86_BUILTIN_PCMPGTD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
21359 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpckhbw, "__builtin_ia32_punpckhbw", IX86_BUILTIN_PUNPCKHBW, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
21360 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpckhwd, "__builtin_ia32_punpckhwd", IX86_BUILTIN_PUNPCKHWD, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
21361 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpckhdq, "__builtin_ia32_punpckhdq", IX86_BUILTIN_PUNPCKHDQ, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
21362 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpcklbw, "__builtin_ia32_punpcklbw", IX86_BUILTIN_PUNPCKLBW, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
21363 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpcklwd, "__builtin_ia32_punpcklwd", IX86_BUILTIN_PUNPCKLWD, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI},
21364 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpckldq, "__builtin_ia32_punpckldq", IX86_BUILTIN_PUNPCKLDQ, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI},
21366 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_packsswb, "__builtin_ia32_packsswb", IX86_BUILTIN_PACKSSWB, UNKNOWN, (int) V8QI_FTYPE_V4HI_V4HI },
21367 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_packssdw, "__builtin_ia32_packssdw", IX86_BUILTIN_PACKSSDW, UNKNOWN, (int) V4HI_FTYPE_V2SI_V2SI },
21368 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_packuswb, "__builtin_ia32_packuswb", IX86_BUILTIN_PACKUSWB, UNKNOWN, (int) V8QI_FTYPE_V4HI_V4HI },
21370 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_pmaddwd, "__builtin_ia32_pmaddwd", IX86_BUILTIN_PMADDWD, UNKNOWN, (int) V2SI_FTYPE_V4HI_V4HI },
21372 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv4hi3, "__builtin_ia32_psllwi", IX86_BUILTIN_PSLLWI, UNKNOWN, (int) V4HI_FTYPE_V4HI_SI_COUNT },
21373 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv2si3, "__builtin_ia32_pslldi", IX86_BUILTIN_PSLLDI, UNKNOWN, (int) V2SI_FTYPE_V2SI_SI_COUNT },
21374 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv1di3, "__builtin_ia32_psllqi", IX86_BUILTIN_PSLLQI, UNKNOWN, (int) V1DI_FTYPE_V1DI_SI_COUNT },
21375 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv4hi3, "__builtin_ia32_psllw", IX86_BUILTIN_PSLLW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI_COUNT },
21376 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv2si3, "__builtin_ia32_pslld", IX86_BUILTIN_PSLLD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI_COUNT },
21377 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv1di3, "__builtin_ia32_psllq", IX86_BUILTIN_PSLLQ, UNKNOWN, (int) V1DI_FTYPE_V1DI_V1DI_COUNT },
21379 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv4hi3, "__builtin_ia32_psrlwi", IX86_BUILTIN_PSRLWI, UNKNOWN, (int) V4HI_FTYPE_V4HI_SI_COUNT },
21380 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv2si3, "__builtin_ia32_psrldi", IX86_BUILTIN_PSRLDI, UNKNOWN, (int) V2SI_FTYPE_V2SI_SI_COUNT },
21381 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv1di3, "__builtin_ia32_psrlqi", IX86_BUILTIN_PSRLQI, UNKNOWN, (int) V1DI_FTYPE_V1DI_SI_COUNT },
21382 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv4hi3, "__builtin_ia32_psrlw", IX86_BUILTIN_PSRLW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI_COUNT },
21383 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv2si3, "__builtin_ia32_psrld", IX86_BUILTIN_PSRLD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI_COUNT },
21384 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv1di3, "__builtin_ia32_psrlq", IX86_BUILTIN_PSRLQ, UNKNOWN, (int) V1DI_FTYPE_V1DI_V1DI_COUNT },
21386 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashrv4hi3, "__builtin_ia32_psrawi", IX86_BUILTIN_PSRAWI, UNKNOWN, (int) V4HI_FTYPE_V4HI_SI_COUNT },
21387 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashrv2si3, "__builtin_ia32_psradi", IX86_BUILTIN_PSRADI, UNKNOWN, (int) V2SI_FTYPE_V2SI_SI_COUNT },
21388 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashrv4hi3, "__builtin_ia32_psraw", IX86_BUILTIN_PSRAW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI_COUNT },
21389 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashrv2si3, "__builtin_ia32_psrad", IX86_BUILTIN_PSRAD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI_COUNT },
21392 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_pf2id, "__builtin_ia32_pf2id", IX86_BUILTIN_PF2ID, UNKNOWN, (int) V2SI_FTYPE_V2SF },
21393 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_floatv2si2, "__builtin_ia32_pi2fd", IX86_BUILTIN_PI2FD, UNKNOWN, (int) V2SF_FTYPE_V2SI },
21394 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_rcpv2sf2, "__builtin_ia32_pfrcp", IX86_BUILTIN_PFRCP, UNKNOWN, (int) V2SF_FTYPE_V2SF },
21395 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_rsqrtv2sf2, "__builtin_ia32_pfrsqrt", IX86_BUILTIN_PFRSQRT, UNKNOWN, (int) V2SF_FTYPE_V2SF },
21397 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_uavgv8qi3, "__builtin_ia32_pavgusb", IX86_BUILTIN_PAVGUSB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
21398 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_haddv2sf3, "__builtin_ia32_pfacc", IX86_BUILTIN_PFACC, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
21399 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_addv2sf3, "__builtin_ia32_pfadd", IX86_BUILTIN_PFADD, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
21400 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_eqv2sf3, "__builtin_ia32_pfcmpeq", IX86_BUILTIN_PFCMPEQ, UNKNOWN, (int) V2SI_FTYPE_V2SF_V2SF },
21401 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_gev2sf3, "__builtin_ia32_pfcmpge", IX86_BUILTIN_PFCMPGE, UNKNOWN, (int) V2SI_FTYPE_V2SF_V2SF },
21402 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_gtv2sf3, "__builtin_ia32_pfcmpgt", IX86_BUILTIN_PFCMPGT, UNKNOWN, (int) V2SI_FTYPE_V2SF_V2SF },
21403 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_smaxv2sf3, "__builtin_ia32_pfmax", IX86_BUILTIN_PFMAX, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
21404 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_sminv2sf3, "__builtin_ia32_pfmin", IX86_BUILTIN_PFMIN, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
21405 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_mulv2sf3, "__builtin_ia32_pfmul", IX86_BUILTIN_PFMUL, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
21406 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_rcpit1v2sf3, "__builtin_ia32_pfrcpit1", IX86_BUILTIN_PFRCPIT1, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
21407 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_rcpit2v2sf3, "__builtin_ia32_pfrcpit2", IX86_BUILTIN_PFRCPIT2, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
21408 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_rsqit1v2sf3, "__builtin_ia32_pfrsqit1", IX86_BUILTIN_PFRSQIT1, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
21409 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_subv2sf3, "__builtin_ia32_pfsub", IX86_BUILTIN_PFSUB, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
21410 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_subrv2sf3, "__builtin_ia32_pfsubr", IX86_BUILTIN_PFSUBR, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
21411 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_pmulhrwv4hi3, "__builtin_ia32_pmulhrw", IX86_BUILTIN_PMULHRW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
21414 { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pf2iw, "__builtin_ia32_pf2iw", IX86_BUILTIN_PF2IW, UNKNOWN, (int) V2SI_FTYPE_V2SF },
21415 { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pi2fw, "__builtin_ia32_pi2fw", IX86_BUILTIN_PI2FW, UNKNOWN, (int) V2SF_FTYPE_V2SI },
21416 { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pswapdv2si2, "__builtin_ia32_pswapdsi", IX86_BUILTIN_PSWAPDSI, UNKNOWN, (int) V2SI_FTYPE_V2SI },
21417 { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pswapdv2sf2, "__builtin_ia32_pswapdsf", IX86_BUILTIN_PSWAPDSF, UNKNOWN, (int) V2SF_FTYPE_V2SF },
21418 { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_hsubv2sf3, "__builtin_ia32_pfnacc", IX86_BUILTIN_PFNACC, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
21419 { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_addsubv2sf3, "__builtin_ia32_pfpnacc", IX86_BUILTIN_PFPNACC, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
21422 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movmskps, "__builtin_ia32_movmskps", IX86_BUILTIN_MOVMSKPS, UNKNOWN, (int) INT_FTYPE_V4SF },
21423 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_sqrtv4sf2, "__builtin_ia32_sqrtps", IX86_BUILTIN_SQRTPS, UNKNOWN, (int) V4SF_FTYPE_V4SF },
21424 { OPTION_MASK_ISA_SSE, CODE_FOR_sqrtv4sf2, "__builtin_ia32_sqrtps_nr", IX86_BUILTIN_SQRTPS_NR, UNKNOWN, (int) V4SF_FTYPE_V4SF },
21425 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_rsqrtv4sf2, "__builtin_ia32_rsqrtps", IX86_BUILTIN_RSQRTPS, UNKNOWN, (int) V4SF_FTYPE_V4SF },
21426 { OPTION_MASK_ISA_SSE, CODE_FOR_rsqrtv4sf2, "__builtin_ia32_rsqrtps_nr", IX86_BUILTIN_RSQRTPS_NR, UNKNOWN, (int) V4SF_FTYPE_V4SF },
21427 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_rcpv4sf2, "__builtin_ia32_rcpps", IX86_BUILTIN_RCPPS, UNKNOWN, (int) V4SF_FTYPE_V4SF },
21428 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvtps2pi, "__builtin_ia32_cvtps2pi", IX86_BUILTIN_CVTPS2PI, UNKNOWN, (int) V2SI_FTYPE_V4SF },
21429 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvtss2si, "__builtin_ia32_cvtss2si", IX86_BUILTIN_CVTSS2SI, UNKNOWN, (int) INT_FTYPE_V4SF },
21430 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_64BIT, CODE_FOR_sse_cvtss2siq, "__builtin_ia32_cvtss2si64", IX86_BUILTIN_CVTSS2SI64, UNKNOWN, (int) INT64_FTYPE_V4SF },
21431 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvttps2pi, "__builtin_ia32_cvttps2pi", IX86_BUILTIN_CVTTPS2PI, UNKNOWN, (int) V2SI_FTYPE_V4SF },
21432 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvttss2si, "__builtin_ia32_cvttss2si", IX86_BUILTIN_CVTTSS2SI, UNKNOWN, (int) INT_FTYPE_V4SF },
21433 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_64BIT, CODE_FOR_sse_cvttss2siq, "__builtin_ia32_cvttss2si64", IX86_BUILTIN_CVTTSS2SI64, UNKNOWN, (int) INT64_FTYPE_V4SF },
21435 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_shufps, "__builtin_ia32_shufps", IX86_BUILTIN_SHUFPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
21437 { OPTION_MASK_ISA_SSE, CODE_FOR_addv4sf3, "__builtin_ia32_addps", IX86_BUILTIN_ADDPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
21438 { OPTION_MASK_ISA_SSE, CODE_FOR_subv4sf3, "__builtin_ia32_subps", IX86_BUILTIN_SUBPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
21439 { OPTION_MASK_ISA_SSE, CODE_FOR_mulv4sf3, "__builtin_ia32_mulps", IX86_BUILTIN_MULPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
21440 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_divv4sf3, "__builtin_ia32_divps", IX86_BUILTIN_DIVPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
21441 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmaddv4sf3, "__builtin_ia32_addss", IX86_BUILTIN_ADDSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
21442 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmsubv4sf3, "__builtin_ia32_subss", IX86_BUILTIN_SUBSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
21443 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmulv4sf3, "__builtin_ia32_mulss", IX86_BUILTIN_MULSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
21444 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmdivv4sf3, "__builtin_ia32_divss", IX86_BUILTIN_DIVSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
21446 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpeqps", IX86_BUILTIN_CMPEQPS, EQ, (int) V4SF_FTYPE_V4SF_V4SF },
21447 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpltps", IX86_BUILTIN_CMPLTPS, LT, (int) V4SF_FTYPE_V4SF_V4SF },
21448 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpleps", IX86_BUILTIN_CMPLEPS, LE, (int) V4SF_FTYPE_V4SF_V4SF },
21449 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpgtps", IX86_BUILTIN_CMPGTPS, LT, (int) V4SF_FTYPE_V4SF_V4SF_SWAP },
21450 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpgeps", IX86_BUILTIN_CMPGEPS, LE, (int) V4SF_FTYPE_V4SF_V4SF_SWAP },
21451 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpunordps", IX86_BUILTIN_CMPUNORDPS, UNORDERED, (int) V4SF_FTYPE_V4SF_V4SF },
21452 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpneqps", IX86_BUILTIN_CMPNEQPS, NE, (int) V4SF_FTYPE_V4SF_V4SF },
21453 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpnltps", IX86_BUILTIN_CMPNLTPS, UNGE, (int) V4SF_FTYPE_V4SF_V4SF },
21454 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpnleps", IX86_BUILTIN_CMPNLEPS, UNGT, (int) V4SF_FTYPE_V4SF_V4SF },
21455 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpngtps", IX86_BUILTIN_CMPNGTPS, UNGE, (int) V4SF_FTYPE_V4SF_V4SF_SWAP },
21456 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpngeps", IX86_BUILTIN_CMPNGEPS, UNGT, (int) V4SF_FTYPE_V4SF_V4SF_SWAP},
21457 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpordps", IX86_BUILTIN_CMPORDPS, ORDERED, (int) V4SF_FTYPE_V4SF_V4SF },
21458 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpeqss", IX86_BUILTIN_CMPEQSS, EQ, (int) V4SF_FTYPE_V4SF_V4SF },
21459 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpltss", IX86_BUILTIN_CMPLTSS, LT, (int) V4SF_FTYPE_V4SF_V4SF },
21460 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpless", IX86_BUILTIN_CMPLESS, LE, (int) V4SF_FTYPE_V4SF_V4SF },
21461 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpunordss", IX86_BUILTIN_CMPUNORDSS, UNORDERED, (int) V4SF_FTYPE_V4SF_V4SF },
21462 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpneqss", IX86_BUILTIN_CMPNEQSS, NE, (int) V4SF_FTYPE_V4SF_V4SF },
21463 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpnltss", IX86_BUILTIN_CMPNLTSS, UNGE, (int) V4SF_FTYPE_V4SF_V4SF },
21464 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpnless", IX86_BUILTIN_CMPNLESS, UNGT, (int) V4SF_FTYPE_V4SF_V4SF },
21465 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpngtss", IX86_BUILTIN_CMPNGTSS, UNGE, (int) V4SF_FTYPE_V4SF_V4SF_SWAP },
21466 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpngess", IX86_BUILTIN_CMPNGESS, UNGT, (int) V4SF_FTYPE_V4SF_V4SF_SWAP },
21467 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpordss", IX86_BUILTIN_CMPORDSS, ORDERED, (int) V4SF_FTYPE_V4SF_V4SF },
21469 { OPTION_MASK_ISA_SSE, CODE_FOR_sminv4sf3, "__builtin_ia32_minps", IX86_BUILTIN_MINPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
21470 { OPTION_MASK_ISA_SSE, CODE_FOR_smaxv4sf3, "__builtin_ia32_maxps", IX86_BUILTIN_MAXPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
21471 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmsminv4sf3, "__builtin_ia32_minss", IX86_BUILTIN_MINSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
21472 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmsmaxv4sf3, "__builtin_ia32_maxss", IX86_BUILTIN_MAXSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
21474 { OPTION_MASK_ISA_SSE, CODE_FOR_andv4sf3, "__builtin_ia32_andps", IX86_BUILTIN_ANDPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
21475 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_andnotv4sf3, "__builtin_ia32_andnps", IX86_BUILTIN_ANDNPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
21476 { OPTION_MASK_ISA_SSE, CODE_FOR_iorv4sf3, "__builtin_ia32_orps", IX86_BUILTIN_ORPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
21477 { OPTION_MASK_ISA_SSE, CODE_FOR_xorv4sf3, "__builtin_ia32_xorps", IX86_BUILTIN_XORPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
21479 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movss, "__builtin_ia32_movss", IX86_BUILTIN_MOVSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
21480 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movhlps_exp, "__builtin_ia32_movhlps", IX86_BUILTIN_MOVHLPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
21481 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movlhps_exp, "__builtin_ia32_movlhps", IX86_BUILTIN_MOVLHPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
21482 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_unpckhps, "__builtin_ia32_unpckhps", IX86_BUILTIN_UNPCKHPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
21483 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_unpcklps, "__builtin_ia32_unpcklps", IX86_BUILTIN_UNPCKLPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
21485 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvtpi2ps, "__builtin_ia32_cvtpi2ps", IX86_BUILTIN_CVTPI2PS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V2SI },
21486 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvtsi2ss, "__builtin_ia32_cvtsi2ss", IX86_BUILTIN_CVTSI2SS, UNKNOWN, (int) V4SF_FTYPE_V4SF_SI },
21487 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_64BIT, CODE_FOR_sse_cvtsi2ssq, "__builtin_ia32_cvtsi642ss", IX86_BUILTIN_CVTSI642SS, UNKNOWN, V4SF_FTYPE_V4SF_DI },
21489 { OPTION_MASK_ISA_SSE, CODE_FOR_rsqrtsf2, "__builtin_ia32_rsqrtf", IX86_BUILTIN_RSQRTF, UNKNOWN, (int) FLOAT_FTYPE_FLOAT },
21491 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmsqrtv4sf2, "__builtin_ia32_sqrtss", IX86_BUILTIN_SQRTSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_VEC_MERGE },
21492 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmrsqrtv4sf2, "__builtin_ia32_rsqrtss", IX86_BUILTIN_RSQRTSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_VEC_MERGE },
21493 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmrcpv4sf2, "__builtin_ia32_rcpss", IX86_BUILTIN_RCPSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_VEC_MERGE },
21495 /* SSE MMX or 3Dnow!A */
21496 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_uavgv8qi3, "__builtin_ia32_pavgb", IX86_BUILTIN_PAVGB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
21497 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_uavgv4hi3, "__builtin_ia32_pavgw", IX86_BUILTIN_PAVGW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
21498 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_umulv4hi3_highpart, "__builtin_ia32_pmulhuw", IX86_BUILTIN_PMULHUW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
21500 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_umaxv8qi3, "__builtin_ia32_pmaxub", IX86_BUILTIN_PMAXUB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
21501 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_smaxv4hi3, "__builtin_ia32_pmaxsw", IX86_BUILTIN_PMAXSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
21502 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_uminv8qi3, "__builtin_ia32_pminub", IX86_BUILTIN_PMINUB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
21503 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_sminv4hi3, "__builtin_ia32_pminsw", IX86_BUILTIN_PMINSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
21505 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_psadbw, "__builtin_ia32_psadbw", IX86_BUILTIN_PSADBW, UNKNOWN, (int) V1DI_FTYPE_V8QI_V8QI },
21506 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pmovmskb, "__builtin_ia32_pmovmskb", IX86_BUILTIN_PMOVMSKB, UNKNOWN, (int) INT_FTYPE_V8QI },
21508 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pshufw, "__builtin_ia32_pshufw", IX86_BUILTIN_PSHUFW, UNKNOWN, (int) V4HI_FTYPE_V4HI_INT },
21511 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_shufpd, "__builtin_ia32_shufpd", IX86_BUILTIN_SHUFPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
21513 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movmskpd, "__builtin_ia32_movmskpd", IX86_BUILTIN_MOVMSKPD, UNKNOWN, (int) INT_FTYPE_V2DF },
21514 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_pmovmskb, "__builtin_ia32_pmovmskb128", IX86_BUILTIN_PMOVMSKB128, UNKNOWN, (int) INT_FTYPE_V16QI },
21515 { OPTION_MASK_ISA_SSE2, CODE_FOR_sqrtv2df2, "__builtin_ia32_sqrtpd", IX86_BUILTIN_SQRTPD, UNKNOWN, (int) V2DF_FTYPE_V2DF },
21516 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtdq2pd, "__builtin_ia32_cvtdq2pd", IX86_BUILTIN_CVTDQ2PD, UNKNOWN, (int) V2DF_FTYPE_V4SI },
21517 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtdq2ps, "__builtin_ia32_cvtdq2ps", IX86_BUILTIN_CVTDQ2PS, UNKNOWN, (int) V4SF_FTYPE_V4SI },
21519 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtpd2dq, "__builtin_ia32_cvtpd2dq", IX86_BUILTIN_CVTPD2DQ, UNKNOWN, (int) V4SI_FTYPE_V2DF },
21520 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtpd2pi, "__builtin_ia32_cvtpd2pi", IX86_BUILTIN_CVTPD2PI, UNKNOWN, (int) V2SI_FTYPE_V2DF },
21521 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtpd2ps, "__builtin_ia32_cvtpd2ps", IX86_BUILTIN_CVTPD2PS, UNKNOWN, (int) V4SF_FTYPE_V2DF },
21522 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvttpd2dq, "__builtin_ia32_cvttpd2dq", IX86_BUILTIN_CVTTPD2DQ, UNKNOWN, (int) V4SI_FTYPE_V2DF },
21523 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvttpd2pi, "__builtin_ia32_cvttpd2pi", IX86_BUILTIN_CVTTPD2PI, UNKNOWN, (int) V2SI_FTYPE_V2DF },
21525 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtpi2pd, "__builtin_ia32_cvtpi2pd", IX86_BUILTIN_CVTPI2PD, UNKNOWN, (int) V2DF_FTYPE_V2SI },
21527 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtsd2si, "__builtin_ia32_cvtsd2si", IX86_BUILTIN_CVTSD2SI, UNKNOWN, (int) INT_FTYPE_V2DF },
21528 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvttsd2si, "__builtin_ia32_cvttsd2si", IX86_BUILTIN_CVTTSD2SI, UNKNOWN, (int) INT_FTYPE_V2DF },
21529 { OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_64BIT, CODE_FOR_sse2_cvtsd2siq, "__builtin_ia32_cvtsd2si64", IX86_BUILTIN_CVTSD2SI64, UNKNOWN, (int) INT64_FTYPE_V2DF },
21530 { OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_64BIT, CODE_FOR_sse2_cvttsd2siq, "__builtin_ia32_cvttsd2si64", IX86_BUILTIN_CVTTSD2SI64, UNKNOWN, (int) INT64_FTYPE_V2DF },
21532 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtps2dq, "__builtin_ia32_cvtps2dq", IX86_BUILTIN_CVTPS2DQ, UNKNOWN, (int) V4SI_FTYPE_V4SF },
21533 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtps2pd, "__builtin_ia32_cvtps2pd", IX86_BUILTIN_CVTPS2PD, UNKNOWN, (int) V2DF_FTYPE_V4SF },
21534 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvttps2dq, "__builtin_ia32_cvttps2dq", IX86_BUILTIN_CVTTPS2DQ, UNKNOWN, (int) V4SI_FTYPE_V4SF },
21536 { OPTION_MASK_ISA_SSE2, CODE_FOR_addv2df3, "__builtin_ia32_addpd", IX86_BUILTIN_ADDPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
21537 { OPTION_MASK_ISA_SSE2, CODE_FOR_subv2df3, "__builtin_ia32_subpd", IX86_BUILTIN_SUBPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
21538 { OPTION_MASK_ISA_SSE2, CODE_FOR_mulv2df3, "__builtin_ia32_mulpd", IX86_BUILTIN_MULPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
21539 { OPTION_MASK_ISA_SSE2, CODE_FOR_divv2df3, "__builtin_ia32_divpd", IX86_BUILTIN_DIVPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
21540 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmaddv2df3, "__builtin_ia32_addsd", IX86_BUILTIN_ADDSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
21541 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmsubv2df3, "__builtin_ia32_subsd", IX86_BUILTIN_SUBSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
21542 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmulv2df3, "__builtin_ia32_mulsd", IX86_BUILTIN_MULSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
21543 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmdivv2df3, "__builtin_ia32_divsd", IX86_BUILTIN_DIVSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
21545 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpeqpd", IX86_BUILTIN_CMPEQPD, EQ, (int) V2DF_FTYPE_V2DF_V2DF },
21546 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpltpd", IX86_BUILTIN_CMPLTPD, LT, (int) V2DF_FTYPE_V2DF_V2DF },
21547 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmplepd", IX86_BUILTIN_CMPLEPD, LE, (int) V2DF_FTYPE_V2DF_V2DF },
21548 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpgtpd", IX86_BUILTIN_CMPGTPD, LT, (int) V2DF_FTYPE_V2DF_V2DF_SWAP },
21549 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpgepd", IX86_BUILTIN_CMPGEPD, LE, (int) V2DF_FTYPE_V2DF_V2DF_SWAP},
21550 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpunordpd", IX86_BUILTIN_CMPUNORDPD, UNORDERED, (int) V2DF_FTYPE_V2DF_V2DF },
21551 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpneqpd", IX86_BUILTIN_CMPNEQPD, NE, (int) V2DF_FTYPE_V2DF_V2DF },
21552 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpnltpd", IX86_BUILTIN_CMPNLTPD, UNGE, (int) V2DF_FTYPE_V2DF_V2DF },
21553 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpnlepd", IX86_BUILTIN_CMPNLEPD, UNGT, (int) V2DF_FTYPE_V2DF_V2DF },
21554 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpngtpd", IX86_BUILTIN_CMPNGTPD, UNGE, (int) V2DF_FTYPE_V2DF_V2DF_SWAP },
21555 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpngepd", IX86_BUILTIN_CMPNGEPD, UNGT, (int) V2DF_FTYPE_V2DF_V2DF_SWAP },
21556 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpordpd", IX86_BUILTIN_CMPORDPD, ORDERED, (int) V2DF_FTYPE_V2DF_V2DF },
21557 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpeqsd", IX86_BUILTIN_CMPEQSD, EQ, (int) V2DF_FTYPE_V2DF_V2DF },
21558 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpltsd", IX86_BUILTIN_CMPLTSD, LT, (int) V2DF_FTYPE_V2DF_V2DF },
21559 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmplesd", IX86_BUILTIN_CMPLESD, LE, (int) V2DF_FTYPE_V2DF_V2DF },
21560 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpunordsd", IX86_BUILTIN_CMPUNORDSD, UNORDERED, (int) V2DF_FTYPE_V2DF_V2DF },
21561 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpneqsd", IX86_BUILTIN_CMPNEQSD, NE, (int) V2DF_FTYPE_V2DF_V2DF },
21562 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpnltsd", IX86_BUILTIN_CMPNLTSD, UNGE, (int) V2DF_FTYPE_V2DF_V2DF },
21563 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpnlesd", IX86_BUILTIN_CMPNLESD, UNGT, (int) V2DF_FTYPE_V2DF_V2DF },
21564 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpordsd", IX86_BUILTIN_CMPORDSD, ORDERED, (int) V2DF_FTYPE_V2DF_V2DF },
21566 { OPTION_MASK_ISA_SSE2, CODE_FOR_sminv2df3, "__builtin_ia32_minpd", IX86_BUILTIN_MINPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
21567 { OPTION_MASK_ISA_SSE2, CODE_FOR_smaxv2df3, "__builtin_ia32_maxpd", IX86_BUILTIN_MAXPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
21568 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmsminv2df3, "__builtin_ia32_minsd", IX86_BUILTIN_MINSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
21569 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmsmaxv2df3, "__builtin_ia32_maxsd", IX86_BUILTIN_MAXSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
21571 { OPTION_MASK_ISA_SSE2, CODE_FOR_andv2df3, "__builtin_ia32_andpd", IX86_BUILTIN_ANDPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
21572 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_andnotv2df3, "__builtin_ia32_andnpd", IX86_BUILTIN_ANDNPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
21573 { OPTION_MASK_ISA_SSE2, CODE_FOR_iorv2df3, "__builtin_ia32_orpd", IX86_BUILTIN_ORPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
21574 { OPTION_MASK_ISA_SSE2, CODE_FOR_xorv2df3, "__builtin_ia32_xorpd", IX86_BUILTIN_XORPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
21576 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movsd, "__builtin_ia32_movsd", IX86_BUILTIN_MOVSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
21577 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_unpckhpd_exp, "__builtin_ia32_unpckhpd", IX86_BUILTIN_UNPCKHPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
21578 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_unpcklpd_exp, "__builtin_ia32_unpcklpd", IX86_BUILTIN_UNPCKLPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
21580 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_pack_sfix_v2df, "__builtin_ia32_vec_pack_sfix", IX86_BUILTIN_VEC_PACK_SFIX, UNKNOWN, (int) V4SI_FTYPE_V2DF_V2DF },
21582 { OPTION_MASK_ISA_SSE2, CODE_FOR_addv16qi3, "__builtin_ia32_paddb128", IX86_BUILTIN_PADDB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
21583 { OPTION_MASK_ISA_SSE2, CODE_FOR_addv8hi3, "__builtin_ia32_paddw128", IX86_BUILTIN_PADDW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
21584 { OPTION_MASK_ISA_SSE2, CODE_FOR_addv4si3, "__builtin_ia32_paddd128", IX86_BUILTIN_PADDD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
21585 { OPTION_MASK_ISA_SSE2, CODE_FOR_addv2di3, "__builtin_ia32_paddq128", IX86_BUILTIN_PADDQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
21586 { OPTION_MASK_ISA_SSE2, CODE_FOR_subv16qi3, "__builtin_ia32_psubb128", IX86_BUILTIN_PSUBB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
21587 { OPTION_MASK_ISA_SSE2, CODE_FOR_subv8hi3, "__builtin_ia32_psubw128", IX86_BUILTIN_PSUBW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
21588 { OPTION_MASK_ISA_SSE2, CODE_FOR_subv4si3, "__builtin_ia32_psubd128", IX86_BUILTIN_PSUBD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
21589 { OPTION_MASK_ISA_SSE2, CODE_FOR_subv2di3, "__builtin_ia32_psubq128", IX86_BUILTIN_PSUBQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
21591 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ssaddv16qi3, "__builtin_ia32_paddsb128", IX86_BUILTIN_PADDSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
21592 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ssaddv8hi3, "__builtin_ia32_paddsw128", IX86_BUILTIN_PADDSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
21593 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_sssubv16qi3, "__builtin_ia32_psubsb128", IX86_BUILTIN_PSUBSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
21594 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_sssubv8hi3, "__builtin_ia32_psubsw128", IX86_BUILTIN_PSUBSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
21595 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_usaddv16qi3, "__builtin_ia32_paddusb128", IX86_BUILTIN_PADDUSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
21596 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_usaddv8hi3, "__builtin_ia32_paddusw128", IX86_BUILTIN_PADDUSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
21597 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ussubv16qi3, "__builtin_ia32_psubusb128", IX86_BUILTIN_PSUBUSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
21598 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ussubv8hi3, "__builtin_ia32_psubusw128", IX86_BUILTIN_PSUBUSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
21600 { OPTION_MASK_ISA_SSE2, CODE_FOR_mulv8hi3, "__builtin_ia32_pmullw128", IX86_BUILTIN_PMULLW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
21601 { OPTION_MASK_ISA_SSE2, CODE_FOR_smulv8hi3_highpart, "__builtin_ia32_pmulhw128", IX86_BUILTIN_PMULHW128, UNKNOWN,(int) V8HI_FTYPE_V8HI_V8HI },
21603 { OPTION_MASK_ISA_SSE2, CODE_FOR_andv2di3, "__builtin_ia32_pand128", IX86_BUILTIN_PAND128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
21604 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_andnotv2di3, "__builtin_ia32_pandn128", IX86_BUILTIN_PANDN128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
21605 { OPTION_MASK_ISA_SSE2, CODE_FOR_iorv2di3, "__builtin_ia32_por128", IX86_BUILTIN_POR128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
21606 { OPTION_MASK_ISA_SSE2, CODE_FOR_xorv2di3, "__builtin_ia32_pxor128", IX86_BUILTIN_PXOR128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
21608 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_uavgv16qi3, "__builtin_ia32_pavgb128", IX86_BUILTIN_PAVGB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
21609 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_uavgv8hi3, "__builtin_ia32_pavgw128", IX86_BUILTIN_PAVGW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
21611 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_eqv16qi3, "__builtin_ia32_pcmpeqb128", IX86_BUILTIN_PCMPEQB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
21612 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_eqv8hi3, "__builtin_ia32_pcmpeqw128", IX86_BUILTIN_PCMPEQW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
21613 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_eqv4si3, "__builtin_ia32_pcmpeqd128", IX86_BUILTIN_PCMPEQD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
21614 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_gtv16qi3, "__builtin_ia32_pcmpgtb128", IX86_BUILTIN_PCMPGTB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
21615 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_gtv8hi3, "__builtin_ia32_pcmpgtw128", IX86_BUILTIN_PCMPGTW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
21616 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_gtv4si3, "__builtin_ia32_pcmpgtd128", IX86_BUILTIN_PCMPGTD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
21618 { OPTION_MASK_ISA_SSE2, CODE_FOR_umaxv16qi3, "__builtin_ia32_pmaxub128", IX86_BUILTIN_PMAXUB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
21619 { OPTION_MASK_ISA_SSE2, CODE_FOR_smaxv8hi3, "__builtin_ia32_pmaxsw128", IX86_BUILTIN_PMAXSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
21620 { OPTION_MASK_ISA_SSE2, CODE_FOR_uminv16qi3, "__builtin_ia32_pminub128", IX86_BUILTIN_PMINUB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
21621 { OPTION_MASK_ISA_SSE2, CODE_FOR_sminv8hi3, "__builtin_ia32_pminsw128", IX86_BUILTIN_PMINSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
21623 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_punpckhbw, "__builtin_ia32_punpckhbw128", IX86_BUILTIN_PUNPCKHBW128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
21624 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_punpckhwd, "__builtin_ia32_punpckhwd128", IX86_BUILTIN_PUNPCKHWD128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
21625 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_punpckhdq, "__builtin_ia32_punpckhdq128", IX86_BUILTIN_PUNPCKHDQ128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
21626 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_punpckhqdq, "__builtin_ia32_punpckhqdq128", IX86_BUILTIN_PUNPCKHQDQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
21627 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_punpcklbw, "__builtin_ia32_punpcklbw128", IX86_BUILTIN_PUNPCKLBW128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
21628 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_punpcklwd, "__builtin_ia32_punpcklwd128", IX86_BUILTIN_PUNPCKLWD128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
21629 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_punpckldq, "__builtin_ia32_punpckldq128", IX86_BUILTIN_PUNPCKLDQ128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
21630 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_punpcklqdq, "__builtin_ia32_punpcklqdq128", IX86_BUILTIN_PUNPCKLQDQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
21632 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_packsswb, "__builtin_ia32_packsswb128", IX86_BUILTIN_PACKSSWB128, UNKNOWN, (int) V16QI_FTYPE_V8HI_V8HI },
21633 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_packssdw, "__builtin_ia32_packssdw128", IX86_BUILTIN_PACKSSDW128, UNKNOWN, (int) V8HI_FTYPE_V4SI_V4SI },
21634 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_packuswb, "__builtin_ia32_packuswb128", IX86_BUILTIN_PACKUSWB128, UNKNOWN, (int) V16QI_FTYPE_V8HI_V8HI },
21636 { OPTION_MASK_ISA_SSE2, CODE_FOR_umulv8hi3_highpart, "__builtin_ia32_pmulhuw128", IX86_BUILTIN_PMULHUW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
21637 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_psadbw, "__builtin_ia32_psadbw128", IX86_BUILTIN_PSADBW128, UNKNOWN, (int) V2DI_FTYPE_V16QI_V16QI },
21639 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_umulv1siv1di3, "__builtin_ia32_pmuludq", IX86_BUILTIN_PMULUDQ, UNKNOWN, (int) V1DI_FTYPE_V2SI_V2SI },
21640 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_umulv2siv2di3, "__builtin_ia32_pmuludq128", IX86_BUILTIN_PMULUDQ128, UNKNOWN, (int) V2DI_FTYPE_V4SI_V4SI },
21642 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_pmaddwd, "__builtin_ia32_pmaddwd128", IX86_BUILTIN_PMADDWD128, UNKNOWN, (int) V4SI_FTYPE_V8HI_V8HI },
21644 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtsi2sd, "__builtin_ia32_cvtsi2sd", IX86_BUILTIN_CVTSI2SD, UNKNOWN, (int) V2DF_FTYPE_V2DF_SI },
21645 { OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_64BIT, CODE_FOR_sse2_cvtsi2sdq, "__builtin_ia32_cvtsi642sd", IX86_BUILTIN_CVTSI642SD, UNKNOWN, (int) V2DF_FTYPE_V2DF_DI },
21646 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtsd2ss, "__builtin_ia32_cvtsd2ss", IX86_BUILTIN_CVTSD2SS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V2DF },
21647 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtss2sd, "__builtin_ia32_cvtss2sd", IX86_BUILTIN_CVTSS2SD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V4SF },
21649 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ashlti3, "__builtin_ia32_pslldqi128", IX86_BUILTIN_PSLLDQI128, UNKNOWN, (int) V2DI2TI_FTYPE_V2DI_INT },
21650 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv8hi3, "__builtin_ia32_psllwi128", IX86_BUILTIN_PSLLWI128, UNKNOWN, (int) V8HI_FTYPE_V8HI_SI_COUNT },
21651 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv4si3, "__builtin_ia32_pslldi128", IX86_BUILTIN_PSLLDI128, UNKNOWN, (int) V4SI_FTYPE_V4SI_SI_COUNT },
21652 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv2di3, "__builtin_ia32_psllqi128", IX86_BUILTIN_PSLLQI128, UNKNOWN, (int) V2DI_FTYPE_V2DI_SI_COUNT },
21653 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv8hi3, "__builtin_ia32_psllw128", IX86_BUILTIN_PSLLW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_COUNT },
21654 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv4si3, "__builtin_ia32_pslld128", IX86_BUILTIN_PSLLD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_COUNT },
21655 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv2di3, "__builtin_ia32_psllq128", IX86_BUILTIN_PSLLQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_COUNT },
21657 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_lshrti3, "__builtin_ia32_psrldqi128", IX86_BUILTIN_PSRLDQI128, UNKNOWN, (int) V2DI2TI_FTYPE_V2DI_INT },
21658 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv8hi3, "__builtin_ia32_psrlwi128", IX86_BUILTIN_PSRLWI128, UNKNOWN, (int) V8HI_FTYPE_V8HI_SI_COUNT },
21659 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv4si3, "__builtin_ia32_psrldi128", IX86_BUILTIN_PSRLDI128, UNKNOWN, (int) V4SI_FTYPE_V4SI_SI_COUNT },
21660 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv2di3, "__builtin_ia32_psrlqi128", IX86_BUILTIN_PSRLQI128, UNKNOWN, (int) V2DI_FTYPE_V2DI_SI_COUNT },
21661 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv8hi3, "__builtin_ia32_psrlw128", IX86_BUILTIN_PSRLW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_COUNT },
21662 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv4si3, "__builtin_ia32_psrld128", IX86_BUILTIN_PSRLD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_COUNT },
21663 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv2di3, "__builtin_ia32_psrlq128", IX86_BUILTIN_PSRLQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_COUNT },
21665 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashrv8hi3, "__builtin_ia32_psrawi128", IX86_BUILTIN_PSRAWI128, UNKNOWN, (int) V8HI_FTYPE_V8HI_SI_COUNT },
21666 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashrv4si3, "__builtin_ia32_psradi128", IX86_BUILTIN_PSRADI128, UNKNOWN, (int) V4SI_FTYPE_V4SI_SI_COUNT },
21667 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashrv8hi3, "__builtin_ia32_psraw128", IX86_BUILTIN_PSRAW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_COUNT },
21668 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashrv4si3, "__builtin_ia32_psrad128", IX86_BUILTIN_PSRAD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_COUNT },
21670 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_pshufd, "__builtin_ia32_pshufd", IX86_BUILTIN_PSHUFD, UNKNOWN, (int) V4SI_FTYPE_V4SI_INT },
21671 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_pshuflw, "__builtin_ia32_pshuflw", IX86_BUILTIN_PSHUFLW, UNKNOWN, (int) V8HI_FTYPE_V8HI_INT },
21672 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_pshufhw, "__builtin_ia32_pshufhw", IX86_BUILTIN_PSHUFHW, UNKNOWN, (int) V8HI_FTYPE_V8HI_INT },
21674 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmsqrtv2df2, "__builtin_ia32_sqrtsd", IX86_BUILTIN_SQRTSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_VEC_MERGE },
21676 { OPTION_MASK_ISA_SSE2, CODE_FOR_abstf2, 0, IX86_BUILTIN_FABSQ, UNKNOWN, (int) FLOAT128_FTYPE_FLOAT128 },
21677 { OPTION_MASK_ISA_SSE2, CODE_FOR_copysigntf3, 0, IX86_BUILTIN_COPYSIGNQ, UNKNOWN, (int) FLOAT128_FTYPE_FLOAT128_FLOAT128 },
21679 { OPTION_MASK_ISA_SSE, CODE_FOR_sse2_movq128, "__builtin_ia32_movq128", IX86_BUILTIN_MOVQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI },
21682 { OPTION_MASK_ISA_SSE2, CODE_FOR_mmx_addv1di3, "__builtin_ia32_paddq", IX86_BUILTIN_PADDQ, UNKNOWN, (int) V1DI_FTYPE_V1DI_V1DI },
21683 { OPTION_MASK_ISA_SSE2, CODE_FOR_mmx_subv1di3, "__builtin_ia32_psubq", IX86_BUILTIN_PSUBQ, UNKNOWN, (int) V1DI_FTYPE_V1DI_V1DI },
21686 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_movshdup, "__builtin_ia32_movshdup", IX86_BUILTIN_MOVSHDUP, UNKNOWN, (int) V4SF_FTYPE_V4SF},
21687 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_movsldup, "__builtin_ia32_movsldup", IX86_BUILTIN_MOVSLDUP, UNKNOWN, (int) V4SF_FTYPE_V4SF },
21689 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_addsubv4sf3, "__builtin_ia32_addsubps", IX86_BUILTIN_ADDSUBPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
21690 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_addsubv2df3, "__builtin_ia32_addsubpd", IX86_BUILTIN_ADDSUBPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
21691 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_haddv4sf3, "__builtin_ia32_haddps", IX86_BUILTIN_HADDPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
21692 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_haddv2df3, "__builtin_ia32_haddpd", IX86_BUILTIN_HADDPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
21693 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_hsubv4sf3, "__builtin_ia32_hsubps", IX86_BUILTIN_HSUBPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
21694 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_hsubv2df3, "__builtin_ia32_hsubpd", IX86_BUILTIN_HSUBPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
21697 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv16qi2, "__builtin_ia32_pabsb128", IX86_BUILTIN_PABSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI },
21698 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv8qi2, "__builtin_ia32_pabsb", IX86_BUILTIN_PABSB, UNKNOWN, (int) V8QI_FTYPE_V8QI },
21699 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv8hi2, "__builtin_ia32_pabsw128", IX86_BUILTIN_PABSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI },
21700 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv4hi2, "__builtin_ia32_pabsw", IX86_BUILTIN_PABSW, UNKNOWN, (int) V4HI_FTYPE_V4HI },
21701 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv4si2, "__builtin_ia32_pabsd128", IX86_BUILTIN_PABSD128, UNKNOWN, (int) V4SI_FTYPE_V4SI },
21702 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv2si2, "__builtin_ia32_pabsd", IX86_BUILTIN_PABSD, UNKNOWN, (int) V2SI_FTYPE_V2SI },
21704 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phaddwv8hi3, "__builtin_ia32_phaddw128", IX86_BUILTIN_PHADDW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
21705 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phaddwv4hi3, "__builtin_ia32_phaddw", IX86_BUILTIN_PHADDW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
21706 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phadddv4si3, "__builtin_ia32_phaddd128", IX86_BUILTIN_PHADDD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
21707 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phadddv2si3, "__builtin_ia32_phaddd", IX86_BUILTIN_PHADDD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
21708 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phaddswv8hi3, "__builtin_ia32_phaddsw128", IX86_BUILTIN_PHADDSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
21709 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phaddswv4hi3, "__builtin_ia32_phaddsw", IX86_BUILTIN_PHADDSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
21710 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubwv8hi3, "__builtin_ia32_phsubw128", IX86_BUILTIN_PHSUBW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
21711 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubwv4hi3, "__builtin_ia32_phsubw", IX86_BUILTIN_PHSUBW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
21712 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubdv4si3, "__builtin_ia32_phsubd128", IX86_BUILTIN_PHSUBD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
21713 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubdv2si3, "__builtin_ia32_phsubd", IX86_BUILTIN_PHSUBD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
21714 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubswv8hi3, "__builtin_ia32_phsubsw128", IX86_BUILTIN_PHSUBSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
21715 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubswv4hi3, "__builtin_ia32_phsubsw", IX86_BUILTIN_PHSUBSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
21716 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pmaddubsw128, "__builtin_ia32_pmaddubsw128", IX86_BUILTIN_PMADDUBSW128, UNKNOWN, (int) V8HI_FTYPE_V16QI_V16QI },
21717 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pmaddubsw, "__builtin_ia32_pmaddubsw", IX86_BUILTIN_PMADDUBSW, UNKNOWN, (int) V4HI_FTYPE_V8QI_V8QI },
21718 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pmulhrswv8hi3, "__builtin_ia32_pmulhrsw128", IX86_BUILTIN_PMULHRSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
21719 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pmulhrswv4hi3, "__builtin_ia32_pmulhrsw", IX86_BUILTIN_PMULHRSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
21720 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pshufbv16qi3, "__builtin_ia32_pshufb128", IX86_BUILTIN_PSHUFB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
21721 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pshufbv8qi3, "__builtin_ia32_pshufb", IX86_BUILTIN_PSHUFB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
21722 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv16qi3, "__builtin_ia32_psignb128", IX86_BUILTIN_PSIGNB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
21723 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv8qi3, "__builtin_ia32_psignb", IX86_BUILTIN_PSIGNB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
21724 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv8hi3, "__builtin_ia32_psignw128", IX86_BUILTIN_PSIGNW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
21725 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv4hi3, "__builtin_ia32_psignw", IX86_BUILTIN_PSIGNW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
21726 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv4si3, "__builtin_ia32_psignd128", IX86_BUILTIN_PSIGND128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
21727 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv2si3, "__builtin_ia32_psignd", IX86_BUILTIN_PSIGND, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
21730 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_palignrti, "__builtin_ia32_palignr128", IX86_BUILTIN_PALIGNR128, UNKNOWN, (int) V2DI2TI_FTYPE_V2DI_V2DI_INT },
21731 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_palignrdi, "__builtin_ia32_palignr", IX86_BUILTIN_PALIGNR, UNKNOWN, (int) V1DI2DI_FTYPE_V1DI_V1DI_INT },
21734 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_blendpd, "__builtin_ia32_blendpd", IX86_BUILTIN_BLENDPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
21735 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_blendps, "__builtin_ia32_blendps", IX86_BUILTIN_BLENDPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
21736 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_blendvpd, "__builtin_ia32_blendvpd", IX86_BUILTIN_BLENDVPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF },
21737 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_blendvps, "__builtin_ia32_blendvps", IX86_BUILTIN_BLENDVPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF },
21738 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_dppd, "__builtin_ia32_dppd", IX86_BUILTIN_DPPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
21739 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_dpps, "__builtin_ia32_dpps", IX86_BUILTIN_DPPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
21740 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_insertps, "__builtin_ia32_insertps128", IX86_BUILTIN_INSERTPS128, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
21741 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_mpsadbw, "__builtin_ia32_mpsadbw128", IX86_BUILTIN_MPSADBW128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_INT },
21742 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_pblendvb, "__builtin_ia32_pblendvb128", IX86_BUILTIN_PBLENDVB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI },
21743 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_pblendw, "__builtin_ia32_pblendw128", IX86_BUILTIN_PBLENDW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_INT },
21745 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_extendv8qiv8hi2, "__builtin_ia32_pmovsxbw128", IX86_BUILTIN_PMOVSXBW128, UNKNOWN, (int) V8HI_FTYPE_V16QI },
21746 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_extendv4qiv4si2, "__builtin_ia32_pmovsxbd128", IX86_BUILTIN_PMOVSXBD128, UNKNOWN, (int) V4SI_FTYPE_V16QI },
21747 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_extendv2qiv2di2, "__builtin_ia32_pmovsxbq128", IX86_BUILTIN_PMOVSXBQ128, UNKNOWN, (int) V2DI_FTYPE_V16QI },
21748 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_extendv4hiv4si2, "__builtin_ia32_pmovsxwd128", IX86_BUILTIN_PMOVSXWD128, UNKNOWN, (int) V4SI_FTYPE_V8HI },
21749 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_extendv2hiv2di2, "__builtin_ia32_pmovsxwq128", IX86_BUILTIN_PMOVSXWQ128, UNKNOWN, (int) V2DI_FTYPE_V8HI },
21750 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_extendv2siv2di2, "__builtin_ia32_pmovsxdq128", IX86_BUILTIN_PMOVSXDQ128, UNKNOWN, (int) V2DI_FTYPE_V4SI },
21751 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv8qiv8hi2, "__builtin_ia32_pmovzxbw128", IX86_BUILTIN_PMOVZXBW128, UNKNOWN, (int) V8HI_FTYPE_V16QI },
21752 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv4qiv4si2, "__builtin_ia32_pmovzxbd128", IX86_BUILTIN_PMOVZXBD128, UNKNOWN, (int) V4SI_FTYPE_V16QI },
21753 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv2qiv2di2, "__builtin_ia32_pmovzxbq128", IX86_BUILTIN_PMOVZXBQ128, UNKNOWN, (int) V2DI_FTYPE_V16QI },
21754 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv4hiv4si2, "__builtin_ia32_pmovzxwd128", IX86_BUILTIN_PMOVZXWD128, UNKNOWN, (int) V4SI_FTYPE_V8HI },
21755 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv2hiv2di2, "__builtin_ia32_pmovzxwq128", IX86_BUILTIN_PMOVZXWQ128, UNKNOWN, (int) V2DI_FTYPE_V8HI },
21756 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv2siv2di2, "__builtin_ia32_pmovzxdq128", IX86_BUILTIN_PMOVZXDQ128, UNKNOWN, (int) V2DI_FTYPE_V4SI },
21757 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_phminposuw, "__builtin_ia32_phminposuw128", IX86_BUILTIN_PHMINPOSUW128, UNKNOWN, (int) V8HI_FTYPE_V8HI },
21759 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_packusdw, "__builtin_ia32_packusdw128", IX86_BUILTIN_PACKUSDW128, UNKNOWN, (int) V8HI_FTYPE_V4SI_V4SI },
21760 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_eqv2di3, "__builtin_ia32_pcmpeqq", IX86_BUILTIN_PCMPEQQ, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
21761 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_smaxv16qi3, "__builtin_ia32_pmaxsb128", IX86_BUILTIN_PMAXSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
21762 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_smaxv4si3, "__builtin_ia32_pmaxsd128", IX86_BUILTIN_PMAXSD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
21763 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_umaxv4si3, "__builtin_ia32_pmaxud128", IX86_BUILTIN_PMAXUD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
21764 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_umaxv8hi3, "__builtin_ia32_pmaxuw128", IX86_BUILTIN_PMAXUW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
21765 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sminv16qi3, "__builtin_ia32_pminsb128", IX86_BUILTIN_PMINSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
21766 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sminv4si3, "__builtin_ia32_pminsd128", IX86_BUILTIN_PMINSD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
21767 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_uminv4si3, "__builtin_ia32_pminud128", IX86_BUILTIN_PMINUD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
21768 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_uminv8hi3, "__builtin_ia32_pminuw128", IX86_BUILTIN_PMINUW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
21769 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_mulv2siv2di3, "__builtin_ia32_pmuldq128", IX86_BUILTIN_PMULDQ128, UNKNOWN, (int) V2DI_FTYPE_V4SI_V4SI },
21770 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_mulv4si3, "__builtin_ia32_pmulld128", IX86_BUILTIN_PMULLD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
21772 /* SSE4.1 and SSE5 */
21773 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundpd, "__builtin_ia32_roundpd", IX86_BUILTIN_ROUNDPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_INT },
21774 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundps, "__builtin_ia32_roundps", IX86_BUILTIN_ROUNDPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_INT },
21775 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundsd, "__builtin_ia32_roundsd", IX86_BUILTIN_ROUNDSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
21776 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundss, "__builtin_ia32_roundss", IX86_BUILTIN_ROUNDSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
21778 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_ptest, "__builtin_ia32_ptestz128", IX86_BUILTIN_PTESTZ, EQ, (int) INT_FTYPE_V2DI_V2DI_PTEST },
21779 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_ptest, "__builtin_ia32_ptestc128", IX86_BUILTIN_PTESTC, LTU, (int) INT_FTYPE_V2DI_V2DI_PTEST },
21780 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_ptest, "__builtin_ia32_ptestnzc128", IX86_BUILTIN_PTESTNZC, GTU, (int) INT_FTYPE_V2DI_V2DI_PTEST },
21783 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_gtv2di3, "__builtin_ia32_pcmpgtq", IX86_BUILTIN_PCMPGTQ, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
21784 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_crc32qi, "__builtin_ia32_crc32qi", IX86_BUILTIN_CRC32QI, UNKNOWN, (int) UINT_FTYPE_UINT_UCHAR },
21785 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_crc32hi, "__builtin_ia32_crc32hi", IX86_BUILTIN_CRC32HI, UNKNOWN, (int) UINT_FTYPE_UINT_USHORT },
21786 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_crc32si, "__builtin_ia32_crc32si", IX86_BUILTIN_CRC32SI, UNKNOWN, (int) UINT_FTYPE_UINT_UINT },
21787 { OPTION_MASK_ISA_SSE4_2 | OPTION_MASK_ISA_64BIT, CODE_FOR_sse4_2_crc32di, "__builtin_ia32_crc32di", IX86_BUILTIN_CRC32DI, UNKNOWN, (int) UINT64_FTYPE_UINT64_UINT64 },
21790 { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_extrqi, "__builtin_ia32_extrqi", IX86_BUILTIN_EXTRQI, UNKNOWN, (int) V2DI_FTYPE_V2DI_UINT_UINT },
21791 { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_extrq, "__builtin_ia32_extrq", IX86_BUILTIN_EXTRQ, UNKNOWN, (int) V2DI_FTYPE_V2DI_V16QI },
21792 { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_insertqi, "__builtin_ia32_insertqi", IX86_BUILTIN_INSERTQI, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_UINT_UINT },
21793 { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_insertq, "__builtin_ia32_insertq", IX86_BUILTIN_INSERTQ, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
21796 { OPTION_MASK_ISA_SSE2, CODE_FOR_aeskeygenassist, 0, IX86_BUILTIN_AESKEYGENASSIST128, UNKNOWN, (int) V2DI_FTYPE_V2DI_INT },
21797 { OPTION_MASK_ISA_SSE2, CODE_FOR_aesimc, 0, IX86_BUILTIN_AESIMC128, UNKNOWN, (int) V2DI_FTYPE_V2DI },
21799 { OPTION_MASK_ISA_SSE2, CODE_FOR_aesenc, 0, IX86_BUILTIN_AESENC128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
21800 { OPTION_MASK_ISA_SSE2, CODE_FOR_aesenclast, 0, IX86_BUILTIN_AESENCLAST128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
21801 { OPTION_MASK_ISA_SSE2, CODE_FOR_aesdec, 0, IX86_BUILTIN_AESDEC128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
21802 { OPTION_MASK_ISA_SSE2, CODE_FOR_aesdeclast, 0, IX86_BUILTIN_AESDECLAST128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
21805 { OPTION_MASK_ISA_SSE2, CODE_FOR_pclmulqdq, 0, IX86_BUILTIN_PCLMULQDQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_INT },
21808 { OPTION_MASK_ISA_AVX, CODE_FOR_addv4df3, "__builtin_ia32_addpd256", IX86_BUILTIN_ADDPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
21809 { OPTION_MASK_ISA_AVX, CODE_FOR_addv8sf3, "__builtin_ia32_addps256", IX86_BUILTIN_ADDPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
21810 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_addsubv4df3, "__builtin_ia32_addsubpd256", IX86_BUILTIN_ADDSUBPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
21811 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_addsubv8sf3, "__builtin_ia32_addsubps256", IX86_BUILTIN_ADDSUBPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
21812 { OPTION_MASK_ISA_AVX, CODE_FOR_andv4df3, "__builtin_ia32_andpd256", IX86_BUILTIN_ANDPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
21813 { OPTION_MASK_ISA_AVX, CODE_FOR_andv8sf3, "__builtin_ia32_andps256", IX86_BUILTIN_ANDPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
21814 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_andnotv4df3, "__builtin_ia32_andnpd256", IX86_BUILTIN_ANDNPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
21815 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_andnotv8sf3, "__builtin_ia32_andnps256", IX86_BUILTIN_ANDNPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
21816 { OPTION_MASK_ISA_AVX, CODE_FOR_divv4df3, "__builtin_ia32_divpd256", IX86_BUILTIN_DIVPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
21817 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_divv8sf3, "__builtin_ia32_divps256", IX86_BUILTIN_DIVPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
21818 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_haddv4df3, "__builtin_ia32_haddpd256", IX86_BUILTIN_HADDPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
21819 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_hsubv8sf3, "__builtin_ia32_hsubps256", IX86_BUILTIN_HSUBPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
21820 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_hsubv4df3, "__builtin_ia32_hsubpd256", IX86_BUILTIN_HSUBPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
21821 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_haddv8sf3, "__builtin_ia32_haddps256", IX86_BUILTIN_HADDPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
21822 { OPTION_MASK_ISA_AVX, CODE_FOR_smaxv4df3, "__builtin_ia32_maxpd256", IX86_BUILTIN_MAXPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
21823 { OPTION_MASK_ISA_AVX, CODE_FOR_smaxv8sf3, "__builtin_ia32_maxps256", IX86_BUILTIN_MAXPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
21824 { OPTION_MASK_ISA_AVX, CODE_FOR_sminv4df3, "__builtin_ia32_minpd256", IX86_BUILTIN_MINPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
21825 { OPTION_MASK_ISA_AVX, CODE_FOR_sminv8sf3, "__builtin_ia32_minps256", IX86_BUILTIN_MINPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
21826 { OPTION_MASK_ISA_AVX, CODE_FOR_mulv4df3, "__builtin_ia32_mulpd256", IX86_BUILTIN_MULPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
21827 { OPTION_MASK_ISA_AVX, CODE_FOR_mulv8sf3, "__builtin_ia32_mulps256", IX86_BUILTIN_MULPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
21828 { OPTION_MASK_ISA_AVX, CODE_FOR_iorv4df3, "__builtin_ia32_orpd256", IX86_BUILTIN_ORPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
21829 { OPTION_MASK_ISA_AVX, CODE_FOR_iorv8sf3, "__builtin_ia32_orps256", IX86_BUILTIN_ORPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
21830 { OPTION_MASK_ISA_AVX, CODE_FOR_subv4df3, "__builtin_ia32_subpd256", IX86_BUILTIN_SUBPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
21831 { OPTION_MASK_ISA_AVX, CODE_FOR_subv8sf3, "__builtin_ia32_subps256", IX86_BUILTIN_SUBPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
21832 { OPTION_MASK_ISA_AVX, CODE_FOR_xorv4df3, "__builtin_ia32_xorpd256", IX86_BUILTIN_XORPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
21833 { OPTION_MASK_ISA_AVX, CODE_FOR_xorv8sf3, "__builtin_ia32_xorps256", IX86_BUILTIN_XORPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
21835 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilvarv2df3, "__builtin_ia32_vpermilvarpd", IX86_BUILTIN_VPERMILVARPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DI },
21836 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilvarv4sf3, "__builtin_ia32_vpermilvarps", IX86_BUILTIN_VPERMILVARPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SI },
21837 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilvarv4df3, "__builtin_ia32_vpermilvarpd256", IX86_BUILTIN_VPERMILVARPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DI },
21838 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilvarv8sf3, "__builtin_ia32_vpermilvarps256", IX86_BUILTIN_VPERMILVARPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SI },
21840 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_blendpd256, "__builtin_ia32_blendpd256", IX86_BUILTIN_BLENDPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_INT },
21841 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_blendps256, "__builtin_ia32_blendps256", IX86_BUILTIN_BLENDPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_INT },
21842 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_blendvpd256, "__builtin_ia32_blendvpd256", IX86_BUILTIN_BLENDVPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF },
21843 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_blendvps256, "__builtin_ia32_blendvps256", IX86_BUILTIN_BLENDVPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF },
21844 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_dpps256, "__builtin_ia32_dpps256", IX86_BUILTIN_DPPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_INT },
21845 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_shufpd256, "__builtin_ia32_shufpd256", IX86_BUILTIN_SHUFPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_INT },
21846 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_shufps256, "__builtin_ia32_shufps256", IX86_BUILTIN_SHUFPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_INT },
21847 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cmpsdv2df3, "__builtin_ia32_cmpsd", IX86_BUILTIN_CMPSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
21848 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cmpssv4sf3, "__builtin_ia32_cmpss", IX86_BUILTIN_CMPSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
21849 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cmppdv2df3, "__builtin_ia32_cmppd", IX86_BUILTIN_CMPPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
21850 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cmppsv4sf3, "__builtin_ia32_cmpps", IX86_BUILTIN_CMPPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
21851 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cmppdv4df3, "__builtin_ia32_cmppd256", IX86_BUILTIN_CMPPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_INT },
21852 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cmppsv8sf3, "__builtin_ia32_cmpps256", IX86_BUILTIN_CMPPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_INT },
21853 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vextractf128v4df, "__builtin_ia32_vextractf128_pd256", IX86_BUILTIN_EXTRACTF128PD256, UNKNOWN, (int) V2DF_FTYPE_V4DF_INT },
21854 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vextractf128v8sf, "__builtin_ia32_vextractf128_ps256", IX86_BUILTIN_EXTRACTF128PS256, UNKNOWN, (int) V4SF_FTYPE_V8SF_INT },
21855 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vextractf128v8si, "__builtin_ia32_vextractf128_si256", IX86_BUILTIN_EXTRACTF128SI256, UNKNOWN, (int) V4SI_FTYPE_V8SI_INT },
21856 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cvtdq2pd256, "__builtin_ia32_cvtdq2pd256", IX86_BUILTIN_CVTDQ2PD256, UNKNOWN, (int) V4DF_FTYPE_V4SI },
21857 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cvtdq2ps256, "__builtin_ia32_cvtdq2ps256", IX86_BUILTIN_CVTDQ2PS256, UNKNOWN, (int) V8SF_FTYPE_V8SI },
21858 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cvtpd2ps256, "__builtin_ia32_cvtpd2ps256", IX86_BUILTIN_CVTPD2PS256, UNKNOWN, (int) V4SF_FTYPE_V4DF },
21859 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cvtps2dq256, "__builtin_ia32_cvtps2dq256", IX86_BUILTIN_CVTPS2DQ256, UNKNOWN, (int) V8SI_FTYPE_V8SF },
21860 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cvtps2pd256, "__builtin_ia32_cvtps2pd256", IX86_BUILTIN_CVTPS2PD256, UNKNOWN, (int) V4DF_FTYPE_V4SF },
21861 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cvttpd2dq256, "__builtin_ia32_cvttpd2dq256", IX86_BUILTIN_CVTTPD2DQ256, UNKNOWN, (int) V4SI_FTYPE_V4DF },
21862 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cvtpd2dq256, "__builtin_ia32_cvtpd2dq256", IX86_BUILTIN_CVTPD2DQ256, UNKNOWN, (int) V4SI_FTYPE_V4DF },
21863 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cvttps2dq256, "__builtin_ia32_cvttps2dq256", IX86_BUILTIN_CVTTPS2DQ256, UNKNOWN, (int) V8SI_FTYPE_V8SF },
21864 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vperm2f128v4df3, "__builtin_ia32_vperm2f128_pd256", IX86_BUILTIN_VPERM2F128PD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_INT },
21865 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vperm2f128v8sf3, "__builtin_ia32_vperm2f128_ps256", IX86_BUILTIN_VPERM2F128PS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_INT },
21866 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vperm2f128v8si3, "__builtin_ia32_vperm2f128_si256", IX86_BUILTIN_VPERM2F128SI256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_INT },
21867 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilv2df, "__builtin_ia32_vpermilpd", IX86_BUILTIN_VPERMILPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_INT },
21868 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilv4sf, "__builtin_ia32_vpermilps", IX86_BUILTIN_VPERMILPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_INT },
21869 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilv4df, "__builtin_ia32_vpermilpd256", IX86_BUILTIN_VPERMILPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_INT },
21870 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilv8sf, "__builtin_ia32_vpermilps256", IX86_BUILTIN_VPERMILPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_INT },
21871 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vinsertf128v4df, "__builtin_ia32_vinsertf128_pd256", IX86_BUILTIN_VINSERTF128PD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V2DF_INT },
21872 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vinsertf128v8sf, "__builtin_ia32_vinsertf128_ps256", IX86_BUILTIN_VINSERTF128PS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V4SF_INT },
21873 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vinsertf128v8si, "__builtin_ia32_vinsertf128_si256", IX86_BUILTIN_VINSERTF128SI256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V4SI_INT },
21875 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movshdup256, "__builtin_ia32_movshdup256", IX86_BUILTIN_MOVSHDUP256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
21876 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movsldup256, "__builtin_ia32_movsldup256", IX86_BUILTIN_MOVSLDUP256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
21877 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movddup256, "__builtin_ia32_movddup256", IX86_BUILTIN_MOVDDUP256, UNKNOWN, (int) V4DF_FTYPE_V4DF },
21879 { OPTION_MASK_ISA_AVX, CODE_FOR_sqrtv4df2, "__builtin_ia32_sqrtpd256", IX86_BUILTIN_SQRTPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF },
21880 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_sqrtv8sf2, "__builtin_ia32_sqrtps256", IX86_BUILTIN_SQRTPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
21881 { OPTION_MASK_ISA_AVX, CODE_FOR_sqrtv8sf2, "__builtin_ia32_sqrtps_nr256", IX86_BUILTIN_SQRTPS_NR256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
21882 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_rsqrtv8sf2, "__builtin_ia32_rsqrtps256", IX86_BUILTIN_RSQRTPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
21883 { OPTION_MASK_ISA_AVX, CODE_FOR_rsqrtv8sf2, "__builtin_ia32_rsqrtps_nr256", IX86_BUILTIN_RSQRTPS_NR256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
21885 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_rcpv8sf2, "__builtin_ia32_rcpps256", IX86_BUILTIN_RCPPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
21887 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundpd256, "__builtin_ia32_roundpd256", IX86_BUILTIN_ROUNDPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_INT },
21888 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundps256, "__builtin_ia32_roundps256", IX86_BUILTIN_ROUNDPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_INT },
21890 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_unpckhpd256, "__builtin_ia32_unpckhpd256", IX86_BUILTIN_UNPCKHPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
21891 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_unpcklpd256, "__builtin_ia32_unpcklpd256", IX86_BUILTIN_UNPCKLPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
21892 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_unpckhps256, "__builtin_ia32_unpckhps256", IX86_BUILTIN_UNPCKHPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
21893 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_unpcklps256, "__builtin_ia32_unpcklps256", IX86_BUILTIN_UNPCKLPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
21895 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_si256_si, "__builtin_ia32_si256_si", IX86_BUILTIN_SI256_SI, UNKNOWN, (int) V8SI_FTYPE_V4SI },
21896 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_ps256_ps, "__builtin_ia32_ps256_ps", IX86_BUILTIN_PS256_PS, UNKNOWN, (int) V8SF_FTYPE_V4SF },
21897 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_pd256_pd, "__builtin_ia32_pd256_pd", IX86_BUILTIN_PD256_PD, UNKNOWN, (int) V4DF_FTYPE_V2DF },
21898 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_si_si256, "__builtin_ia32_si_si256", IX86_BUILTIN_SI_SI256, UNKNOWN, (int) V4SI_FTYPE_V8SI },
21899 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_ps_ps256, "__builtin_ia32_ps_ps256", IX86_BUILTIN_PS_PS256, UNKNOWN, (int) V4SF_FTYPE_V8SF },
21900 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_pd_pd256, "__builtin_ia32_pd_pd256", IX86_BUILTIN_PD_PD256, UNKNOWN, (int) V2DF_FTYPE_V4DF },
21902 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestpd, "__builtin_ia32_vtestzpd", IX86_BUILTIN_VTESTZPD, EQ, (int) INT_FTYPE_V2DF_V2DF_PTEST },
21903 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestpd, "__builtin_ia32_vtestcpd", IX86_BUILTIN_VTESTCPD, LTU, (int) INT_FTYPE_V2DF_V2DF_PTEST },
21904 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestpd, "__builtin_ia32_vtestnzcpd", IX86_BUILTIN_VTESTNZCPD, GTU, (int) INT_FTYPE_V2DF_V2DF_PTEST },
21905 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestps, "__builtin_ia32_vtestzps", IX86_BUILTIN_VTESTZPS, EQ, (int) INT_FTYPE_V4SF_V4SF_PTEST },
21906 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestps, "__builtin_ia32_vtestcps", IX86_BUILTIN_VTESTCPS, LTU, (int) INT_FTYPE_V4SF_V4SF_PTEST },
21907 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestps, "__builtin_ia32_vtestnzcps", IX86_BUILTIN_VTESTNZCPS, GTU, (int) INT_FTYPE_V4SF_V4SF_PTEST },
21908 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestpd256, "__builtin_ia32_vtestzpd256", IX86_BUILTIN_VTESTZPD256, EQ, (int) INT_FTYPE_V4DF_V4DF_PTEST },
21909 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestpd256, "__builtin_ia32_vtestcpd256", IX86_BUILTIN_VTESTCPD256, LTU, (int) INT_FTYPE_V4DF_V4DF_PTEST },
21910 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestpd256, "__builtin_ia32_vtestnzcpd256", IX86_BUILTIN_VTESTNZCPD256, GTU, (int) INT_FTYPE_V4DF_V4DF_PTEST },
21911 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestps256, "__builtin_ia32_vtestzps256", IX86_BUILTIN_VTESTZPS256, EQ, (int) INT_FTYPE_V8SF_V8SF_PTEST },
21912 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestps256, "__builtin_ia32_vtestcps256", IX86_BUILTIN_VTESTCPS256, LTU, (int) INT_FTYPE_V8SF_V8SF_PTEST },
21913 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestps256, "__builtin_ia32_vtestnzcps256", IX86_BUILTIN_VTESTNZCPS256, GTU, (int) INT_FTYPE_V8SF_V8SF_PTEST },
21914 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_ptest256, "__builtin_ia32_ptestz256", IX86_BUILTIN_PTESTZ256, EQ, (int) INT_FTYPE_V4DI_V4DI_PTEST },
21915 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_ptest256, "__builtin_ia32_ptestc256", IX86_BUILTIN_PTESTC256, LTU, (int) INT_FTYPE_V4DI_V4DI_PTEST },
21916 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_ptest256, "__builtin_ia32_ptestnzc256", IX86_BUILTIN_PTESTNZC256, GTU, (int) INT_FTYPE_V4DI_V4DI_PTEST },
21918 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movmskpd256, "__builtin_ia32_movmskpd256", IX86_BUILTIN_MOVMSKPD256, UNKNOWN, (int) INT_FTYPE_V4DF },
21919 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movmskps256, "__builtin_ia32_movmskps256", IX86_BUILTIN_MOVMSKPS256, UNKNOWN, (int) INT_FTYPE_V8SF },
21923 enum multi_arg_type {
21933 MULTI_ARG_3_PERMPS,
21934 MULTI_ARG_3_PERMPD,
21941 MULTI_ARG_2_DI_IMM,
21942 MULTI_ARG_2_SI_IMM,
21943 MULTI_ARG_2_HI_IMM,
21944 MULTI_ARG_2_QI_IMM,
21945 MULTI_ARG_2_SF_CMP,
21946 MULTI_ARG_2_DF_CMP,
21947 MULTI_ARG_2_DI_CMP,
21948 MULTI_ARG_2_SI_CMP,
21949 MULTI_ARG_2_HI_CMP,
21950 MULTI_ARG_2_QI_CMP,
21973 static const struct builtin_description bdesc_multi_arg[] =
21975 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_vmfmaddv4sf4, "__builtin_ia32_fmaddss", IX86_BUILTIN_FMADDSS, 0, (int)MULTI_ARG_3_SF },
21976 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_vmfmaddv2df4, "__builtin_ia32_fmaddsd", IX86_BUILTIN_FMADDSD, 0, (int)MULTI_ARG_3_DF },
21977 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_fmaddv4sf4, "__builtin_ia32_fmaddps", IX86_BUILTIN_FMADDPS, 0, (int)MULTI_ARG_3_SF },
21978 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_fmaddv2df4, "__builtin_ia32_fmaddpd", IX86_BUILTIN_FMADDPD, 0, (int)MULTI_ARG_3_DF },
21979 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_vmfmsubv4sf4, "__builtin_ia32_fmsubss", IX86_BUILTIN_FMSUBSS, 0, (int)MULTI_ARG_3_SF },
21980 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_vmfmsubv2df4, "__builtin_ia32_fmsubsd", IX86_BUILTIN_FMSUBSD, 0, (int)MULTI_ARG_3_DF },
21981 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_fmsubv4sf4, "__builtin_ia32_fmsubps", IX86_BUILTIN_FMSUBPS, 0, (int)MULTI_ARG_3_SF },
21982 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_fmsubv2df4, "__builtin_ia32_fmsubpd", IX86_BUILTIN_FMSUBPD, 0, (int)MULTI_ARG_3_DF },
21983 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_vmfnmaddv4sf4, "__builtin_ia32_fnmaddss", IX86_BUILTIN_FNMADDSS, 0, (int)MULTI_ARG_3_SF },
21984 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_vmfnmaddv2df4, "__builtin_ia32_fnmaddsd", IX86_BUILTIN_FNMADDSD, 0, (int)MULTI_ARG_3_DF },
21985 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_fnmaddv4sf4, "__builtin_ia32_fnmaddps", IX86_BUILTIN_FNMADDPS, 0, (int)MULTI_ARG_3_SF },
21986 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_fnmaddv2df4, "__builtin_ia32_fnmaddpd", IX86_BUILTIN_FNMADDPD, 0, (int)MULTI_ARG_3_DF },
21987 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_vmfnmsubv4sf4, "__builtin_ia32_fnmsubss", IX86_BUILTIN_FNMSUBSS, 0, (int)MULTI_ARG_3_SF },
21988 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_vmfnmsubv2df4, "__builtin_ia32_fnmsubsd", IX86_BUILTIN_FNMSUBSD, 0, (int)MULTI_ARG_3_DF },
21989 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_fnmsubv4sf4, "__builtin_ia32_fnmsubps", IX86_BUILTIN_FNMSUBPS, 0, (int)MULTI_ARG_3_SF },
21990 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_fnmsubv2df4, "__builtin_ia32_fnmsubpd", IX86_BUILTIN_FNMSUBPD, 0, (int)MULTI_ARG_3_DF },
21991 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcmov_v2di, "__builtin_ia32_pcmov", IX86_BUILTIN_PCMOV, 0, (int)MULTI_ARG_3_DI },
21992 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcmov_v2di, "__builtin_ia32_pcmov_v2di", IX86_BUILTIN_PCMOV_V2DI, 0, (int)MULTI_ARG_3_DI },
21993 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcmov_v4si, "__builtin_ia32_pcmov_v4si", IX86_BUILTIN_PCMOV_V4SI, 0, (int)MULTI_ARG_3_SI },
21994 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcmov_v8hi, "__builtin_ia32_pcmov_v8hi", IX86_BUILTIN_PCMOV_V8HI, 0, (int)MULTI_ARG_3_HI },
21995 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcmov_v16qi, "__builtin_ia32_pcmov_v16qi",IX86_BUILTIN_PCMOV_V16QI,0, (int)MULTI_ARG_3_QI },
21996 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcmov_v2df, "__builtin_ia32_pcmov_v2df", IX86_BUILTIN_PCMOV_V2DF, 0, (int)MULTI_ARG_3_DF },
21997 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcmov_v4sf, "__builtin_ia32_pcmov_v4sf", IX86_BUILTIN_PCMOV_V4SF, 0, (int)MULTI_ARG_3_SF },
21998 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pperm, "__builtin_ia32_pperm", IX86_BUILTIN_PPERM, 0, (int)MULTI_ARG_3_QI },
21999 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_permv4sf, "__builtin_ia32_permps", IX86_BUILTIN_PERMPS, 0, (int)MULTI_ARG_3_PERMPS },
22000 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_permv2df, "__builtin_ia32_permpd", IX86_BUILTIN_PERMPD, 0, (int)MULTI_ARG_3_PERMPD },
22001 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pmacssww, "__builtin_ia32_pmacssww", IX86_BUILTIN_PMACSSWW, 0, (int)MULTI_ARG_3_HI },
22002 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pmacsww, "__builtin_ia32_pmacsww", IX86_BUILTIN_PMACSWW, 0, (int)MULTI_ARG_3_HI },
22003 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pmacsswd, "__builtin_ia32_pmacsswd", IX86_BUILTIN_PMACSSWD, 0, (int)MULTI_ARG_3_HI_SI },
22004 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pmacswd, "__builtin_ia32_pmacswd", IX86_BUILTIN_PMACSWD, 0, (int)MULTI_ARG_3_HI_SI },
22005 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pmacssdd, "__builtin_ia32_pmacssdd", IX86_BUILTIN_PMACSSDD, 0, (int)MULTI_ARG_3_SI },
22006 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pmacsdd, "__builtin_ia32_pmacsdd", IX86_BUILTIN_PMACSDD, 0, (int)MULTI_ARG_3_SI },
22007 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pmacssdql, "__builtin_ia32_pmacssdql", IX86_BUILTIN_PMACSSDQL, 0, (int)MULTI_ARG_3_SI_DI },
22008 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pmacssdqh, "__builtin_ia32_pmacssdqh", IX86_BUILTIN_PMACSSDQH, 0, (int)MULTI_ARG_3_SI_DI },
22009 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pmacsdql, "__builtin_ia32_pmacsdql", IX86_BUILTIN_PMACSDQL, 0, (int)MULTI_ARG_3_SI_DI },
22010 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pmacsdqh, "__builtin_ia32_pmacsdqh", IX86_BUILTIN_PMACSDQH, 0, (int)MULTI_ARG_3_SI_DI },
22011 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pmadcsswd, "__builtin_ia32_pmadcsswd", IX86_BUILTIN_PMADCSSWD, 0, (int)MULTI_ARG_3_HI_SI },
22012 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pmadcswd, "__builtin_ia32_pmadcswd", IX86_BUILTIN_PMADCSWD, 0, (int)MULTI_ARG_3_HI_SI },
22013 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vrotlv2di3, "__builtin_ia32_protq", IX86_BUILTIN_PROTQ, 0, (int)MULTI_ARG_2_DI },
22014 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vrotlv4si3, "__builtin_ia32_protd", IX86_BUILTIN_PROTD, 0, (int)MULTI_ARG_2_SI },
22015 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vrotlv8hi3, "__builtin_ia32_protw", IX86_BUILTIN_PROTW, 0, (int)MULTI_ARG_2_HI },
22016 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vrotlv16qi3, "__builtin_ia32_protb", IX86_BUILTIN_PROTB, 0, (int)MULTI_ARG_2_QI },
22017 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_rotlv2di3, "__builtin_ia32_protqi", IX86_BUILTIN_PROTQ_IMM, 0, (int)MULTI_ARG_2_DI_IMM },
22018 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_rotlv4si3, "__builtin_ia32_protdi", IX86_BUILTIN_PROTD_IMM, 0, (int)MULTI_ARG_2_SI_IMM },
22019 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_rotlv8hi3, "__builtin_ia32_protwi", IX86_BUILTIN_PROTW_IMM, 0, (int)MULTI_ARG_2_HI_IMM },
22020 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_rotlv16qi3, "__builtin_ia32_protbi", IX86_BUILTIN_PROTB_IMM, 0, (int)MULTI_ARG_2_QI_IMM },
22021 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_ashlv2di3, "__builtin_ia32_pshaq", IX86_BUILTIN_PSHAQ, 0, (int)MULTI_ARG_2_DI },
22022 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_ashlv4si3, "__builtin_ia32_pshad", IX86_BUILTIN_PSHAD, 0, (int)MULTI_ARG_2_SI },
22023 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_ashlv8hi3, "__builtin_ia32_pshaw", IX86_BUILTIN_PSHAW, 0, (int)MULTI_ARG_2_HI },
22024 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_ashlv16qi3, "__builtin_ia32_pshab", IX86_BUILTIN_PSHAB, 0, (int)MULTI_ARG_2_QI },
22025 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_lshlv2di3, "__builtin_ia32_pshlq", IX86_BUILTIN_PSHLQ, 0, (int)MULTI_ARG_2_DI },
22026 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_lshlv4si3, "__builtin_ia32_pshld", IX86_BUILTIN_PSHLD, 0, (int)MULTI_ARG_2_SI },
22027 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_lshlv8hi3, "__builtin_ia32_pshlw", IX86_BUILTIN_PSHLW, 0, (int)MULTI_ARG_2_HI },
22028 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_lshlv16qi3, "__builtin_ia32_pshlb", IX86_BUILTIN_PSHLB, 0, (int)MULTI_ARG_2_QI },
22029 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmfrczv4sf2, "__builtin_ia32_frczss", IX86_BUILTIN_FRCZSS, 0, (int)MULTI_ARG_2_SF },
22030 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmfrczv2df2, "__builtin_ia32_frczsd", IX86_BUILTIN_FRCZSD, 0, (int)MULTI_ARG_2_DF },
22031 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_frczv4sf2, "__builtin_ia32_frczps", IX86_BUILTIN_FRCZPS, 0, (int)MULTI_ARG_1_SF },
22032 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_frczv2df2, "__builtin_ia32_frczpd", IX86_BUILTIN_FRCZPD, 0, (int)MULTI_ARG_1_DF },
22033 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_cvtph2ps, "__builtin_ia32_cvtph2ps", IX86_BUILTIN_CVTPH2PS, 0, (int)MULTI_ARG_1_PH2PS },
22034 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_cvtps2ph, "__builtin_ia32_cvtps2ph", IX86_BUILTIN_CVTPS2PH, 0, (int)MULTI_ARG_1_PS2PH },
22035 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_phaddbw, "__builtin_ia32_phaddbw", IX86_BUILTIN_PHADDBW, 0, (int)MULTI_ARG_1_QI_HI },
22036 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_phaddbd, "__builtin_ia32_phaddbd", IX86_BUILTIN_PHADDBD, 0, (int)MULTI_ARG_1_QI_SI },
22037 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_phaddbq, "__builtin_ia32_phaddbq", IX86_BUILTIN_PHADDBQ, 0, (int)MULTI_ARG_1_QI_DI },
22038 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_phaddwd, "__builtin_ia32_phaddwd", IX86_BUILTIN_PHADDWD, 0, (int)MULTI_ARG_1_HI_SI },
22039 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_phaddwq, "__builtin_ia32_phaddwq", IX86_BUILTIN_PHADDWQ, 0, (int)MULTI_ARG_1_HI_DI },
22040 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_phadddq, "__builtin_ia32_phadddq", IX86_BUILTIN_PHADDDQ, 0, (int)MULTI_ARG_1_SI_DI },
22041 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_phaddubw, "__builtin_ia32_phaddubw", IX86_BUILTIN_PHADDUBW, 0, (int)MULTI_ARG_1_QI_HI },
22042 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_phaddubd, "__builtin_ia32_phaddubd", IX86_BUILTIN_PHADDUBD, 0, (int)MULTI_ARG_1_QI_SI },
22043 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_phaddubq, "__builtin_ia32_phaddubq", IX86_BUILTIN_PHADDUBQ, 0, (int)MULTI_ARG_1_QI_DI },
22044 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_phadduwd, "__builtin_ia32_phadduwd", IX86_BUILTIN_PHADDUWD, 0, (int)MULTI_ARG_1_HI_SI },
22045 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_phadduwq, "__builtin_ia32_phadduwq", IX86_BUILTIN_PHADDUWQ, 0, (int)MULTI_ARG_1_HI_DI },
22046 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_phaddudq, "__builtin_ia32_phaddudq", IX86_BUILTIN_PHADDUDQ, 0, (int)MULTI_ARG_1_SI_DI },
22047 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_phsubbw, "__builtin_ia32_phsubbw", IX86_BUILTIN_PHSUBBW, 0, (int)MULTI_ARG_1_QI_HI },
22048 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_phsubwd, "__builtin_ia32_phsubwd", IX86_BUILTIN_PHSUBWD, 0, (int)MULTI_ARG_1_HI_SI },
22049 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_phsubdq, "__builtin_ia32_phsubdq", IX86_BUILTIN_PHSUBDQ, 0, (int)MULTI_ARG_1_SI_DI },
22051 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv4sf3, "__builtin_ia32_comeqss", IX86_BUILTIN_COMEQSS, EQ, (int)MULTI_ARG_2_SF_CMP },
22052 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv4sf3, "__builtin_ia32_comness", IX86_BUILTIN_COMNESS, NE, (int)MULTI_ARG_2_SF_CMP },
22053 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv4sf3, "__builtin_ia32_comneqss", IX86_BUILTIN_COMNESS, NE, (int)MULTI_ARG_2_SF_CMP },
22054 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv4sf3, "__builtin_ia32_comltss", IX86_BUILTIN_COMLTSS, LT, (int)MULTI_ARG_2_SF_CMP },
22055 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv4sf3, "__builtin_ia32_comless", IX86_BUILTIN_COMLESS, LE, (int)MULTI_ARG_2_SF_CMP },
22056 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv4sf3, "__builtin_ia32_comgtss", IX86_BUILTIN_COMGTSS, GT, (int)MULTI_ARG_2_SF_CMP },
22057 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv4sf3, "__builtin_ia32_comgess", IX86_BUILTIN_COMGESS, GE, (int)MULTI_ARG_2_SF_CMP },
22058 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv4sf3, "__builtin_ia32_comueqss", IX86_BUILTIN_COMUEQSS, UNEQ, (int)MULTI_ARG_2_SF_CMP },
22059 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv4sf3, "__builtin_ia32_comuness", IX86_BUILTIN_COMUNESS, LTGT, (int)MULTI_ARG_2_SF_CMP },
22060 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv4sf3, "__builtin_ia32_comuneqss", IX86_BUILTIN_COMUNESS, LTGT, (int)MULTI_ARG_2_SF_CMP },
22061 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv4sf3, "__builtin_ia32_comunltss", IX86_BUILTIN_COMULTSS, UNLT, (int)MULTI_ARG_2_SF_CMP },
22062 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv4sf3, "__builtin_ia32_comunless", IX86_BUILTIN_COMULESS, UNLE, (int)MULTI_ARG_2_SF_CMP },
22063 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv4sf3, "__builtin_ia32_comungtss", IX86_BUILTIN_COMUGTSS, UNGT, (int)MULTI_ARG_2_SF_CMP },
22064 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv4sf3, "__builtin_ia32_comungess", IX86_BUILTIN_COMUGESS, UNGE, (int)MULTI_ARG_2_SF_CMP },
22065 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv4sf3, "__builtin_ia32_comordss", IX86_BUILTIN_COMORDSS, ORDERED, (int)MULTI_ARG_2_SF_CMP },
22066 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv4sf3, "__builtin_ia32_comunordss", IX86_BUILTIN_COMUNORDSS, UNORDERED, (int)MULTI_ARG_2_SF_CMP },
22068 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv2df3, "__builtin_ia32_comeqsd", IX86_BUILTIN_COMEQSD, EQ, (int)MULTI_ARG_2_DF_CMP },
22069 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv2df3, "__builtin_ia32_comnesd", IX86_BUILTIN_COMNESD, NE, (int)MULTI_ARG_2_DF_CMP },
22070 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv2df3, "__builtin_ia32_comneqsd", IX86_BUILTIN_COMNESD, NE, (int)MULTI_ARG_2_DF_CMP },
22071 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv2df3, "__builtin_ia32_comltsd", IX86_BUILTIN_COMLTSD, LT, (int)MULTI_ARG_2_DF_CMP },
22072 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv2df3, "__builtin_ia32_comlesd", IX86_BUILTIN_COMLESD, LE, (int)MULTI_ARG_2_DF_CMP },
22073 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv2df3, "__builtin_ia32_comgtsd", IX86_BUILTIN_COMGTSD, GT, (int)MULTI_ARG_2_DF_CMP },
22074 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv2df3, "__builtin_ia32_comgesd", IX86_BUILTIN_COMGESD, GE, (int)MULTI_ARG_2_DF_CMP },
22075 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv2df3, "__builtin_ia32_comueqsd", IX86_BUILTIN_COMUEQSD, UNEQ, (int)MULTI_ARG_2_DF_CMP },
22076 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv2df3, "__builtin_ia32_comunesd", IX86_BUILTIN_COMUNESD, LTGT, (int)MULTI_ARG_2_DF_CMP },
22077 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv2df3, "__builtin_ia32_comuneqsd", IX86_BUILTIN_COMUNESD, LTGT, (int)MULTI_ARG_2_DF_CMP },
22078 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv2df3, "__builtin_ia32_comunltsd", IX86_BUILTIN_COMULTSD, UNLT, (int)MULTI_ARG_2_DF_CMP },
22079 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv2df3, "__builtin_ia32_comunlesd", IX86_BUILTIN_COMULESD, UNLE, (int)MULTI_ARG_2_DF_CMP },
22080 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv2df3, "__builtin_ia32_comungtsd", IX86_BUILTIN_COMUGTSD, UNGT, (int)MULTI_ARG_2_DF_CMP },
22081 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv2df3, "__builtin_ia32_comungesd", IX86_BUILTIN_COMUGESD, UNGE, (int)MULTI_ARG_2_DF_CMP },
22082 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv2df3, "__builtin_ia32_comordsd", IX86_BUILTIN_COMORDSD, ORDERED, (int)MULTI_ARG_2_DF_CMP },
22083 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv2df3, "__builtin_ia32_comunordsd", IX86_BUILTIN_COMUNORDSD, UNORDERED, (int)MULTI_ARG_2_DF_CMP },
22085 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4sf3, "__builtin_ia32_comeqps", IX86_BUILTIN_COMEQPS, EQ, (int)MULTI_ARG_2_SF_CMP },
22086 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4sf3, "__builtin_ia32_comneps", IX86_BUILTIN_COMNEPS, NE, (int)MULTI_ARG_2_SF_CMP },
22087 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4sf3, "__builtin_ia32_comneqps", IX86_BUILTIN_COMNEPS, NE, (int)MULTI_ARG_2_SF_CMP },
22088 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4sf3, "__builtin_ia32_comltps", IX86_BUILTIN_COMLTPS, LT, (int)MULTI_ARG_2_SF_CMP },
22089 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4sf3, "__builtin_ia32_comleps", IX86_BUILTIN_COMLEPS, LE, (int)MULTI_ARG_2_SF_CMP },
22090 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4sf3, "__builtin_ia32_comgtps", IX86_BUILTIN_COMGTPS, GT, (int)MULTI_ARG_2_SF_CMP },
22091 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4sf3, "__builtin_ia32_comgeps", IX86_BUILTIN_COMGEPS, GE, (int)MULTI_ARG_2_SF_CMP },
22092 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4sf3, "__builtin_ia32_comueqps", IX86_BUILTIN_COMUEQPS, UNEQ, (int)MULTI_ARG_2_SF_CMP },
22093 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4sf3, "__builtin_ia32_comuneps", IX86_BUILTIN_COMUNEPS, LTGT, (int)MULTI_ARG_2_SF_CMP },
22094 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4sf3, "__builtin_ia32_comuneqps", IX86_BUILTIN_COMUNEPS, LTGT, (int)MULTI_ARG_2_SF_CMP },
22095 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4sf3, "__builtin_ia32_comunltps", IX86_BUILTIN_COMULTPS, UNLT, (int)MULTI_ARG_2_SF_CMP },
22096 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4sf3, "__builtin_ia32_comunleps", IX86_BUILTIN_COMULEPS, UNLE, (int)MULTI_ARG_2_SF_CMP },
22097 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4sf3, "__builtin_ia32_comungtps", IX86_BUILTIN_COMUGTPS, UNGT, (int)MULTI_ARG_2_SF_CMP },
22098 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4sf3, "__builtin_ia32_comungeps", IX86_BUILTIN_COMUGEPS, UNGE, (int)MULTI_ARG_2_SF_CMP },
22099 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4sf3, "__builtin_ia32_comordps", IX86_BUILTIN_COMORDPS, ORDERED, (int)MULTI_ARG_2_SF_CMP },
22100 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4sf3, "__builtin_ia32_comunordps", IX86_BUILTIN_COMUNORDPS, UNORDERED, (int)MULTI_ARG_2_SF_CMP },
22102 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2df3, "__builtin_ia32_comeqpd", IX86_BUILTIN_COMEQPD, EQ, (int)MULTI_ARG_2_DF_CMP },
22103 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2df3, "__builtin_ia32_comnepd", IX86_BUILTIN_COMNEPD, NE, (int)MULTI_ARG_2_DF_CMP },
22104 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2df3, "__builtin_ia32_comneqpd", IX86_BUILTIN_COMNEPD, NE, (int)MULTI_ARG_2_DF_CMP },
22105 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2df3, "__builtin_ia32_comltpd", IX86_BUILTIN_COMLTPD, LT, (int)MULTI_ARG_2_DF_CMP },
22106 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2df3, "__builtin_ia32_comlepd", IX86_BUILTIN_COMLEPD, LE, (int)MULTI_ARG_2_DF_CMP },
22107 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2df3, "__builtin_ia32_comgtpd", IX86_BUILTIN_COMGTPD, GT, (int)MULTI_ARG_2_DF_CMP },
22108 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2df3, "__builtin_ia32_comgepd", IX86_BUILTIN_COMGEPD, GE, (int)MULTI_ARG_2_DF_CMP },
22109 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2df3, "__builtin_ia32_comueqpd", IX86_BUILTIN_COMUEQPD, UNEQ, (int)MULTI_ARG_2_DF_CMP },
22110 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2df3, "__builtin_ia32_comunepd", IX86_BUILTIN_COMUNEPD, LTGT, (int)MULTI_ARG_2_DF_CMP },
22111 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2df3, "__builtin_ia32_comuneqpd", IX86_BUILTIN_COMUNEPD, LTGT, (int)MULTI_ARG_2_DF_CMP },
22112 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2df3, "__builtin_ia32_comunltpd", IX86_BUILTIN_COMULTPD, UNLT, (int)MULTI_ARG_2_DF_CMP },
22113 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2df3, "__builtin_ia32_comunlepd", IX86_BUILTIN_COMULEPD, UNLE, (int)MULTI_ARG_2_DF_CMP },
22114 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2df3, "__builtin_ia32_comungtpd", IX86_BUILTIN_COMUGTPD, UNGT, (int)MULTI_ARG_2_DF_CMP },
22115 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2df3, "__builtin_ia32_comungepd", IX86_BUILTIN_COMUGEPD, UNGE, (int)MULTI_ARG_2_DF_CMP },
22116 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2df3, "__builtin_ia32_comordpd", IX86_BUILTIN_COMORDPD, ORDERED, (int)MULTI_ARG_2_DF_CMP },
22117 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2df3, "__builtin_ia32_comunordpd", IX86_BUILTIN_COMUNORDPD, UNORDERED, (int)MULTI_ARG_2_DF_CMP },
22119 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv16qi3, "__builtin_ia32_pcomeqb", IX86_BUILTIN_PCOMEQB, EQ, (int)MULTI_ARG_2_QI_CMP },
22120 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv16qi3, "__builtin_ia32_pcomneb", IX86_BUILTIN_PCOMNEB, NE, (int)MULTI_ARG_2_QI_CMP },
22121 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv16qi3, "__builtin_ia32_pcomneqb", IX86_BUILTIN_PCOMNEB, NE, (int)MULTI_ARG_2_QI_CMP },
22122 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv16qi3, "__builtin_ia32_pcomltb", IX86_BUILTIN_PCOMLTB, LT, (int)MULTI_ARG_2_QI_CMP },
22123 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv16qi3, "__builtin_ia32_pcomleb", IX86_BUILTIN_PCOMLEB, LE, (int)MULTI_ARG_2_QI_CMP },
22124 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv16qi3, "__builtin_ia32_pcomgtb", IX86_BUILTIN_PCOMGTB, GT, (int)MULTI_ARG_2_QI_CMP },
22125 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv16qi3, "__builtin_ia32_pcomgeb", IX86_BUILTIN_PCOMGEB, GE, (int)MULTI_ARG_2_QI_CMP },
22127 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv8hi3, "__builtin_ia32_pcomeqw", IX86_BUILTIN_PCOMEQW, EQ, (int)MULTI_ARG_2_HI_CMP },
22128 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv8hi3, "__builtin_ia32_pcomnew", IX86_BUILTIN_PCOMNEW, NE, (int)MULTI_ARG_2_HI_CMP },
22129 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv8hi3, "__builtin_ia32_pcomneqw", IX86_BUILTIN_PCOMNEW, NE, (int)MULTI_ARG_2_HI_CMP },
22130 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv8hi3, "__builtin_ia32_pcomltw", IX86_BUILTIN_PCOMLTW, LT, (int)MULTI_ARG_2_HI_CMP },
22131 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv8hi3, "__builtin_ia32_pcomlew", IX86_BUILTIN_PCOMLEW, LE, (int)MULTI_ARG_2_HI_CMP },
22132 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv8hi3, "__builtin_ia32_pcomgtw", IX86_BUILTIN_PCOMGTW, GT, (int)MULTI_ARG_2_HI_CMP },
22133 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv8hi3, "__builtin_ia32_pcomgew", IX86_BUILTIN_PCOMGEW, GE, (int)MULTI_ARG_2_HI_CMP },
22135 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4si3, "__builtin_ia32_pcomeqd", IX86_BUILTIN_PCOMEQD, EQ, (int)MULTI_ARG_2_SI_CMP },
22136 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4si3, "__builtin_ia32_pcomned", IX86_BUILTIN_PCOMNED, NE, (int)MULTI_ARG_2_SI_CMP },
22137 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4si3, "__builtin_ia32_pcomneqd", IX86_BUILTIN_PCOMNED, NE, (int)MULTI_ARG_2_SI_CMP },
22138 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4si3, "__builtin_ia32_pcomltd", IX86_BUILTIN_PCOMLTD, LT, (int)MULTI_ARG_2_SI_CMP },
22139 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4si3, "__builtin_ia32_pcomled", IX86_BUILTIN_PCOMLED, LE, (int)MULTI_ARG_2_SI_CMP },
22140 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4si3, "__builtin_ia32_pcomgtd", IX86_BUILTIN_PCOMGTD, GT, (int)MULTI_ARG_2_SI_CMP },
22141 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4si3, "__builtin_ia32_pcomged", IX86_BUILTIN_PCOMGED, GE, (int)MULTI_ARG_2_SI_CMP },
22143 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2di3, "__builtin_ia32_pcomeqq", IX86_BUILTIN_PCOMEQQ, EQ, (int)MULTI_ARG_2_DI_CMP },
22144 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2di3, "__builtin_ia32_pcomneq", IX86_BUILTIN_PCOMNEQ, NE, (int)MULTI_ARG_2_DI_CMP },
22145 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2di3, "__builtin_ia32_pcomneqq", IX86_BUILTIN_PCOMNEQ, NE, (int)MULTI_ARG_2_DI_CMP },
22146 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2di3, "__builtin_ia32_pcomltq", IX86_BUILTIN_PCOMLTQ, LT, (int)MULTI_ARG_2_DI_CMP },
22147 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2di3, "__builtin_ia32_pcomleq", IX86_BUILTIN_PCOMLEQ, LE, (int)MULTI_ARG_2_DI_CMP },
22148 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2di3, "__builtin_ia32_pcomgtq", IX86_BUILTIN_PCOMGTQ, GT, (int)MULTI_ARG_2_DI_CMP },
22149 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2di3, "__builtin_ia32_pcomgeq", IX86_BUILTIN_PCOMGEQ, GE, (int)MULTI_ARG_2_DI_CMP },
22151 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_uns2v16qi3,"__builtin_ia32_pcomequb", IX86_BUILTIN_PCOMEQUB, EQ, (int)MULTI_ARG_2_QI_CMP },
22152 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_uns2v16qi3,"__builtin_ia32_pcomneub", IX86_BUILTIN_PCOMNEUB, NE, (int)MULTI_ARG_2_QI_CMP },
22153 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_uns2v16qi3,"__builtin_ia32_pcomnequb", IX86_BUILTIN_PCOMNEUB, NE, (int)MULTI_ARG_2_QI_CMP },
22154 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_unsv16qi3, "__builtin_ia32_pcomltub", IX86_BUILTIN_PCOMLTUB, LTU, (int)MULTI_ARG_2_QI_CMP },
22155 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_unsv16qi3, "__builtin_ia32_pcomleub", IX86_BUILTIN_PCOMLEUB, LEU, (int)MULTI_ARG_2_QI_CMP },
22156 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_unsv16qi3, "__builtin_ia32_pcomgtub", IX86_BUILTIN_PCOMGTUB, GTU, (int)MULTI_ARG_2_QI_CMP },
22157 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_unsv16qi3, "__builtin_ia32_pcomgeub", IX86_BUILTIN_PCOMGEUB, GEU, (int)MULTI_ARG_2_QI_CMP },
22159 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_uns2v8hi3, "__builtin_ia32_pcomequw", IX86_BUILTIN_PCOMEQUW, EQ, (int)MULTI_ARG_2_HI_CMP },
22160 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_uns2v8hi3, "__builtin_ia32_pcomneuw", IX86_BUILTIN_PCOMNEUW, NE, (int)MULTI_ARG_2_HI_CMP },
22161 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_uns2v8hi3, "__builtin_ia32_pcomnequw", IX86_BUILTIN_PCOMNEUW, NE, (int)MULTI_ARG_2_HI_CMP },
22162 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_unsv8hi3, "__builtin_ia32_pcomltuw", IX86_BUILTIN_PCOMLTUW, LTU, (int)MULTI_ARG_2_HI_CMP },
22163 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_unsv8hi3, "__builtin_ia32_pcomleuw", IX86_BUILTIN_PCOMLEUW, LEU, (int)MULTI_ARG_2_HI_CMP },
22164 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_unsv8hi3, "__builtin_ia32_pcomgtuw", IX86_BUILTIN_PCOMGTUW, GTU, (int)MULTI_ARG_2_HI_CMP },
22165 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_unsv8hi3, "__builtin_ia32_pcomgeuw", IX86_BUILTIN_PCOMGEUW, GEU, (int)MULTI_ARG_2_HI_CMP },
22167 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_uns2v4si3, "__builtin_ia32_pcomequd", IX86_BUILTIN_PCOMEQUD, EQ, (int)MULTI_ARG_2_SI_CMP },
22168 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_uns2v4si3, "__builtin_ia32_pcomneud", IX86_BUILTIN_PCOMNEUD, NE, (int)MULTI_ARG_2_SI_CMP },
22169 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_uns2v4si3, "__builtin_ia32_pcomnequd", IX86_BUILTIN_PCOMNEUD, NE, (int)MULTI_ARG_2_SI_CMP },
22170 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_unsv4si3, "__builtin_ia32_pcomltud", IX86_BUILTIN_PCOMLTUD, LTU, (int)MULTI_ARG_2_SI_CMP },
22171 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_unsv4si3, "__builtin_ia32_pcomleud", IX86_BUILTIN_PCOMLEUD, LEU, (int)MULTI_ARG_2_SI_CMP },
22172 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_unsv4si3, "__builtin_ia32_pcomgtud", IX86_BUILTIN_PCOMGTUD, GTU, (int)MULTI_ARG_2_SI_CMP },
22173 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_unsv4si3, "__builtin_ia32_pcomgeud", IX86_BUILTIN_PCOMGEUD, GEU, (int)MULTI_ARG_2_SI_CMP },
22175 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_uns2v2di3, "__builtin_ia32_pcomequq", IX86_BUILTIN_PCOMEQUQ, EQ, (int)MULTI_ARG_2_DI_CMP },
22176 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_uns2v2di3, "__builtin_ia32_pcomneuq", IX86_BUILTIN_PCOMNEUQ, NE, (int)MULTI_ARG_2_DI_CMP },
22177 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_uns2v2di3, "__builtin_ia32_pcomnequq", IX86_BUILTIN_PCOMNEUQ, NE, (int)MULTI_ARG_2_DI_CMP },
22178 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_unsv2di3, "__builtin_ia32_pcomltuq", IX86_BUILTIN_PCOMLTUQ, LTU, (int)MULTI_ARG_2_DI_CMP },
22179 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_unsv2di3, "__builtin_ia32_pcomleuq", IX86_BUILTIN_PCOMLEUQ, LEU, (int)MULTI_ARG_2_DI_CMP },
22180 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_unsv2di3, "__builtin_ia32_pcomgtuq", IX86_BUILTIN_PCOMGTUQ, GTU, (int)MULTI_ARG_2_DI_CMP },
22181 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_unsv2di3, "__builtin_ia32_pcomgeuq", IX86_BUILTIN_PCOMGEUQ, GEU, (int)MULTI_ARG_2_DI_CMP },
22183 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_com_tfv4sf3, "__builtin_ia32_comfalsess", IX86_BUILTIN_COMFALSESS, COM_FALSE_S, (int)MULTI_ARG_2_SF_TF },
22184 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_com_tfv4sf3, "__builtin_ia32_comtruess", IX86_BUILTIN_COMTRUESS, COM_TRUE_S, (int)MULTI_ARG_2_SF_TF },
22185 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_com_tfv4sf3, "__builtin_ia32_comfalseps", IX86_BUILTIN_COMFALSEPS, COM_FALSE_P, (int)MULTI_ARG_2_SF_TF },
22186 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_com_tfv4sf3, "__builtin_ia32_comtrueps", IX86_BUILTIN_COMTRUEPS, COM_TRUE_P, (int)MULTI_ARG_2_SF_TF },
22187 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_com_tfv2df3, "__builtin_ia32_comfalsesd", IX86_BUILTIN_COMFALSESD, COM_FALSE_S, (int)MULTI_ARG_2_DF_TF },
22188 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_com_tfv2df3, "__builtin_ia32_comtruesd", IX86_BUILTIN_COMTRUESD, COM_TRUE_S, (int)MULTI_ARG_2_DF_TF },
22189 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_com_tfv2df3, "__builtin_ia32_comfalsepd", IX86_BUILTIN_COMFALSEPD, COM_FALSE_P, (int)MULTI_ARG_2_DF_TF },
22190 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_com_tfv2df3, "__builtin_ia32_comtruepd", IX86_BUILTIN_COMTRUEPD, COM_TRUE_P, (int)MULTI_ARG_2_DF_TF },
22192 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcom_tfv16qi3, "__builtin_ia32_pcomfalseb", IX86_BUILTIN_PCOMFALSEB, PCOM_FALSE, (int)MULTI_ARG_2_QI_TF },
22193 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcom_tfv8hi3, "__builtin_ia32_pcomfalsew", IX86_BUILTIN_PCOMFALSEW, PCOM_FALSE, (int)MULTI_ARG_2_HI_TF },
22194 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcom_tfv4si3, "__builtin_ia32_pcomfalsed", IX86_BUILTIN_PCOMFALSED, PCOM_FALSE, (int)MULTI_ARG_2_SI_TF },
22195 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcom_tfv2di3, "__builtin_ia32_pcomfalseq", IX86_BUILTIN_PCOMFALSEQ, PCOM_FALSE, (int)MULTI_ARG_2_DI_TF },
22196 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcom_tfv16qi3, "__builtin_ia32_pcomfalseub",IX86_BUILTIN_PCOMFALSEUB,PCOM_FALSE, (int)MULTI_ARG_2_QI_TF },
22197 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcom_tfv8hi3, "__builtin_ia32_pcomfalseuw",IX86_BUILTIN_PCOMFALSEUW,PCOM_FALSE, (int)MULTI_ARG_2_HI_TF },
22198 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcom_tfv4si3, "__builtin_ia32_pcomfalseud",IX86_BUILTIN_PCOMFALSEUD,PCOM_FALSE, (int)MULTI_ARG_2_SI_TF },
22199 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcom_tfv2di3, "__builtin_ia32_pcomfalseuq",IX86_BUILTIN_PCOMFALSEUQ,PCOM_FALSE, (int)MULTI_ARG_2_DI_TF },
22201 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcom_tfv16qi3, "__builtin_ia32_pcomtrueb", IX86_BUILTIN_PCOMTRUEB, PCOM_TRUE, (int)MULTI_ARG_2_QI_TF },
22202 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcom_tfv8hi3, "__builtin_ia32_pcomtruew", IX86_BUILTIN_PCOMTRUEW, PCOM_TRUE, (int)MULTI_ARG_2_HI_TF },
22203 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcom_tfv4si3, "__builtin_ia32_pcomtrued", IX86_BUILTIN_PCOMTRUED, PCOM_TRUE, (int)MULTI_ARG_2_SI_TF },
22204 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcom_tfv2di3, "__builtin_ia32_pcomtrueq", IX86_BUILTIN_PCOMTRUEQ, PCOM_TRUE, (int)MULTI_ARG_2_DI_TF },
22205 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcom_tfv16qi3, "__builtin_ia32_pcomtrueub", IX86_BUILTIN_PCOMTRUEUB, PCOM_TRUE, (int)MULTI_ARG_2_QI_TF },
22206 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcom_tfv8hi3, "__builtin_ia32_pcomtrueuw", IX86_BUILTIN_PCOMTRUEUW, PCOM_TRUE, (int)MULTI_ARG_2_HI_TF },
22207 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcom_tfv4si3, "__builtin_ia32_pcomtrueud", IX86_BUILTIN_PCOMTRUEUD, PCOM_TRUE, (int)MULTI_ARG_2_SI_TF },
22208 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcom_tfv2di3, "__builtin_ia32_pcomtrueuq", IX86_BUILTIN_PCOMTRUEUQ, PCOM_TRUE, (int)MULTI_ARG_2_DI_TF },
22211 /* Set up all the MMX/SSE builtins, even builtins for instructions that are not
22212 in the current target ISA to allow the user to compile particular modules
22213 with different target specific options that differ from the command line
22216 ix86_init_mmx_sse_builtins (void)
22218 const struct builtin_description * d;
22221 tree V16QI_type_node = build_vector_type_for_mode (char_type_node, V16QImode);
22222 tree V2SI_type_node = build_vector_type_for_mode (intSI_type_node, V2SImode);
22223 tree V1DI_type_node
22224 = build_vector_type_for_mode (long_long_integer_type_node, V1DImode);
22225 tree V2SF_type_node = build_vector_type_for_mode (float_type_node, V2SFmode);
22226 tree V2DI_type_node
22227 = build_vector_type_for_mode (long_long_integer_type_node, V2DImode);
22228 tree V2DF_type_node = build_vector_type_for_mode (double_type_node, V2DFmode);
22229 tree V4SF_type_node = build_vector_type_for_mode (float_type_node, V4SFmode);
22230 tree V4SI_type_node = build_vector_type_for_mode (intSI_type_node, V4SImode);
22231 tree V4HI_type_node = build_vector_type_for_mode (intHI_type_node, V4HImode);
22232 tree V8QI_type_node = build_vector_type_for_mode (char_type_node, V8QImode);
22233 tree V8HI_type_node = build_vector_type_for_mode (intHI_type_node, V8HImode);
22235 tree pchar_type_node = build_pointer_type (char_type_node);
22236 tree pcchar_type_node
22237 = build_pointer_type (build_type_variant (char_type_node, 1, 0));
22238 tree pfloat_type_node = build_pointer_type (float_type_node);
22239 tree pcfloat_type_node
22240 = build_pointer_type (build_type_variant (float_type_node, 1, 0));
22241 tree pv2sf_type_node = build_pointer_type (V2SF_type_node);
22242 tree pcv2sf_type_node
22243 = build_pointer_type (build_type_variant (V2SF_type_node, 1, 0));
22244 tree pv2di_type_node = build_pointer_type (V2DI_type_node);
22245 tree pdi_type_node = build_pointer_type (long_long_unsigned_type_node);
22248 tree int_ftype_v4sf_v4sf
22249 = build_function_type_list (integer_type_node,
22250 V4SF_type_node, V4SF_type_node, NULL_TREE);
22251 tree v4si_ftype_v4sf_v4sf
22252 = build_function_type_list (V4SI_type_node,
22253 V4SF_type_node, V4SF_type_node, NULL_TREE);
22254 /* MMX/SSE/integer conversions. */
22255 tree int_ftype_v4sf
22256 = build_function_type_list (integer_type_node,
22257 V4SF_type_node, NULL_TREE);
22258 tree int64_ftype_v4sf
22259 = build_function_type_list (long_long_integer_type_node,
22260 V4SF_type_node, NULL_TREE);
22261 tree int_ftype_v8qi
22262 = build_function_type_list (integer_type_node, V8QI_type_node, NULL_TREE);
22263 tree v4sf_ftype_v4sf_int
22264 = build_function_type_list (V4SF_type_node,
22265 V4SF_type_node, integer_type_node, NULL_TREE);
22266 tree v4sf_ftype_v4sf_int64
22267 = build_function_type_list (V4SF_type_node,
22268 V4SF_type_node, long_long_integer_type_node,
22270 tree v4sf_ftype_v4sf_v2si
22271 = build_function_type_list (V4SF_type_node,
22272 V4SF_type_node, V2SI_type_node, NULL_TREE);
22274 /* Miscellaneous. */
22275 tree v8qi_ftype_v4hi_v4hi
22276 = build_function_type_list (V8QI_type_node,
22277 V4HI_type_node, V4HI_type_node, NULL_TREE);
22278 tree v4hi_ftype_v2si_v2si
22279 = build_function_type_list (V4HI_type_node,
22280 V2SI_type_node, V2SI_type_node, NULL_TREE);
22281 tree v4sf_ftype_v4sf_v4sf_int
22282 = build_function_type_list (V4SF_type_node,
22283 V4SF_type_node, V4SF_type_node,
22284 integer_type_node, NULL_TREE);
22285 tree v2si_ftype_v4hi_v4hi
22286 = build_function_type_list (V2SI_type_node,
22287 V4HI_type_node, V4HI_type_node, NULL_TREE);
22288 tree v4hi_ftype_v4hi_int
22289 = build_function_type_list (V4HI_type_node,
22290 V4HI_type_node, integer_type_node, NULL_TREE);
22291 tree v2si_ftype_v2si_int
22292 = build_function_type_list (V2SI_type_node,
22293 V2SI_type_node, integer_type_node, NULL_TREE);
22294 tree v1di_ftype_v1di_int
22295 = build_function_type_list (V1DI_type_node,
22296 V1DI_type_node, integer_type_node, NULL_TREE);
22298 tree void_ftype_void
22299 = build_function_type (void_type_node, void_list_node);
22300 tree void_ftype_unsigned
22301 = build_function_type_list (void_type_node, unsigned_type_node, NULL_TREE);
22302 tree void_ftype_unsigned_unsigned
22303 = build_function_type_list (void_type_node, unsigned_type_node,
22304 unsigned_type_node, NULL_TREE);
22305 tree void_ftype_pcvoid_unsigned_unsigned
22306 = build_function_type_list (void_type_node, const_ptr_type_node,
22307 unsigned_type_node, unsigned_type_node,
22309 tree unsigned_ftype_void
22310 = build_function_type (unsigned_type_node, void_list_node);
22311 tree v2si_ftype_v4sf
22312 = build_function_type_list (V2SI_type_node, V4SF_type_node, NULL_TREE);
22313 /* Loads/stores. */
22314 tree void_ftype_v8qi_v8qi_pchar
22315 = build_function_type_list (void_type_node,
22316 V8QI_type_node, V8QI_type_node,
22317 pchar_type_node, NULL_TREE);
22318 tree v4sf_ftype_pcfloat
22319 = build_function_type_list (V4SF_type_node, pcfloat_type_node, NULL_TREE);
22320 tree v4sf_ftype_v4sf_pcv2sf
22321 = build_function_type_list (V4SF_type_node,
22322 V4SF_type_node, pcv2sf_type_node, NULL_TREE);
22323 tree void_ftype_pv2sf_v4sf
22324 = build_function_type_list (void_type_node,
22325 pv2sf_type_node, V4SF_type_node, NULL_TREE);
22326 tree void_ftype_pfloat_v4sf
22327 = build_function_type_list (void_type_node,
22328 pfloat_type_node, V4SF_type_node, NULL_TREE);
22329 tree void_ftype_pdi_di
22330 = build_function_type_list (void_type_node,
22331 pdi_type_node, long_long_unsigned_type_node,
22333 tree void_ftype_pv2di_v2di
22334 = build_function_type_list (void_type_node,
22335 pv2di_type_node, V2DI_type_node, NULL_TREE);
22336 /* Normal vector unops. */
22337 tree v4sf_ftype_v4sf
22338 = build_function_type_list (V4SF_type_node, V4SF_type_node, NULL_TREE);
22339 tree v16qi_ftype_v16qi
22340 = build_function_type_list (V16QI_type_node, V16QI_type_node, NULL_TREE);
22341 tree v8hi_ftype_v8hi
22342 = build_function_type_list (V8HI_type_node, V8HI_type_node, NULL_TREE);
22343 tree v4si_ftype_v4si
22344 = build_function_type_list (V4SI_type_node, V4SI_type_node, NULL_TREE);
22345 tree v8qi_ftype_v8qi
22346 = build_function_type_list (V8QI_type_node, V8QI_type_node, NULL_TREE);
22347 tree v4hi_ftype_v4hi
22348 = build_function_type_list (V4HI_type_node, V4HI_type_node, NULL_TREE);
22350 /* Normal vector binops. */
22351 tree v4sf_ftype_v4sf_v4sf
22352 = build_function_type_list (V4SF_type_node,
22353 V4SF_type_node, V4SF_type_node, NULL_TREE);
22354 tree v8qi_ftype_v8qi_v8qi
22355 = build_function_type_list (V8QI_type_node,
22356 V8QI_type_node, V8QI_type_node, NULL_TREE);
22357 tree v4hi_ftype_v4hi_v4hi
22358 = build_function_type_list (V4HI_type_node,
22359 V4HI_type_node, V4HI_type_node, NULL_TREE);
22360 tree v2si_ftype_v2si_v2si
22361 = build_function_type_list (V2SI_type_node,
22362 V2SI_type_node, V2SI_type_node, NULL_TREE);
22363 tree v1di_ftype_v1di_v1di
22364 = build_function_type_list (V1DI_type_node,
22365 V1DI_type_node, V1DI_type_node, NULL_TREE);
22366 tree v1di_ftype_v1di_v1di_int
22367 = build_function_type_list (V1DI_type_node,
22368 V1DI_type_node, V1DI_type_node,
22369 integer_type_node, NULL_TREE);
22370 tree v2si_ftype_v2sf
22371 = build_function_type_list (V2SI_type_node, V2SF_type_node, NULL_TREE);
22372 tree v2sf_ftype_v2si
22373 = build_function_type_list (V2SF_type_node, V2SI_type_node, NULL_TREE);
22374 tree v2si_ftype_v2si
22375 = build_function_type_list (V2SI_type_node, V2SI_type_node, NULL_TREE);
22376 tree v2sf_ftype_v2sf
22377 = build_function_type_list (V2SF_type_node, V2SF_type_node, NULL_TREE);
22378 tree v2sf_ftype_v2sf_v2sf
22379 = build_function_type_list (V2SF_type_node,
22380 V2SF_type_node, V2SF_type_node, NULL_TREE);
22381 tree v2si_ftype_v2sf_v2sf
22382 = build_function_type_list (V2SI_type_node,
22383 V2SF_type_node, V2SF_type_node, NULL_TREE);
22384 tree pint_type_node = build_pointer_type (integer_type_node);
22385 tree pdouble_type_node = build_pointer_type (double_type_node);
22386 tree pcdouble_type_node = build_pointer_type (
22387 build_type_variant (double_type_node, 1, 0));
22388 tree int_ftype_v2df_v2df
22389 = build_function_type_list (integer_type_node,
22390 V2DF_type_node, V2DF_type_node, NULL_TREE);
22392 tree void_ftype_pcvoid
22393 = build_function_type_list (void_type_node, const_ptr_type_node, NULL_TREE);
22394 tree v4sf_ftype_v4si
22395 = build_function_type_list (V4SF_type_node, V4SI_type_node, NULL_TREE);
22396 tree v4si_ftype_v4sf
22397 = build_function_type_list (V4SI_type_node, V4SF_type_node, NULL_TREE);
22398 tree v2df_ftype_v4si
22399 = build_function_type_list (V2DF_type_node, V4SI_type_node, NULL_TREE);
22400 tree v4si_ftype_v2df
22401 = build_function_type_list (V4SI_type_node, V2DF_type_node, NULL_TREE);
22402 tree v4si_ftype_v2df_v2df
22403 = build_function_type_list (V4SI_type_node,
22404 V2DF_type_node, V2DF_type_node, NULL_TREE);
22405 tree v2si_ftype_v2df
22406 = build_function_type_list (V2SI_type_node, V2DF_type_node, NULL_TREE);
22407 tree v4sf_ftype_v2df
22408 = build_function_type_list (V4SF_type_node, V2DF_type_node, NULL_TREE);
22409 tree v2df_ftype_v2si
22410 = build_function_type_list (V2DF_type_node, V2SI_type_node, NULL_TREE);
22411 tree v2df_ftype_v4sf
22412 = build_function_type_list (V2DF_type_node, V4SF_type_node, NULL_TREE);
22413 tree int_ftype_v2df
22414 = build_function_type_list (integer_type_node, V2DF_type_node, NULL_TREE);
22415 tree int64_ftype_v2df
22416 = build_function_type_list (long_long_integer_type_node,
22417 V2DF_type_node, NULL_TREE);
22418 tree v2df_ftype_v2df_int
22419 = build_function_type_list (V2DF_type_node,
22420 V2DF_type_node, integer_type_node, NULL_TREE);
22421 tree v2df_ftype_v2df_int64
22422 = build_function_type_list (V2DF_type_node,
22423 V2DF_type_node, long_long_integer_type_node,
22425 tree v4sf_ftype_v4sf_v2df
22426 = build_function_type_list (V4SF_type_node,
22427 V4SF_type_node, V2DF_type_node, NULL_TREE);
22428 tree v2df_ftype_v2df_v4sf
22429 = build_function_type_list (V2DF_type_node,
22430 V2DF_type_node, V4SF_type_node, NULL_TREE);
22431 tree v2df_ftype_v2df_v2df_int
22432 = build_function_type_list (V2DF_type_node,
22433 V2DF_type_node, V2DF_type_node,
22436 tree v2df_ftype_v2df_pcdouble
22437 = build_function_type_list (V2DF_type_node,
22438 V2DF_type_node, pcdouble_type_node, NULL_TREE);
22439 tree void_ftype_pdouble_v2df
22440 = build_function_type_list (void_type_node,
22441 pdouble_type_node, V2DF_type_node, NULL_TREE);
22442 tree void_ftype_pint_int
22443 = build_function_type_list (void_type_node,
22444 pint_type_node, integer_type_node, NULL_TREE);
22445 tree void_ftype_v16qi_v16qi_pchar
22446 = build_function_type_list (void_type_node,
22447 V16QI_type_node, V16QI_type_node,
22448 pchar_type_node, NULL_TREE);
22449 tree v2df_ftype_pcdouble
22450 = build_function_type_list (V2DF_type_node, pcdouble_type_node, NULL_TREE);
22451 tree v2df_ftype_v2df_v2df
22452 = build_function_type_list (V2DF_type_node,
22453 V2DF_type_node, V2DF_type_node, NULL_TREE);
22454 tree v16qi_ftype_v16qi_v16qi
22455 = build_function_type_list (V16QI_type_node,
22456 V16QI_type_node, V16QI_type_node, NULL_TREE);
22457 tree v8hi_ftype_v8hi_v8hi
22458 = build_function_type_list (V8HI_type_node,
22459 V8HI_type_node, V8HI_type_node, NULL_TREE);
22460 tree v4si_ftype_v4si_v4si
22461 = build_function_type_list (V4SI_type_node,
22462 V4SI_type_node, V4SI_type_node, NULL_TREE);
22463 tree v2di_ftype_v2di_v2di
22464 = build_function_type_list (V2DI_type_node,
22465 V2DI_type_node, V2DI_type_node, NULL_TREE);
22466 tree v2di_ftype_v2df_v2df
22467 = build_function_type_list (V2DI_type_node,
22468 V2DF_type_node, V2DF_type_node, NULL_TREE);
22469 tree v2df_ftype_v2df
22470 = build_function_type_list (V2DF_type_node, V2DF_type_node, NULL_TREE);
22471 tree v2di_ftype_v2di_int
22472 = build_function_type_list (V2DI_type_node,
22473 V2DI_type_node, integer_type_node, NULL_TREE);
22474 tree v2di_ftype_v2di_v2di_int
22475 = build_function_type_list (V2DI_type_node, V2DI_type_node,
22476 V2DI_type_node, integer_type_node, NULL_TREE);
22477 tree v4si_ftype_v4si_int
22478 = build_function_type_list (V4SI_type_node,
22479 V4SI_type_node, integer_type_node, NULL_TREE);
22480 tree v8hi_ftype_v8hi_int
22481 = build_function_type_list (V8HI_type_node,
22482 V8HI_type_node, integer_type_node, NULL_TREE);
22483 tree v4si_ftype_v8hi_v8hi
22484 = build_function_type_list (V4SI_type_node,
22485 V8HI_type_node, V8HI_type_node, NULL_TREE);
22486 tree v1di_ftype_v8qi_v8qi
22487 = build_function_type_list (V1DI_type_node,
22488 V8QI_type_node, V8QI_type_node, NULL_TREE);
22489 tree v1di_ftype_v2si_v2si
22490 = build_function_type_list (V1DI_type_node,
22491 V2SI_type_node, V2SI_type_node, NULL_TREE);
22492 tree v2di_ftype_v16qi_v16qi
22493 = build_function_type_list (V2DI_type_node,
22494 V16QI_type_node, V16QI_type_node, NULL_TREE);
22495 tree v2di_ftype_v4si_v4si
22496 = build_function_type_list (V2DI_type_node,
22497 V4SI_type_node, V4SI_type_node, NULL_TREE);
22498 tree int_ftype_v16qi
22499 = build_function_type_list (integer_type_node, V16QI_type_node, NULL_TREE);
22500 tree v16qi_ftype_pcchar
22501 = build_function_type_list (V16QI_type_node, pcchar_type_node, NULL_TREE);
22502 tree void_ftype_pchar_v16qi
22503 = build_function_type_list (void_type_node,
22504 pchar_type_node, V16QI_type_node, NULL_TREE);
22506 tree v2di_ftype_v2di_unsigned_unsigned
22507 = build_function_type_list (V2DI_type_node, V2DI_type_node,
22508 unsigned_type_node, unsigned_type_node,
22510 tree v2di_ftype_v2di_v2di_unsigned_unsigned
22511 = build_function_type_list (V2DI_type_node, V2DI_type_node, V2DI_type_node,
22512 unsigned_type_node, unsigned_type_node,
22514 tree v2di_ftype_v2di_v16qi
22515 = build_function_type_list (V2DI_type_node, V2DI_type_node, V16QI_type_node,
22517 tree v2df_ftype_v2df_v2df_v2df
22518 = build_function_type_list (V2DF_type_node,
22519 V2DF_type_node, V2DF_type_node,
22520 V2DF_type_node, NULL_TREE);
22521 tree v4sf_ftype_v4sf_v4sf_v4sf
22522 = build_function_type_list (V4SF_type_node,
22523 V4SF_type_node, V4SF_type_node,
22524 V4SF_type_node, NULL_TREE);
22525 tree v8hi_ftype_v16qi
22526 = build_function_type_list (V8HI_type_node, V16QI_type_node,
22528 tree v4si_ftype_v16qi
22529 = build_function_type_list (V4SI_type_node, V16QI_type_node,
22531 tree v2di_ftype_v16qi
22532 = build_function_type_list (V2DI_type_node, V16QI_type_node,
22534 tree v4si_ftype_v8hi
22535 = build_function_type_list (V4SI_type_node, V8HI_type_node,
22537 tree v2di_ftype_v8hi
22538 = build_function_type_list (V2DI_type_node, V8HI_type_node,
22540 tree v2di_ftype_v4si
22541 = build_function_type_list (V2DI_type_node, V4SI_type_node,
22543 tree v2di_ftype_pv2di
22544 = build_function_type_list (V2DI_type_node, pv2di_type_node,
22546 tree v16qi_ftype_v16qi_v16qi_int
22547 = build_function_type_list (V16QI_type_node, V16QI_type_node,
22548 V16QI_type_node, integer_type_node,
22550 tree v16qi_ftype_v16qi_v16qi_v16qi
22551 = build_function_type_list (V16QI_type_node, V16QI_type_node,
22552 V16QI_type_node, V16QI_type_node,
22554 tree v8hi_ftype_v8hi_v8hi_int
22555 = build_function_type_list (V8HI_type_node, V8HI_type_node,
22556 V8HI_type_node, integer_type_node,
22558 tree v4si_ftype_v4si_v4si_int
22559 = build_function_type_list (V4SI_type_node, V4SI_type_node,
22560 V4SI_type_node, integer_type_node,
22562 tree int_ftype_v2di_v2di
22563 = build_function_type_list (integer_type_node,
22564 V2DI_type_node, V2DI_type_node,
22566 tree int_ftype_v16qi_int_v16qi_int_int
22567 = build_function_type_list (integer_type_node,
22574 tree v16qi_ftype_v16qi_int_v16qi_int_int
22575 = build_function_type_list (V16QI_type_node,
22582 tree int_ftype_v16qi_v16qi_int
22583 = build_function_type_list (integer_type_node,
22589 /* SSE5 instructions */
22590 tree v2di_ftype_v2di_v2di_v2di
22591 = build_function_type_list (V2DI_type_node,
22597 tree v4si_ftype_v4si_v4si_v4si
22598 = build_function_type_list (V4SI_type_node,
22604 tree v4si_ftype_v4si_v4si_v2di
22605 = build_function_type_list (V4SI_type_node,
22611 tree v8hi_ftype_v8hi_v8hi_v8hi
22612 = build_function_type_list (V8HI_type_node,
22618 tree v8hi_ftype_v8hi_v8hi_v4si
22619 = build_function_type_list (V8HI_type_node,
22625 tree v2df_ftype_v2df_v2df_v16qi
22626 = build_function_type_list (V2DF_type_node,
22632 tree v4sf_ftype_v4sf_v4sf_v16qi
22633 = build_function_type_list (V4SF_type_node,
22639 tree v2di_ftype_v2di_si
22640 = build_function_type_list (V2DI_type_node,
22645 tree v4si_ftype_v4si_si
22646 = build_function_type_list (V4SI_type_node,
22651 tree v8hi_ftype_v8hi_si
22652 = build_function_type_list (V8HI_type_node,
22657 tree v16qi_ftype_v16qi_si
22658 = build_function_type_list (V16QI_type_node,
22662 tree v4sf_ftype_v4hi
22663 = build_function_type_list (V4SF_type_node,
22667 tree v4hi_ftype_v4sf
22668 = build_function_type_list (V4HI_type_node,
22672 tree v2di_ftype_v2di
22673 = build_function_type_list (V2DI_type_node, V2DI_type_node, NULL_TREE);
22675 tree v16qi_ftype_v8hi_v8hi
22676 = build_function_type_list (V16QI_type_node,
22677 V8HI_type_node, V8HI_type_node,
22679 tree v8hi_ftype_v4si_v4si
22680 = build_function_type_list (V8HI_type_node,
22681 V4SI_type_node, V4SI_type_node,
22683 tree v8hi_ftype_v16qi_v16qi
22684 = build_function_type_list (V8HI_type_node,
22685 V16QI_type_node, V16QI_type_node,
22687 tree v4hi_ftype_v8qi_v8qi
22688 = build_function_type_list (V4HI_type_node,
22689 V8QI_type_node, V8QI_type_node,
22691 tree unsigned_ftype_unsigned_uchar
22692 = build_function_type_list (unsigned_type_node,
22693 unsigned_type_node,
22694 unsigned_char_type_node,
22696 tree unsigned_ftype_unsigned_ushort
22697 = build_function_type_list (unsigned_type_node,
22698 unsigned_type_node,
22699 short_unsigned_type_node,
22701 tree unsigned_ftype_unsigned_unsigned
22702 = build_function_type_list (unsigned_type_node,
22703 unsigned_type_node,
22704 unsigned_type_node,
22706 tree uint64_ftype_uint64_uint64
22707 = build_function_type_list (long_long_unsigned_type_node,
22708 long_long_unsigned_type_node,
22709 long_long_unsigned_type_node,
22711 tree float_ftype_float
22712 = build_function_type_list (float_type_node,
22717 tree V32QI_type_node = build_vector_type_for_mode (char_type_node,
22719 tree V8SI_type_node = build_vector_type_for_mode (intSI_type_node,
22721 tree V8SF_type_node = build_vector_type_for_mode (float_type_node,
22723 tree V4DI_type_node = build_vector_type_for_mode (long_long_integer_type_node,
22725 tree V4DF_type_node = build_vector_type_for_mode (double_type_node,
22727 tree v8sf_ftype_v8sf
22728 = build_function_type_list (V8SF_type_node,
22731 tree v8si_ftype_v8sf
22732 = build_function_type_list (V8SI_type_node,
22735 tree v8sf_ftype_v8si
22736 = build_function_type_list (V8SF_type_node,
22739 tree v4si_ftype_v4df
22740 = build_function_type_list (V4SI_type_node,
22743 tree v4df_ftype_v4df
22744 = build_function_type_list (V4DF_type_node,
22747 tree v4df_ftype_v4si
22748 = build_function_type_list (V4DF_type_node,
22751 tree v4df_ftype_v4sf
22752 = build_function_type_list (V4DF_type_node,
22755 tree v4sf_ftype_v4df
22756 = build_function_type_list (V4SF_type_node,
22759 tree v8sf_ftype_v8sf_v8sf
22760 = build_function_type_list (V8SF_type_node,
22761 V8SF_type_node, V8SF_type_node,
22763 tree v4df_ftype_v4df_v4df
22764 = build_function_type_list (V4DF_type_node,
22765 V4DF_type_node, V4DF_type_node,
22767 tree v8sf_ftype_v8sf_int
22768 = build_function_type_list (V8SF_type_node,
22769 V8SF_type_node, integer_type_node,
22771 tree v4si_ftype_v8si_int
22772 = build_function_type_list (V4SI_type_node,
22773 V8SI_type_node, integer_type_node,
22775 tree v4df_ftype_v4df_int
22776 = build_function_type_list (V4DF_type_node,
22777 V4DF_type_node, integer_type_node,
22779 tree v4sf_ftype_v8sf_int
22780 = build_function_type_list (V4SF_type_node,
22781 V8SF_type_node, integer_type_node,
22783 tree v2df_ftype_v4df_int
22784 = build_function_type_list (V2DF_type_node,
22785 V4DF_type_node, integer_type_node,
22787 tree v8sf_ftype_v8sf_v8sf_int
22788 = build_function_type_list (V8SF_type_node,
22789 V8SF_type_node, V8SF_type_node,
22792 tree v8sf_ftype_v8sf_v8sf_v8sf
22793 = build_function_type_list (V8SF_type_node,
22794 V8SF_type_node, V8SF_type_node,
22797 tree v4df_ftype_v4df_v4df_v4df
22798 = build_function_type_list (V4DF_type_node,
22799 V4DF_type_node, V4DF_type_node,
22802 tree v8si_ftype_v8si_v8si_int
22803 = build_function_type_list (V8SI_type_node,
22804 V8SI_type_node, V8SI_type_node,
22807 tree v4df_ftype_v4df_v4df_int
22808 = build_function_type_list (V4DF_type_node,
22809 V4DF_type_node, V4DF_type_node,
22812 tree v8sf_ftype_pcfloat
22813 = build_function_type_list (V8SF_type_node,
22816 tree v4df_ftype_pcdouble
22817 = build_function_type_list (V4DF_type_node,
22818 pcdouble_type_node,
22820 tree pcv4sf_type_node
22821 = build_pointer_type (build_type_variant (V4SF_type_node, 1, 0));
22822 tree pcv2df_type_node
22823 = build_pointer_type (build_type_variant (V2DF_type_node, 1, 0));
22824 tree v8sf_ftype_pcv4sf
22825 = build_function_type_list (V8SF_type_node,
22828 tree v4df_ftype_pcv2df
22829 = build_function_type_list (V4DF_type_node,
22832 tree v32qi_ftype_pcchar
22833 = build_function_type_list (V32QI_type_node,
22836 tree void_ftype_pchar_v32qi
22837 = build_function_type_list (void_type_node,
22838 pchar_type_node, V32QI_type_node,
22840 tree v8si_ftype_v8si_v4si_int
22841 = build_function_type_list (V8SI_type_node,
22842 V8SI_type_node, V4SI_type_node,
22845 tree pv4di_type_node = build_pointer_type (V4DI_type_node);
22846 tree void_ftype_pv4di_v4di
22847 = build_function_type_list (void_type_node,
22848 pv4di_type_node, V4DI_type_node,
22850 tree v8sf_ftype_v8sf_v4sf_int
22851 = build_function_type_list (V8SF_type_node,
22852 V8SF_type_node, V4SF_type_node,
22855 tree v4df_ftype_v4df_v2df_int
22856 = build_function_type_list (V4DF_type_node,
22857 V4DF_type_node, V2DF_type_node,
22860 tree void_ftype_pfloat_v8sf
22861 = build_function_type_list (void_type_node,
22862 pfloat_type_node, V8SF_type_node,
22864 tree void_ftype_pdouble_v4df
22865 = build_function_type_list (void_type_node,
22866 pdouble_type_node, V4DF_type_node,
22868 tree pv8sf_type_node = build_pointer_type (V8SF_type_node);
22869 tree pv4sf_type_node = build_pointer_type (V4SF_type_node);
22870 tree pv4df_type_node = build_pointer_type (V4DF_type_node);
22871 tree pv2df_type_node = build_pointer_type (V2DF_type_node);
22872 tree pcv8sf_type_node
22873 = build_pointer_type (build_type_variant (V8SF_type_node, 1, 0));
22874 tree pcv4df_type_node
22875 = build_pointer_type (build_type_variant (V4DF_type_node, 1, 0));
22876 tree v8sf_ftype_pcv8sf_v8sf
22877 = build_function_type_list (V8SF_type_node,
22878 pcv8sf_type_node, V8SF_type_node,
22880 tree v4df_ftype_pcv4df_v4df
22881 = build_function_type_list (V4DF_type_node,
22882 pcv4df_type_node, V4DF_type_node,
22884 tree v4sf_ftype_pcv4sf_v4sf
22885 = build_function_type_list (V4SF_type_node,
22886 pcv4sf_type_node, V4SF_type_node,
22888 tree v2df_ftype_pcv2df_v2df
22889 = build_function_type_list (V2DF_type_node,
22890 pcv2df_type_node, V2DF_type_node,
22892 tree void_ftype_pv8sf_v8sf_v8sf
22893 = build_function_type_list (void_type_node,
22894 pv8sf_type_node, V8SF_type_node,
22897 tree void_ftype_pv4df_v4df_v4df
22898 = build_function_type_list (void_type_node,
22899 pv4df_type_node, V4DF_type_node,
22902 tree void_ftype_pv4sf_v4sf_v4sf
22903 = build_function_type_list (void_type_node,
22904 pv4sf_type_node, V4SF_type_node,
22907 tree void_ftype_pv2df_v2df_v2df
22908 = build_function_type_list (void_type_node,
22909 pv2df_type_node, V2DF_type_node,
22912 tree v4df_ftype_v2df
22913 = build_function_type_list (V4DF_type_node,
22916 tree v8sf_ftype_v4sf
22917 = build_function_type_list (V8SF_type_node,
22920 tree v8si_ftype_v4si
22921 = build_function_type_list (V8SI_type_node,
22924 tree v2df_ftype_v4df
22925 = build_function_type_list (V2DF_type_node,
22928 tree v4sf_ftype_v8sf
22929 = build_function_type_list (V4SF_type_node,
22932 tree v4si_ftype_v8si
22933 = build_function_type_list (V4SI_type_node,
22936 tree int_ftype_v4df
22937 = build_function_type_list (integer_type_node,
22940 tree int_ftype_v8sf
22941 = build_function_type_list (integer_type_node,
22944 tree int_ftype_v8sf_v8sf
22945 = build_function_type_list (integer_type_node,
22946 V8SF_type_node, V8SF_type_node,
22948 tree int_ftype_v4di_v4di
22949 = build_function_type_list (integer_type_node,
22950 V4DI_type_node, V4DI_type_node,
22952 tree int_ftype_v4df_v4df
22953 = build_function_type_list (integer_type_node,
22954 V4DF_type_node, V4DF_type_node,
22956 tree v8sf_ftype_v8sf_v8si
22957 = build_function_type_list (V8SF_type_node,
22958 V8SF_type_node, V8SI_type_node,
22960 tree v4df_ftype_v4df_v4di
22961 = build_function_type_list (V4DF_type_node,
22962 V4DF_type_node, V4DI_type_node,
22964 tree v4sf_ftype_v4sf_v4si
22965 = build_function_type_list (V4SF_type_node,
22966 V4SF_type_node, V4SI_type_node, NULL_TREE);
22967 tree v2df_ftype_v2df_v2di
22968 = build_function_type_list (V2DF_type_node,
22969 V2DF_type_node, V2DI_type_node, NULL_TREE);
22973 /* Add all special builtins with variable number of operands. */
22974 for (i = 0, d = bdesc_special_args;
22975 i < ARRAY_SIZE (bdesc_special_args);
22983 switch ((enum ix86_special_builtin_type) d->flag)
22985 case VOID_FTYPE_VOID:
22986 type = void_ftype_void;
22988 case V32QI_FTYPE_PCCHAR:
22989 type = v32qi_ftype_pcchar;
22991 case V16QI_FTYPE_PCCHAR:
22992 type = v16qi_ftype_pcchar;
22994 case V8SF_FTYPE_PCV4SF:
22995 type = v8sf_ftype_pcv4sf;
22997 case V8SF_FTYPE_PCFLOAT:
22998 type = v8sf_ftype_pcfloat;
23000 case V4DF_FTYPE_PCV2DF:
23001 type = v4df_ftype_pcv2df;
23003 case V4DF_FTYPE_PCDOUBLE:
23004 type = v4df_ftype_pcdouble;
23006 case V4SF_FTYPE_PCFLOAT:
23007 type = v4sf_ftype_pcfloat;
23009 case V2DI_FTYPE_PV2DI:
23010 type = v2di_ftype_pv2di;
23012 case V2DF_FTYPE_PCDOUBLE:
23013 type = v2df_ftype_pcdouble;
23015 case V8SF_FTYPE_PCV8SF_V8SF:
23016 type = v8sf_ftype_pcv8sf_v8sf;
23018 case V4DF_FTYPE_PCV4DF_V4DF:
23019 type = v4df_ftype_pcv4df_v4df;
23021 case V4SF_FTYPE_V4SF_PCV2SF:
23022 type = v4sf_ftype_v4sf_pcv2sf;
23024 case V4SF_FTYPE_PCV4SF_V4SF:
23025 type = v4sf_ftype_pcv4sf_v4sf;
23027 case V2DF_FTYPE_V2DF_PCDOUBLE:
23028 type = v2df_ftype_v2df_pcdouble;
23030 case V2DF_FTYPE_PCV2DF_V2DF:
23031 type = v2df_ftype_pcv2df_v2df;
23033 case VOID_FTYPE_PV2SF_V4SF:
23034 type = void_ftype_pv2sf_v4sf;
23036 case VOID_FTYPE_PV4DI_V4DI:
23037 type = void_ftype_pv4di_v4di;
23039 case VOID_FTYPE_PV2DI_V2DI:
23040 type = void_ftype_pv2di_v2di;
23042 case VOID_FTYPE_PCHAR_V32QI:
23043 type = void_ftype_pchar_v32qi;
23045 case VOID_FTYPE_PCHAR_V16QI:
23046 type = void_ftype_pchar_v16qi;
23048 case VOID_FTYPE_PFLOAT_V8SF:
23049 type = void_ftype_pfloat_v8sf;
23051 case VOID_FTYPE_PFLOAT_V4SF:
23052 type = void_ftype_pfloat_v4sf;
23054 case VOID_FTYPE_PDOUBLE_V4DF:
23055 type = void_ftype_pdouble_v4df;
23057 case VOID_FTYPE_PDOUBLE_V2DF:
23058 type = void_ftype_pdouble_v2df;
23060 case VOID_FTYPE_PDI_DI:
23061 type = void_ftype_pdi_di;
23063 case VOID_FTYPE_PINT_INT:
23064 type = void_ftype_pint_int;
23066 case VOID_FTYPE_PV8SF_V8SF_V8SF:
23067 type = void_ftype_pv8sf_v8sf_v8sf;
23069 case VOID_FTYPE_PV4DF_V4DF_V4DF:
23070 type = void_ftype_pv4df_v4df_v4df;
23072 case VOID_FTYPE_PV4SF_V4SF_V4SF:
23073 type = void_ftype_pv4sf_v4sf_v4sf;
23075 case VOID_FTYPE_PV2DF_V2DF_V2DF:
23076 type = void_ftype_pv2df_v2df_v2df;
23079 gcc_unreachable ();
23082 def_builtin (d->mask, d->name, type, d->code);
23085 /* Add all builtins with variable number of operands. */
23086 for (i = 0, d = bdesc_args;
23087 i < ARRAY_SIZE (bdesc_args);
23095 switch ((enum ix86_builtin_type) d->flag)
23097 case FLOAT_FTYPE_FLOAT:
23098 type = float_ftype_float;
23100 case INT_FTYPE_V8SF_V8SF_PTEST:
23101 type = int_ftype_v8sf_v8sf;
23103 case INT_FTYPE_V4DI_V4DI_PTEST:
23104 type = int_ftype_v4di_v4di;
23106 case INT_FTYPE_V4DF_V4DF_PTEST:
23107 type = int_ftype_v4df_v4df;
23109 case INT_FTYPE_V4SF_V4SF_PTEST:
23110 type = int_ftype_v4sf_v4sf;
23112 case INT_FTYPE_V2DI_V2DI_PTEST:
23113 type = int_ftype_v2di_v2di;
23115 case INT_FTYPE_V2DF_V2DF_PTEST:
23116 type = int_ftype_v2df_v2df;
23118 case INT64_FTYPE_V4SF:
23119 type = int64_ftype_v4sf;
23121 case INT64_FTYPE_V2DF:
23122 type = int64_ftype_v2df;
23124 case INT_FTYPE_V16QI:
23125 type = int_ftype_v16qi;
23127 case INT_FTYPE_V8QI:
23128 type = int_ftype_v8qi;
23130 case INT_FTYPE_V8SF:
23131 type = int_ftype_v8sf;
23133 case INT_FTYPE_V4DF:
23134 type = int_ftype_v4df;
23136 case INT_FTYPE_V4SF:
23137 type = int_ftype_v4sf;
23139 case INT_FTYPE_V2DF:
23140 type = int_ftype_v2df;
23142 case V16QI_FTYPE_V16QI:
23143 type = v16qi_ftype_v16qi;
23145 case V8SI_FTYPE_V8SF:
23146 type = v8si_ftype_v8sf;
23148 case V8SI_FTYPE_V4SI:
23149 type = v8si_ftype_v4si;
23151 case V8HI_FTYPE_V8HI:
23152 type = v8hi_ftype_v8hi;
23154 case V8HI_FTYPE_V16QI:
23155 type = v8hi_ftype_v16qi;
23157 case V8QI_FTYPE_V8QI:
23158 type = v8qi_ftype_v8qi;
23160 case V8SF_FTYPE_V8SF:
23161 type = v8sf_ftype_v8sf;
23163 case V8SF_FTYPE_V8SI:
23164 type = v8sf_ftype_v8si;
23166 case V8SF_FTYPE_V4SF:
23167 type = v8sf_ftype_v4sf;
23169 case V4SI_FTYPE_V4DF:
23170 type = v4si_ftype_v4df;
23172 case V4SI_FTYPE_V4SI:
23173 type = v4si_ftype_v4si;
23175 case V4SI_FTYPE_V16QI:
23176 type = v4si_ftype_v16qi;
23178 case V4SI_FTYPE_V8SI:
23179 type = v4si_ftype_v8si;
23181 case V4SI_FTYPE_V8HI:
23182 type = v4si_ftype_v8hi;
23184 case V4SI_FTYPE_V4SF:
23185 type = v4si_ftype_v4sf;
23187 case V4SI_FTYPE_V2DF:
23188 type = v4si_ftype_v2df;
23190 case V4HI_FTYPE_V4HI:
23191 type = v4hi_ftype_v4hi;
23193 case V4DF_FTYPE_V4DF:
23194 type = v4df_ftype_v4df;
23196 case V4DF_FTYPE_V4SI:
23197 type = v4df_ftype_v4si;
23199 case V4DF_FTYPE_V4SF:
23200 type = v4df_ftype_v4sf;
23202 case V4DF_FTYPE_V2DF:
23203 type = v4df_ftype_v2df;
23205 case V4SF_FTYPE_V4SF:
23206 case V4SF_FTYPE_V4SF_VEC_MERGE:
23207 type = v4sf_ftype_v4sf;
23209 case V4SF_FTYPE_V8SF:
23210 type = v4sf_ftype_v8sf;
23212 case V4SF_FTYPE_V4SI:
23213 type = v4sf_ftype_v4si;
23215 case V4SF_FTYPE_V4DF:
23216 type = v4sf_ftype_v4df;
23218 case V4SF_FTYPE_V2DF:
23219 type = v4sf_ftype_v2df;
23221 case V2DI_FTYPE_V2DI:
23222 type = v2di_ftype_v2di;
23224 case V2DI_FTYPE_V16QI:
23225 type = v2di_ftype_v16qi;
23227 case V2DI_FTYPE_V8HI:
23228 type = v2di_ftype_v8hi;
23230 case V2DI_FTYPE_V4SI:
23231 type = v2di_ftype_v4si;
23233 case V2SI_FTYPE_V2SI:
23234 type = v2si_ftype_v2si;
23236 case V2SI_FTYPE_V4SF:
23237 type = v2si_ftype_v4sf;
23239 case V2SI_FTYPE_V2DF:
23240 type = v2si_ftype_v2df;
23242 case V2SI_FTYPE_V2SF:
23243 type = v2si_ftype_v2sf;
23245 case V2DF_FTYPE_V4DF:
23246 type = v2df_ftype_v4df;
23248 case V2DF_FTYPE_V4SF:
23249 type = v2df_ftype_v4sf;
23251 case V2DF_FTYPE_V2DF:
23252 case V2DF_FTYPE_V2DF_VEC_MERGE:
23253 type = v2df_ftype_v2df;
23255 case V2DF_FTYPE_V2SI:
23256 type = v2df_ftype_v2si;
23258 case V2DF_FTYPE_V4SI:
23259 type = v2df_ftype_v4si;
23261 case V2SF_FTYPE_V2SF:
23262 type = v2sf_ftype_v2sf;
23264 case V2SF_FTYPE_V2SI:
23265 type = v2sf_ftype_v2si;
23267 case V16QI_FTYPE_V16QI_V16QI:
23268 type = v16qi_ftype_v16qi_v16qi;
23270 case V16QI_FTYPE_V8HI_V8HI:
23271 type = v16qi_ftype_v8hi_v8hi;
23273 case V8QI_FTYPE_V8QI_V8QI:
23274 type = v8qi_ftype_v8qi_v8qi;
23276 case V8QI_FTYPE_V4HI_V4HI:
23277 type = v8qi_ftype_v4hi_v4hi;
23279 case V8HI_FTYPE_V8HI_V8HI:
23280 case V8HI_FTYPE_V8HI_V8HI_COUNT:
23281 type = v8hi_ftype_v8hi_v8hi;
23283 case V8HI_FTYPE_V16QI_V16QI:
23284 type = v8hi_ftype_v16qi_v16qi;
23286 case V8HI_FTYPE_V4SI_V4SI:
23287 type = v8hi_ftype_v4si_v4si;
23289 case V8HI_FTYPE_V8HI_SI_COUNT:
23290 type = v8hi_ftype_v8hi_int;
23292 case V8SF_FTYPE_V8SF_V8SF:
23293 type = v8sf_ftype_v8sf_v8sf;
23295 case V8SF_FTYPE_V8SF_V8SI:
23296 type = v8sf_ftype_v8sf_v8si;
23298 case V4SI_FTYPE_V4SI_V4SI:
23299 case V4SI_FTYPE_V4SI_V4SI_COUNT:
23300 type = v4si_ftype_v4si_v4si;
23302 case V4SI_FTYPE_V8HI_V8HI:
23303 type = v4si_ftype_v8hi_v8hi;
23305 case V4SI_FTYPE_V4SF_V4SF:
23306 type = v4si_ftype_v4sf_v4sf;
23308 case V4SI_FTYPE_V2DF_V2DF:
23309 type = v4si_ftype_v2df_v2df;
23311 case V4SI_FTYPE_V4SI_SI_COUNT:
23312 type = v4si_ftype_v4si_int;
23314 case V4HI_FTYPE_V4HI_V4HI:
23315 case V4HI_FTYPE_V4HI_V4HI_COUNT:
23316 type = v4hi_ftype_v4hi_v4hi;
23318 case V4HI_FTYPE_V8QI_V8QI:
23319 type = v4hi_ftype_v8qi_v8qi;
23321 case V4HI_FTYPE_V2SI_V2SI:
23322 type = v4hi_ftype_v2si_v2si;
23324 case V4HI_FTYPE_V4HI_SI_COUNT:
23325 type = v4hi_ftype_v4hi_int;
23327 case V4DF_FTYPE_V4DF_V4DF:
23328 type = v4df_ftype_v4df_v4df;
23330 case V4DF_FTYPE_V4DF_V4DI:
23331 type = v4df_ftype_v4df_v4di;
23333 case V4SF_FTYPE_V4SF_V4SF:
23334 case V4SF_FTYPE_V4SF_V4SF_SWAP:
23335 type = v4sf_ftype_v4sf_v4sf;
23337 case V4SF_FTYPE_V4SF_V4SI:
23338 type = v4sf_ftype_v4sf_v4si;
23340 case V4SF_FTYPE_V4SF_V2SI:
23341 type = v4sf_ftype_v4sf_v2si;
23343 case V4SF_FTYPE_V4SF_V2DF:
23344 type = v4sf_ftype_v4sf_v2df;
23346 case V4SF_FTYPE_V4SF_DI:
23347 type = v4sf_ftype_v4sf_int64;
23349 case V4SF_FTYPE_V4SF_SI:
23350 type = v4sf_ftype_v4sf_int;
23352 case V2DI_FTYPE_V2DI_V2DI:
23353 case V2DI_FTYPE_V2DI_V2DI_COUNT:
23354 type = v2di_ftype_v2di_v2di;
23356 case V2DI_FTYPE_V16QI_V16QI:
23357 type = v2di_ftype_v16qi_v16qi;
23359 case V2DI_FTYPE_V4SI_V4SI:
23360 type = v2di_ftype_v4si_v4si;
23362 case V2DI_FTYPE_V2DI_V16QI:
23363 type = v2di_ftype_v2di_v16qi;
23365 case V2DI_FTYPE_V2DF_V2DF:
23366 type = v2di_ftype_v2df_v2df;
23368 case V2DI_FTYPE_V2DI_SI_COUNT:
23369 type = v2di_ftype_v2di_int;
23371 case V2SI_FTYPE_V2SI_V2SI:
23372 case V2SI_FTYPE_V2SI_V2SI_COUNT:
23373 type = v2si_ftype_v2si_v2si;
23375 case V2SI_FTYPE_V4HI_V4HI:
23376 type = v2si_ftype_v4hi_v4hi;
23378 case V2SI_FTYPE_V2SF_V2SF:
23379 type = v2si_ftype_v2sf_v2sf;
23381 case V2SI_FTYPE_V2SI_SI_COUNT:
23382 type = v2si_ftype_v2si_int;
23384 case V2DF_FTYPE_V2DF_V2DF:
23385 case V2DF_FTYPE_V2DF_V2DF_SWAP:
23386 type = v2df_ftype_v2df_v2df;
23388 case V2DF_FTYPE_V2DF_V4SF:
23389 type = v2df_ftype_v2df_v4sf;
23391 case V2DF_FTYPE_V2DF_V2DI:
23392 type = v2df_ftype_v2df_v2di;
23394 case V2DF_FTYPE_V2DF_DI:
23395 type = v2df_ftype_v2df_int64;
23397 case V2DF_FTYPE_V2DF_SI:
23398 type = v2df_ftype_v2df_int;
23400 case V2SF_FTYPE_V2SF_V2SF:
23401 type = v2sf_ftype_v2sf_v2sf;
23403 case V1DI_FTYPE_V1DI_V1DI:
23404 case V1DI_FTYPE_V1DI_V1DI_COUNT:
23405 type = v1di_ftype_v1di_v1di;
23407 case V1DI_FTYPE_V8QI_V8QI:
23408 type = v1di_ftype_v8qi_v8qi;
23410 case V1DI_FTYPE_V2SI_V2SI:
23411 type = v1di_ftype_v2si_v2si;
23413 case V1DI_FTYPE_V1DI_SI_COUNT:
23414 type = v1di_ftype_v1di_int;
23416 case UINT64_FTYPE_UINT64_UINT64:
23417 type = uint64_ftype_uint64_uint64;
23419 case UINT_FTYPE_UINT_UINT:
23420 type = unsigned_ftype_unsigned_unsigned;
23422 case UINT_FTYPE_UINT_USHORT:
23423 type = unsigned_ftype_unsigned_ushort;
23425 case UINT_FTYPE_UINT_UCHAR:
23426 type = unsigned_ftype_unsigned_uchar;
23428 case V8HI_FTYPE_V8HI_INT:
23429 type = v8hi_ftype_v8hi_int;
23431 case V8SF_FTYPE_V8SF_INT:
23432 type = v8sf_ftype_v8sf_int;
23434 case V4SI_FTYPE_V4SI_INT:
23435 type = v4si_ftype_v4si_int;
23437 case V4SI_FTYPE_V8SI_INT:
23438 type = v4si_ftype_v8si_int;
23440 case V4HI_FTYPE_V4HI_INT:
23441 type = v4hi_ftype_v4hi_int;
23443 case V4DF_FTYPE_V4DF_INT:
23444 type = v4df_ftype_v4df_int;
23446 case V4SF_FTYPE_V4SF_INT:
23447 type = v4sf_ftype_v4sf_int;
23449 case V4SF_FTYPE_V8SF_INT:
23450 type = v4sf_ftype_v8sf_int;
23452 case V2DI_FTYPE_V2DI_INT:
23453 case V2DI2TI_FTYPE_V2DI_INT:
23454 type = v2di_ftype_v2di_int;
23456 case V2DF_FTYPE_V2DF_INT:
23457 type = v2df_ftype_v2df_int;
23459 case V2DF_FTYPE_V4DF_INT:
23460 type = v2df_ftype_v4df_int;
23462 case V16QI_FTYPE_V16QI_V16QI_V16QI:
23463 type = v16qi_ftype_v16qi_v16qi_v16qi;
23465 case V8SF_FTYPE_V8SF_V8SF_V8SF:
23466 type = v8sf_ftype_v8sf_v8sf_v8sf;
23468 case V4DF_FTYPE_V4DF_V4DF_V4DF:
23469 type = v4df_ftype_v4df_v4df_v4df;
23471 case V4SF_FTYPE_V4SF_V4SF_V4SF:
23472 type = v4sf_ftype_v4sf_v4sf_v4sf;
23474 case V2DF_FTYPE_V2DF_V2DF_V2DF:
23475 type = v2df_ftype_v2df_v2df_v2df;
23477 case V16QI_FTYPE_V16QI_V16QI_INT:
23478 type = v16qi_ftype_v16qi_v16qi_int;
23480 case V8SI_FTYPE_V8SI_V8SI_INT:
23481 type = v8si_ftype_v8si_v8si_int;
23483 case V8SI_FTYPE_V8SI_V4SI_INT:
23484 type = v8si_ftype_v8si_v4si_int;
23486 case V8HI_FTYPE_V8HI_V8HI_INT:
23487 type = v8hi_ftype_v8hi_v8hi_int;
23489 case V8SF_FTYPE_V8SF_V8SF_INT:
23490 type = v8sf_ftype_v8sf_v8sf_int;
23492 case V8SF_FTYPE_V8SF_V4SF_INT:
23493 type = v8sf_ftype_v8sf_v4sf_int;
23495 case V4SI_FTYPE_V4SI_V4SI_INT:
23496 type = v4si_ftype_v4si_v4si_int;
23498 case V4DF_FTYPE_V4DF_V4DF_INT:
23499 type = v4df_ftype_v4df_v4df_int;
23501 case V4DF_FTYPE_V4DF_V2DF_INT:
23502 type = v4df_ftype_v4df_v2df_int;
23504 case V4SF_FTYPE_V4SF_V4SF_INT:
23505 type = v4sf_ftype_v4sf_v4sf_int;
23507 case V2DI_FTYPE_V2DI_V2DI_INT:
23508 case V2DI2TI_FTYPE_V2DI_V2DI_INT:
23509 type = v2di_ftype_v2di_v2di_int;
23511 case V2DF_FTYPE_V2DF_V2DF_INT:
23512 type = v2df_ftype_v2df_v2df_int;
23514 case V2DI_FTYPE_V2DI_UINT_UINT:
23515 type = v2di_ftype_v2di_unsigned_unsigned;
23517 case V2DI_FTYPE_V2DI_V2DI_UINT_UINT:
23518 type = v2di_ftype_v2di_v2di_unsigned_unsigned;
23520 case V1DI2DI_FTYPE_V1DI_V1DI_INT:
23521 type = v1di_ftype_v1di_v1di_int;
23524 gcc_unreachable ();
23527 def_builtin_const (d->mask, d->name, type, d->code);
23530 /* pcmpestr[im] insns. */
23531 for (i = 0, d = bdesc_pcmpestr;
23532 i < ARRAY_SIZE (bdesc_pcmpestr);
23535 if (d->code == IX86_BUILTIN_PCMPESTRM128)
23536 ftype = v16qi_ftype_v16qi_int_v16qi_int_int;
23538 ftype = int_ftype_v16qi_int_v16qi_int_int;
23539 def_builtin_const (d->mask, d->name, ftype, d->code);
23542 /* pcmpistr[im] insns. */
23543 for (i = 0, d = bdesc_pcmpistr;
23544 i < ARRAY_SIZE (bdesc_pcmpistr);
23547 if (d->code == IX86_BUILTIN_PCMPISTRM128)
23548 ftype = v16qi_ftype_v16qi_v16qi_int;
23550 ftype = int_ftype_v16qi_v16qi_int;
23551 def_builtin_const (d->mask, d->name, ftype, d->code);
23554 /* comi/ucomi insns. */
23555 for (i = 0, d = bdesc_comi; i < ARRAY_SIZE (bdesc_comi); i++, d++)
23556 if (d->mask == OPTION_MASK_ISA_SSE2)
23557 def_builtin_const (d->mask, d->name, int_ftype_v2df_v2df, d->code);
23559 def_builtin_const (d->mask, d->name, int_ftype_v4sf_v4sf, d->code);
23562 def_builtin (OPTION_MASK_ISA_SSE, "__builtin_ia32_ldmxcsr", void_ftype_unsigned, IX86_BUILTIN_LDMXCSR);
23563 def_builtin (OPTION_MASK_ISA_SSE, "__builtin_ia32_stmxcsr", unsigned_ftype_void, IX86_BUILTIN_STMXCSR);
23565 /* SSE or 3DNow!A */
23566 def_builtin (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, "__builtin_ia32_maskmovq", void_ftype_v8qi_v8qi_pchar, IX86_BUILTIN_MASKMOVQ);
23569 def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_maskmovdqu", void_ftype_v16qi_v16qi_pchar, IX86_BUILTIN_MASKMOVDQU);
23571 def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_clflush", void_ftype_pcvoid, IX86_BUILTIN_CLFLUSH);
23572 x86_mfence = def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_mfence", void_ftype_void, IX86_BUILTIN_MFENCE);
23575 def_builtin (OPTION_MASK_ISA_SSE3, "__builtin_ia32_monitor", void_ftype_pcvoid_unsigned_unsigned, IX86_BUILTIN_MONITOR);
23576 def_builtin (OPTION_MASK_ISA_SSE3, "__builtin_ia32_mwait", void_ftype_unsigned_unsigned, IX86_BUILTIN_MWAIT);
23579 def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aesenc128", v2di_ftype_v2di_v2di, IX86_BUILTIN_AESENC128);
23580 def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aesenclast128", v2di_ftype_v2di_v2di, IX86_BUILTIN_AESENCLAST128);
23581 def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aesdec128", v2di_ftype_v2di_v2di, IX86_BUILTIN_AESDEC128);
23582 def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aesdeclast128", v2di_ftype_v2di_v2di, IX86_BUILTIN_AESDECLAST128);
23583 def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aesimc128", v2di_ftype_v2di, IX86_BUILTIN_AESIMC128);
23584 def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aeskeygenassist128", v2di_ftype_v2di_int, IX86_BUILTIN_AESKEYGENASSIST128);
23587 def_builtin_const (OPTION_MASK_ISA_PCLMUL, "__builtin_ia32_pclmulqdq128", v2di_ftype_v2di_v2di_int, IX86_BUILTIN_PCLMULQDQ128);
23590 def_builtin (OPTION_MASK_ISA_AVX, "__builtin_ia32_vzeroupper", void_ftype_void,
23591 TARGET_64BIT ? IX86_BUILTIN_VZEROUPPER_REX64 : IX86_BUILTIN_VZEROUPPER);
23593 /* Access to the vec_init patterns. */
23594 ftype = build_function_type_list (V2SI_type_node, integer_type_node,
23595 integer_type_node, NULL_TREE);
23596 def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_vec_init_v2si", ftype, IX86_BUILTIN_VEC_INIT_V2SI);
23598 ftype = build_function_type_list (V4HI_type_node, short_integer_type_node,
23599 short_integer_type_node,
23600 short_integer_type_node,
23601 short_integer_type_node, NULL_TREE);
23602 def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_vec_init_v4hi", ftype, IX86_BUILTIN_VEC_INIT_V4HI);
23604 ftype = build_function_type_list (V8QI_type_node, char_type_node,
23605 char_type_node, char_type_node,
23606 char_type_node, char_type_node,
23607 char_type_node, char_type_node,
23608 char_type_node, NULL_TREE);
23609 def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_vec_init_v8qi", ftype, IX86_BUILTIN_VEC_INIT_V8QI);
23611 /* Access to the vec_extract patterns. */
23612 ftype = build_function_type_list (double_type_node, V2DF_type_node,
23613 integer_type_node, NULL_TREE);
23614 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v2df", ftype, IX86_BUILTIN_VEC_EXT_V2DF);
23616 ftype = build_function_type_list (long_long_integer_type_node,
23617 V2DI_type_node, integer_type_node,
23619 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v2di", ftype, IX86_BUILTIN_VEC_EXT_V2DI);
23621 ftype = build_function_type_list (float_type_node, V4SF_type_node,
23622 integer_type_node, NULL_TREE);
23623 def_builtin_const (OPTION_MASK_ISA_SSE, "__builtin_ia32_vec_ext_v4sf", ftype, IX86_BUILTIN_VEC_EXT_V4SF);
23625 ftype = build_function_type_list (intSI_type_node, V4SI_type_node,
23626 integer_type_node, NULL_TREE);
23627 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v4si", ftype, IX86_BUILTIN_VEC_EXT_V4SI);
23629 ftype = build_function_type_list (intHI_type_node, V8HI_type_node,
23630 integer_type_node, NULL_TREE);
23631 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v8hi", ftype, IX86_BUILTIN_VEC_EXT_V8HI);
23633 ftype = build_function_type_list (intHI_type_node, V4HI_type_node,
23634 integer_type_node, NULL_TREE);
23635 def_builtin_const (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, "__builtin_ia32_vec_ext_v4hi", ftype, IX86_BUILTIN_VEC_EXT_V4HI);
23637 ftype = build_function_type_list (intSI_type_node, V2SI_type_node,
23638 integer_type_node, NULL_TREE);
23639 def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_vec_ext_v2si", ftype, IX86_BUILTIN_VEC_EXT_V2SI);
23641 ftype = build_function_type_list (intQI_type_node, V16QI_type_node,
23642 integer_type_node, NULL_TREE);
23643 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v16qi", ftype, IX86_BUILTIN_VEC_EXT_V16QI);
23645 /* Access to the vec_set patterns. */
23646 ftype = build_function_type_list (V2DI_type_node, V2DI_type_node,
23648 integer_type_node, NULL_TREE);
23649 def_builtin_const (OPTION_MASK_ISA_SSE4_1 | OPTION_MASK_ISA_64BIT, "__builtin_ia32_vec_set_v2di", ftype, IX86_BUILTIN_VEC_SET_V2DI);
23651 ftype = build_function_type_list (V4SF_type_node, V4SF_type_node,
23653 integer_type_node, NULL_TREE);
23654 def_builtin_const (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_vec_set_v4sf", ftype, IX86_BUILTIN_VEC_SET_V4SF);
23656 ftype = build_function_type_list (V4SI_type_node, V4SI_type_node,
23658 integer_type_node, NULL_TREE);
23659 def_builtin_const (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_vec_set_v4si", ftype, IX86_BUILTIN_VEC_SET_V4SI);
23661 ftype = build_function_type_list (V8HI_type_node, V8HI_type_node,
23663 integer_type_node, NULL_TREE);
23664 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_set_v8hi", ftype, IX86_BUILTIN_VEC_SET_V8HI);
23666 ftype = build_function_type_list (V4HI_type_node, V4HI_type_node,
23668 integer_type_node, NULL_TREE);
23669 def_builtin_const (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, "__builtin_ia32_vec_set_v4hi", ftype, IX86_BUILTIN_VEC_SET_V4HI);
23671 ftype = build_function_type_list (V16QI_type_node, V16QI_type_node,
23673 integer_type_node, NULL_TREE);
23674 def_builtin_const (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_vec_set_v16qi", ftype, IX86_BUILTIN_VEC_SET_V16QI);
23676 /* Add SSE5 multi-arg argument instructions */
23677 for (i = 0, d = bdesc_multi_arg; i < ARRAY_SIZE (bdesc_multi_arg); i++, d++)
23679 tree mtype = NULL_TREE;
23684 switch ((enum multi_arg_type)d->flag)
23686 case MULTI_ARG_3_SF: mtype = v4sf_ftype_v4sf_v4sf_v4sf; break;
23687 case MULTI_ARG_3_DF: mtype = v2df_ftype_v2df_v2df_v2df; break;
23688 case MULTI_ARG_3_DI: mtype = v2di_ftype_v2di_v2di_v2di; break;
23689 case MULTI_ARG_3_SI: mtype = v4si_ftype_v4si_v4si_v4si; break;
23690 case MULTI_ARG_3_SI_DI: mtype = v4si_ftype_v4si_v4si_v2di; break;
23691 case MULTI_ARG_3_HI: mtype = v8hi_ftype_v8hi_v8hi_v8hi; break;
23692 case MULTI_ARG_3_HI_SI: mtype = v8hi_ftype_v8hi_v8hi_v4si; break;
23693 case MULTI_ARG_3_QI: mtype = v16qi_ftype_v16qi_v16qi_v16qi; break;
23694 case MULTI_ARG_3_PERMPS: mtype = v4sf_ftype_v4sf_v4sf_v16qi; break;
23695 case MULTI_ARG_3_PERMPD: mtype = v2df_ftype_v2df_v2df_v16qi; break;
23696 case MULTI_ARG_2_SF: mtype = v4sf_ftype_v4sf_v4sf; break;
23697 case MULTI_ARG_2_DF: mtype = v2df_ftype_v2df_v2df; break;
23698 case MULTI_ARG_2_DI: mtype = v2di_ftype_v2di_v2di; break;
23699 case MULTI_ARG_2_SI: mtype = v4si_ftype_v4si_v4si; break;
23700 case MULTI_ARG_2_HI: mtype = v8hi_ftype_v8hi_v8hi; break;
23701 case MULTI_ARG_2_QI: mtype = v16qi_ftype_v16qi_v16qi; break;
23702 case MULTI_ARG_2_DI_IMM: mtype = v2di_ftype_v2di_si; break;
23703 case MULTI_ARG_2_SI_IMM: mtype = v4si_ftype_v4si_si; break;
23704 case MULTI_ARG_2_HI_IMM: mtype = v8hi_ftype_v8hi_si; break;
23705 case MULTI_ARG_2_QI_IMM: mtype = v16qi_ftype_v16qi_si; break;
23706 case MULTI_ARG_2_SF_CMP: mtype = v4sf_ftype_v4sf_v4sf; break;
23707 case MULTI_ARG_2_DF_CMP: mtype = v2df_ftype_v2df_v2df; break;
23708 case MULTI_ARG_2_DI_CMP: mtype = v2di_ftype_v2di_v2di; break;
23709 case MULTI_ARG_2_SI_CMP: mtype = v4si_ftype_v4si_v4si; break;
23710 case MULTI_ARG_2_HI_CMP: mtype = v8hi_ftype_v8hi_v8hi; break;
23711 case MULTI_ARG_2_QI_CMP: mtype = v16qi_ftype_v16qi_v16qi; break;
23712 case MULTI_ARG_2_SF_TF: mtype = v4sf_ftype_v4sf_v4sf; break;
23713 case MULTI_ARG_2_DF_TF: mtype = v2df_ftype_v2df_v2df; break;
23714 case MULTI_ARG_2_DI_TF: mtype = v2di_ftype_v2di_v2di; break;
23715 case MULTI_ARG_2_SI_TF: mtype = v4si_ftype_v4si_v4si; break;
23716 case MULTI_ARG_2_HI_TF: mtype = v8hi_ftype_v8hi_v8hi; break;
23717 case MULTI_ARG_2_QI_TF: mtype = v16qi_ftype_v16qi_v16qi; break;
23718 case MULTI_ARG_1_SF: mtype = v4sf_ftype_v4sf; break;
23719 case MULTI_ARG_1_DF: mtype = v2df_ftype_v2df; break;
23720 case MULTI_ARG_1_DI: mtype = v2di_ftype_v2di; break;
23721 case MULTI_ARG_1_SI: mtype = v4si_ftype_v4si; break;
23722 case MULTI_ARG_1_HI: mtype = v8hi_ftype_v8hi; break;
23723 case MULTI_ARG_1_QI: mtype = v16qi_ftype_v16qi; break;
23724 case MULTI_ARG_1_SI_DI: mtype = v2di_ftype_v4si; break;
23725 case MULTI_ARG_1_HI_DI: mtype = v2di_ftype_v8hi; break;
23726 case MULTI_ARG_1_HI_SI: mtype = v4si_ftype_v8hi; break;
23727 case MULTI_ARG_1_QI_DI: mtype = v2di_ftype_v16qi; break;
23728 case MULTI_ARG_1_QI_SI: mtype = v4si_ftype_v16qi; break;
23729 case MULTI_ARG_1_QI_HI: mtype = v8hi_ftype_v16qi; break;
23730 case MULTI_ARG_1_PH2PS: mtype = v4sf_ftype_v4hi; break;
23731 case MULTI_ARG_1_PS2PH: mtype = v4hi_ftype_v4sf; break;
23732 case MULTI_ARG_UNKNOWN:
23734 gcc_unreachable ();
23738 def_builtin_const (d->mask, d->name, mtype, d->code);
23742 /* Internal method for ix86_init_builtins. */
23745 ix86_init_builtins_va_builtins_abi (void)
23747 tree ms_va_ref, sysv_va_ref;
23748 tree fnvoid_va_end_ms, fnvoid_va_end_sysv;
23749 tree fnvoid_va_start_ms, fnvoid_va_start_sysv;
23750 tree fnvoid_va_copy_ms, fnvoid_va_copy_sysv;
23751 tree fnattr_ms = NULL_TREE, fnattr_sysv = NULL_TREE;
23755 fnattr_ms = build_tree_list (get_identifier ("ms_abi"), NULL_TREE);
23756 fnattr_sysv = build_tree_list (get_identifier ("sysv_abi"), NULL_TREE);
23757 ms_va_ref = build_reference_type (ms_va_list_type_node);
23759 build_pointer_type (TREE_TYPE (sysv_va_list_type_node));
23762 build_function_type_list (void_type_node, ms_va_ref, NULL_TREE);
23763 fnvoid_va_start_ms =
23764 build_varargs_function_type_list (void_type_node, ms_va_ref, NULL_TREE);
23765 fnvoid_va_end_sysv =
23766 build_function_type_list (void_type_node, sysv_va_ref, NULL_TREE);
23767 fnvoid_va_start_sysv =
23768 build_varargs_function_type_list (void_type_node, sysv_va_ref,
23770 fnvoid_va_copy_ms =
23771 build_function_type_list (void_type_node, ms_va_ref, ms_va_list_type_node,
23773 fnvoid_va_copy_sysv =
23774 build_function_type_list (void_type_node, sysv_va_ref,
23775 sysv_va_ref, NULL_TREE);
23777 add_builtin_function ("__builtin_ms_va_start", fnvoid_va_start_ms,
23778 BUILT_IN_VA_START, BUILT_IN_NORMAL, NULL, fnattr_ms);
23779 add_builtin_function ("__builtin_ms_va_end", fnvoid_va_end_ms,
23780 BUILT_IN_VA_END, BUILT_IN_NORMAL, NULL, fnattr_ms);
23781 add_builtin_function ("__builtin_ms_va_copy", fnvoid_va_copy_ms,
23782 BUILT_IN_VA_COPY, BUILT_IN_NORMAL, NULL, fnattr_ms);
23783 add_builtin_function ("__builtin_sysv_va_start", fnvoid_va_start_sysv,
23784 BUILT_IN_VA_START, BUILT_IN_NORMAL, NULL, fnattr_sysv);
23785 add_builtin_function ("__builtin_sysv_va_end", fnvoid_va_end_sysv,
23786 BUILT_IN_VA_END, BUILT_IN_NORMAL, NULL, fnattr_sysv);
23787 add_builtin_function ("__builtin_sysv_va_copy", fnvoid_va_copy_sysv,
23788 BUILT_IN_VA_COPY, BUILT_IN_NORMAL, NULL, fnattr_sysv);
23792 ix86_init_builtins (void)
23794 tree float128_type_node = make_node (REAL_TYPE);
23797 /* The __float80 type. */
23798 if (TYPE_MODE (long_double_type_node) == XFmode)
23799 (*lang_hooks.types.register_builtin_type) (long_double_type_node,
23803 /* The __float80 type. */
23804 tree float80_type_node = make_node (REAL_TYPE);
23806 TYPE_PRECISION (float80_type_node) = 80;
23807 layout_type (float80_type_node);
23808 (*lang_hooks.types.register_builtin_type) (float80_type_node,
23812 /* The __float128 type. */
23813 TYPE_PRECISION (float128_type_node) = 128;
23814 layout_type (float128_type_node);
23815 (*lang_hooks.types.register_builtin_type) (float128_type_node,
23818 /* TFmode support builtins. */
23819 ftype = build_function_type (float128_type_node, void_list_node);
23820 decl = add_builtin_function ("__builtin_infq", ftype,
23821 IX86_BUILTIN_INFQ, BUILT_IN_MD,
23823 ix86_builtins[(int) IX86_BUILTIN_INFQ] = decl;
23825 decl = add_builtin_function ("__builtin_huge_valq", ftype,
23826 IX86_BUILTIN_HUGE_VALQ, BUILT_IN_MD,
23828 ix86_builtins[(int) IX86_BUILTIN_HUGE_VALQ] = decl;
23830 /* We will expand them to normal call if SSE2 isn't available since
23831 they are used by libgcc. */
23832 ftype = build_function_type_list (float128_type_node,
23833 float128_type_node,
23835 decl = add_builtin_function ("__builtin_fabsq", ftype,
23836 IX86_BUILTIN_FABSQ, BUILT_IN_MD,
23837 "__fabstf2", NULL_TREE);
23838 ix86_builtins[(int) IX86_BUILTIN_FABSQ] = decl;
23839 TREE_READONLY (decl) = 1;
23841 ftype = build_function_type_list (float128_type_node,
23842 float128_type_node,
23843 float128_type_node,
23845 decl = add_builtin_function ("__builtin_copysignq", ftype,
23846 IX86_BUILTIN_COPYSIGNQ, BUILT_IN_MD,
23847 "__copysigntf3", NULL_TREE);
23848 ix86_builtins[(int) IX86_BUILTIN_COPYSIGNQ] = decl;
23849 TREE_READONLY (decl) = 1;
23851 ix86_init_mmx_sse_builtins ();
23853 ix86_init_builtins_va_builtins_abi ();
23856 /* Errors in the source file can cause expand_expr to return const0_rtx
23857 where we expect a vector. To avoid crashing, use one of the vector
23858 clear instructions. */
23860 safe_vector_operand (rtx x, enum machine_mode mode)
23862 if (x == const0_rtx)
23863 x = CONST0_RTX (mode);
23867 /* Subroutine of ix86_expand_builtin to take care of binop insns. */
23870 ix86_expand_binop_builtin (enum insn_code icode, tree exp, rtx target)
23873 tree arg0 = CALL_EXPR_ARG (exp, 0);
23874 tree arg1 = CALL_EXPR_ARG (exp, 1);
23875 rtx op0 = expand_normal (arg0);
23876 rtx op1 = expand_normal (arg1);
23877 enum machine_mode tmode = insn_data[icode].operand[0].mode;
23878 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
23879 enum machine_mode mode1 = insn_data[icode].operand[2].mode;
23881 if (VECTOR_MODE_P (mode0))
23882 op0 = safe_vector_operand (op0, mode0);
23883 if (VECTOR_MODE_P (mode1))
23884 op1 = safe_vector_operand (op1, mode1);
23886 if (optimize || !target
23887 || GET_MODE (target) != tmode
23888 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
23889 target = gen_reg_rtx (tmode);
23891 if (GET_MODE (op1) == SImode && mode1 == TImode)
23893 rtx x = gen_reg_rtx (V4SImode);
23894 emit_insn (gen_sse2_loadd (x, op1));
23895 op1 = gen_lowpart (TImode, x);
23898 if (!(*insn_data[icode].operand[1].predicate) (op0, mode0))
23899 op0 = copy_to_mode_reg (mode0, op0);
23900 if (!(*insn_data[icode].operand[2].predicate) (op1, mode1))
23901 op1 = copy_to_mode_reg (mode1, op1);
23903 pat = GEN_FCN (icode) (target, op0, op1);
23912 /* Subroutine of ix86_expand_builtin to take care of 2-4 argument insns. */
23915 ix86_expand_multi_arg_builtin (enum insn_code icode, tree exp, rtx target,
23916 enum multi_arg_type m_type,
23917 enum insn_code sub_code)
23922 bool comparison_p = false;
23924 bool last_arg_constant = false;
23925 int num_memory = 0;
23928 enum machine_mode mode;
23931 enum machine_mode tmode = insn_data[icode].operand[0].mode;
23935 case MULTI_ARG_3_SF:
23936 case MULTI_ARG_3_DF:
23937 case MULTI_ARG_3_DI:
23938 case MULTI_ARG_3_SI:
23939 case MULTI_ARG_3_SI_DI:
23940 case MULTI_ARG_3_HI:
23941 case MULTI_ARG_3_HI_SI:
23942 case MULTI_ARG_3_QI:
23943 case MULTI_ARG_3_PERMPS:
23944 case MULTI_ARG_3_PERMPD:
23948 case MULTI_ARG_2_SF:
23949 case MULTI_ARG_2_DF:
23950 case MULTI_ARG_2_DI:
23951 case MULTI_ARG_2_SI:
23952 case MULTI_ARG_2_HI:
23953 case MULTI_ARG_2_QI:
23957 case MULTI_ARG_2_DI_IMM:
23958 case MULTI_ARG_2_SI_IMM:
23959 case MULTI_ARG_2_HI_IMM:
23960 case MULTI_ARG_2_QI_IMM:
23962 last_arg_constant = true;
23965 case MULTI_ARG_1_SF:
23966 case MULTI_ARG_1_DF:
23967 case MULTI_ARG_1_DI:
23968 case MULTI_ARG_1_SI:
23969 case MULTI_ARG_1_HI:
23970 case MULTI_ARG_1_QI:
23971 case MULTI_ARG_1_SI_DI:
23972 case MULTI_ARG_1_HI_DI:
23973 case MULTI_ARG_1_HI_SI:
23974 case MULTI_ARG_1_QI_DI:
23975 case MULTI_ARG_1_QI_SI:
23976 case MULTI_ARG_1_QI_HI:
23977 case MULTI_ARG_1_PH2PS:
23978 case MULTI_ARG_1_PS2PH:
23982 case MULTI_ARG_2_SF_CMP:
23983 case MULTI_ARG_2_DF_CMP:
23984 case MULTI_ARG_2_DI_CMP:
23985 case MULTI_ARG_2_SI_CMP:
23986 case MULTI_ARG_2_HI_CMP:
23987 case MULTI_ARG_2_QI_CMP:
23989 comparison_p = true;
23992 case MULTI_ARG_2_SF_TF:
23993 case MULTI_ARG_2_DF_TF:
23994 case MULTI_ARG_2_DI_TF:
23995 case MULTI_ARG_2_SI_TF:
23996 case MULTI_ARG_2_HI_TF:
23997 case MULTI_ARG_2_QI_TF:
24002 case MULTI_ARG_UNKNOWN:
24004 gcc_unreachable ();
24007 if (optimize || !target
24008 || GET_MODE (target) != tmode
24009 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
24010 target = gen_reg_rtx (tmode);
24012 gcc_assert (nargs <= 4);
24014 for (i = 0; i < nargs; i++)
24016 tree arg = CALL_EXPR_ARG (exp, i);
24017 rtx op = expand_normal (arg);
24018 int adjust = (comparison_p) ? 1 : 0;
24019 enum machine_mode mode = insn_data[icode].operand[i+adjust+1].mode;
24021 if (last_arg_constant && i == nargs-1)
24023 if (GET_CODE (op) != CONST_INT)
24025 error ("last argument must be an immediate");
24026 return gen_reg_rtx (tmode);
24031 if (VECTOR_MODE_P (mode))
24032 op = safe_vector_operand (op, mode);
24034 /* If we aren't optimizing, only allow one memory operand to be
24036 if (memory_operand (op, mode))
24039 gcc_assert (GET_MODE (op) == mode || GET_MODE (op) == VOIDmode);
24042 || ! (*insn_data[icode].operand[i+adjust+1].predicate) (op, mode)
24044 op = force_reg (mode, op);
24048 args[i].mode = mode;
24054 pat = GEN_FCN (icode) (target, args[0].op);
24059 pat = GEN_FCN (icode) (target, args[0].op, args[1].op,
24060 GEN_INT ((int)sub_code));
24061 else if (! comparison_p)
24062 pat = GEN_FCN (icode) (target, args[0].op, args[1].op);
24065 rtx cmp_op = gen_rtx_fmt_ee (sub_code, GET_MODE (target),
24069 pat = GEN_FCN (icode) (target, cmp_op, args[0].op, args[1].op);
24074 pat = GEN_FCN (icode) (target, args[0].op, args[1].op, args[2].op);
24078 gcc_unreachable ();
24088 /* Subroutine of ix86_expand_args_builtin to take care of scalar unop
24089 insns with vec_merge. */
24092 ix86_expand_unop_vec_merge_builtin (enum insn_code icode, tree exp,
24096 tree arg0 = CALL_EXPR_ARG (exp, 0);
24097 rtx op1, op0 = expand_normal (arg0);
24098 enum machine_mode tmode = insn_data[icode].operand[0].mode;
24099 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
24101 if (optimize || !target
24102 || GET_MODE (target) != tmode
24103 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
24104 target = gen_reg_rtx (tmode);
24106 if (VECTOR_MODE_P (mode0))
24107 op0 = safe_vector_operand (op0, mode0);
24109 if ((optimize && !register_operand (op0, mode0))
24110 || ! (*insn_data[icode].operand[1].predicate) (op0, mode0))
24111 op0 = copy_to_mode_reg (mode0, op0);
24114 if (! (*insn_data[icode].operand[2].predicate) (op1, mode0))
24115 op1 = copy_to_mode_reg (mode0, op1);
24117 pat = GEN_FCN (icode) (target, op0, op1);
24124 /* Subroutine of ix86_expand_builtin to take care of comparison insns. */
24127 ix86_expand_sse_compare (const struct builtin_description *d,
24128 tree exp, rtx target, bool swap)
24131 tree arg0 = CALL_EXPR_ARG (exp, 0);
24132 tree arg1 = CALL_EXPR_ARG (exp, 1);
24133 rtx op0 = expand_normal (arg0);
24134 rtx op1 = expand_normal (arg1);
24136 enum machine_mode tmode = insn_data[d->icode].operand[0].mode;
24137 enum machine_mode mode0 = insn_data[d->icode].operand[1].mode;
24138 enum machine_mode mode1 = insn_data[d->icode].operand[2].mode;
24139 enum rtx_code comparison = d->comparison;
24141 if (VECTOR_MODE_P (mode0))
24142 op0 = safe_vector_operand (op0, mode0);
24143 if (VECTOR_MODE_P (mode1))
24144 op1 = safe_vector_operand (op1, mode1);
24146 /* Swap operands if we have a comparison that isn't available in
24150 rtx tmp = gen_reg_rtx (mode1);
24151 emit_move_insn (tmp, op1);
24156 if (optimize || !target
24157 || GET_MODE (target) != tmode
24158 || ! (*insn_data[d->icode].operand[0].predicate) (target, tmode))
24159 target = gen_reg_rtx (tmode);
24161 if ((optimize && !register_operand (op0, mode0))
24162 || ! (*insn_data[d->icode].operand[1].predicate) (op0, mode0))
24163 op0 = copy_to_mode_reg (mode0, op0);
24164 if ((optimize && !register_operand (op1, mode1))
24165 || ! (*insn_data[d->icode].operand[2].predicate) (op1, mode1))
24166 op1 = copy_to_mode_reg (mode1, op1);
24168 op2 = gen_rtx_fmt_ee (comparison, mode0, op0, op1);
24169 pat = GEN_FCN (d->icode) (target, op0, op1, op2);
24176 /* Subroutine of ix86_expand_builtin to take care of comi insns. */
24179 ix86_expand_sse_comi (const struct builtin_description *d, tree exp,
24183 tree arg0 = CALL_EXPR_ARG (exp, 0);
24184 tree arg1 = CALL_EXPR_ARG (exp, 1);
24185 rtx op0 = expand_normal (arg0);
24186 rtx op1 = expand_normal (arg1);
24187 enum machine_mode mode0 = insn_data[d->icode].operand[0].mode;
24188 enum machine_mode mode1 = insn_data[d->icode].operand[1].mode;
24189 enum rtx_code comparison = d->comparison;
24191 if (VECTOR_MODE_P (mode0))
24192 op0 = safe_vector_operand (op0, mode0);
24193 if (VECTOR_MODE_P (mode1))
24194 op1 = safe_vector_operand (op1, mode1);
24196 /* Swap operands if we have a comparison that isn't available in
24198 if (d->flag & BUILTIN_DESC_SWAP_OPERANDS)
24205 target = gen_reg_rtx (SImode);
24206 emit_move_insn (target, const0_rtx);
24207 target = gen_rtx_SUBREG (QImode, target, 0);
24209 if ((optimize && !register_operand (op0, mode0))
24210 || !(*insn_data[d->icode].operand[0].predicate) (op0, mode0))
24211 op0 = copy_to_mode_reg (mode0, op0);
24212 if ((optimize && !register_operand (op1, mode1))
24213 || !(*insn_data[d->icode].operand[1].predicate) (op1, mode1))
24214 op1 = copy_to_mode_reg (mode1, op1);
24216 pat = GEN_FCN (d->icode) (op0, op1);
24220 emit_insn (gen_rtx_SET (VOIDmode,
24221 gen_rtx_STRICT_LOW_PART (VOIDmode, target),
24222 gen_rtx_fmt_ee (comparison, QImode,
24226 return SUBREG_REG (target);
24229 /* Subroutine of ix86_expand_builtin to take care of ptest insns. */
24232 ix86_expand_sse_ptest (const struct builtin_description *d, tree exp,
24236 tree arg0 = CALL_EXPR_ARG (exp, 0);
24237 tree arg1 = CALL_EXPR_ARG (exp, 1);
24238 rtx op0 = expand_normal (arg0);
24239 rtx op1 = expand_normal (arg1);
24240 enum machine_mode mode0 = insn_data[d->icode].operand[0].mode;
24241 enum machine_mode mode1 = insn_data[d->icode].operand[1].mode;
24242 enum rtx_code comparison = d->comparison;
24244 if (VECTOR_MODE_P (mode0))
24245 op0 = safe_vector_operand (op0, mode0);
24246 if (VECTOR_MODE_P (mode1))
24247 op1 = safe_vector_operand (op1, mode1);
24249 target = gen_reg_rtx (SImode);
24250 emit_move_insn (target, const0_rtx);
24251 target = gen_rtx_SUBREG (QImode, target, 0);
24253 if ((optimize && !register_operand (op0, mode0))
24254 || !(*insn_data[d->icode].operand[0].predicate) (op0, mode0))
24255 op0 = copy_to_mode_reg (mode0, op0);
24256 if ((optimize && !register_operand (op1, mode1))
24257 || !(*insn_data[d->icode].operand[1].predicate) (op1, mode1))
24258 op1 = copy_to_mode_reg (mode1, op1);
24260 pat = GEN_FCN (d->icode) (op0, op1);
24264 emit_insn (gen_rtx_SET (VOIDmode,
24265 gen_rtx_STRICT_LOW_PART (VOIDmode, target),
24266 gen_rtx_fmt_ee (comparison, QImode,
24270 return SUBREG_REG (target);
24273 /* Subroutine of ix86_expand_builtin to take care of pcmpestr[im] insns. */
24276 ix86_expand_sse_pcmpestr (const struct builtin_description *d,
24277 tree exp, rtx target)
24280 tree arg0 = CALL_EXPR_ARG (exp, 0);
24281 tree arg1 = CALL_EXPR_ARG (exp, 1);
24282 tree arg2 = CALL_EXPR_ARG (exp, 2);
24283 tree arg3 = CALL_EXPR_ARG (exp, 3);
24284 tree arg4 = CALL_EXPR_ARG (exp, 4);
24285 rtx scratch0, scratch1;
24286 rtx op0 = expand_normal (arg0);
24287 rtx op1 = expand_normal (arg1);
24288 rtx op2 = expand_normal (arg2);
24289 rtx op3 = expand_normal (arg3);
24290 rtx op4 = expand_normal (arg4);
24291 enum machine_mode tmode0, tmode1, modev2, modei3, modev4, modei5, modeimm;
24293 tmode0 = insn_data[d->icode].operand[0].mode;
24294 tmode1 = insn_data[d->icode].operand[1].mode;
24295 modev2 = insn_data[d->icode].operand[2].mode;
24296 modei3 = insn_data[d->icode].operand[3].mode;
24297 modev4 = insn_data[d->icode].operand[4].mode;
24298 modei5 = insn_data[d->icode].operand[5].mode;
24299 modeimm = insn_data[d->icode].operand[6].mode;
24301 if (VECTOR_MODE_P (modev2))
24302 op0 = safe_vector_operand (op0, modev2);
24303 if (VECTOR_MODE_P (modev4))
24304 op2 = safe_vector_operand (op2, modev4);
24306 if (! (*insn_data[d->icode].operand[2].predicate) (op0, modev2))
24307 op0 = copy_to_mode_reg (modev2, op0);
24308 if (! (*insn_data[d->icode].operand[3].predicate) (op1, modei3))
24309 op1 = copy_to_mode_reg (modei3, op1);
24310 if ((optimize && !register_operand (op2, modev4))
24311 || !(*insn_data[d->icode].operand[4].predicate) (op2, modev4))
24312 op2 = copy_to_mode_reg (modev4, op2);
24313 if (! (*insn_data[d->icode].operand[5].predicate) (op3, modei5))
24314 op3 = copy_to_mode_reg (modei5, op3);
24316 if (! (*insn_data[d->icode].operand[6].predicate) (op4, modeimm))
24318 error ("the fifth argument must be a 8-bit immediate");
24322 if (d->code == IX86_BUILTIN_PCMPESTRI128)
24324 if (optimize || !target
24325 || GET_MODE (target) != tmode0
24326 || ! (*insn_data[d->icode].operand[0].predicate) (target, tmode0))
24327 target = gen_reg_rtx (tmode0);
24329 scratch1 = gen_reg_rtx (tmode1);
24331 pat = GEN_FCN (d->icode) (target, scratch1, op0, op1, op2, op3, op4);
24333 else if (d->code == IX86_BUILTIN_PCMPESTRM128)
24335 if (optimize || !target
24336 || GET_MODE (target) != tmode1
24337 || ! (*insn_data[d->icode].operand[1].predicate) (target, tmode1))
24338 target = gen_reg_rtx (tmode1);
24340 scratch0 = gen_reg_rtx (tmode0);
24342 pat = GEN_FCN (d->icode) (scratch0, target, op0, op1, op2, op3, op4);
24346 gcc_assert (d->flag);
24348 scratch0 = gen_reg_rtx (tmode0);
24349 scratch1 = gen_reg_rtx (tmode1);
24351 pat = GEN_FCN (d->icode) (scratch0, scratch1, op0, op1, op2, op3, op4);
24361 target = gen_reg_rtx (SImode);
24362 emit_move_insn (target, const0_rtx);
24363 target = gen_rtx_SUBREG (QImode, target, 0);
24366 (gen_rtx_SET (VOIDmode, gen_rtx_STRICT_LOW_PART (VOIDmode, target),
24367 gen_rtx_fmt_ee (EQ, QImode,
24368 gen_rtx_REG ((enum machine_mode) d->flag,
24371 return SUBREG_REG (target);
24378 /* Subroutine of ix86_expand_builtin to take care of pcmpistr[im] insns. */
24381 ix86_expand_sse_pcmpistr (const struct builtin_description *d,
24382 tree exp, rtx target)
24385 tree arg0 = CALL_EXPR_ARG (exp, 0);
24386 tree arg1 = CALL_EXPR_ARG (exp, 1);
24387 tree arg2 = CALL_EXPR_ARG (exp, 2);
24388 rtx scratch0, scratch1;
24389 rtx op0 = expand_normal (arg0);
24390 rtx op1 = expand_normal (arg1);
24391 rtx op2 = expand_normal (arg2);
24392 enum machine_mode tmode0, tmode1, modev2, modev3, modeimm;
24394 tmode0 = insn_data[d->icode].operand[0].mode;
24395 tmode1 = insn_data[d->icode].operand[1].mode;
24396 modev2 = insn_data[d->icode].operand[2].mode;
24397 modev3 = insn_data[d->icode].operand[3].mode;
24398 modeimm = insn_data[d->icode].operand[4].mode;
24400 if (VECTOR_MODE_P (modev2))
24401 op0 = safe_vector_operand (op0, modev2);
24402 if (VECTOR_MODE_P (modev3))
24403 op1 = safe_vector_operand (op1, modev3);
24405 if (! (*insn_data[d->icode].operand[2].predicate) (op0, modev2))
24406 op0 = copy_to_mode_reg (modev2, op0);
24407 if ((optimize && !register_operand (op1, modev3))
24408 || !(*insn_data[d->icode].operand[3].predicate) (op1, modev3))
24409 op1 = copy_to_mode_reg (modev3, op1);
24411 if (! (*insn_data[d->icode].operand[4].predicate) (op2, modeimm))
24413 error ("the third argument must be a 8-bit immediate");
24417 if (d->code == IX86_BUILTIN_PCMPISTRI128)
24419 if (optimize || !target
24420 || GET_MODE (target) != tmode0
24421 || ! (*insn_data[d->icode].operand[0].predicate) (target, tmode0))
24422 target = gen_reg_rtx (tmode0);
24424 scratch1 = gen_reg_rtx (tmode1);
24426 pat = GEN_FCN (d->icode) (target, scratch1, op0, op1, op2);
24428 else if (d->code == IX86_BUILTIN_PCMPISTRM128)
24430 if (optimize || !target
24431 || GET_MODE (target) != tmode1
24432 || ! (*insn_data[d->icode].operand[1].predicate) (target, tmode1))
24433 target = gen_reg_rtx (tmode1);
24435 scratch0 = gen_reg_rtx (tmode0);
24437 pat = GEN_FCN (d->icode) (scratch0, target, op0, op1, op2);
24441 gcc_assert (d->flag);
24443 scratch0 = gen_reg_rtx (tmode0);
24444 scratch1 = gen_reg_rtx (tmode1);
24446 pat = GEN_FCN (d->icode) (scratch0, scratch1, op0, op1, op2);
24456 target = gen_reg_rtx (SImode);
24457 emit_move_insn (target, const0_rtx);
24458 target = gen_rtx_SUBREG (QImode, target, 0);
24461 (gen_rtx_SET (VOIDmode, gen_rtx_STRICT_LOW_PART (VOIDmode, target),
24462 gen_rtx_fmt_ee (EQ, QImode,
24463 gen_rtx_REG ((enum machine_mode) d->flag,
24466 return SUBREG_REG (target);
24472 /* Subroutine of ix86_expand_builtin to take care of insns with
24473 variable number of operands. */
24476 ix86_expand_args_builtin (const struct builtin_description *d,
24477 tree exp, rtx target)
24479 rtx pat, real_target;
24480 unsigned int i, nargs;
24481 unsigned int nargs_constant = 0;
24482 int num_memory = 0;
24486 enum machine_mode mode;
24488 bool last_arg_count = false;
24489 enum insn_code icode = d->icode;
24490 const struct insn_data *insn_p = &insn_data[icode];
24491 enum machine_mode tmode = insn_p->operand[0].mode;
24492 enum machine_mode rmode = VOIDmode;
24494 enum rtx_code comparison = d->comparison;
24496 switch ((enum ix86_builtin_type) d->flag)
24498 case INT_FTYPE_V8SF_V8SF_PTEST:
24499 case INT_FTYPE_V4DI_V4DI_PTEST:
24500 case INT_FTYPE_V4DF_V4DF_PTEST:
24501 case INT_FTYPE_V4SF_V4SF_PTEST:
24502 case INT_FTYPE_V2DI_V2DI_PTEST:
24503 case INT_FTYPE_V2DF_V2DF_PTEST:
24504 return ix86_expand_sse_ptest (d, exp, target);
24505 case FLOAT128_FTYPE_FLOAT128:
24506 case FLOAT_FTYPE_FLOAT:
24507 case INT64_FTYPE_V4SF:
24508 case INT64_FTYPE_V2DF:
24509 case INT_FTYPE_V16QI:
24510 case INT_FTYPE_V8QI:
24511 case INT_FTYPE_V8SF:
24512 case INT_FTYPE_V4DF:
24513 case INT_FTYPE_V4SF:
24514 case INT_FTYPE_V2DF:
24515 case V16QI_FTYPE_V16QI:
24516 case V8SI_FTYPE_V8SF:
24517 case V8SI_FTYPE_V4SI:
24518 case V8HI_FTYPE_V8HI:
24519 case V8HI_FTYPE_V16QI:
24520 case V8QI_FTYPE_V8QI:
24521 case V8SF_FTYPE_V8SF:
24522 case V8SF_FTYPE_V8SI:
24523 case V8SF_FTYPE_V4SF:
24524 case V4SI_FTYPE_V4SI:
24525 case V4SI_FTYPE_V16QI:
24526 case V4SI_FTYPE_V4SF:
24527 case V4SI_FTYPE_V8SI:
24528 case V4SI_FTYPE_V8HI:
24529 case V4SI_FTYPE_V4DF:
24530 case V4SI_FTYPE_V2DF:
24531 case V4HI_FTYPE_V4HI:
24532 case V4DF_FTYPE_V4DF:
24533 case V4DF_FTYPE_V4SI:
24534 case V4DF_FTYPE_V4SF:
24535 case V4DF_FTYPE_V2DF:
24536 case V4SF_FTYPE_V4SF:
24537 case V4SF_FTYPE_V4SI:
24538 case V4SF_FTYPE_V8SF:
24539 case V4SF_FTYPE_V4DF:
24540 case V4SF_FTYPE_V2DF:
24541 case V2DI_FTYPE_V2DI:
24542 case V2DI_FTYPE_V16QI:
24543 case V2DI_FTYPE_V8HI:
24544 case V2DI_FTYPE_V4SI:
24545 case V2DF_FTYPE_V2DF:
24546 case V2DF_FTYPE_V4SI:
24547 case V2DF_FTYPE_V4DF:
24548 case V2DF_FTYPE_V4SF:
24549 case V2DF_FTYPE_V2SI:
24550 case V2SI_FTYPE_V2SI:
24551 case V2SI_FTYPE_V4SF:
24552 case V2SI_FTYPE_V2SF:
24553 case V2SI_FTYPE_V2DF:
24554 case V2SF_FTYPE_V2SF:
24555 case V2SF_FTYPE_V2SI:
24558 case V4SF_FTYPE_V4SF_VEC_MERGE:
24559 case V2DF_FTYPE_V2DF_VEC_MERGE:
24560 return ix86_expand_unop_vec_merge_builtin (icode, exp, target);
24561 case FLOAT128_FTYPE_FLOAT128_FLOAT128:
24562 case V16QI_FTYPE_V16QI_V16QI:
24563 case V16QI_FTYPE_V8HI_V8HI:
24564 case V8QI_FTYPE_V8QI_V8QI:
24565 case V8QI_FTYPE_V4HI_V4HI:
24566 case V8HI_FTYPE_V8HI_V8HI:
24567 case V8HI_FTYPE_V16QI_V16QI:
24568 case V8HI_FTYPE_V4SI_V4SI:
24569 case V8SF_FTYPE_V8SF_V8SF:
24570 case V8SF_FTYPE_V8SF_V8SI:
24571 case V4SI_FTYPE_V4SI_V4SI:
24572 case V4SI_FTYPE_V8HI_V8HI:
24573 case V4SI_FTYPE_V4SF_V4SF:
24574 case V4SI_FTYPE_V2DF_V2DF:
24575 case V4HI_FTYPE_V4HI_V4HI:
24576 case V4HI_FTYPE_V8QI_V8QI:
24577 case V4HI_FTYPE_V2SI_V2SI:
24578 case V4DF_FTYPE_V4DF_V4DF:
24579 case V4DF_FTYPE_V4DF_V4DI:
24580 case V4SF_FTYPE_V4SF_V4SF:
24581 case V4SF_FTYPE_V4SF_V4SI:
24582 case V4SF_FTYPE_V4SF_V2SI:
24583 case V4SF_FTYPE_V4SF_V2DF:
24584 case V4SF_FTYPE_V4SF_DI:
24585 case V4SF_FTYPE_V4SF_SI:
24586 case V2DI_FTYPE_V2DI_V2DI:
24587 case V2DI_FTYPE_V16QI_V16QI:
24588 case V2DI_FTYPE_V4SI_V4SI:
24589 case V2DI_FTYPE_V2DI_V16QI:
24590 case V2DI_FTYPE_V2DF_V2DF:
24591 case V2SI_FTYPE_V2SI_V2SI:
24592 case V2SI_FTYPE_V4HI_V4HI:
24593 case V2SI_FTYPE_V2SF_V2SF:
24594 case V2DF_FTYPE_V2DF_V2DF:
24595 case V2DF_FTYPE_V2DF_V4SF:
24596 case V2DF_FTYPE_V2DF_V2DI:
24597 case V2DF_FTYPE_V2DF_DI:
24598 case V2DF_FTYPE_V2DF_SI:
24599 case V2SF_FTYPE_V2SF_V2SF:
24600 case V1DI_FTYPE_V1DI_V1DI:
24601 case V1DI_FTYPE_V8QI_V8QI:
24602 case V1DI_FTYPE_V2SI_V2SI:
24603 if (comparison == UNKNOWN)
24604 return ix86_expand_binop_builtin (icode, exp, target);
24607 case V4SF_FTYPE_V4SF_V4SF_SWAP:
24608 case V2DF_FTYPE_V2DF_V2DF_SWAP:
24609 gcc_assert (comparison != UNKNOWN);
24613 case V8HI_FTYPE_V8HI_V8HI_COUNT:
24614 case V8HI_FTYPE_V8HI_SI_COUNT:
24615 case V4SI_FTYPE_V4SI_V4SI_COUNT:
24616 case V4SI_FTYPE_V4SI_SI_COUNT:
24617 case V4HI_FTYPE_V4HI_V4HI_COUNT:
24618 case V4HI_FTYPE_V4HI_SI_COUNT:
24619 case V2DI_FTYPE_V2DI_V2DI_COUNT:
24620 case V2DI_FTYPE_V2DI_SI_COUNT:
24621 case V2SI_FTYPE_V2SI_V2SI_COUNT:
24622 case V2SI_FTYPE_V2SI_SI_COUNT:
24623 case V1DI_FTYPE_V1DI_V1DI_COUNT:
24624 case V1DI_FTYPE_V1DI_SI_COUNT:
24626 last_arg_count = true;
24628 case UINT64_FTYPE_UINT64_UINT64:
24629 case UINT_FTYPE_UINT_UINT:
24630 case UINT_FTYPE_UINT_USHORT:
24631 case UINT_FTYPE_UINT_UCHAR:
24634 case V2DI2TI_FTYPE_V2DI_INT:
24637 nargs_constant = 1;
24639 case V8HI_FTYPE_V8HI_INT:
24640 case V8SF_FTYPE_V8SF_INT:
24641 case V4SI_FTYPE_V4SI_INT:
24642 case V4SI_FTYPE_V8SI_INT:
24643 case V4HI_FTYPE_V4HI_INT:
24644 case V4DF_FTYPE_V4DF_INT:
24645 case V4SF_FTYPE_V4SF_INT:
24646 case V4SF_FTYPE_V8SF_INT:
24647 case V2DI_FTYPE_V2DI_INT:
24648 case V2DF_FTYPE_V2DF_INT:
24649 case V2DF_FTYPE_V4DF_INT:
24651 nargs_constant = 1;
24653 case V16QI_FTYPE_V16QI_V16QI_V16QI:
24654 case V8SF_FTYPE_V8SF_V8SF_V8SF:
24655 case V4DF_FTYPE_V4DF_V4DF_V4DF:
24656 case V4SF_FTYPE_V4SF_V4SF_V4SF:
24657 case V2DF_FTYPE_V2DF_V2DF_V2DF:
24660 case V16QI_FTYPE_V16QI_V16QI_INT:
24661 case V8HI_FTYPE_V8HI_V8HI_INT:
24662 case V8SI_FTYPE_V8SI_V8SI_INT:
24663 case V8SI_FTYPE_V8SI_V4SI_INT:
24664 case V8SF_FTYPE_V8SF_V8SF_INT:
24665 case V8SF_FTYPE_V8SF_V4SF_INT:
24666 case V4SI_FTYPE_V4SI_V4SI_INT:
24667 case V4DF_FTYPE_V4DF_V4DF_INT:
24668 case V4DF_FTYPE_V4DF_V2DF_INT:
24669 case V4SF_FTYPE_V4SF_V4SF_INT:
24670 case V2DI_FTYPE_V2DI_V2DI_INT:
24671 case V2DF_FTYPE_V2DF_V2DF_INT:
24673 nargs_constant = 1;
24675 case V2DI2TI_FTYPE_V2DI_V2DI_INT:
24678 nargs_constant = 1;
24680 case V1DI2DI_FTYPE_V1DI_V1DI_INT:
24683 nargs_constant = 1;
24685 case V2DI_FTYPE_V2DI_UINT_UINT:
24687 nargs_constant = 2;
24689 case V2DI_FTYPE_V2DI_V2DI_UINT_UINT:
24691 nargs_constant = 2;
24694 gcc_unreachable ();
24697 gcc_assert (nargs <= ARRAY_SIZE (args));
24699 if (comparison != UNKNOWN)
24701 gcc_assert (nargs == 2);
24702 return ix86_expand_sse_compare (d, exp, target, swap);
24705 if (rmode == VOIDmode || rmode == tmode)
24709 || GET_MODE (target) != tmode
24710 || ! (*insn_p->operand[0].predicate) (target, tmode))
24711 target = gen_reg_rtx (tmode);
24712 real_target = target;
24716 target = gen_reg_rtx (rmode);
24717 real_target = simplify_gen_subreg (tmode, target, rmode, 0);
24720 for (i = 0; i < nargs; i++)
24722 tree arg = CALL_EXPR_ARG (exp, i);
24723 rtx op = expand_normal (arg);
24724 enum machine_mode mode = insn_p->operand[i + 1].mode;
24725 bool match = (*insn_p->operand[i + 1].predicate) (op, mode);
24727 if (last_arg_count && (i + 1) == nargs)
24729 /* SIMD shift insns take either an 8-bit immediate or
24730 register as count. But builtin functions take int as
24731 count. If count doesn't match, we put it in register. */
24734 op = simplify_gen_subreg (SImode, op, GET_MODE (op), 0);
24735 if (!(*insn_p->operand[i + 1].predicate) (op, mode))
24736 op = copy_to_reg (op);
24739 else if ((nargs - i) <= nargs_constant)
24744 case CODE_FOR_sse4_1_roundpd:
24745 case CODE_FOR_sse4_1_roundps:
24746 case CODE_FOR_sse4_1_roundsd:
24747 case CODE_FOR_sse4_1_roundss:
24748 case CODE_FOR_sse4_1_blendps:
24749 case CODE_FOR_avx_blendpd256:
24750 case CODE_FOR_avx_vpermilv4df:
24751 case CODE_FOR_avx_roundpd256:
24752 case CODE_FOR_avx_roundps256:
24753 error ("the last argument must be a 4-bit immediate");
24756 case CODE_FOR_sse4_1_blendpd:
24757 case CODE_FOR_avx_vpermilv2df:
24758 error ("the last argument must be a 2-bit immediate");
24761 case CODE_FOR_avx_vextractf128v4df:
24762 case CODE_FOR_avx_vextractf128v8sf:
24763 case CODE_FOR_avx_vextractf128v8si:
24764 case CODE_FOR_avx_vinsertf128v4df:
24765 case CODE_FOR_avx_vinsertf128v8sf:
24766 case CODE_FOR_avx_vinsertf128v8si:
24767 error ("the last argument must be a 1-bit immediate");
24770 case CODE_FOR_avx_cmpsdv2df3:
24771 case CODE_FOR_avx_cmpssv4sf3:
24772 case CODE_FOR_avx_cmppdv2df3:
24773 case CODE_FOR_avx_cmppsv4sf3:
24774 case CODE_FOR_avx_cmppdv4df3:
24775 case CODE_FOR_avx_cmppsv8sf3:
24776 error ("the last argument must be a 5-bit immediate");
24780 switch (nargs_constant)
24783 if ((nargs - i) == nargs_constant)
24785 error ("the next to last argument must be an 8-bit immediate");
24789 error ("the last argument must be an 8-bit immediate");
24792 gcc_unreachable ();
24799 if (VECTOR_MODE_P (mode))
24800 op = safe_vector_operand (op, mode);
24802 /* If we aren't optimizing, only allow one memory operand to
24804 if (memory_operand (op, mode))
24807 if (GET_MODE (op) == mode || GET_MODE (op) == VOIDmode)
24809 if (optimize || !match || num_memory > 1)
24810 op = copy_to_mode_reg (mode, op);
24814 op = copy_to_reg (op);
24815 op = simplify_gen_subreg (mode, op, GET_MODE (op), 0);
24820 args[i].mode = mode;
24826 pat = GEN_FCN (icode) (real_target, args[0].op);
24829 pat = GEN_FCN (icode) (real_target, args[0].op, args[1].op);
24832 pat = GEN_FCN (icode) (real_target, args[0].op, args[1].op,
24836 pat = GEN_FCN (icode) (real_target, args[0].op, args[1].op,
24837 args[2].op, args[3].op);
24840 gcc_unreachable ();
24850 /* Subroutine of ix86_expand_builtin to take care of special insns
24851 with variable number of operands. */
24854 ix86_expand_special_args_builtin (const struct builtin_description *d,
24855 tree exp, rtx target)
24859 unsigned int i, nargs, arg_adjust, memory;
24863 enum machine_mode mode;
24865 enum insn_code icode = d->icode;
24866 bool last_arg_constant = false;
24867 const struct insn_data *insn_p = &insn_data[icode];
24868 enum machine_mode tmode = insn_p->operand[0].mode;
24869 enum { load, store } klass;
24871 switch ((enum ix86_special_builtin_type) d->flag)
24873 case VOID_FTYPE_VOID:
24874 emit_insn (GEN_FCN (icode) (target));
24876 case V2DI_FTYPE_PV2DI:
24877 case V32QI_FTYPE_PCCHAR:
24878 case V16QI_FTYPE_PCCHAR:
24879 case V8SF_FTYPE_PCV4SF:
24880 case V8SF_FTYPE_PCFLOAT:
24881 case V4SF_FTYPE_PCFLOAT:
24882 case V4DF_FTYPE_PCV2DF:
24883 case V4DF_FTYPE_PCDOUBLE:
24884 case V2DF_FTYPE_PCDOUBLE:
24889 case VOID_FTYPE_PV2SF_V4SF:
24890 case VOID_FTYPE_PV4DI_V4DI:
24891 case VOID_FTYPE_PV2DI_V2DI:
24892 case VOID_FTYPE_PCHAR_V32QI:
24893 case VOID_FTYPE_PCHAR_V16QI:
24894 case VOID_FTYPE_PFLOAT_V8SF:
24895 case VOID_FTYPE_PFLOAT_V4SF:
24896 case VOID_FTYPE_PDOUBLE_V4DF:
24897 case VOID_FTYPE_PDOUBLE_V2DF:
24898 case VOID_FTYPE_PDI_DI:
24899 case VOID_FTYPE_PINT_INT:
24902 /* Reserve memory operand for target. */
24903 memory = ARRAY_SIZE (args);
24905 case V4SF_FTYPE_V4SF_PCV2SF:
24906 case V2DF_FTYPE_V2DF_PCDOUBLE:
24911 case V8SF_FTYPE_PCV8SF_V8SF:
24912 case V4DF_FTYPE_PCV4DF_V4DF:
24913 case V4SF_FTYPE_PCV4SF_V4SF:
24914 case V2DF_FTYPE_PCV2DF_V2DF:
24919 case VOID_FTYPE_PV8SF_V8SF_V8SF:
24920 case VOID_FTYPE_PV4DF_V4DF_V4DF:
24921 case VOID_FTYPE_PV4SF_V4SF_V4SF:
24922 case VOID_FTYPE_PV2DF_V2DF_V2DF:
24925 /* Reserve memory operand for target. */
24926 memory = ARRAY_SIZE (args);
24929 gcc_unreachable ();
24932 gcc_assert (nargs <= ARRAY_SIZE (args));
24934 if (klass == store)
24936 arg = CALL_EXPR_ARG (exp, 0);
24937 op = expand_normal (arg);
24938 gcc_assert (target == 0);
24939 target = gen_rtx_MEM (tmode, copy_to_mode_reg (Pmode, op));
24947 || GET_MODE (target) != tmode
24948 || ! (*insn_p->operand[0].predicate) (target, tmode))
24949 target = gen_reg_rtx (tmode);
24952 for (i = 0; i < nargs; i++)
24954 enum machine_mode mode = insn_p->operand[i + 1].mode;
24957 arg = CALL_EXPR_ARG (exp, i + arg_adjust);
24958 op = expand_normal (arg);
24959 match = (*insn_p->operand[i + 1].predicate) (op, mode);
24961 if (last_arg_constant && (i + 1) == nargs)
24967 error ("the last argument must be an 8-bit immediate");
24975 /* This must be the memory operand. */
24976 op = gen_rtx_MEM (mode, copy_to_mode_reg (Pmode, op));
24977 gcc_assert (GET_MODE (op) == mode
24978 || GET_MODE (op) == VOIDmode);
24982 /* This must be register. */
24983 if (VECTOR_MODE_P (mode))
24984 op = safe_vector_operand (op, mode);
24986 gcc_assert (GET_MODE (op) == mode
24987 || GET_MODE (op) == VOIDmode);
24988 op = copy_to_mode_reg (mode, op);
24993 args[i].mode = mode;
24999 pat = GEN_FCN (icode) (target, args[0].op);
25002 pat = GEN_FCN (icode) (target, args[0].op, args[1].op);
25005 gcc_unreachable ();
25011 return klass == store ? 0 : target;
25014 /* Return the integer constant in ARG. Constrain it to be in the range
25015 of the subparts of VEC_TYPE; issue an error if not. */
25018 get_element_number (tree vec_type, tree arg)
25020 unsigned HOST_WIDE_INT elt, max = TYPE_VECTOR_SUBPARTS (vec_type) - 1;
25022 if (!host_integerp (arg, 1)
25023 || (elt = tree_low_cst (arg, 1), elt > max))
25025 error ("selector must be an integer constant in the range 0..%wi", max);
25032 /* A subroutine of ix86_expand_builtin. These builtins are a wrapper around
25033 ix86_expand_vector_init. We DO have language-level syntax for this, in
25034 the form of (type){ init-list }. Except that since we can't place emms
25035 instructions from inside the compiler, we can't allow the use of MMX
25036 registers unless the user explicitly asks for it. So we do *not* define
25037 vec_set/vec_extract/vec_init patterns for MMX modes in mmx.md. Instead
25038 we have builtins invoked by mmintrin.h that gives us license to emit
25039 these sorts of instructions. */
25042 ix86_expand_vec_init_builtin (tree type, tree exp, rtx target)
25044 enum machine_mode tmode = TYPE_MODE (type);
25045 enum machine_mode inner_mode = GET_MODE_INNER (tmode);
25046 int i, n_elt = GET_MODE_NUNITS (tmode);
25047 rtvec v = rtvec_alloc (n_elt);
25049 gcc_assert (VECTOR_MODE_P (tmode));
25050 gcc_assert (call_expr_nargs (exp) == n_elt);
25052 for (i = 0; i < n_elt; ++i)
25054 rtx x = expand_normal (CALL_EXPR_ARG (exp, i));
25055 RTVEC_ELT (v, i) = gen_lowpart (inner_mode, x);
25058 if (!target || !register_operand (target, tmode))
25059 target = gen_reg_rtx (tmode);
25061 ix86_expand_vector_init (true, target, gen_rtx_PARALLEL (tmode, v));
25065 /* A subroutine of ix86_expand_builtin. These builtins are a wrapper around
25066 ix86_expand_vector_extract. They would be redundant (for non-MMX) if we
25067 had a language-level syntax for referencing vector elements. */
25070 ix86_expand_vec_ext_builtin (tree exp, rtx target)
25072 enum machine_mode tmode, mode0;
25077 arg0 = CALL_EXPR_ARG (exp, 0);
25078 arg1 = CALL_EXPR_ARG (exp, 1);
25080 op0 = expand_normal (arg0);
25081 elt = get_element_number (TREE_TYPE (arg0), arg1);
25083 tmode = TYPE_MODE (TREE_TYPE (TREE_TYPE (arg0)));
25084 mode0 = TYPE_MODE (TREE_TYPE (arg0));
25085 gcc_assert (VECTOR_MODE_P (mode0));
25087 op0 = force_reg (mode0, op0);
25089 if (optimize || !target || !register_operand (target, tmode))
25090 target = gen_reg_rtx (tmode);
25092 ix86_expand_vector_extract (true, target, op0, elt);
25097 /* A subroutine of ix86_expand_builtin. These builtins are a wrapper around
25098 ix86_expand_vector_set. They would be redundant (for non-MMX) if we had
25099 a language-level syntax for referencing vector elements. */
25102 ix86_expand_vec_set_builtin (tree exp)
25104 enum machine_mode tmode, mode1;
25105 tree arg0, arg1, arg2;
25107 rtx op0, op1, target;
25109 arg0 = CALL_EXPR_ARG (exp, 0);
25110 arg1 = CALL_EXPR_ARG (exp, 1);
25111 arg2 = CALL_EXPR_ARG (exp, 2);
25113 tmode = TYPE_MODE (TREE_TYPE (arg0));
25114 mode1 = TYPE_MODE (TREE_TYPE (TREE_TYPE (arg0)));
25115 gcc_assert (VECTOR_MODE_P (tmode));
25117 op0 = expand_expr (arg0, NULL_RTX, tmode, EXPAND_NORMAL);
25118 op1 = expand_expr (arg1, NULL_RTX, mode1, EXPAND_NORMAL);
25119 elt = get_element_number (TREE_TYPE (arg0), arg2);
25121 if (GET_MODE (op1) != mode1 && GET_MODE (op1) != VOIDmode)
25122 op1 = convert_modes (mode1, GET_MODE (op1), op1, true);
25124 op0 = force_reg (tmode, op0);
25125 op1 = force_reg (mode1, op1);
25127 /* OP0 is the source of these builtin functions and shouldn't be
25128 modified. Create a copy, use it and return it as target. */
25129 target = gen_reg_rtx (tmode);
25130 emit_move_insn (target, op0);
25131 ix86_expand_vector_set (true, target, op1, elt);
25136 /* Expand an expression EXP that calls a built-in function,
25137 with result going to TARGET if that's convenient
25138 (and in mode MODE if that's convenient).
25139 SUBTARGET may be used as the target for computing one of EXP's operands.
25140 IGNORE is nonzero if the value is to be ignored. */
25143 ix86_expand_builtin (tree exp, rtx target, rtx subtarget ATTRIBUTE_UNUSED,
25144 enum machine_mode mode ATTRIBUTE_UNUSED,
25145 int ignore ATTRIBUTE_UNUSED)
25147 const struct builtin_description *d;
25149 enum insn_code icode;
25150 tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
25151 tree arg0, arg1, arg2;
25152 rtx op0, op1, op2, pat;
25153 enum machine_mode mode0, mode1, mode2;
25154 unsigned int fcode = DECL_FUNCTION_CODE (fndecl);
25156 /* Determine whether the builtin function is available under the current ISA.
25157 Originally the builtin was not created if it wasn't applicable to the
25158 current ISA based on the command line switches. With function specific
25159 options, we need to check in the context of the function making the call
25160 whether it is supported. */
25161 if (ix86_builtins_isa[fcode].isa
25162 && !(ix86_builtins_isa[fcode].isa & ix86_isa_flags))
25164 char *opts = ix86_target_string (ix86_builtins_isa[fcode].isa, 0, NULL,
25165 NULL, NULL, false);
25168 error ("%qE needs unknown isa option", fndecl);
25171 gcc_assert (opts != NULL);
25172 error ("%qE needs isa option %s", fndecl, opts);
25180 case IX86_BUILTIN_MASKMOVQ:
25181 case IX86_BUILTIN_MASKMOVDQU:
25182 icode = (fcode == IX86_BUILTIN_MASKMOVQ
25183 ? CODE_FOR_mmx_maskmovq
25184 : CODE_FOR_sse2_maskmovdqu);
25185 /* Note the arg order is different from the operand order. */
25186 arg1 = CALL_EXPR_ARG (exp, 0);
25187 arg2 = CALL_EXPR_ARG (exp, 1);
25188 arg0 = CALL_EXPR_ARG (exp, 2);
25189 op0 = expand_normal (arg0);
25190 op1 = expand_normal (arg1);
25191 op2 = expand_normal (arg2);
25192 mode0 = insn_data[icode].operand[0].mode;
25193 mode1 = insn_data[icode].operand[1].mode;
25194 mode2 = insn_data[icode].operand[2].mode;
25196 op0 = force_reg (Pmode, op0);
25197 op0 = gen_rtx_MEM (mode1, op0);
25199 if (! (*insn_data[icode].operand[0].predicate) (op0, mode0))
25200 op0 = copy_to_mode_reg (mode0, op0);
25201 if (! (*insn_data[icode].operand[1].predicate) (op1, mode1))
25202 op1 = copy_to_mode_reg (mode1, op1);
25203 if (! (*insn_data[icode].operand[2].predicate) (op2, mode2))
25204 op2 = copy_to_mode_reg (mode2, op2);
25205 pat = GEN_FCN (icode) (op0, op1, op2);
25211 case IX86_BUILTIN_LDMXCSR:
25212 op0 = expand_normal (CALL_EXPR_ARG (exp, 0));
25213 target = assign_386_stack_local (SImode, SLOT_VIRTUAL);
25214 emit_move_insn (target, op0);
25215 emit_insn (gen_sse_ldmxcsr (target));
25218 case IX86_BUILTIN_STMXCSR:
25219 target = assign_386_stack_local (SImode, SLOT_VIRTUAL);
25220 emit_insn (gen_sse_stmxcsr (target));
25221 return copy_to_mode_reg (SImode, target);
25223 case IX86_BUILTIN_CLFLUSH:
25224 arg0 = CALL_EXPR_ARG (exp, 0);
25225 op0 = expand_normal (arg0);
25226 icode = CODE_FOR_sse2_clflush;
25227 if (! (*insn_data[icode].operand[0].predicate) (op0, Pmode))
25228 op0 = copy_to_mode_reg (Pmode, op0);
25230 emit_insn (gen_sse2_clflush (op0));
25233 case IX86_BUILTIN_MONITOR:
25234 arg0 = CALL_EXPR_ARG (exp, 0);
25235 arg1 = CALL_EXPR_ARG (exp, 1);
25236 arg2 = CALL_EXPR_ARG (exp, 2);
25237 op0 = expand_normal (arg0);
25238 op1 = expand_normal (arg1);
25239 op2 = expand_normal (arg2);
25241 op0 = copy_to_mode_reg (Pmode, op0);
25243 op1 = copy_to_mode_reg (SImode, op1);
25245 op2 = copy_to_mode_reg (SImode, op2);
25246 emit_insn ((*ix86_gen_monitor) (op0, op1, op2));
25249 case IX86_BUILTIN_MWAIT:
25250 arg0 = CALL_EXPR_ARG (exp, 0);
25251 arg1 = CALL_EXPR_ARG (exp, 1);
25252 op0 = expand_normal (arg0);
25253 op1 = expand_normal (arg1);
25255 op0 = copy_to_mode_reg (SImode, op0);
25257 op1 = copy_to_mode_reg (SImode, op1);
25258 emit_insn (gen_sse3_mwait (op0, op1));
25261 case IX86_BUILTIN_VEC_INIT_V2SI:
25262 case IX86_BUILTIN_VEC_INIT_V4HI:
25263 case IX86_BUILTIN_VEC_INIT_V8QI:
25264 return ix86_expand_vec_init_builtin (TREE_TYPE (exp), exp, target);
25266 case IX86_BUILTIN_VEC_EXT_V2DF:
25267 case IX86_BUILTIN_VEC_EXT_V2DI:
25268 case IX86_BUILTIN_VEC_EXT_V4SF:
25269 case IX86_BUILTIN_VEC_EXT_V4SI:
25270 case IX86_BUILTIN_VEC_EXT_V8HI:
25271 case IX86_BUILTIN_VEC_EXT_V2SI:
25272 case IX86_BUILTIN_VEC_EXT_V4HI:
25273 case IX86_BUILTIN_VEC_EXT_V16QI:
25274 return ix86_expand_vec_ext_builtin (exp, target);
25276 case IX86_BUILTIN_VEC_SET_V2DI:
25277 case IX86_BUILTIN_VEC_SET_V4SF:
25278 case IX86_BUILTIN_VEC_SET_V4SI:
25279 case IX86_BUILTIN_VEC_SET_V8HI:
25280 case IX86_BUILTIN_VEC_SET_V4HI:
25281 case IX86_BUILTIN_VEC_SET_V16QI:
25282 return ix86_expand_vec_set_builtin (exp);
25284 case IX86_BUILTIN_INFQ:
25285 case IX86_BUILTIN_HUGE_VALQ:
25287 REAL_VALUE_TYPE inf;
25291 tmp = CONST_DOUBLE_FROM_REAL_VALUE (inf, mode);
25293 tmp = validize_mem (force_const_mem (mode, tmp));
25296 target = gen_reg_rtx (mode);
25298 emit_move_insn (target, tmp);
25306 for (i = 0, d = bdesc_special_args;
25307 i < ARRAY_SIZE (bdesc_special_args);
25309 if (d->code == fcode)
25310 return ix86_expand_special_args_builtin (d, exp, target);
25312 for (i = 0, d = bdesc_args;
25313 i < ARRAY_SIZE (bdesc_args);
25315 if (d->code == fcode)
25318 case IX86_BUILTIN_FABSQ:
25319 case IX86_BUILTIN_COPYSIGNQ:
25321 /* Emit a normal call if SSE2 isn't available. */
25322 return expand_call (exp, target, ignore);
25324 return ix86_expand_args_builtin (d, exp, target);
25327 for (i = 0, d = bdesc_comi; i < ARRAY_SIZE (bdesc_comi); i++, d++)
25328 if (d->code == fcode)
25329 return ix86_expand_sse_comi (d, exp, target);
25331 for (i = 0, d = bdesc_pcmpestr;
25332 i < ARRAY_SIZE (bdesc_pcmpestr);
25334 if (d->code == fcode)
25335 return ix86_expand_sse_pcmpestr (d, exp, target);
25337 for (i = 0, d = bdesc_pcmpistr;
25338 i < ARRAY_SIZE (bdesc_pcmpistr);
25340 if (d->code == fcode)
25341 return ix86_expand_sse_pcmpistr (d, exp, target);
25343 for (i = 0, d = bdesc_multi_arg; i < ARRAY_SIZE (bdesc_multi_arg); i++, d++)
25344 if (d->code == fcode)
25345 return ix86_expand_multi_arg_builtin (d->icode, exp, target,
25346 (enum multi_arg_type)d->flag,
25349 gcc_unreachable ();
25352 /* Returns a function decl for a vectorized version of the builtin function
25353 with builtin function code FN and the result vector type TYPE, or NULL_TREE
25354 if it is not available. */
25357 ix86_builtin_vectorized_function (unsigned int fn, tree type_out,
25360 enum machine_mode in_mode, out_mode;
25363 if (TREE_CODE (type_out) != VECTOR_TYPE
25364 || TREE_CODE (type_in) != VECTOR_TYPE)
25367 out_mode = TYPE_MODE (TREE_TYPE (type_out));
25368 out_n = TYPE_VECTOR_SUBPARTS (type_out);
25369 in_mode = TYPE_MODE (TREE_TYPE (type_in));
25370 in_n = TYPE_VECTOR_SUBPARTS (type_in);
25374 case BUILT_IN_SQRT:
25375 if (out_mode == DFmode && out_n == 2
25376 && in_mode == DFmode && in_n == 2)
25377 return ix86_builtins[IX86_BUILTIN_SQRTPD];
25380 case BUILT_IN_SQRTF:
25381 if (out_mode == SFmode && out_n == 4
25382 && in_mode == SFmode && in_n == 4)
25383 return ix86_builtins[IX86_BUILTIN_SQRTPS_NR];
25386 case BUILT_IN_LRINT:
25387 if (out_mode == SImode && out_n == 4
25388 && in_mode == DFmode && in_n == 2)
25389 return ix86_builtins[IX86_BUILTIN_VEC_PACK_SFIX];
25392 case BUILT_IN_LRINTF:
25393 if (out_mode == SImode && out_n == 4
25394 && in_mode == SFmode && in_n == 4)
25395 return ix86_builtins[IX86_BUILTIN_CVTPS2DQ];
25402 /* Dispatch to a handler for a vectorization library. */
25403 if (ix86_veclib_handler)
25404 return (*ix86_veclib_handler)(fn, type_out, type_in);
25409 /* Handler for an SVML-style interface to
25410 a library with vectorized intrinsics. */
25413 ix86_veclibabi_svml (enum built_in_function fn, tree type_out, tree type_in)
25416 tree fntype, new_fndecl, args;
25419 enum machine_mode el_mode, in_mode;
25422 /* The SVML is suitable for unsafe math only. */
25423 if (!flag_unsafe_math_optimizations)
25426 el_mode = TYPE_MODE (TREE_TYPE (type_out));
25427 n = TYPE_VECTOR_SUBPARTS (type_out);
25428 in_mode = TYPE_MODE (TREE_TYPE (type_in));
25429 in_n = TYPE_VECTOR_SUBPARTS (type_in);
25430 if (el_mode != in_mode
25438 case BUILT_IN_LOG10:
25440 case BUILT_IN_TANH:
25442 case BUILT_IN_ATAN:
25443 case BUILT_IN_ATAN2:
25444 case BUILT_IN_ATANH:
25445 case BUILT_IN_CBRT:
25446 case BUILT_IN_SINH:
25448 case BUILT_IN_ASINH:
25449 case BUILT_IN_ASIN:
25450 case BUILT_IN_COSH:
25452 case BUILT_IN_ACOSH:
25453 case BUILT_IN_ACOS:
25454 if (el_mode != DFmode || n != 2)
25458 case BUILT_IN_EXPF:
25459 case BUILT_IN_LOGF:
25460 case BUILT_IN_LOG10F:
25461 case BUILT_IN_POWF:
25462 case BUILT_IN_TANHF:
25463 case BUILT_IN_TANF:
25464 case BUILT_IN_ATANF:
25465 case BUILT_IN_ATAN2F:
25466 case BUILT_IN_ATANHF:
25467 case BUILT_IN_CBRTF:
25468 case BUILT_IN_SINHF:
25469 case BUILT_IN_SINF:
25470 case BUILT_IN_ASINHF:
25471 case BUILT_IN_ASINF:
25472 case BUILT_IN_COSHF:
25473 case BUILT_IN_COSF:
25474 case BUILT_IN_ACOSHF:
25475 case BUILT_IN_ACOSF:
25476 if (el_mode != SFmode || n != 4)
25484 bname = IDENTIFIER_POINTER (DECL_NAME (implicit_built_in_decls[fn]));
25486 if (fn == BUILT_IN_LOGF)
25487 strcpy (name, "vmlsLn4");
25488 else if (fn == BUILT_IN_LOG)
25489 strcpy (name, "vmldLn2");
25492 sprintf (name, "vmls%s", bname+10);
25493 name[strlen (name)-1] = '4';
25496 sprintf (name, "vmld%s2", bname+10);
25498 /* Convert to uppercase. */
25502 for (args = DECL_ARGUMENTS (implicit_built_in_decls[fn]); args;
25503 args = TREE_CHAIN (args))
25507 fntype = build_function_type_list (type_out, type_in, NULL);
25509 fntype = build_function_type_list (type_out, type_in, type_in, NULL);
25511 /* Build a function declaration for the vectorized function. */
25512 new_fndecl = build_decl (FUNCTION_DECL, get_identifier (name), fntype);
25513 TREE_PUBLIC (new_fndecl) = 1;
25514 DECL_EXTERNAL (new_fndecl) = 1;
25515 DECL_IS_NOVOPS (new_fndecl) = 1;
25516 TREE_READONLY (new_fndecl) = 1;
25521 /* Handler for an ACML-style interface to
25522 a library with vectorized intrinsics. */
25525 ix86_veclibabi_acml (enum built_in_function fn, tree type_out, tree type_in)
25527 char name[20] = "__vr.._";
25528 tree fntype, new_fndecl, args;
25531 enum machine_mode el_mode, in_mode;
25534 /* The ACML is 64bits only and suitable for unsafe math only as
25535 it does not correctly support parts of IEEE with the required
25536 precision such as denormals. */
25538 || !flag_unsafe_math_optimizations)
25541 el_mode = TYPE_MODE (TREE_TYPE (type_out));
25542 n = TYPE_VECTOR_SUBPARTS (type_out);
25543 in_mode = TYPE_MODE (TREE_TYPE (type_in));
25544 in_n = TYPE_VECTOR_SUBPARTS (type_in);
25545 if (el_mode != in_mode
25555 case BUILT_IN_LOG2:
25556 case BUILT_IN_LOG10:
25559 if (el_mode != DFmode
25564 case BUILT_IN_SINF:
25565 case BUILT_IN_COSF:
25566 case BUILT_IN_EXPF:
25567 case BUILT_IN_POWF:
25568 case BUILT_IN_LOGF:
25569 case BUILT_IN_LOG2F:
25570 case BUILT_IN_LOG10F:
25573 if (el_mode != SFmode
25582 bname = IDENTIFIER_POINTER (DECL_NAME (implicit_built_in_decls[fn]));
25583 sprintf (name + 7, "%s", bname+10);
25586 for (args = DECL_ARGUMENTS (implicit_built_in_decls[fn]); args;
25587 args = TREE_CHAIN (args))
25591 fntype = build_function_type_list (type_out, type_in, NULL);
25593 fntype = build_function_type_list (type_out, type_in, type_in, NULL);
25595 /* Build a function declaration for the vectorized function. */
25596 new_fndecl = build_decl (FUNCTION_DECL, get_identifier (name), fntype);
25597 TREE_PUBLIC (new_fndecl) = 1;
25598 DECL_EXTERNAL (new_fndecl) = 1;
25599 DECL_IS_NOVOPS (new_fndecl) = 1;
25600 TREE_READONLY (new_fndecl) = 1;
25606 /* Returns a decl of a function that implements conversion of an integer vector
25607 into a floating-point vector, or vice-versa. TYPE is the type of the integer
25608 side of the conversion.
25609 Return NULL_TREE if it is not available. */
25612 ix86_vectorize_builtin_conversion (unsigned int code, tree type)
25614 if (TREE_CODE (type) != VECTOR_TYPE)
25620 switch (TYPE_MODE (type))
25623 return ix86_builtins[IX86_BUILTIN_CVTDQ2PS];
25628 case FIX_TRUNC_EXPR:
25629 switch (TYPE_MODE (type))
25632 return ix86_builtins[IX86_BUILTIN_CVTTPS2DQ];
25642 /* Returns a code for a target-specific builtin that implements
25643 reciprocal of the function, or NULL_TREE if not available. */
25646 ix86_builtin_reciprocal (unsigned int fn, bool md_fn,
25647 bool sqrt ATTRIBUTE_UNUSED)
25649 if (! (TARGET_SSE_MATH && TARGET_RECIP && !optimize_insn_for_size_p ()
25650 && flag_finite_math_only && !flag_trapping_math
25651 && flag_unsafe_math_optimizations))
25655 /* Machine dependent builtins. */
25658 /* Vectorized version of sqrt to rsqrt conversion. */
25659 case IX86_BUILTIN_SQRTPS_NR:
25660 return ix86_builtins[IX86_BUILTIN_RSQRTPS_NR];
25666 /* Normal builtins. */
25669 /* Sqrt to rsqrt conversion. */
25670 case BUILT_IN_SQRTF:
25671 return ix86_builtins[IX86_BUILTIN_RSQRTF];
25678 /* Store OPERAND to the memory after reload is completed. This means
25679 that we can't easily use assign_stack_local. */
25681 ix86_force_to_memory (enum machine_mode mode, rtx operand)
25685 gcc_assert (reload_completed);
25686 if (!TARGET_64BIT_MS_ABI && TARGET_RED_ZONE)
25688 result = gen_rtx_MEM (mode,
25689 gen_rtx_PLUS (Pmode,
25691 GEN_INT (-RED_ZONE_SIZE)));
25692 emit_move_insn (result, operand);
25694 else if ((TARGET_64BIT_MS_ABI || !TARGET_RED_ZONE) && TARGET_64BIT)
25700 operand = gen_lowpart (DImode, operand);
25704 gen_rtx_SET (VOIDmode,
25705 gen_rtx_MEM (DImode,
25706 gen_rtx_PRE_DEC (DImode,
25707 stack_pointer_rtx)),
25711 gcc_unreachable ();
25713 result = gen_rtx_MEM (mode, stack_pointer_rtx);
25722 split_di (&operand, 1, operands, operands + 1);
25724 gen_rtx_SET (VOIDmode,
25725 gen_rtx_MEM (SImode,
25726 gen_rtx_PRE_DEC (Pmode,
25727 stack_pointer_rtx)),
25730 gen_rtx_SET (VOIDmode,
25731 gen_rtx_MEM (SImode,
25732 gen_rtx_PRE_DEC (Pmode,
25733 stack_pointer_rtx)),
25738 /* Store HImodes as SImodes. */
25739 operand = gen_lowpart (SImode, operand);
25743 gen_rtx_SET (VOIDmode,
25744 gen_rtx_MEM (GET_MODE (operand),
25745 gen_rtx_PRE_DEC (SImode,
25746 stack_pointer_rtx)),
25750 gcc_unreachable ();
25752 result = gen_rtx_MEM (mode, stack_pointer_rtx);
25757 /* Free operand from the memory. */
25759 ix86_free_from_memory (enum machine_mode mode)
25761 if (!TARGET_RED_ZONE || TARGET_64BIT_MS_ABI)
25765 if (mode == DImode || TARGET_64BIT)
25769 /* Use LEA to deallocate stack space. In peephole2 it will be converted
25770 to pop or add instruction if registers are available. */
25771 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
25772 gen_rtx_PLUS (Pmode, stack_pointer_rtx,
25777 /* Put float CONST_DOUBLE in the constant pool instead of fp regs.
25778 QImode must go into class Q_REGS.
25779 Narrow ALL_REGS to GENERAL_REGS. This supports allowing movsf and
25780 movdf to do mem-to-mem moves through integer regs. */
25782 ix86_preferred_reload_class (rtx x, enum reg_class regclass)
25784 enum machine_mode mode = GET_MODE (x);
25786 /* We're only allowed to return a subclass of CLASS. Many of the
25787 following checks fail for NO_REGS, so eliminate that early. */
25788 if (regclass == NO_REGS)
25791 /* All classes can load zeros. */
25792 if (x == CONST0_RTX (mode))
25795 /* Force constants into memory if we are loading a (nonzero) constant into
25796 an MMX or SSE register. This is because there are no MMX/SSE instructions
25797 to load from a constant. */
25799 && (MAYBE_MMX_CLASS_P (regclass) || MAYBE_SSE_CLASS_P (regclass)))
25802 /* Prefer SSE regs only, if we can use them for math. */
25803 if (TARGET_SSE_MATH && !TARGET_MIX_SSE_I387 && SSE_FLOAT_MODE_P (mode))
25804 return SSE_CLASS_P (regclass) ? regclass : NO_REGS;
25806 /* Floating-point constants need more complex checks. */
25807 if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) != VOIDmode)
25809 /* General regs can load everything. */
25810 if (reg_class_subset_p (regclass, GENERAL_REGS))
25813 /* Floats can load 0 and 1 plus some others. Note that we eliminated
25814 zero above. We only want to wind up preferring 80387 registers if
25815 we plan on doing computation with them. */
25817 && standard_80387_constant_p (x))
25819 /* Limit class to non-sse. */
25820 if (regclass == FLOAT_SSE_REGS)
25822 if (regclass == FP_TOP_SSE_REGS)
25824 if (regclass == FP_SECOND_SSE_REGS)
25825 return FP_SECOND_REG;
25826 if (regclass == FLOAT_INT_REGS || regclass == FLOAT_REGS)
25833 /* Generally when we see PLUS here, it's the function invariant
25834 (plus soft-fp const_int). Which can only be computed into general
25836 if (GET_CODE (x) == PLUS)
25837 return reg_class_subset_p (regclass, GENERAL_REGS) ? regclass : NO_REGS;
25839 /* QImode constants are easy to load, but non-constant QImode data
25840 must go into Q_REGS. */
25841 if (GET_MODE (x) == QImode && !CONSTANT_P (x))
25843 if (reg_class_subset_p (regclass, Q_REGS))
25845 if (reg_class_subset_p (Q_REGS, regclass))
25853 /* Discourage putting floating-point values in SSE registers unless
25854 SSE math is being used, and likewise for the 387 registers. */
25856 ix86_preferred_output_reload_class (rtx x, enum reg_class regclass)
25858 enum machine_mode mode = GET_MODE (x);
25860 /* Restrict the output reload class to the register bank that we are doing
25861 math on. If we would like not to return a subset of CLASS, reject this
25862 alternative: if reload cannot do this, it will still use its choice. */
25863 mode = GET_MODE (x);
25864 if (TARGET_SSE_MATH && SSE_FLOAT_MODE_P (mode))
25865 return MAYBE_SSE_CLASS_P (regclass) ? SSE_REGS : NO_REGS;
25867 if (X87_FLOAT_MODE_P (mode))
25869 if (regclass == FP_TOP_SSE_REGS)
25871 else if (regclass == FP_SECOND_SSE_REGS)
25872 return FP_SECOND_REG;
25874 return FLOAT_CLASS_P (regclass) ? regclass : NO_REGS;
25880 static enum reg_class
25881 ix86_secondary_reload (bool in_p, rtx x, enum reg_class rclass,
25882 enum machine_mode mode,
25883 secondary_reload_info *sri ATTRIBUTE_UNUSED)
25885 /* QImode spills from non-QI registers require
25886 intermediate register on 32bit targets. */
25887 if (!in_p && mode == QImode && !TARGET_64BIT
25888 && (rclass == GENERAL_REGS
25889 || rclass == LEGACY_REGS
25890 || rclass == INDEX_REGS))
25899 if (regno >= FIRST_PSEUDO_REGISTER || GET_CODE (x) == SUBREG)
25900 regno = true_regnum (x);
25902 /* Return Q_REGS if the operand is in memory. */
25910 /* If we are copying between general and FP registers, we need a memory
25911 location. The same is true for SSE and MMX registers.
25913 To optimize register_move_cost performance, allow inline variant.
25915 The macro can't work reliably when one of the CLASSES is class containing
25916 registers from multiple units (SSE, MMX, integer). We avoid this by never
25917 combining those units in single alternative in the machine description.
25918 Ensure that this constraint holds to avoid unexpected surprises.
25920 When STRICT is false, we are being called from REGISTER_MOVE_COST, so do not
25921 enforce these sanity checks. */
25924 inline_secondary_memory_needed (enum reg_class class1, enum reg_class class2,
25925 enum machine_mode mode, int strict)
25927 if (MAYBE_FLOAT_CLASS_P (class1) != FLOAT_CLASS_P (class1)
25928 || MAYBE_FLOAT_CLASS_P (class2) != FLOAT_CLASS_P (class2)
25929 || MAYBE_SSE_CLASS_P (class1) != SSE_CLASS_P (class1)
25930 || MAYBE_SSE_CLASS_P (class2) != SSE_CLASS_P (class2)
25931 || MAYBE_MMX_CLASS_P (class1) != MMX_CLASS_P (class1)
25932 || MAYBE_MMX_CLASS_P (class2) != MMX_CLASS_P (class2))
25934 gcc_assert (!strict);
25938 if (FLOAT_CLASS_P (class1) != FLOAT_CLASS_P (class2))
25941 /* ??? This is a lie. We do have moves between mmx/general, and for
25942 mmx/sse2. But by saying we need secondary memory we discourage the
25943 register allocator from using the mmx registers unless needed. */
25944 if (MMX_CLASS_P (class1) != MMX_CLASS_P (class2))
25947 if (SSE_CLASS_P (class1) != SSE_CLASS_P (class2))
25949 /* SSE1 doesn't have any direct moves from other classes. */
25953 /* If the target says that inter-unit moves are more expensive
25954 than moving through memory, then don't generate them. */
25955 if (!TARGET_INTER_UNIT_MOVES)
25958 /* Between SSE and general, we have moves no larger than word size. */
25959 if (GET_MODE_SIZE (mode) > UNITS_PER_WORD)
25967 ix86_secondary_memory_needed (enum reg_class class1, enum reg_class class2,
25968 enum machine_mode mode, int strict)
25970 return inline_secondary_memory_needed (class1, class2, mode, strict);
25973 /* Return true if the registers in CLASS cannot represent the change from
25974 modes FROM to TO. */
25977 ix86_cannot_change_mode_class (enum machine_mode from, enum machine_mode to,
25978 enum reg_class regclass)
25983 /* x87 registers can't do subreg at all, as all values are reformatted
25984 to extended precision. */
25985 if (MAYBE_FLOAT_CLASS_P (regclass))
25988 if (MAYBE_SSE_CLASS_P (regclass) || MAYBE_MMX_CLASS_P (regclass))
25990 /* Vector registers do not support QI or HImode loads. If we don't
25991 disallow a change to these modes, reload will assume it's ok to
25992 drop the subreg from (subreg:SI (reg:HI 100) 0). This affects
25993 the vec_dupv4hi pattern. */
25994 if (GET_MODE_SIZE (from) < 4)
25997 /* Vector registers do not support subreg with nonzero offsets, which
25998 are otherwise valid for integer registers. Since we can't see
25999 whether we have a nonzero offset from here, prohibit all
26000 nonparadoxical subregs changing size. */
26001 if (GET_MODE_SIZE (to) < GET_MODE_SIZE (from))
26008 /* Return the cost of moving data of mode M between a
26009 register and memory. A value of 2 is the default; this cost is
26010 relative to those in `REGISTER_MOVE_COST'.
26012 This function is used extensively by register_move_cost that is used to
26013 build tables at startup. Make it inline in this case.
26014 When IN is 2, return maximum of in and out move cost.
26016 If moving between registers and memory is more expensive than
26017 between two registers, you should define this macro to express the
26020 Model also increased moving costs of QImode registers in non
26024 inline_memory_move_cost (enum machine_mode mode, enum reg_class regclass,
26028 if (FLOAT_CLASS_P (regclass))
26046 return MAX (ix86_cost->fp_load [index], ix86_cost->fp_store [index]);
26047 return in ? ix86_cost->fp_load [index] : ix86_cost->fp_store [index];
26049 if (SSE_CLASS_P (regclass))
26052 switch (GET_MODE_SIZE (mode))
26067 return MAX (ix86_cost->sse_load [index], ix86_cost->sse_store [index]);
26068 return in ? ix86_cost->sse_load [index] : ix86_cost->sse_store [index];
26070 if (MMX_CLASS_P (regclass))
26073 switch (GET_MODE_SIZE (mode))
26085 return MAX (ix86_cost->mmx_load [index], ix86_cost->mmx_store [index]);
26086 return in ? ix86_cost->mmx_load [index] : ix86_cost->mmx_store [index];
26088 switch (GET_MODE_SIZE (mode))
26091 if (Q_CLASS_P (regclass) || TARGET_64BIT)
26094 return ix86_cost->int_store[0];
26095 if (TARGET_PARTIAL_REG_DEPENDENCY
26096 && optimize_function_for_speed_p (cfun))
26097 cost = ix86_cost->movzbl_load;
26099 cost = ix86_cost->int_load[0];
26101 return MAX (cost, ix86_cost->int_store[0]);
26107 return MAX (ix86_cost->movzbl_load, ix86_cost->int_store[0] + 4);
26109 return ix86_cost->movzbl_load;
26111 return ix86_cost->int_store[0] + 4;
26116 return MAX (ix86_cost->int_load[1], ix86_cost->int_store[1]);
26117 return in ? ix86_cost->int_load[1] : ix86_cost->int_store[1];
26119 /* Compute number of 32bit moves needed. TFmode is moved as XFmode. */
26120 if (mode == TFmode)
26123 cost = MAX (ix86_cost->int_load[2] , ix86_cost->int_store[2]);
26125 cost = ix86_cost->int_load[2];
26127 cost = ix86_cost->int_store[2];
26128 return (cost * (((int) GET_MODE_SIZE (mode)
26129 + UNITS_PER_WORD - 1) / UNITS_PER_WORD));
26134 ix86_memory_move_cost (enum machine_mode mode, enum reg_class regclass, int in)
26136 return inline_memory_move_cost (mode, regclass, in);
26140 /* Return the cost of moving data from a register in class CLASS1 to
26141 one in class CLASS2.
26143 It is not required that the cost always equal 2 when FROM is the same as TO;
26144 on some machines it is expensive to move between registers if they are not
26145 general registers. */
26148 ix86_register_move_cost (enum machine_mode mode, enum reg_class class1,
26149 enum reg_class class2)
26151 /* In case we require secondary memory, compute cost of the store followed
26152 by load. In order to avoid bad register allocation choices, we need
26153 for this to be *at least* as high as the symmetric MEMORY_MOVE_COST. */
26155 if (inline_secondary_memory_needed (class1, class2, mode, 0))
26159 cost += inline_memory_move_cost (mode, class1, 2);
26160 cost += inline_memory_move_cost (mode, class2, 2);
26162 /* In case of copying from general_purpose_register we may emit multiple
26163 stores followed by single load causing memory size mismatch stall.
26164 Count this as arbitrarily high cost of 20. */
26165 if (CLASS_MAX_NREGS (class1, mode) > CLASS_MAX_NREGS (class2, mode))
26168 /* In the case of FP/MMX moves, the registers actually overlap, and we
26169 have to switch modes in order to treat them differently. */
26170 if ((MMX_CLASS_P (class1) && MAYBE_FLOAT_CLASS_P (class2))
26171 || (MMX_CLASS_P (class2) && MAYBE_FLOAT_CLASS_P (class1)))
26177 /* Moves between SSE/MMX and integer unit are expensive. */
26178 if (MMX_CLASS_P (class1) != MMX_CLASS_P (class2)
26179 || SSE_CLASS_P (class1) != SSE_CLASS_P (class2))
26181 /* ??? By keeping returned value relatively high, we limit the number
26182 of moves between integer and MMX/SSE registers for all targets.
26183 Additionally, high value prevents problem with x86_modes_tieable_p(),
26184 where integer modes in MMX/SSE registers are not tieable
26185 because of missing QImode and HImode moves to, from or between
26186 MMX/SSE registers. */
26187 return MAX (8, ix86_cost->mmxsse_to_integer);
26189 if (MAYBE_FLOAT_CLASS_P (class1))
26190 return ix86_cost->fp_move;
26191 if (MAYBE_SSE_CLASS_P (class1))
26192 return ix86_cost->sse_move;
26193 if (MAYBE_MMX_CLASS_P (class1))
26194 return ix86_cost->mmx_move;
26198 /* Return 1 if hard register REGNO can hold a value of machine-mode MODE. */
26201 ix86_hard_regno_mode_ok (int regno, enum machine_mode mode)
26203 /* Flags and only flags can only hold CCmode values. */
26204 if (CC_REGNO_P (regno))
26205 return GET_MODE_CLASS (mode) == MODE_CC;
26206 if (GET_MODE_CLASS (mode) == MODE_CC
26207 || GET_MODE_CLASS (mode) == MODE_RANDOM
26208 || GET_MODE_CLASS (mode) == MODE_PARTIAL_INT)
26210 if (FP_REGNO_P (regno))
26211 return VALID_FP_MODE_P (mode);
26212 if (SSE_REGNO_P (regno))
26214 /* We implement the move patterns for all vector modes into and
26215 out of SSE registers, even when no operation instructions
26216 are available. OImode move is available only when AVX is
26218 return ((TARGET_AVX && mode == OImode)
26219 || VALID_AVX256_REG_MODE (mode)
26220 || VALID_SSE_REG_MODE (mode)
26221 || VALID_SSE2_REG_MODE (mode)
26222 || VALID_MMX_REG_MODE (mode)
26223 || VALID_MMX_REG_MODE_3DNOW (mode));
26225 if (MMX_REGNO_P (regno))
26227 /* We implement the move patterns for 3DNOW modes even in MMX mode,
26228 so if the register is available at all, then we can move data of
26229 the given mode into or out of it. */
26230 return (VALID_MMX_REG_MODE (mode)
26231 || VALID_MMX_REG_MODE_3DNOW (mode));
26234 if (mode == QImode)
26236 /* Take care for QImode values - they can be in non-QI regs,
26237 but then they do cause partial register stalls. */
26238 if (regno <= BX_REG || TARGET_64BIT)
26240 if (!TARGET_PARTIAL_REG_STALL)
26242 return reload_in_progress || reload_completed;
26244 /* We handle both integer and floats in the general purpose registers. */
26245 else if (VALID_INT_MODE_P (mode))
26247 else if (VALID_FP_MODE_P (mode))
26249 else if (VALID_DFP_MODE_P (mode))
26251 /* Lots of MMX code casts 8 byte vector modes to DImode. If we then go
26252 on to use that value in smaller contexts, this can easily force a
26253 pseudo to be allocated to GENERAL_REGS. Since this is no worse than
26254 supporting DImode, allow it. */
26255 else if (VALID_MMX_REG_MODE_3DNOW (mode) || VALID_MMX_REG_MODE (mode))
26261 /* A subroutine of ix86_modes_tieable_p. Return true if MODE is a
26262 tieable integer mode. */
26265 ix86_tieable_integer_mode_p (enum machine_mode mode)
26274 return TARGET_64BIT || !TARGET_PARTIAL_REG_STALL;
26277 return TARGET_64BIT;
26284 /* Return true if MODE1 is accessible in a register that can hold MODE2
26285 without copying. That is, all register classes that can hold MODE2
26286 can also hold MODE1. */
26289 ix86_modes_tieable_p (enum machine_mode mode1, enum machine_mode mode2)
26291 if (mode1 == mode2)
26294 if (ix86_tieable_integer_mode_p (mode1)
26295 && ix86_tieable_integer_mode_p (mode2))
26298 /* MODE2 being XFmode implies fp stack or general regs, which means we
26299 can tie any smaller floating point modes to it. Note that we do not
26300 tie this with TFmode. */
26301 if (mode2 == XFmode)
26302 return mode1 == SFmode || mode1 == DFmode;
26304 /* MODE2 being DFmode implies fp stack, general or sse regs, which means
26305 that we can tie it with SFmode. */
26306 if (mode2 == DFmode)
26307 return mode1 == SFmode;
26309 /* If MODE2 is only appropriate for an SSE register, then tie with
26310 any other mode acceptable to SSE registers. */
26311 if (GET_MODE_SIZE (mode2) == 16
26312 && ix86_hard_regno_mode_ok (FIRST_SSE_REG, mode2))
26313 return (GET_MODE_SIZE (mode1) == 16
26314 && ix86_hard_regno_mode_ok (FIRST_SSE_REG, mode1));
26316 /* If MODE2 is appropriate for an MMX register, then tie
26317 with any other mode acceptable to MMX registers. */
26318 if (GET_MODE_SIZE (mode2) == 8
26319 && ix86_hard_regno_mode_ok (FIRST_MMX_REG, mode2))
26320 return (GET_MODE_SIZE (mode1) == 8
26321 && ix86_hard_regno_mode_ok (FIRST_MMX_REG, mode1));
26326 /* Compute a (partial) cost for rtx X. Return true if the complete
26327 cost has been computed, and false if subexpressions should be
26328 scanned. In either case, *TOTAL contains the cost result. */
26331 ix86_rtx_costs (rtx x, int code, int outer_code_i, int *total, bool speed)
26333 enum rtx_code outer_code = (enum rtx_code) outer_code_i;
26334 enum machine_mode mode = GET_MODE (x);
26335 const struct processor_costs *cost = speed ? ix86_cost : &ix86_size_cost;
26343 if (TARGET_64BIT && !x86_64_immediate_operand (x, VOIDmode))
26345 else if (TARGET_64BIT && !x86_64_zext_immediate_operand (x, VOIDmode))
26347 else if (flag_pic && SYMBOLIC_CONST (x)
26349 || (!GET_CODE (x) != LABEL_REF
26350 && (GET_CODE (x) != SYMBOL_REF
26351 || !SYMBOL_REF_LOCAL_P (x)))))
26358 if (mode == VOIDmode)
26361 switch (standard_80387_constant_p (x))
26366 default: /* Other constants */
26371 /* Start with (MEM (SYMBOL_REF)), since that's where
26372 it'll probably end up. Add a penalty for size. */
26373 *total = (COSTS_N_INSNS (1)
26374 + (flag_pic != 0 && !TARGET_64BIT)
26375 + (mode == SFmode ? 0 : mode == DFmode ? 1 : 2));
26381 /* The zero extensions is often completely free on x86_64, so make
26382 it as cheap as possible. */
26383 if (TARGET_64BIT && mode == DImode
26384 && GET_MODE (XEXP (x, 0)) == SImode)
26386 else if (TARGET_ZERO_EXTEND_WITH_AND)
26387 *total = cost->add;
26389 *total = cost->movzx;
26393 *total = cost->movsx;
26397 if (CONST_INT_P (XEXP (x, 1))
26398 && (GET_MODE (XEXP (x, 0)) != DImode || TARGET_64BIT))
26400 HOST_WIDE_INT value = INTVAL (XEXP (x, 1));
26403 *total = cost->add;
26406 if ((value == 2 || value == 3)
26407 && cost->lea <= cost->shift_const)
26409 *total = cost->lea;
26419 if (!TARGET_64BIT && GET_MODE (XEXP (x, 0)) == DImode)
26421 if (CONST_INT_P (XEXP (x, 1)))
26423 if (INTVAL (XEXP (x, 1)) > 32)
26424 *total = cost->shift_const + COSTS_N_INSNS (2);
26426 *total = cost->shift_const * 2;
26430 if (GET_CODE (XEXP (x, 1)) == AND)
26431 *total = cost->shift_var * 2;
26433 *total = cost->shift_var * 6 + COSTS_N_INSNS (2);
26438 if (CONST_INT_P (XEXP (x, 1)))
26439 *total = cost->shift_const;
26441 *total = cost->shift_var;
26446 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
26448 /* ??? SSE scalar cost should be used here. */
26449 *total = cost->fmul;
26452 else if (X87_FLOAT_MODE_P (mode))
26454 *total = cost->fmul;
26457 else if (FLOAT_MODE_P (mode))
26459 /* ??? SSE vector cost should be used here. */
26460 *total = cost->fmul;
26465 rtx op0 = XEXP (x, 0);
26466 rtx op1 = XEXP (x, 1);
26468 if (CONST_INT_P (XEXP (x, 1)))
26470 unsigned HOST_WIDE_INT value = INTVAL (XEXP (x, 1));
26471 for (nbits = 0; value != 0; value &= value - 1)
26475 /* This is arbitrary. */
26478 /* Compute costs correctly for widening multiplication. */
26479 if ((GET_CODE (op0) == SIGN_EXTEND || GET_CODE (op0) == ZERO_EXTEND)
26480 && GET_MODE_SIZE (GET_MODE (XEXP (op0, 0))) * 2
26481 == GET_MODE_SIZE (mode))
26483 int is_mulwiden = 0;
26484 enum machine_mode inner_mode = GET_MODE (op0);
26486 if (GET_CODE (op0) == GET_CODE (op1))
26487 is_mulwiden = 1, op1 = XEXP (op1, 0);
26488 else if (CONST_INT_P (op1))
26490 if (GET_CODE (op0) == SIGN_EXTEND)
26491 is_mulwiden = trunc_int_for_mode (INTVAL (op1), inner_mode)
26494 is_mulwiden = !(INTVAL (op1) & ~GET_MODE_MASK (inner_mode));
26498 op0 = XEXP (op0, 0), mode = GET_MODE (op0);
26501 *total = (cost->mult_init[MODE_INDEX (mode)]
26502 + nbits * cost->mult_bit
26503 + rtx_cost (op0, outer_code, speed) + rtx_cost (op1, outer_code, speed));
26512 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
26513 /* ??? SSE cost should be used here. */
26514 *total = cost->fdiv;
26515 else if (X87_FLOAT_MODE_P (mode))
26516 *total = cost->fdiv;
26517 else if (FLOAT_MODE_P (mode))
26518 /* ??? SSE vector cost should be used here. */
26519 *total = cost->fdiv;
26521 *total = cost->divide[MODE_INDEX (mode)];
26525 if (GET_MODE_CLASS (mode) == MODE_INT
26526 && GET_MODE_BITSIZE (mode) <= GET_MODE_BITSIZE (Pmode))
26528 if (GET_CODE (XEXP (x, 0)) == PLUS
26529 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
26530 && CONST_INT_P (XEXP (XEXP (XEXP (x, 0), 0), 1))
26531 && CONSTANT_P (XEXP (x, 1)))
26533 HOST_WIDE_INT val = INTVAL (XEXP (XEXP (XEXP (x, 0), 0), 1));
26534 if (val == 2 || val == 4 || val == 8)
26536 *total = cost->lea;
26537 *total += rtx_cost (XEXP (XEXP (x, 0), 1), outer_code, speed);
26538 *total += rtx_cost (XEXP (XEXP (XEXP (x, 0), 0), 0),
26539 outer_code, speed);
26540 *total += rtx_cost (XEXP (x, 1), outer_code, speed);
26544 else if (GET_CODE (XEXP (x, 0)) == MULT
26545 && CONST_INT_P (XEXP (XEXP (x, 0), 1)))
26547 HOST_WIDE_INT val = INTVAL (XEXP (XEXP (x, 0), 1));
26548 if (val == 2 || val == 4 || val == 8)
26550 *total = cost->lea;
26551 *total += rtx_cost (XEXP (XEXP (x, 0), 0), outer_code, speed);
26552 *total += rtx_cost (XEXP (x, 1), outer_code, speed);
26556 else if (GET_CODE (XEXP (x, 0)) == PLUS)
26558 *total = cost->lea;
26559 *total += rtx_cost (XEXP (XEXP (x, 0), 0), outer_code, speed);
26560 *total += rtx_cost (XEXP (XEXP (x, 0), 1), outer_code, speed);
26561 *total += rtx_cost (XEXP (x, 1), outer_code, speed);
26568 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
26570 /* ??? SSE cost should be used here. */
26571 *total = cost->fadd;
26574 else if (X87_FLOAT_MODE_P (mode))
26576 *total = cost->fadd;
26579 else if (FLOAT_MODE_P (mode))
26581 /* ??? SSE vector cost should be used here. */
26582 *total = cost->fadd;
26590 if (!TARGET_64BIT && mode == DImode)
26592 *total = (cost->add * 2
26593 + (rtx_cost (XEXP (x, 0), outer_code, speed)
26594 << (GET_MODE (XEXP (x, 0)) != DImode))
26595 + (rtx_cost (XEXP (x, 1), outer_code, speed)
26596 << (GET_MODE (XEXP (x, 1)) != DImode)));
26602 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
26604 /* ??? SSE cost should be used here. */
26605 *total = cost->fchs;
26608 else if (X87_FLOAT_MODE_P (mode))
26610 *total = cost->fchs;
26613 else if (FLOAT_MODE_P (mode))
26615 /* ??? SSE vector cost should be used here. */
26616 *total = cost->fchs;
26622 if (!TARGET_64BIT && mode == DImode)
26623 *total = cost->add * 2;
26625 *total = cost->add;
26629 if (GET_CODE (XEXP (x, 0)) == ZERO_EXTRACT
26630 && XEXP (XEXP (x, 0), 1) == const1_rtx
26631 && CONST_INT_P (XEXP (XEXP (x, 0), 2))
26632 && XEXP (x, 1) == const0_rtx)
26634 /* This kind of construct is implemented using test[bwl].
26635 Treat it as if we had an AND. */
26636 *total = (cost->add
26637 + rtx_cost (XEXP (XEXP (x, 0), 0), outer_code, speed)
26638 + rtx_cost (const1_rtx, outer_code, speed));
26644 if (!(SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH))
26649 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
26650 /* ??? SSE cost should be used here. */
26651 *total = cost->fabs;
26652 else if (X87_FLOAT_MODE_P (mode))
26653 *total = cost->fabs;
26654 else if (FLOAT_MODE_P (mode))
26655 /* ??? SSE vector cost should be used here. */
26656 *total = cost->fabs;
26660 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
26661 /* ??? SSE cost should be used here. */
26662 *total = cost->fsqrt;
26663 else if (X87_FLOAT_MODE_P (mode))
26664 *total = cost->fsqrt;
26665 else if (FLOAT_MODE_P (mode))
26666 /* ??? SSE vector cost should be used here. */
26667 *total = cost->fsqrt;
26671 if (XINT (x, 1) == UNSPEC_TP)
26682 static int current_machopic_label_num;
26684 /* Given a symbol name and its associated stub, write out the
26685 definition of the stub. */
26688 machopic_output_stub (FILE *file, const char *symb, const char *stub)
26690 unsigned int length;
26691 char *binder_name, *symbol_name, lazy_ptr_name[32];
26692 int label = ++current_machopic_label_num;
26694 /* For 64-bit we shouldn't get here. */
26695 gcc_assert (!TARGET_64BIT);
26697 /* Lose our funky encoding stuff so it doesn't contaminate the stub. */
26698 symb = (*targetm.strip_name_encoding) (symb);
26700 length = strlen (stub);
26701 binder_name = XALLOCAVEC (char, length + 32);
26702 GEN_BINDER_NAME_FOR_STUB (binder_name, stub, length);
26704 length = strlen (symb);
26705 symbol_name = XALLOCAVEC (char, length + 32);
26706 GEN_SYMBOL_NAME_FOR_SYMBOL (symbol_name, symb, length);
26708 sprintf (lazy_ptr_name, "L%d$lz", label);
26711 switch_to_section (darwin_sections[machopic_picsymbol_stub_section]);
26713 switch_to_section (darwin_sections[machopic_symbol_stub_section]);
26715 fprintf (file, "%s:\n", stub);
26716 fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
26720 fprintf (file, "\tcall\tLPC$%d\nLPC$%d:\tpopl\t%%eax\n", label, label);
26721 fprintf (file, "\tmovl\t%s-LPC$%d(%%eax),%%edx\n", lazy_ptr_name, label);
26722 fprintf (file, "\tjmp\t*%%edx\n");
26725 fprintf (file, "\tjmp\t*%s\n", lazy_ptr_name);
26727 fprintf (file, "%s:\n", binder_name);
26731 fprintf (file, "\tlea\t%s-LPC$%d(%%eax),%%eax\n", lazy_ptr_name, label);
26732 fprintf (file, "\tpushl\t%%eax\n");
26735 fprintf (file, "\tpushl\t$%s\n", lazy_ptr_name);
26737 fprintf (file, "\tjmp\tdyld_stub_binding_helper\n");
26739 switch_to_section (darwin_sections[machopic_lazy_symbol_ptr_section]);
26740 fprintf (file, "%s:\n", lazy_ptr_name);
26741 fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
26742 fprintf (file, "\t.long %s\n", binder_name);
26746 darwin_x86_file_end (void)
26748 darwin_file_end ();
26751 #endif /* TARGET_MACHO */
26753 /* Order the registers for register allocator. */
26756 x86_order_regs_for_local_alloc (void)
26761 /* First allocate the local general purpose registers. */
26762 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
26763 if (GENERAL_REGNO_P (i) && call_used_regs[i])
26764 reg_alloc_order [pos++] = i;
26766 /* Global general purpose registers. */
26767 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
26768 if (GENERAL_REGNO_P (i) && !call_used_regs[i])
26769 reg_alloc_order [pos++] = i;
26771 /* x87 registers come first in case we are doing FP math
26773 if (!TARGET_SSE_MATH)
26774 for (i = FIRST_STACK_REG; i <= LAST_STACK_REG; i++)
26775 reg_alloc_order [pos++] = i;
26777 /* SSE registers. */
26778 for (i = FIRST_SSE_REG; i <= LAST_SSE_REG; i++)
26779 reg_alloc_order [pos++] = i;
26780 for (i = FIRST_REX_SSE_REG; i <= LAST_REX_SSE_REG; i++)
26781 reg_alloc_order [pos++] = i;
26783 /* x87 registers. */
26784 if (TARGET_SSE_MATH)
26785 for (i = FIRST_STACK_REG; i <= LAST_STACK_REG; i++)
26786 reg_alloc_order [pos++] = i;
26788 for (i = FIRST_MMX_REG; i <= LAST_MMX_REG; i++)
26789 reg_alloc_order [pos++] = i;
26791 /* Initialize the rest of array as we do not allocate some registers
26793 while (pos < FIRST_PSEUDO_REGISTER)
26794 reg_alloc_order [pos++] = 0;
26797 /* Handle a "ms_abi" or "sysv" attribute; arguments as in
26798 struct attribute_spec.handler. */
26800 ix86_handle_abi_attribute (tree *node, tree name,
26801 tree args ATTRIBUTE_UNUSED,
26802 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
26804 if (TREE_CODE (*node) != FUNCTION_TYPE
26805 && TREE_CODE (*node) != METHOD_TYPE
26806 && TREE_CODE (*node) != FIELD_DECL
26807 && TREE_CODE (*node) != TYPE_DECL)
26809 warning (OPT_Wattributes, "%qs attribute only applies to functions",
26810 IDENTIFIER_POINTER (name));
26811 *no_add_attrs = true;
26816 warning (OPT_Wattributes, "%qs attribute only available for 64-bit",
26817 IDENTIFIER_POINTER (name));
26818 *no_add_attrs = true;
26822 /* Can combine regparm with all attributes but fastcall. */
26823 if (is_attribute_p ("ms_abi", name))
26825 if (lookup_attribute ("sysv_abi", TYPE_ATTRIBUTES (*node)))
26827 error ("ms_abi and sysv_abi attributes are not compatible");
26832 else if (is_attribute_p ("sysv_abi", name))
26834 if (lookup_attribute ("ms_abi", TYPE_ATTRIBUTES (*node)))
26836 error ("ms_abi and sysv_abi attributes are not compatible");
26845 /* Handle a "ms_struct" or "gcc_struct" attribute; arguments as in
26846 struct attribute_spec.handler. */
26848 ix86_handle_struct_attribute (tree *node, tree name,
26849 tree args ATTRIBUTE_UNUSED,
26850 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
26853 if (DECL_P (*node))
26855 if (TREE_CODE (*node) == TYPE_DECL)
26856 type = &TREE_TYPE (*node);
26861 if (!(type && (TREE_CODE (*type) == RECORD_TYPE
26862 || TREE_CODE (*type) == UNION_TYPE)))
26864 warning (OPT_Wattributes, "%qs attribute ignored",
26865 IDENTIFIER_POINTER (name));
26866 *no_add_attrs = true;
26869 else if ((is_attribute_p ("ms_struct", name)
26870 && lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (*type)))
26871 || ((is_attribute_p ("gcc_struct", name)
26872 && lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (*type)))))
26874 warning (OPT_Wattributes, "%qs incompatible attribute ignored",
26875 IDENTIFIER_POINTER (name));
26876 *no_add_attrs = true;
26883 ix86_ms_bitfield_layout_p (const_tree record_type)
26885 return (TARGET_MS_BITFIELD_LAYOUT &&
26886 !lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (record_type)))
26887 || lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (record_type));
26890 /* Returns an expression indicating where the this parameter is
26891 located on entry to the FUNCTION. */
26894 x86_this_parameter (tree function)
26896 tree type = TREE_TYPE (function);
26897 bool aggr = aggregate_value_p (TREE_TYPE (type), type) != 0;
26902 const int *parm_regs;
26904 if (ix86_function_type_abi (type) == MS_ABI)
26905 parm_regs = x86_64_ms_abi_int_parameter_registers;
26907 parm_regs = x86_64_int_parameter_registers;
26908 return gen_rtx_REG (DImode, parm_regs[aggr]);
26911 nregs = ix86_function_regparm (type, function);
26913 if (nregs > 0 && !stdarg_p (type))
26917 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type)))
26918 regno = aggr ? DX_REG : CX_REG;
26926 return gen_rtx_MEM (SImode,
26927 plus_constant (stack_pointer_rtx, 4));
26930 return gen_rtx_REG (SImode, regno);
26933 return gen_rtx_MEM (SImode, plus_constant (stack_pointer_rtx, aggr ? 8 : 4));
26936 /* Determine whether x86_output_mi_thunk can succeed. */
26939 x86_can_output_mi_thunk (const_tree thunk ATTRIBUTE_UNUSED,
26940 HOST_WIDE_INT delta ATTRIBUTE_UNUSED,
26941 HOST_WIDE_INT vcall_offset, const_tree function)
26943 /* 64-bit can handle anything. */
26947 /* For 32-bit, everything's fine if we have one free register. */
26948 if (ix86_function_regparm (TREE_TYPE (function), function) < 3)
26951 /* Need a free register for vcall_offset. */
26955 /* Need a free register for GOT references. */
26956 if (flag_pic && !(*targetm.binds_local_p) (function))
26959 /* Otherwise ok. */
26963 /* Output the assembler code for a thunk function. THUNK_DECL is the
26964 declaration for the thunk function itself, FUNCTION is the decl for
26965 the target function. DELTA is an immediate constant offset to be
26966 added to THIS. If VCALL_OFFSET is nonzero, the word at
26967 *(*this + vcall_offset) should be added to THIS. */
26970 x86_output_mi_thunk (FILE *file ATTRIBUTE_UNUSED,
26971 tree thunk ATTRIBUTE_UNUSED, HOST_WIDE_INT delta,
26972 HOST_WIDE_INT vcall_offset, tree function)
26975 rtx this_param = x86_this_parameter (function);
26978 /* If VCALL_OFFSET, we'll need THIS in a register. Might as well
26979 pull it in now and let DELTA benefit. */
26980 if (REG_P (this_param))
26981 this_reg = this_param;
26982 else if (vcall_offset)
26984 /* Put the this parameter into %eax. */
26985 xops[0] = this_param;
26986 xops[1] = this_reg = gen_rtx_REG (Pmode, AX_REG);
26987 output_asm_insn ("mov%z1\t{%0, %1|%1, %0}", xops);
26990 this_reg = NULL_RTX;
26992 /* Adjust the this parameter by a fixed constant. */
26995 xops[0] = GEN_INT (delta);
26996 xops[1] = this_reg ? this_reg : this_param;
26999 if (!x86_64_general_operand (xops[0], DImode))
27001 tmp = gen_rtx_REG (DImode, R10_REG);
27003 output_asm_insn ("mov{q}\t{%1, %0|%0, %1}", xops);
27005 xops[1] = this_param;
27007 output_asm_insn ("add{q}\t{%0, %1|%1, %0}", xops);
27010 output_asm_insn ("add{l}\t{%0, %1|%1, %0}", xops);
27013 /* Adjust the this parameter by a value stored in the vtable. */
27017 tmp = gen_rtx_REG (DImode, R10_REG);
27020 int tmp_regno = CX_REG;
27021 if (lookup_attribute ("fastcall",
27022 TYPE_ATTRIBUTES (TREE_TYPE (function))))
27023 tmp_regno = AX_REG;
27024 tmp = gen_rtx_REG (SImode, tmp_regno);
27027 xops[0] = gen_rtx_MEM (Pmode, this_reg);
27029 output_asm_insn ("mov%z1\t{%0, %1|%1, %0}", xops);
27031 /* Adjust the this parameter. */
27032 xops[0] = gen_rtx_MEM (Pmode, plus_constant (tmp, vcall_offset));
27033 if (TARGET_64BIT && !memory_operand (xops[0], Pmode))
27035 rtx tmp2 = gen_rtx_REG (DImode, R11_REG);
27036 xops[0] = GEN_INT (vcall_offset);
27038 output_asm_insn ("mov{q}\t{%0, %1|%1, %0}", xops);
27039 xops[0] = gen_rtx_MEM (Pmode, gen_rtx_PLUS (Pmode, tmp, tmp2));
27041 xops[1] = this_reg;
27042 output_asm_insn ("add%z1\t{%0, %1|%1, %0}", xops);
27045 /* If necessary, drop THIS back to its stack slot. */
27046 if (this_reg && this_reg != this_param)
27048 xops[0] = this_reg;
27049 xops[1] = this_param;
27050 output_asm_insn ("mov%z1\t{%0, %1|%1, %0}", xops);
27053 xops[0] = XEXP (DECL_RTL (function), 0);
27056 if (!flag_pic || (*targetm.binds_local_p) (function))
27057 output_asm_insn ("jmp\t%P0", xops);
27058 /* All thunks should be in the same object as their target,
27059 and thus binds_local_p should be true. */
27060 else if (TARGET_64BIT && cfun->machine->call_abi == MS_ABI)
27061 gcc_unreachable ();
27064 tmp = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, xops[0]), UNSPEC_GOTPCREL);
27065 tmp = gen_rtx_CONST (Pmode, tmp);
27066 tmp = gen_rtx_MEM (QImode, tmp);
27068 output_asm_insn ("jmp\t%A0", xops);
27073 if (!flag_pic || (*targetm.binds_local_p) (function))
27074 output_asm_insn ("jmp\t%P0", xops);
27079 rtx sym_ref = XEXP (DECL_RTL (function), 0);
27080 tmp = (gen_rtx_SYMBOL_REF
27082 machopic_indirection_name (sym_ref, /*stub_p=*/true)));
27083 tmp = gen_rtx_MEM (QImode, tmp);
27085 output_asm_insn ("jmp\t%0", xops);
27088 #endif /* TARGET_MACHO */
27090 tmp = gen_rtx_REG (SImode, CX_REG);
27091 output_set_got (tmp, NULL_RTX);
27094 output_asm_insn ("mov{l}\t{%0@GOT(%1), %1|%1, %0@GOT[%1]}", xops);
27095 output_asm_insn ("jmp\t{*}%1", xops);
27101 x86_file_start (void)
27103 default_file_start ();
27105 darwin_file_start ();
27107 if (X86_FILE_START_VERSION_DIRECTIVE)
27108 fputs ("\t.version\t\"01.01\"\n", asm_out_file);
27109 if (X86_FILE_START_FLTUSED)
27110 fputs ("\t.global\t__fltused\n", asm_out_file);
27111 if (ix86_asm_dialect == ASM_INTEL)
27112 fputs ("\t.intel_syntax noprefix\n", asm_out_file);
27116 x86_field_alignment (tree field, int computed)
27118 enum machine_mode mode;
27119 tree type = TREE_TYPE (field);
27121 if (TARGET_64BIT || TARGET_ALIGN_DOUBLE)
27123 mode = TYPE_MODE (strip_array_types (type));
27124 if (mode == DFmode || mode == DCmode
27125 || GET_MODE_CLASS (mode) == MODE_INT
27126 || GET_MODE_CLASS (mode) == MODE_COMPLEX_INT)
27127 return MIN (32, computed);
27131 /* Output assembler code to FILE to increment profiler label # LABELNO
27132 for profiling a function entry. */
27134 x86_function_profiler (FILE *file, int labelno ATTRIBUTE_UNUSED)
27138 #ifndef NO_PROFILE_COUNTERS
27139 fprintf (file, "\tleaq\t%sP%d@(%%rip),%%r11\n", LPREFIX, labelno);
27142 if (DEFAULT_ABI == SYSV_ABI && flag_pic)
27143 fprintf (file, "\tcall\t*%s@GOTPCREL(%%rip)\n", MCOUNT_NAME);
27145 fprintf (file, "\tcall\t%s\n", MCOUNT_NAME);
27149 #ifndef NO_PROFILE_COUNTERS
27150 fprintf (file, "\tleal\t%sP%d@GOTOFF(%%ebx),%%%s\n",
27151 LPREFIX, labelno, PROFILE_COUNT_REGISTER);
27153 fprintf (file, "\tcall\t*%s@GOT(%%ebx)\n", MCOUNT_NAME);
27157 #ifndef NO_PROFILE_COUNTERS
27158 fprintf (file, "\tmovl\t$%sP%d,%%%s\n", LPREFIX, labelno,
27159 PROFILE_COUNT_REGISTER);
27161 fprintf (file, "\tcall\t%s\n", MCOUNT_NAME);
27165 /* We don't have exact information about the insn sizes, but we may assume
27166 quite safely that we are informed about all 1 byte insns and memory
27167 address sizes. This is enough to eliminate unnecessary padding in
27171 min_insn_size (rtx insn)
27175 if (!INSN_P (insn) || !active_insn_p (insn))
27178 /* Discard alignments we've emit and jump instructions. */
27179 if (GET_CODE (PATTERN (insn)) == UNSPEC_VOLATILE
27180 && XINT (PATTERN (insn), 1) == UNSPECV_ALIGN)
27183 && (GET_CODE (PATTERN (insn)) == ADDR_VEC
27184 || GET_CODE (PATTERN (insn)) == ADDR_DIFF_VEC))
27187 /* Important case - calls are always 5 bytes.
27188 It is common to have many calls in the row. */
27190 && symbolic_reference_mentioned_p (PATTERN (insn))
27191 && !SIBLING_CALL_P (insn))
27193 if (get_attr_length (insn) <= 1)
27196 /* For normal instructions we may rely on the sizes of addresses
27197 and the presence of symbol to require 4 bytes of encoding.
27198 This is not the case for jumps where references are PC relative. */
27199 if (!JUMP_P (insn))
27201 l = get_attr_length_address (insn);
27202 if (l < 4 && symbolic_reference_mentioned_p (PATTERN (insn)))
27211 /* AMD K8 core mispredicts jumps when there are more than 3 jumps in 16 byte
27215 ix86_avoid_jump_misspredicts (void)
27217 rtx insn, start = get_insns ();
27218 int nbytes = 0, njumps = 0;
27221 /* Look for all minimal intervals of instructions containing 4 jumps.
27222 The intervals are bounded by START and INSN. NBYTES is the total
27223 size of instructions in the interval including INSN and not including
27224 START. When the NBYTES is smaller than 16 bytes, it is possible
27225 that the end of START and INSN ends up in the same 16byte page.
27227 The smallest offset in the page INSN can start is the case where START
27228 ends on the offset 0. Offset of INSN is then NBYTES - sizeof (INSN).
27229 We add p2align to 16byte window with maxskip 17 - NBYTES + sizeof (INSN).
27231 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
27234 nbytes += min_insn_size (insn);
27236 fprintf(dump_file, "Insn %i estimated to %i bytes\n",
27237 INSN_UID (insn), min_insn_size (insn));
27239 && GET_CODE (PATTERN (insn)) != ADDR_VEC
27240 && GET_CODE (PATTERN (insn)) != ADDR_DIFF_VEC)
27248 start = NEXT_INSN (start);
27249 if ((JUMP_P (start)
27250 && GET_CODE (PATTERN (start)) != ADDR_VEC
27251 && GET_CODE (PATTERN (start)) != ADDR_DIFF_VEC)
27253 njumps--, isjump = 1;
27256 nbytes -= min_insn_size (start);
27258 gcc_assert (njumps >= 0);
27260 fprintf (dump_file, "Interval %i to %i has %i bytes\n",
27261 INSN_UID (start), INSN_UID (insn), nbytes);
27263 if (njumps == 3 && isjump && nbytes < 16)
27265 int padsize = 15 - nbytes + min_insn_size (insn);
27268 fprintf (dump_file, "Padding insn %i by %i bytes!\n",
27269 INSN_UID (insn), padsize);
27270 emit_insn_before (gen_align (GEN_INT (padsize)), insn);
27275 /* AMD Athlon works faster
27276 when RET is not destination of conditional jump or directly preceded
27277 by other jump instruction. We avoid the penalty by inserting NOP just
27278 before the RET instructions in such cases. */
27280 ix86_pad_returns (void)
27285 FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR->preds)
27287 basic_block bb = e->src;
27288 rtx ret = BB_END (bb);
27290 bool replace = false;
27292 if (!JUMP_P (ret) || GET_CODE (PATTERN (ret)) != RETURN
27293 || optimize_bb_for_size_p (bb))
27295 for (prev = PREV_INSN (ret); prev; prev = PREV_INSN (prev))
27296 if (active_insn_p (prev) || LABEL_P (prev))
27298 if (prev && LABEL_P (prev))
27303 FOR_EACH_EDGE (e, ei, bb->preds)
27304 if (EDGE_FREQUENCY (e) && e->src->index >= 0
27305 && !(e->flags & EDGE_FALLTHRU))
27310 prev = prev_active_insn (ret);
27312 && ((JUMP_P (prev) && any_condjump_p (prev))
27315 /* Empty functions get branch mispredict even when the jump destination
27316 is not visible to us. */
27317 if (!prev && cfun->function_frequency > FUNCTION_FREQUENCY_UNLIKELY_EXECUTED)
27322 emit_insn_before (gen_return_internal_long (), ret);
27328 /* Implement machine specific optimizations. We implement padding of returns
27329 for K8 CPUs and pass to avoid 4 jumps in the single 16 byte window. */
27333 if (TARGET_PAD_RETURNS && optimize
27334 && optimize_function_for_speed_p (cfun))
27335 ix86_pad_returns ();
27336 if (TARGET_FOUR_JUMP_LIMIT && optimize
27337 && optimize_function_for_speed_p (cfun))
27338 ix86_avoid_jump_misspredicts ();
27341 /* Return nonzero when QImode register that must be represented via REX prefix
27344 x86_extended_QIreg_mentioned_p (rtx insn)
27347 extract_insn_cached (insn);
27348 for (i = 0; i < recog_data.n_operands; i++)
27349 if (REG_P (recog_data.operand[i])
27350 && REGNO (recog_data.operand[i]) > BX_REG)
27355 /* Return nonzero when P points to register encoded via REX prefix.
27356 Called via for_each_rtx. */
27358 extended_reg_mentioned_1 (rtx *p, void *data ATTRIBUTE_UNUSED)
27360 unsigned int regno;
27363 regno = REGNO (*p);
27364 return REX_INT_REGNO_P (regno) || REX_SSE_REGNO_P (regno);
27367 /* Return true when INSN mentions register that must be encoded using REX
27370 x86_extended_reg_mentioned_p (rtx insn)
27372 return for_each_rtx (INSN_P (insn) ? &PATTERN (insn) : &insn,
27373 extended_reg_mentioned_1, NULL);
27376 /* Generate an unsigned DImode/SImode to FP conversion. This is the same code
27377 optabs would emit if we didn't have TFmode patterns. */
27380 x86_emit_floatuns (rtx operands[2])
27382 rtx neglab, donelab, i0, i1, f0, in, out;
27383 enum machine_mode mode, inmode;
27385 inmode = GET_MODE (operands[1]);
27386 gcc_assert (inmode == SImode || inmode == DImode);
27389 in = force_reg (inmode, operands[1]);
27390 mode = GET_MODE (out);
27391 neglab = gen_label_rtx ();
27392 donelab = gen_label_rtx ();
27393 f0 = gen_reg_rtx (mode);
27395 emit_cmp_and_jump_insns (in, const0_rtx, LT, const0_rtx, inmode, 0, neglab);
27397 expand_float (out, in, 0);
27399 emit_jump_insn (gen_jump (donelab));
27402 emit_label (neglab);
27404 i0 = expand_simple_binop (inmode, LSHIFTRT, in, const1_rtx, NULL,
27406 i1 = expand_simple_binop (inmode, AND, in, const1_rtx, NULL,
27408 i0 = expand_simple_binop (inmode, IOR, i0, i1, i0, 1, OPTAB_DIRECT);
27410 expand_float (f0, i0, 0);
27412 emit_insn (gen_rtx_SET (VOIDmode, out, gen_rtx_PLUS (mode, f0, f0)));
27414 emit_label (donelab);
27417 /* A subroutine of ix86_expand_vector_init. Store into TARGET a vector
27418 with all elements equal to VAR. Return true if successful. */
27421 ix86_expand_vector_init_duplicate (bool mmx_ok, enum machine_mode mode,
27422 rtx target, rtx val)
27424 enum machine_mode hmode, smode, wsmode, wvmode;
27439 val = force_reg (GET_MODE_INNER (mode), val);
27440 x = gen_rtx_VEC_DUPLICATE (mode, val);
27441 emit_insn (gen_rtx_SET (VOIDmode, target, x));
27447 if (TARGET_SSE || TARGET_3DNOW_A)
27449 val = gen_lowpart (SImode, val);
27450 x = gen_rtx_TRUNCATE (HImode, val);
27451 x = gen_rtx_VEC_DUPLICATE (mode, x);
27452 emit_insn (gen_rtx_SET (VOIDmode, target, x));
27474 /* Extend HImode to SImode using a paradoxical SUBREG. */
27475 tmp1 = gen_reg_rtx (SImode);
27476 emit_move_insn (tmp1, gen_lowpart (SImode, val));
27477 /* Insert the SImode value as low element of V4SImode vector. */
27478 tmp2 = gen_reg_rtx (V4SImode);
27479 tmp1 = gen_rtx_VEC_MERGE (V4SImode,
27480 gen_rtx_VEC_DUPLICATE (V4SImode, tmp1),
27481 CONST0_RTX (V4SImode),
27483 emit_insn (gen_rtx_SET (VOIDmode, tmp2, tmp1));
27484 /* Cast the V4SImode vector back to a V8HImode vector. */
27485 tmp1 = gen_reg_rtx (V8HImode);
27486 emit_move_insn (tmp1, gen_lowpart (V8HImode, tmp2));
27487 /* Duplicate the low short through the whole low SImode word. */
27488 emit_insn (gen_sse2_punpcklwd (tmp1, tmp1, tmp1));
27489 /* Cast the V8HImode vector back to a V4SImode vector. */
27490 tmp2 = gen_reg_rtx (V4SImode);
27491 emit_move_insn (tmp2, gen_lowpart (V4SImode, tmp1));
27492 /* Replicate the low element of the V4SImode vector. */
27493 emit_insn (gen_sse2_pshufd (tmp2, tmp2, const0_rtx));
27494 /* Cast the V2SImode back to V8HImode, and store in target. */
27495 emit_move_insn (target, gen_lowpart (V8HImode, tmp2));
27506 /* Extend QImode to SImode using a paradoxical SUBREG. */
27507 tmp1 = gen_reg_rtx (SImode);
27508 emit_move_insn (tmp1, gen_lowpart (SImode, val));
27509 /* Insert the SImode value as low element of V4SImode vector. */
27510 tmp2 = gen_reg_rtx (V4SImode);
27511 tmp1 = gen_rtx_VEC_MERGE (V4SImode,
27512 gen_rtx_VEC_DUPLICATE (V4SImode, tmp1),
27513 CONST0_RTX (V4SImode),
27515 emit_insn (gen_rtx_SET (VOIDmode, tmp2, tmp1));
27516 /* Cast the V4SImode vector back to a V16QImode vector. */
27517 tmp1 = gen_reg_rtx (V16QImode);
27518 emit_move_insn (tmp1, gen_lowpart (V16QImode, tmp2));
27519 /* Duplicate the low byte through the whole low SImode word. */
27520 emit_insn (gen_sse2_punpcklbw (tmp1, tmp1, tmp1));
27521 emit_insn (gen_sse2_punpcklbw (tmp1, tmp1, tmp1));
27522 /* Cast the V16QImode vector back to a V4SImode vector. */
27523 tmp2 = gen_reg_rtx (V4SImode);
27524 emit_move_insn (tmp2, gen_lowpart (V4SImode, tmp1));
27525 /* Replicate the low element of the V4SImode vector. */
27526 emit_insn (gen_sse2_pshufd (tmp2, tmp2, const0_rtx));
27527 /* Cast the V2SImode back to V16QImode, and store in target. */
27528 emit_move_insn (target, gen_lowpart (V16QImode, tmp2));
27536 /* Replicate the value once into the next wider mode and recurse. */
27537 val = convert_modes (wsmode, smode, val, true);
27538 x = expand_simple_binop (wsmode, ASHIFT, val,
27539 GEN_INT (GET_MODE_BITSIZE (smode)),
27540 NULL_RTX, 1, OPTAB_LIB_WIDEN);
27541 val = expand_simple_binop (wsmode, IOR, val, x, x, 1, OPTAB_LIB_WIDEN);
27543 x = gen_reg_rtx (wvmode);
27544 if (!ix86_expand_vector_init_duplicate (mmx_ok, wvmode, x, val))
27545 gcc_unreachable ();
27546 emit_move_insn (target, gen_lowpart (mode, x));
27569 rtx tmp = gen_reg_rtx (hmode);
27570 ix86_expand_vector_init_duplicate (mmx_ok, hmode, tmp, val);
27571 emit_insn (gen_rtx_SET (VOIDmode, target,
27572 gen_rtx_VEC_CONCAT (mode, tmp, tmp)));
27581 /* A subroutine of ix86_expand_vector_init. Store into TARGET a vector
27582 whose ONE_VAR element is VAR, and other elements are zero. Return true
27586 ix86_expand_vector_init_one_nonzero (bool mmx_ok, enum machine_mode mode,
27587 rtx target, rtx var, int one_var)
27589 enum machine_mode vsimode;
27592 bool use_vector_set = false;
27597 /* For SSE4.1, we normally use vector set. But if the second
27598 element is zero and inter-unit moves are OK, we use movq
27600 use_vector_set = (TARGET_64BIT
27602 && !(TARGET_INTER_UNIT_MOVES
27608 use_vector_set = TARGET_SSE4_1;
27611 use_vector_set = TARGET_SSE2;
27614 use_vector_set = TARGET_SSE || TARGET_3DNOW_A;
27621 use_vector_set = TARGET_AVX;
27624 /* Use ix86_expand_vector_set in 64bit mode only. */
27625 use_vector_set = TARGET_AVX && TARGET_64BIT;
27631 if (use_vector_set)
27633 emit_insn (gen_rtx_SET (VOIDmode, target, CONST0_RTX (mode)));
27634 var = force_reg (GET_MODE_INNER (mode), var);
27635 ix86_expand_vector_set (mmx_ok, target, var, one_var);
27651 var = force_reg (GET_MODE_INNER (mode), var);
27652 x = gen_rtx_VEC_CONCAT (mode, var, CONST0_RTX (GET_MODE_INNER (mode)));
27653 emit_insn (gen_rtx_SET (VOIDmode, target, x));
27658 if (!REG_P (target) || REGNO (target) < FIRST_PSEUDO_REGISTER)
27659 new_target = gen_reg_rtx (mode);
27661 new_target = target;
27662 var = force_reg (GET_MODE_INNER (mode), var);
27663 x = gen_rtx_VEC_DUPLICATE (mode, var);
27664 x = gen_rtx_VEC_MERGE (mode, x, CONST0_RTX (mode), const1_rtx);
27665 emit_insn (gen_rtx_SET (VOIDmode, new_target, x));
27668 /* We need to shuffle the value to the correct position, so
27669 create a new pseudo to store the intermediate result. */
27671 /* With SSE2, we can use the integer shuffle insns. */
27672 if (mode != V4SFmode && TARGET_SSE2)
27674 emit_insn (gen_sse2_pshufd_1 (new_target, new_target,
27676 GEN_INT (one_var == 1 ? 0 : 1),
27677 GEN_INT (one_var == 2 ? 0 : 1),
27678 GEN_INT (one_var == 3 ? 0 : 1)));
27679 if (target != new_target)
27680 emit_move_insn (target, new_target);
27684 /* Otherwise convert the intermediate result to V4SFmode and
27685 use the SSE1 shuffle instructions. */
27686 if (mode != V4SFmode)
27688 tmp = gen_reg_rtx (V4SFmode);
27689 emit_move_insn (tmp, gen_lowpart (V4SFmode, new_target));
27694 emit_insn (gen_sse_shufps_v4sf (tmp, tmp, tmp,
27696 GEN_INT (one_var == 1 ? 0 : 1),
27697 GEN_INT (one_var == 2 ? 0+4 : 1+4),
27698 GEN_INT (one_var == 3 ? 0+4 : 1+4)));
27700 if (mode != V4SFmode)
27701 emit_move_insn (target, gen_lowpart (V4SImode, tmp));
27702 else if (tmp != target)
27703 emit_move_insn (target, tmp);
27705 else if (target != new_target)
27706 emit_move_insn (target, new_target);
27711 vsimode = V4SImode;
27717 vsimode = V2SImode;
27723 /* Zero extend the variable element to SImode and recurse. */
27724 var = convert_modes (SImode, GET_MODE_INNER (mode), var, true);
27726 x = gen_reg_rtx (vsimode);
27727 if (!ix86_expand_vector_init_one_nonzero (mmx_ok, vsimode, x,
27729 gcc_unreachable ();
27731 emit_move_insn (target, gen_lowpart (mode, x));
27739 /* A subroutine of ix86_expand_vector_init. Store into TARGET a vector
27740 consisting of the values in VALS. It is known that all elements
27741 except ONE_VAR are constants. Return true if successful. */
27744 ix86_expand_vector_init_one_var (bool mmx_ok, enum machine_mode mode,
27745 rtx target, rtx vals, int one_var)
27747 rtx var = XVECEXP (vals, 0, one_var);
27748 enum machine_mode wmode;
27751 const_vec = copy_rtx (vals);
27752 XVECEXP (const_vec, 0, one_var) = CONST0_RTX (GET_MODE_INNER (mode));
27753 const_vec = gen_rtx_CONST_VECTOR (mode, XVEC (const_vec, 0));
27761 /* For the two element vectors, it's just as easy to use
27762 the general case. */
27766 /* Use ix86_expand_vector_set in 64bit mode only. */
27789 /* There's no way to set one QImode entry easily. Combine
27790 the variable value with its adjacent constant value, and
27791 promote to an HImode set. */
27792 x = XVECEXP (vals, 0, one_var ^ 1);
27795 var = convert_modes (HImode, QImode, var, true);
27796 var = expand_simple_binop (HImode, ASHIFT, var, GEN_INT (8),
27797 NULL_RTX, 1, OPTAB_LIB_WIDEN);
27798 x = GEN_INT (INTVAL (x) & 0xff);
27802 var = convert_modes (HImode, QImode, var, true);
27803 x = gen_int_mode (INTVAL (x) << 8, HImode);
27805 if (x != const0_rtx)
27806 var = expand_simple_binop (HImode, IOR, var, x, var,
27807 1, OPTAB_LIB_WIDEN);
27809 x = gen_reg_rtx (wmode);
27810 emit_move_insn (x, gen_lowpart (wmode, const_vec));
27811 ix86_expand_vector_set (mmx_ok, x, var, one_var >> 1);
27813 emit_move_insn (target, gen_lowpart (mode, x));
27820 emit_move_insn (target, const_vec);
27821 ix86_expand_vector_set (mmx_ok, target, var, one_var);
27825 /* A subroutine of ix86_expand_vector_init_general. Use vector
27826 concatenate to handle the most general case: all values variable,
27827 and none identical. */
27830 ix86_expand_vector_init_concat (enum machine_mode mode,
27831 rtx target, rtx *ops, int n)
27833 enum machine_mode cmode, hmode = VOIDmode;
27834 rtx first[8], second[4];
27874 gcc_unreachable ();
27877 if (!register_operand (ops[1], cmode))
27878 ops[1] = force_reg (cmode, ops[1]);
27879 if (!register_operand (ops[0], cmode))
27880 ops[0] = force_reg (cmode, ops[0]);
27881 emit_insn (gen_rtx_SET (VOIDmode, target,
27882 gen_rtx_VEC_CONCAT (mode, ops[0],
27902 gcc_unreachable ();
27918 gcc_unreachable ();
27923 /* FIXME: We process inputs backward to help RA. PR 36222. */
27926 for (; i > 0; i -= 2, j--)
27928 first[j] = gen_reg_rtx (cmode);
27929 v = gen_rtvec (2, ops[i - 1], ops[i]);
27930 ix86_expand_vector_init (false, first[j],
27931 gen_rtx_PARALLEL (cmode, v));
27937 gcc_assert (hmode != VOIDmode);
27938 for (i = j = 0; i < n; i += 2, j++)
27940 second[j] = gen_reg_rtx (hmode);
27941 ix86_expand_vector_init_concat (hmode, second [j],
27945 ix86_expand_vector_init_concat (mode, target, second, n);
27948 ix86_expand_vector_init_concat (mode, target, first, n);
27952 gcc_unreachable ();
27956 /* A subroutine of ix86_expand_vector_init_general. Use vector
27957 interleave to handle the most general case: all values variable,
27958 and none identical. */
27961 ix86_expand_vector_init_interleave (enum machine_mode mode,
27962 rtx target, rtx *ops, int n)
27964 enum machine_mode first_imode, second_imode, third_imode, inner_mode;
27967 rtx (*gen_load_even) (rtx, rtx, rtx);
27968 rtx (*gen_interleave_first_low) (rtx, rtx, rtx);
27969 rtx (*gen_interleave_second_low) (rtx, rtx, rtx);
27974 gen_load_even = gen_vec_setv8hi;
27975 gen_interleave_first_low = gen_vec_interleave_lowv4si;
27976 gen_interleave_second_low = gen_vec_interleave_lowv2di;
27977 inner_mode = HImode;
27978 first_imode = V4SImode;
27979 second_imode = V2DImode;
27980 third_imode = VOIDmode;
27983 gen_load_even = gen_vec_setv16qi;
27984 gen_interleave_first_low = gen_vec_interleave_lowv8hi;
27985 gen_interleave_second_low = gen_vec_interleave_lowv4si;
27986 inner_mode = QImode;
27987 first_imode = V8HImode;
27988 second_imode = V4SImode;
27989 third_imode = V2DImode;
27992 gcc_unreachable ();
27995 for (i = 0; i < n; i++)
27997 /* Extend the odd elment to SImode using a paradoxical SUBREG. */
27998 op0 = gen_reg_rtx (SImode);
27999 emit_move_insn (op0, gen_lowpart (SImode, ops [i + i]));
28001 /* Insert the SImode value as low element of V4SImode vector. */
28002 op1 = gen_reg_rtx (V4SImode);
28003 op0 = gen_rtx_VEC_MERGE (V4SImode,
28004 gen_rtx_VEC_DUPLICATE (V4SImode,
28006 CONST0_RTX (V4SImode),
28008 emit_insn (gen_rtx_SET (VOIDmode, op1, op0));
28010 /* Cast the V4SImode vector back to a vector in orignal mode. */
28011 op0 = gen_reg_rtx (mode);
28012 emit_move_insn (op0, gen_lowpart (mode, op1));
28014 /* Load even elements into the second positon. */
28015 emit_insn ((*gen_load_even) (op0,
28016 force_reg (inner_mode,
28020 /* Cast vector to FIRST_IMODE vector. */
28021 ops[i] = gen_reg_rtx (first_imode);
28022 emit_move_insn (ops[i], gen_lowpart (first_imode, op0));
28025 /* Interleave low FIRST_IMODE vectors. */
28026 for (i = j = 0; i < n; i += 2, j++)
28028 op0 = gen_reg_rtx (first_imode);
28029 emit_insn ((*gen_interleave_first_low) (op0, ops[i], ops[i + 1]));
28031 /* Cast FIRST_IMODE vector to SECOND_IMODE vector. */
28032 ops[j] = gen_reg_rtx (second_imode);
28033 emit_move_insn (ops[j], gen_lowpart (second_imode, op0));
28036 /* Interleave low SECOND_IMODE vectors. */
28037 switch (second_imode)
28040 for (i = j = 0; i < n / 2; i += 2, j++)
28042 op0 = gen_reg_rtx (second_imode);
28043 emit_insn ((*gen_interleave_second_low) (op0, ops[i],
28046 /* Cast the SECOND_IMODE vector to the THIRD_IMODE
28048 ops[j] = gen_reg_rtx (third_imode);
28049 emit_move_insn (ops[j], gen_lowpart (third_imode, op0));
28051 second_imode = V2DImode;
28052 gen_interleave_second_low = gen_vec_interleave_lowv2di;
28056 op0 = gen_reg_rtx (second_imode);
28057 emit_insn ((*gen_interleave_second_low) (op0, ops[0],
28060 /* Cast the SECOND_IMODE vector back to a vector on original
28062 emit_insn (gen_rtx_SET (VOIDmode, target,
28063 gen_lowpart (mode, op0)));
28067 gcc_unreachable ();
28071 /* A subroutine of ix86_expand_vector_init. Handle the most general case:
28072 all values variable, and none identical. */
28075 ix86_expand_vector_init_general (bool mmx_ok, enum machine_mode mode,
28076 rtx target, rtx vals)
28078 rtx ops[32], op0, op1;
28079 enum machine_mode half_mode = VOIDmode;
28086 if (!mmx_ok && !TARGET_SSE)
28098 n = GET_MODE_NUNITS (mode);
28099 for (i = 0; i < n; i++)
28100 ops[i] = XVECEXP (vals, 0, i);
28101 ix86_expand_vector_init_concat (mode, target, ops, n);
28105 half_mode = V16QImode;
28109 half_mode = V8HImode;
28113 n = GET_MODE_NUNITS (mode);
28114 for (i = 0; i < n; i++)
28115 ops[i] = XVECEXP (vals, 0, i);
28116 op0 = gen_reg_rtx (half_mode);
28117 op1 = gen_reg_rtx (half_mode);
28118 ix86_expand_vector_init_interleave (half_mode, op0, ops,
28120 ix86_expand_vector_init_interleave (half_mode, op1,
28121 &ops [n >> 1], n >> 2);
28122 emit_insn (gen_rtx_SET (VOIDmode, target,
28123 gen_rtx_VEC_CONCAT (mode, op0, op1)));
28127 if (!TARGET_SSE4_1)
28135 /* Don't use ix86_expand_vector_init_interleave if we can't
28136 move from GPR to SSE register directly. */
28137 if (!TARGET_INTER_UNIT_MOVES)
28140 n = GET_MODE_NUNITS (mode);
28141 for (i = 0; i < n; i++)
28142 ops[i] = XVECEXP (vals, 0, i);
28143 ix86_expand_vector_init_interleave (mode, target, ops, n >> 1);
28151 gcc_unreachable ();
28155 int i, j, n_elts, n_words, n_elt_per_word;
28156 enum machine_mode inner_mode;
28157 rtx words[4], shift;
28159 inner_mode = GET_MODE_INNER (mode);
28160 n_elts = GET_MODE_NUNITS (mode);
28161 n_words = GET_MODE_SIZE (mode) / UNITS_PER_WORD;
28162 n_elt_per_word = n_elts / n_words;
28163 shift = GEN_INT (GET_MODE_BITSIZE (inner_mode));
28165 for (i = 0; i < n_words; ++i)
28167 rtx word = NULL_RTX;
28169 for (j = 0; j < n_elt_per_word; ++j)
28171 rtx elt = XVECEXP (vals, 0, (i+1)*n_elt_per_word - j - 1);
28172 elt = convert_modes (word_mode, inner_mode, elt, true);
28178 word = expand_simple_binop (word_mode, ASHIFT, word, shift,
28179 word, 1, OPTAB_LIB_WIDEN);
28180 word = expand_simple_binop (word_mode, IOR, word, elt,
28181 word, 1, OPTAB_LIB_WIDEN);
28189 emit_move_insn (target, gen_lowpart (mode, words[0]));
28190 else if (n_words == 2)
28192 rtx tmp = gen_reg_rtx (mode);
28193 emit_clobber (tmp);
28194 emit_move_insn (gen_lowpart (word_mode, tmp), words[0]);
28195 emit_move_insn (gen_highpart (word_mode, tmp), words[1]);
28196 emit_move_insn (target, tmp);
28198 else if (n_words == 4)
28200 rtx tmp = gen_reg_rtx (V4SImode);
28201 gcc_assert (word_mode == SImode);
28202 vals = gen_rtx_PARALLEL (V4SImode, gen_rtvec_v (4, words));
28203 ix86_expand_vector_init_general (false, V4SImode, tmp, vals);
28204 emit_move_insn (target, gen_lowpart (mode, tmp));
28207 gcc_unreachable ();
28211 /* Initialize vector TARGET via VALS. Suppress the use of MMX
28212 instructions unless MMX_OK is true. */
28215 ix86_expand_vector_init (bool mmx_ok, rtx target, rtx vals)
28217 enum machine_mode mode = GET_MODE (target);
28218 enum machine_mode inner_mode = GET_MODE_INNER (mode);
28219 int n_elts = GET_MODE_NUNITS (mode);
28220 int n_var = 0, one_var = -1;
28221 bool all_same = true, all_const_zero = true;
28225 for (i = 0; i < n_elts; ++i)
28227 x = XVECEXP (vals, 0, i);
28228 if (!(CONST_INT_P (x)
28229 || GET_CODE (x) == CONST_DOUBLE
28230 || GET_CODE (x) == CONST_FIXED))
28231 n_var++, one_var = i;
28232 else if (x != CONST0_RTX (inner_mode))
28233 all_const_zero = false;
28234 if (i > 0 && !rtx_equal_p (x, XVECEXP (vals, 0, 0)))
28238 /* Constants are best loaded from the constant pool. */
28241 emit_move_insn (target, gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0)));
28245 /* If all values are identical, broadcast the value. */
28247 && ix86_expand_vector_init_duplicate (mmx_ok, mode, target,
28248 XVECEXP (vals, 0, 0)))
28251 /* Values where only one field is non-constant are best loaded from
28252 the pool and overwritten via move later. */
28256 && ix86_expand_vector_init_one_nonzero (mmx_ok, mode, target,
28257 XVECEXP (vals, 0, one_var),
28261 if (ix86_expand_vector_init_one_var (mmx_ok, mode, target, vals, one_var))
28265 ix86_expand_vector_init_general (mmx_ok, mode, target, vals);
28269 ix86_expand_vector_set (bool mmx_ok, rtx target, rtx val, int elt)
28271 enum machine_mode mode = GET_MODE (target);
28272 enum machine_mode inner_mode = GET_MODE_INNER (mode);
28273 enum machine_mode half_mode;
28274 bool use_vec_merge = false;
28276 static rtx (*gen_extract[6][2]) (rtx, rtx)
28278 { gen_vec_extract_lo_v32qi, gen_vec_extract_hi_v32qi },
28279 { gen_vec_extract_lo_v16hi, gen_vec_extract_hi_v16hi },
28280 { gen_vec_extract_lo_v8si, gen_vec_extract_hi_v8si },
28281 { gen_vec_extract_lo_v4di, gen_vec_extract_hi_v4di },
28282 { gen_vec_extract_lo_v8sf, gen_vec_extract_hi_v8sf },
28283 { gen_vec_extract_lo_v4df, gen_vec_extract_hi_v4df }
28285 static rtx (*gen_insert[6][2]) (rtx, rtx, rtx)
28287 { gen_vec_set_lo_v32qi, gen_vec_set_hi_v32qi },
28288 { gen_vec_set_lo_v16hi, gen_vec_set_hi_v16hi },
28289 { gen_vec_set_lo_v8si, gen_vec_set_hi_v8si },
28290 { gen_vec_set_lo_v4di, gen_vec_set_hi_v4di },
28291 { gen_vec_set_lo_v8sf, gen_vec_set_hi_v8sf },
28292 { gen_vec_set_lo_v4df, gen_vec_set_hi_v4df }
28302 tmp = gen_reg_rtx (GET_MODE_INNER (mode));
28303 ix86_expand_vector_extract (true, tmp, target, 1 - elt);
28305 tmp = gen_rtx_VEC_CONCAT (mode, tmp, val);
28307 tmp = gen_rtx_VEC_CONCAT (mode, val, tmp);
28308 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
28314 use_vec_merge = TARGET_SSE4_1;
28322 /* For the two element vectors, we implement a VEC_CONCAT with
28323 the extraction of the other element. */
28325 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, GEN_INT (1 - elt)));
28326 tmp = gen_rtx_VEC_SELECT (inner_mode, target, tmp);
28329 op0 = val, op1 = tmp;
28331 op0 = tmp, op1 = val;
28333 tmp = gen_rtx_VEC_CONCAT (mode, op0, op1);
28334 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
28339 use_vec_merge = TARGET_SSE4_1;
28346 use_vec_merge = true;
28350 /* tmp = target = A B C D */
28351 tmp = copy_to_reg (target);
28352 /* target = A A B B */
28353 emit_insn (gen_sse_unpcklps (target, target, target));
28354 /* target = X A B B */
28355 ix86_expand_vector_set (false, target, val, 0);
28356 /* target = A X C D */
28357 emit_insn (gen_sse_shufps_v4sf (target, target, tmp,
28358 GEN_INT (1), GEN_INT (0),
28359 GEN_INT (2+4), GEN_INT (3+4)));
28363 /* tmp = target = A B C D */
28364 tmp = copy_to_reg (target);
28365 /* tmp = X B C D */
28366 ix86_expand_vector_set (false, tmp, val, 0);
28367 /* target = A B X D */
28368 emit_insn (gen_sse_shufps_v4sf (target, target, tmp,
28369 GEN_INT (0), GEN_INT (1),
28370 GEN_INT (0+4), GEN_INT (3+4)));
28374 /* tmp = target = A B C D */
28375 tmp = copy_to_reg (target);
28376 /* tmp = X B C D */
28377 ix86_expand_vector_set (false, tmp, val, 0);
28378 /* target = A B X D */
28379 emit_insn (gen_sse_shufps_v4sf (target, target, tmp,
28380 GEN_INT (0), GEN_INT (1),
28381 GEN_INT (2+4), GEN_INT (0+4)));
28385 gcc_unreachable ();
28390 use_vec_merge = TARGET_SSE4_1;
28394 /* Element 0 handled by vec_merge below. */
28397 use_vec_merge = true;
28403 /* With SSE2, use integer shuffles to swap element 0 and ELT,
28404 store into element 0, then shuffle them back. */
28408 order[0] = GEN_INT (elt);
28409 order[1] = const1_rtx;
28410 order[2] = const2_rtx;
28411 order[3] = GEN_INT (3);
28412 order[elt] = const0_rtx;
28414 emit_insn (gen_sse2_pshufd_1 (target, target, order[0],
28415 order[1], order[2], order[3]));
28417 ix86_expand_vector_set (false, target, val, 0);
28419 emit_insn (gen_sse2_pshufd_1 (target, target, order[0],
28420 order[1], order[2], order[3]));
28424 /* For SSE1, we have to reuse the V4SF code. */
28425 ix86_expand_vector_set (false, gen_lowpart (V4SFmode, target),
28426 gen_lowpart (SFmode, val), elt);
28431 use_vec_merge = TARGET_SSE2;
28434 use_vec_merge = mmx_ok && (TARGET_SSE || TARGET_3DNOW_A);
28438 use_vec_merge = TARGET_SSE4_1;
28445 half_mode = V16QImode;
28451 half_mode = V8HImode;
28457 half_mode = V4SImode;
28463 half_mode = V2DImode;
28469 half_mode = V4SFmode;
28475 half_mode = V2DFmode;
28481 /* Compute offset. */
28485 gcc_assert (i <= 1);
28487 /* Extract the half. */
28488 tmp = gen_reg_rtx (half_mode);
28489 emit_insn ((*gen_extract[j][i]) (tmp, target));
28491 /* Put val in tmp at elt. */
28492 ix86_expand_vector_set (false, tmp, val, elt);
28495 emit_insn ((*gen_insert[j][i]) (target, target, tmp));
28504 tmp = gen_rtx_VEC_DUPLICATE (mode, val);
28505 tmp = gen_rtx_VEC_MERGE (mode, tmp, target, GEN_INT (1 << elt));
28506 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
28510 rtx mem = assign_stack_temp (mode, GET_MODE_SIZE (mode), false);
28512 emit_move_insn (mem, target);
28514 tmp = adjust_address (mem, inner_mode, elt*GET_MODE_SIZE (inner_mode));
28515 emit_move_insn (tmp, val);
28517 emit_move_insn (target, mem);
28522 ix86_expand_vector_extract (bool mmx_ok, rtx target, rtx vec, int elt)
28524 enum machine_mode mode = GET_MODE (vec);
28525 enum machine_mode inner_mode = GET_MODE_INNER (mode);
28526 bool use_vec_extr = false;
28539 use_vec_extr = true;
28543 use_vec_extr = TARGET_SSE4_1;
28555 tmp = gen_reg_rtx (mode);
28556 emit_insn (gen_sse_shufps_v4sf (tmp, vec, vec,
28557 GEN_INT (elt), GEN_INT (elt),
28558 GEN_INT (elt+4), GEN_INT (elt+4)));
28562 tmp = gen_reg_rtx (mode);
28563 emit_insn (gen_sse_unpckhps (tmp, vec, vec));
28567 gcc_unreachable ();
28570 use_vec_extr = true;
28575 use_vec_extr = TARGET_SSE4_1;
28589 tmp = gen_reg_rtx (mode);
28590 emit_insn (gen_sse2_pshufd_1 (tmp, vec,
28591 GEN_INT (elt), GEN_INT (elt),
28592 GEN_INT (elt), GEN_INT (elt)));
28596 tmp = gen_reg_rtx (mode);
28597 emit_insn (gen_sse2_punpckhdq (tmp, vec, vec));
28601 gcc_unreachable ();
28604 use_vec_extr = true;
28609 /* For SSE1, we have to reuse the V4SF code. */
28610 ix86_expand_vector_extract (false, gen_lowpart (SFmode, target),
28611 gen_lowpart (V4SFmode, vec), elt);
28617 use_vec_extr = TARGET_SSE2;
28620 use_vec_extr = mmx_ok && (TARGET_SSE || TARGET_3DNOW_A);
28624 use_vec_extr = TARGET_SSE4_1;
28628 /* ??? Could extract the appropriate HImode element and shift. */
28635 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, GEN_INT (elt)));
28636 tmp = gen_rtx_VEC_SELECT (inner_mode, vec, tmp);
28638 /* Let the rtl optimizers know about the zero extension performed. */
28639 if (inner_mode == QImode || inner_mode == HImode)
28641 tmp = gen_rtx_ZERO_EXTEND (SImode, tmp);
28642 target = gen_lowpart (SImode, target);
28645 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
28649 rtx mem = assign_stack_temp (mode, GET_MODE_SIZE (mode), false);
28651 emit_move_insn (mem, vec);
28653 tmp = adjust_address (mem, inner_mode, elt*GET_MODE_SIZE (inner_mode));
28654 emit_move_insn (target, tmp);
28658 /* Expand a vector reduction on V4SFmode for SSE1. FN is the binary
28659 pattern to reduce; DEST is the destination; IN is the input vector. */
28662 ix86_expand_reduc_v4sf (rtx (*fn) (rtx, rtx, rtx), rtx dest, rtx in)
28664 rtx tmp1, tmp2, tmp3;
28666 tmp1 = gen_reg_rtx (V4SFmode);
28667 tmp2 = gen_reg_rtx (V4SFmode);
28668 tmp3 = gen_reg_rtx (V4SFmode);
28670 emit_insn (gen_sse_movhlps (tmp1, in, in));
28671 emit_insn (fn (tmp2, tmp1, in));
28673 emit_insn (gen_sse_shufps_v4sf (tmp3, tmp2, tmp2,
28674 GEN_INT (1), GEN_INT (1),
28675 GEN_INT (1+4), GEN_INT (1+4)));
28676 emit_insn (fn (dest, tmp2, tmp3));
28679 /* Target hook for scalar_mode_supported_p. */
28681 ix86_scalar_mode_supported_p (enum machine_mode mode)
28683 if (DECIMAL_FLOAT_MODE_P (mode))
28685 else if (mode == TFmode)
28688 return default_scalar_mode_supported_p (mode);
28691 /* Implements target hook vector_mode_supported_p. */
28693 ix86_vector_mode_supported_p (enum machine_mode mode)
28695 if (TARGET_SSE && VALID_SSE_REG_MODE (mode))
28697 if (TARGET_SSE2 && VALID_SSE2_REG_MODE (mode))
28699 if (TARGET_AVX && VALID_AVX256_REG_MODE (mode))
28701 if (TARGET_MMX && VALID_MMX_REG_MODE (mode))
28703 if (TARGET_3DNOW && VALID_MMX_REG_MODE_3DNOW (mode))
28708 /* Target hook for c_mode_for_suffix. */
28709 static enum machine_mode
28710 ix86_c_mode_for_suffix (char suffix)
28720 /* Worker function for TARGET_MD_ASM_CLOBBERS.
28722 We do this in the new i386 backend to maintain source compatibility
28723 with the old cc0-based compiler. */
28726 ix86_md_asm_clobbers (tree outputs ATTRIBUTE_UNUSED,
28727 tree inputs ATTRIBUTE_UNUSED,
28730 clobbers = tree_cons (NULL_TREE, build_string (5, "flags"),
28732 clobbers = tree_cons (NULL_TREE, build_string (4, "fpsr"),
28737 /* Implements target vector targetm.asm.encode_section_info. This
28738 is not used by netware. */
28740 static void ATTRIBUTE_UNUSED
28741 ix86_encode_section_info (tree decl, rtx rtl, int first)
28743 default_encode_section_info (decl, rtl, first);
28745 if (TREE_CODE (decl) == VAR_DECL
28746 && (TREE_STATIC (decl) || DECL_EXTERNAL (decl))
28747 && ix86_in_large_data_p (decl))
28748 SYMBOL_REF_FLAGS (XEXP (rtl, 0)) |= SYMBOL_FLAG_FAR_ADDR;
28751 /* Worker function for REVERSE_CONDITION. */
28754 ix86_reverse_condition (enum rtx_code code, enum machine_mode mode)
28756 return (mode != CCFPmode && mode != CCFPUmode
28757 ? reverse_condition (code)
28758 : reverse_condition_maybe_unordered (code));
28761 /* Output code to perform an x87 FP register move, from OPERANDS[1]
28765 output_387_reg_move (rtx insn, rtx *operands)
28767 if (REG_P (operands[0]))
28769 if (REG_P (operands[1])
28770 && find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
28772 if (REGNO (operands[0]) == FIRST_STACK_REG)
28773 return output_387_ffreep (operands, 0);
28774 return "fstp\t%y0";
28776 if (STACK_TOP_P (operands[0]))
28777 return "fld%z1\t%y1";
28780 else if (MEM_P (operands[0]))
28782 gcc_assert (REG_P (operands[1]));
28783 if (find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
28784 return "fstp%z0\t%y0";
28787 /* There is no non-popping store to memory for XFmode.
28788 So if we need one, follow the store with a load. */
28789 if (GET_MODE (operands[0]) == XFmode)
28790 return "fstp%z0\t%y0\n\tfld%z0\t%y0";
28792 return "fst%z0\t%y0";
28799 /* Output code to perform a conditional jump to LABEL, if C2 flag in
28800 FP status register is set. */
28803 ix86_emit_fp_unordered_jump (rtx label)
28805 rtx reg = gen_reg_rtx (HImode);
28808 emit_insn (gen_x86_fnstsw_1 (reg));
28810 if (TARGET_SAHF && (TARGET_USE_SAHF || optimize_insn_for_size_p ()))
28812 emit_insn (gen_x86_sahf_1 (reg));
28814 temp = gen_rtx_REG (CCmode, FLAGS_REG);
28815 temp = gen_rtx_UNORDERED (VOIDmode, temp, const0_rtx);
28819 emit_insn (gen_testqi_ext_ccno_0 (reg, GEN_INT (0x04)));
28821 temp = gen_rtx_REG (CCNOmode, FLAGS_REG);
28822 temp = gen_rtx_NE (VOIDmode, temp, const0_rtx);
28825 temp = gen_rtx_IF_THEN_ELSE (VOIDmode, temp,
28826 gen_rtx_LABEL_REF (VOIDmode, label),
28828 temp = gen_rtx_SET (VOIDmode, pc_rtx, temp);
28830 emit_jump_insn (temp);
28831 predict_jump (REG_BR_PROB_BASE * 10 / 100);
28834 /* Output code to perform a log1p XFmode calculation. */
28836 void ix86_emit_i387_log1p (rtx op0, rtx op1)
28838 rtx label1 = gen_label_rtx ();
28839 rtx label2 = gen_label_rtx ();
28841 rtx tmp = gen_reg_rtx (XFmode);
28842 rtx tmp2 = gen_reg_rtx (XFmode);
28844 emit_insn (gen_absxf2 (tmp, op1));
28845 emit_insn (gen_cmpxf (tmp,
28846 CONST_DOUBLE_FROM_REAL_VALUE (
28847 REAL_VALUE_ATOF ("0.29289321881345247561810596348408353", XFmode),
28849 emit_jump_insn (gen_bge (label1));
28851 emit_move_insn (tmp2, standard_80387_constant_rtx (4)); /* fldln2 */
28852 emit_insn (gen_fyl2xp1xf3_i387 (op0, op1, tmp2));
28853 emit_jump (label2);
28855 emit_label (label1);
28856 emit_move_insn (tmp, CONST1_RTX (XFmode));
28857 emit_insn (gen_addxf3 (tmp, op1, tmp));
28858 emit_move_insn (tmp2, standard_80387_constant_rtx (4)); /* fldln2 */
28859 emit_insn (gen_fyl2xxf3_i387 (op0, tmp, tmp2));
28861 emit_label (label2);
28864 /* Output code to perform a Newton-Rhapson approximation of a single precision
28865 floating point divide [http://en.wikipedia.org/wiki/N-th_root_algorithm]. */
28867 void ix86_emit_swdivsf (rtx res, rtx a, rtx b, enum machine_mode mode)
28869 rtx x0, x1, e0, e1, two;
28871 x0 = gen_reg_rtx (mode);
28872 e0 = gen_reg_rtx (mode);
28873 e1 = gen_reg_rtx (mode);
28874 x1 = gen_reg_rtx (mode);
28876 two = CONST_DOUBLE_FROM_REAL_VALUE (dconst2, SFmode);
28878 if (VECTOR_MODE_P (mode))
28879 two = ix86_build_const_vector (SFmode, true, two);
28881 two = force_reg (mode, two);
28883 /* a / b = a * rcp(b) * (2.0 - b * rcp(b)) */
28885 /* x0 = rcp(b) estimate */
28886 emit_insn (gen_rtx_SET (VOIDmode, x0,
28887 gen_rtx_UNSPEC (mode, gen_rtvec (1, b),
28890 emit_insn (gen_rtx_SET (VOIDmode, e0,
28891 gen_rtx_MULT (mode, x0, b)));
28893 emit_insn (gen_rtx_SET (VOIDmode, e1,
28894 gen_rtx_MINUS (mode, two, e0)));
28896 emit_insn (gen_rtx_SET (VOIDmode, x1,
28897 gen_rtx_MULT (mode, x0, e1)));
28899 emit_insn (gen_rtx_SET (VOIDmode, res,
28900 gen_rtx_MULT (mode, a, x1)));
28903 /* Output code to perform a Newton-Rhapson approximation of a
28904 single precision floating point [reciprocal] square root. */
28906 void ix86_emit_swsqrtsf (rtx res, rtx a, enum machine_mode mode,
28909 rtx x0, e0, e1, e2, e3, mthree, mhalf;
28912 x0 = gen_reg_rtx (mode);
28913 e0 = gen_reg_rtx (mode);
28914 e1 = gen_reg_rtx (mode);
28915 e2 = gen_reg_rtx (mode);
28916 e3 = gen_reg_rtx (mode);
28918 real_from_integer (&r, VOIDmode, -3, -1, 0);
28919 mthree = CONST_DOUBLE_FROM_REAL_VALUE (r, SFmode);
28921 real_arithmetic (&r, NEGATE_EXPR, &dconsthalf, NULL);
28922 mhalf = CONST_DOUBLE_FROM_REAL_VALUE (r, SFmode);
28924 if (VECTOR_MODE_P (mode))
28926 mthree = ix86_build_const_vector (SFmode, true, mthree);
28927 mhalf = ix86_build_const_vector (SFmode, true, mhalf);
28930 /* sqrt(a) = -0.5 * a * rsqrtss(a) * (a * rsqrtss(a) * rsqrtss(a) - 3.0)
28931 rsqrt(a) = -0.5 * rsqrtss(a) * (a * rsqrtss(a) * rsqrtss(a) - 3.0) */
28933 /* x0 = rsqrt(a) estimate */
28934 emit_insn (gen_rtx_SET (VOIDmode, x0,
28935 gen_rtx_UNSPEC (mode, gen_rtvec (1, a),
28938 /* If (a == 0.0) Filter out infinity to prevent NaN for sqrt(0.0). */
28943 zero = gen_reg_rtx (mode);
28944 mask = gen_reg_rtx (mode);
28946 zero = force_reg (mode, CONST0_RTX(mode));
28947 emit_insn (gen_rtx_SET (VOIDmode, mask,
28948 gen_rtx_NE (mode, zero, a)));
28950 emit_insn (gen_rtx_SET (VOIDmode, x0,
28951 gen_rtx_AND (mode, x0, mask)));
28955 emit_insn (gen_rtx_SET (VOIDmode, e0,
28956 gen_rtx_MULT (mode, x0, a)));
28958 emit_insn (gen_rtx_SET (VOIDmode, e1,
28959 gen_rtx_MULT (mode, e0, x0)));
28962 mthree = force_reg (mode, mthree);
28963 emit_insn (gen_rtx_SET (VOIDmode, e2,
28964 gen_rtx_PLUS (mode, e1, mthree)));
28966 mhalf = force_reg (mode, mhalf);
28968 /* e3 = -.5 * x0 */
28969 emit_insn (gen_rtx_SET (VOIDmode, e3,
28970 gen_rtx_MULT (mode, x0, mhalf)));
28972 /* e3 = -.5 * e0 */
28973 emit_insn (gen_rtx_SET (VOIDmode, e3,
28974 gen_rtx_MULT (mode, e0, mhalf)));
28975 /* ret = e2 * e3 */
28976 emit_insn (gen_rtx_SET (VOIDmode, res,
28977 gen_rtx_MULT (mode, e2, e3)));
28980 /* Solaris implementation of TARGET_ASM_NAMED_SECTION. */
28982 static void ATTRIBUTE_UNUSED
28983 i386_solaris_elf_named_section (const char *name, unsigned int flags,
28986 /* With Binutils 2.15, the "@unwind" marker must be specified on
28987 every occurrence of the ".eh_frame" section, not just the first
28990 && strcmp (name, ".eh_frame") == 0)
28992 fprintf (asm_out_file, "\t.section\t%s,\"%s\",@unwind\n", name,
28993 flags & SECTION_WRITE ? "aw" : "a");
28996 default_elf_asm_named_section (name, flags, decl);
28999 /* Return the mangling of TYPE if it is an extended fundamental type. */
29001 static const char *
29002 ix86_mangle_type (const_tree type)
29004 type = TYPE_MAIN_VARIANT (type);
29006 if (TREE_CODE (type) != VOID_TYPE && TREE_CODE (type) != BOOLEAN_TYPE
29007 && TREE_CODE (type) != INTEGER_TYPE && TREE_CODE (type) != REAL_TYPE)
29010 switch (TYPE_MODE (type))
29013 /* __float128 is "g". */
29016 /* "long double" or __float80 is "e". */
29023 /* For 32-bit code we can save PIC register setup by using
29024 __stack_chk_fail_local hidden function instead of calling
29025 __stack_chk_fail directly. 64-bit code doesn't need to setup any PIC
29026 register, so it is better to call __stack_chk_fail directly. */
29029 ix86_stack_protect_fail (void)
29031 return TARGET_64BIT
29032 ? default_external_stack_protect_fail ()
29033 : default_hidden_stack_protect_fail ();
29036 /* Select a format to encode pointers in exception handling data. CODE
29037 is 0 for data, 1 for code labels, 2 for function pointers. GLOBAL is
29038 true if the symbol may be affected by dynamic relocations.
29040 ??? All x86 object file formats are capable of representing this.
29041 After all, the relocation needed is the same as for the call insn.
29042 Whether or not a particular assembler allows us to enter such, I
29043 guess we'll have to see. */
29045 asm_preferred_eh_data_format (int code, int global)
29049 int type = DW_EH_PE_sdata8;
29051 || ix86_cmodel == CM_SMALL_PIC
29052 || (ix86_cmodel == CM_MEDIUM_PIC && (global || code)))
29053 type = DW_EH_PE_sdata4;
29054 return (global ? DW_EH_PE_indirect : 0) | DW_EH_PE_pcrel | type;
29056 if (ix86_cmodel == CM_SMALL
29057 || (ix86_cmodel == CM_MEDIUM && code))
29058 return DW_EH_PE_udata4;
29059 return DW_EH_PE_absptr;
29062 /* Expand copysign from SIGN to the positive value ABS_VALUE
29063 storing in RESULT. If MASK is non-null, it shall be a mask to mask out
29066 ix86_sse_copysign_to_positive (rtx result, rtx abs_value, rtx sign, rtx mask)
29068 enum machine_mode mode = GET_MODE (sign);
29069 rtx sgn = gen_reg_rtx (mode);
29070 if (mask == NULL_RTX)
29072 mask = ix86_build_signbit_mask (mode, VECTOR_MODE_P (mode), false);
29073 if (!VECTOR_MODE_P (mode))
29075 /* We need to generate a scalar mode mask in this case. */
29076 rtx tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, const0_rtx));
29077 tmp = gen_rtx_VEC_SELECT (mode, mask, tmp);
29078 mask = gen_reg_rtx (mode);
29079 emit_insn (gen_rtx_SET (VOIDmode, mask, tmp));
29083 mask = gen_rtx_NOT (mode, mask);
29084 emit_insn (gen_rtx_SET (VOIDmode, sgn,
29085 gen_rtx_AND (mode, mask, sign)));
29086 emit_insn (gen_rtx_SET (VOIDmode, result,
29087 gen_rtx_IOR (mode, abs_value, sgn)));
29090 /* Expand fabs (OP0) and return a new rtx that holds the result. The
29091 mask for masking out the sign-bit is stored in *SMASK, if that is
29094 ix86_expand_sse_fabs (rtx op0, rtx *smask)
29096 enum machine_mode mode = GET_MODE (op0);
29099 xa = gen_reg_rtx (mode);
29100 mask = ix86_build_signbit_mask (mode, VECTOR_MODE_P (mode), true);
29101 if (!VECTOR_MODE_P (mode))
29103 /* We need to generate a scalar mode mask in this case. */
29104 rtx tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, const0_rtx));
29105 tmp = gen_rtx_VEC_SELECT (mode, mask, tmp);
29106 mask = gen_reg_rtx (mode);
29107 emit_insn (gen_rtx_SET (VOIDmode, mask, tmp));
29109 emit_insn (gen_rtx_SET (VOIDmode, xa,
29110 gen_rtx_AND (mode, op0, mask)));
29118 /* Expands a comparison of OP0 with OP1 using comparison code CODE,
29119 swapping the operands if SWAP_OPERANDS is true. The expanded
29120 code is a forward jump to a newly created label in case the
29121 comparison is true. The generated label rtx is returned. */
29123 ix86_expand_sse_compare_and_jump (enum rtx_code code, rtx op0, rtx op1,
29124 bool swap_operands)
29135 label = gen_label_rtx ();
29136 tmp = gen_rtx_REG (CCFPUmode, FLAGS_REG);
29137 emit_insn (gen_rtx_SET (VOIDmode, tmp,
29138 gen_rtx_COMPARE (CCFPUmode, op0, op1)));
29139 tmp = gen_rtx_fmt_ee (code, VOIDmode, tmp, const0_rtx);
29140 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
29141 gen_rtx_LABEL_REF (VOIDmode, label), pc_rtx);
29142 tmp = emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
29143 JUMP_LABEL (tmp) = label;
29148 /* Expand a mask generating SSE comparison instruction comparing OP0 with OP1
29149 using comparison code CODE. Operands are swapped for the comparison if
29150 SWAP_OPERANDS is true. Returns a rtx for the generated mask. */
29152 ix86_expand_sse_compare_mask (enum rtx_code code, rtx op0, rtx op1,
29153 bool swap_operands)
29155 enum machine_mode mode = GET_MODE (op0);
29156 rtx mask = gen_reg_rtx (mode);
29165 if (mode == DFmode)
29166 emit_insn (gen_sse2_maskcmpdf3 (mask, op0, op1,
29167 gen_rtx_fmt_ee (code, mode, op0, op1)));
29169 emit_insn (gen_sse_maskcmpsf3 (mask, op0, op1,
29170 gen_rtx_fmt_ee (code, mode, op0, op1)));
29175 /* Generate and return a rtx of mode MODE for 2**n where n is the number
29176 of bits of the mantissa of MODE, which must be one of DFmode or SFmode. */
29178 ix86_gen_TWO52 (enum machine_mode mode)
29180 REAL_VALUE_TYPE TWO52r;
29183 real_ldexp (&TWO52r, &dconst1, mode == DFmode ? 52 : 23);
29184 TWO52 = const_double_from_real_value (TWO52r, mode);
29185 TWO52 = force_reg (mode, TWO52);
29190 /* Expand SSE sequence for computing lround from OP1 storing
29193 ix86_expand_lround (rtx op0, rtx op1)
29195 /* C code for the stuff we're doing below:
29196 tmp = op1 + copysign (nextafter (0.5, 0.0), op1)
29199 enum machine_mode mode = GET_MODE (op1);
29200 const struct real_format *fmt;
29201 REAL_VALUE_TYPE pred_half, half_minus_pred_half;
29204 /* load nextafter (0.5, 0.0) */
29205 fmt = REAL_MODE_FORMAT (mode);
29206 real_2expN (&half_minus_pred_half, -(fmt->p) - 1, mode);
29207 REAL_ARITHMETIC (pred_half, MINUS_EXPR, dconsthalf, half_minus_pred_half);
29209 /* adj = copysign (0.5, op1) */
29210 adj = force_reg (mode, const_double_from_real_value (pred_half, mode));
29211 ix86_sse_copysign_to_positive (adj, adj, force_reg (mode, op1), NULL_RTX);
29213 /* adj = op1 + adj */
29214 adj = expand_simple_binop (mode, PLUS, adj, op1, NULL_RTX, 0, OPTAB_DIRECT);
29216 /* op0 = (imode)adj */
29217 expand_fix (op0, adj, 0);
29220 /* Expand SSE2 sequence for computing lround from OPERAND1 storing
29223 ix86_expand_lfloorceil (rtx op0, rtx op1, bool do_floor)
29225 /* C code for the stuff we're doing below (for do_floor):
29227 xi -= (double)xi > op1 ? 1 : 0;
29230 enum machine_mode fmode = GET_MODE (op1);
29231 enum machine_mode imode = GET_MODE (op0);
29232 rtx ireg, freg, label, tmp;
29234 /* reg = (long)op1 */
29235 ireg = gen_reg_rtx (imode);
29236 expand_fix (ireg, op1, 0);
29238 /* freg = (double)reg */
29239 freg = gen_reg_rtx (fmode);
29240 expand_float (freg, ireg, 0);
29242 /* ireg = (freg > op1) ? ireg - 1 : ireg */
29243 label = ix86_expand_sse_compare_and_jump (UNLE,
29244 freg, op1, !do_floor);
29245 tmp = expand_simple_binop (imode, do_floor ? MINUS : PLUS,
29246 ireg, const1_rtx, NULL_RTX, 0, OPTAB_DIRECT);
29247 emit_move_insn (ireg, tmp);
29249 emit_label (label);
29250 LABEL_NUSES (label) = 1;
29252 emit_move_insn (op0, ireg);
29255 /* Expand rint (IEEE round to nearest) rounding OPERAND1 and storing the
29256 result in OPERAND0. */
29258 ix86_expand_rint (rtx operand0, rtx operand1)
29260 /* C code for the stuff we're doing below:
29261 xa = fabs (operand1);
29262 if (!isless (xa, 2**52))
29264 xa = xa + 2**52 - 2**52;
29265 return copysign (xa, operand1);
29267 enum machine_mode mode = GET_MODE (operand0);
29268 rtx res, xa, label, TWO52, mask;
29270 res = gen_reg_rtx (mode);
29271 emit_move_insn (res, operand1);
29273 /* xa = abs (operand1) */
29274 xa = ix86_expand_sse_fabs (res, &mask);
29276 /* if (!isless (xa, TWO52)) goto label; */
29277 TWO52 = ix86_gen_TWO52 (mode);
29278 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
29280 xa = expand_simple_binop (mode, PLUS, xa, TWO52, NULL_RTX, 0, OPTAB_DIRECT);
29281 xa = expand_simple_binop (mode, MINUS, xa, TWO52, xa, 0, OPTAB_DIRECT);
29283 ix86_sse_copysign_to_positive (res, xa, res, mask);
29285 emit_label (label);
29286 LABEL_NUSES (label) = 1;
29288 emit_move_insn (operand0, res);
29291 /* Expand SSE2 sequence for computing floor or ceil from OPERAND1 storing
29294 ix86_expand_floorceildf_32 (rtx operand0, rtx operand1, bool do_floor)
29296 /* C code for the stuff we expand below.
29297 double xa = fabs (x), x2;
29298 if (!isless (xa, TWO52))
29300 xa = xa + TWO52 - TWO52;
29301 x2 = copysign (xa, x);
29310 enum machine_mode mode = GET_MODE (operand0);
29311 rtx xa, TWO52, tmp, label, one, res, mask;
29313 TWO52 = ix86_gen_TWO52 (mode);
29315 /* Temporary for holding the result, initialized to the input
29316 operand to ease control flow. */
29317 res = gen_reg_rtx (mode);
29318 emit_move_insn (res, operand1);
29320 /* xa = abs (operand1) */
29321 xa = ix86_expand_sse_fabs (res, &mask);
29323 /* if (!isless (xa, TWO52)) goto label; */
29324 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
29326 /* xa = xa + TWO52 - TWO52; */
29327 xa = expand_simple_binop (mode, PLUS, xa, TWO52, NULL_RTX, 0, OPTAB_DIRECT);
29328 xa = expand_simple_binop (mode, MINUS, xa, TWO52, xa, 0, OPTAB_DIRECT);
29330 /* xa = copysign (xa, operand1) */
29331 ix86_sse_copysign_to_positive (xa, xa, res, mask);
29333 /* generate 1.0 or -1.0 */
29334 one = force_reg (mode,
29335 const_double_from_real_value (do_floor
29336 ? dconst1 : dconstm1, mode));
29338 /* Compensate: xa = xa - (xa > operand1 ? 1 : 0) */
29339 tmp = ix86_expand_sse_compare_mask (UNGT, xa, res, !do_floor);
29340 emit_insn (gen_rtx_SET (VOIDmode, tmp,
29341 gen_rtx_AND (mode, one, tmp)));
29342 /* We always need to subtract here to preserve signed zero. */
29343 tmp = expand_simple_binop (mode, MINUS,
29344 xa, tmp, NULL_RTX, 0, OPTAB_DIRECT);
29345 emit_move_insn (res, tmp);
29347 emit_label (label);
29348 LABEL_NUSES (label) = 1;
29350 emit_move_insn (operand0, res);
29353 /* Expand SSE2 sequence for computing floor or ceil from OPERAND1 storing
29356 ix86_expand_floorceil (rtx operand0, rtx operand1, bool do_floor)
29358 /* C code for the stuff we expand below.
29359 double xa = fabs (x), x2;
29360 if (!isless (xa, TWO52))
29362 x2 = (double)(long)x;
29369 if (HONOR_SIGNED_ZEROS (mode))
29370 return copysign (x2, x);
29373 enum machine_mode mode = GET_MODE (operand0);
29374 rtx xa, xi, TWO52, tmp, label, one, res, mask;
29376 TWO52 = ix86_gen_TWO52 (mode);
29378 /* Temporary for holding the result, initialized to the input
29379 operand to ease control flow. */
29380 res = gen_reg_rtx (mode);
29381 emit_move_insn (res, operand1);
29383 /* xa = abs (operand1) */
29384 xa = ix86_expand_sse_fabs (res, &mask);
29386 /* if (!isless (xa, TWO52)) goto label; */
29387 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
29389 /* xa = (double)(long)x */
29390 xi = gen_reg_rtx (mode == DFmode ? DImode : SImode);
29391 expand_fix (xi, res, 0);
29392 expand_float (xa, xi, 0);
29395 one = force_reg (mode, const_double_from_real_value (dconst1, mode));
29397 /* Compensate: xa = xa - (xa > operand1 ? 1 : 0) */
29398 tmp = ix86_expand_sse_compare_mask (UNGT, xa, res, !do_floor);
29399 emit_insn (gen_rtx_SET (VOIDmode, tmp,
29400 gen_rtx_AND (mode, one, tmp)));
29401 tmp = expand_simple_binop (mode, do_floor ? MINUS : PLUS,
29402 xa, tmp, NULL_RTX, 0, OPTAB_DIRECT);
29403 emit_move_insn (res, tmp);
29405 if (HONOR_SIGNED_ZEROS (mode))
29406 ix86_sse_copysign_to_positive (res, res, force_reg (mode, operand1), mask);
29408 emit_label (label);
29409 LABEL_NUSES (label) = 1;
29411 emit_move_insn (operand0, res);
29414 /* Expand SSE sequence for computing round from OPERAND1 storing
29415 into OPERAND0. Sequence that works without relying on DImode truncation
29416 via cvttsd2siq that is only available on 64bit targets. */
29418 ix86_expand_rounddf_32 (rtx operand0, rtx operand1)
29420 /* C code for the stuff we expand below.
29421 double xa = fabs (x), xa2, x2;
29422 if (!isless (xa, TWO52))
29424 Using the absolute value and copying back sign makes
29425 -0.0 -> -0.0 correct.
29426 xa2 = xa + TWO52 - TWO52;
29431 else if (dxa > 0.5)
29433 x2 = copysign (xa2, x);
29436 enum machine_mode mode = GET_MODE (operand0);
29437 rtx xa, xa2, dxa, TWO52, tmp, label, half, mhalf, one, res, mask;
29439 TWO52 = ix86_gen_TWO52 (mode);
29441 /* Temporary for holding the result, initialized to the input
29442 operand to ease control flow. */
29443 res = gen_reg_rtx (mode);
29444 emit_move_insn (res, operand1);
29446 /* xa = abs (operand1) */
29447 xa = ix86_expand_sse_fabs (res, &mask);
29449 /* if (!isless (xa, TWO52)) goto label; */
29450 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
29452 /* xa2 = xa + TWO52 - TWO52; */
29453 xa2 = expand_simple_binop (mode, PLUS, xa, TWO52, NULL_RTX, 0, OPTAB_DIRECT);
29454 xa2 = expand_simple_binop (mode, MINUS, xa2, TWO52, xa2, 0, OPTAB_DIRECT);
29456 /* dxa = xa2 - xa; */
29457 dxa = expand_simple_binop (mode, MINUS, xa2, xa, NULL_RTX, 0, OPTAB_DIRECT);
29459 /* generate 0.5, 1.0 and -0.5 */
29460 half = force_reg (mode, const_double_from_real_value (dconsthalf, mode));
29461 one = expand_simple_binop (mode, PLUS, half, half, NULL_RTX, 0, OPTAB_DIRECT);
29462 mhalf = expand_simple_binop (mode, MINUS, half, one, NULL_RTX,
29466 tmp = gen_reg_rtx (mode);
29467 /* xa2 = xa2 - (dxa > 0.5 ? 1 : 0) */
29468 tmp = ix86_expand_sse_compare_mask (UNGT, dxa, half, false);
29469 emit_insn (gen_rtx_SET (VOIDmode, tmp,
29470 gen_rtx_AND (mode, one, tmp)));
29471 xa2 = expand_simple_binop (mode, MINUS, xa2, tmp, NULL_RTX, 0, OPTAB_DIRECT);
29472 /* xa2 = xa2 + (dxa <= -0.5 ? 1 : 0) */
29473 tmp = ix86_expand_sse_compare_mask (UNGE, mhalf, dxa, false);
29474 emit_insn (gen_rtx_SET (VOIDmode, tmp,
29475 gen_rtx_AND (mode, one, tmp)));
29476 xa2 = expand_simple_binop (mode, PLUS, xa2, tmp, NULL_RTX, 0, OPTAB_DIRECT);
29478 /* res = copysign (xa2, operand1) */
29479 ix86_sse_copysign_to_positive (res, xa2, force_reg (mode, operand1), mask);
29481 emit_label (label);
29482 LABEL_NUSES (label) = 1;
29484 emit_move_insn (operand0, res);
29487 /* Expand SSE sequence for computing trunc from OPERAND1 storing
29490 ix86_expand_trunc (rtx operand0, rtx operand1)
29492 /* C code for SSE variant we expand below.
29493 double xa = fabs (x), x2;
29494 if (!isless (xa, TWO52))
29496 x2 = (double)(long)x;
29497 if (HONOR_SIGNED_ZEROS (mode))
29498 return copysign (x2, x);
29501 enum machine_mode mode = GET_MODE (operand0);
29502 rtx xa, xi, TWO52, label, res, mask;
29504 TWO52 = ix86_gen_TWO52 (mode);
29506 /* Temporary for holding the result, initialized to the input
29507 operand to ease control flow. */
29508 res = gen_reg_rtx (mode);
29509 emit_move_insn (res, operand1);
29511 /* xa = abs (operand1) */
29512 xa = ix86_expand_sse_fabs (res, &mask);
29514 /* if (!isless (xa, TWO52)) goto label; */
29515 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
29517 /* x = (double)(long)x */
29518 xi = gen_reg_rtx (mode == DFmode ? DImode : SImode);
29519 expand_fix (xi, res, 0);
29520 expand_float (res, xi, 0);
29522 if (HONOR_SIGNED_ZEROS (mode))
29523 ix86_sse_copysign_to_positive (res, res, force_reg (mode, operand1), mask);
29525 emit_label (label);
29526 LABEL_NUSES (label) = 1;
29528 emit_move_insn (operand0, res);
29531 /* Expand SSE sequence for computing trunc from OPERAND1 storing
29534 ix86_expand_truncdf_32 (rtx operand0, rtx operand1)
29536 enum machine_mode mode = GET_MODE (operand0);
29537 rtx xa, mask, TWO52, label, one, res, smask, tmp;
29539 /* C code for SSE variant we expand below.
29540 double xa = fabs (x), x2;
29541 if (!isless (xa, TWO52))
29543 xa2 = xa + TWO52 - TWO52;
29547 x2 = copysign (xa2, x);
29551 TWO52 = ix86_gen_TWO52 (mode);
29553 /* Temporary for holding the result, initialized to the input
29554 operand to ease control flow. */
29555 res = gen_reg_rtx (mode);
29556 emit_move_insn (res, operand1);
29558 /* xa = abs (operand1) */
29559 xa = ix86_expand_sse_fabs (res, &smask);
29561 /* if (!isless (xa, TWO52)) goto label; */
29562 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
29564 /* res = xa + TWO52 - TWO52; */
29565 tmp = expand_simple_binop (mode, PLUS, xa, TWO52, NULL_RTX, 0, OPTAB_DIRECT);
29566 tmp = expand_simple_binop (mode, MINUS, tmp, TWO52, tmp, 0, OPTAB_DIRECT);
29567 emit_move_insn (res, tmp);
29570 one = force_reg (mode, const_double_from_real_value (dconst1, mode));
29572 /* Compensate: res = xa2 - (res > xa ? 1 : 0) */
29573 mask = ix86_expand_sse_compare_mask (UNGT, res, xa, false);
29574 emit_insn (gen_rtx_SET (VOIDmode, mask,
29575 gen_rtx_AND (mode, mask, one)));
29576 tmp = expand_simple_binop (mode, MINUS,
29577 res, mask, NULL_RTX, 0, OPTAB_DIRECT);
29578 emit_move_insn (res, tmp);
29580 /* res = copysign (res, operand1) */
29581 ix86_sse_copysign_to_positive (res, res, force_reg (mode, operand1), smask);
29583 emit_label (label);
29584 LABEL_NUSES (label) = 1;
29586 emit_move_insn (operand0, res);
29589 /* Expand SSE sequence for computing round from OPERAND1 storing
29592 ix86_expand_round (rtx operand0, rtx operand1)
29594 /* C code for the stuff we're doing below:
29595 double xa = fabs (x);
29596 if (!isless (xa, TWO52))
29598 xa = (double)(long)(xa + nextafter (0.5, 0.0));
29599 return copysign (xa, x);
29601 enum machine_mode mode = GET_MODE (operand0);
29602 rtx res, TWO52, xa, label, xi, half, mask;
29603 const struct real_format *fmt;
29604 REAL_VALUE_TYPE pred_half, half_minus_pred_half;
29606 /* Temporary for holding the result, initialized to the input
29607 operand to ease control flow. */
29608 res = gen_reg_rtx (mode);
29609 emit_move_insn (res, operand1);
29611 TWO52 = ix86_gen_TWO52 (mode);
29612 xa = ix86_expand_sse_fabs (res, &mask);
29613 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
29615 /* load nextafter (0.5, 0.0) */
29616 fmt = REAL_MODE_FORMAT (mode);
29617 real_2expN (&half_minus_pred_half, -(fmt->p) - 1, mode);
29618 REAL_ARITHMETIC (pred_half, MINUS_EXPR, dconsthalf, half_minus_pred_half);
29620 /* xa = xa + 0.5 */
29621 half = force_reg (mode, const_double_from_real_value (pred_half, mode));
29622 xa = expand_simple_binop (mode, PLUS, xa, half, NULL_RTX, 0, OPTAB_DIRECT);
29624 /* xa = (double)(int64_t)xa */
29625 xi = gen_reg_rtx (mode == DFmode ? DImode : SImode);
29626 expand_fix (xi, xa, 0);
29627 expand_float (xa, xi, 0);
29629 /* res = copysign (xa, operand1) */
29630 ix86_sse_copysign_to_positive (res, xa, force_reg (mode, operand1), mask);
29632 emit_label (label);
29633 LABEL_NUSES (label) = 1;
29635 emit_move_insn (operand0, res);
29639 /* Validate whether a SSE5 instruction is valid or not.
29640 OPERANDS is the array of operands.
29641 NUM is the number of operands.
29642 USES_OC0 is true if the instruction uses OC0 and provides 4 variants.
29643 NUM_MEMORY is the maximum number of memory operands to accept.
29644 when COMMUTATIVE is set, operand 1 and 2 can be swapped. */
29647 ix86_sse5_valid_op_p (rtx operands[], rtx insn ATTRIBUTE_UNUSED, int num,
29648 bool uses_oc0, int num_memory, bool commutative)
29654 /* Count the number of memory arguments */
29657 for (i = 0; i < num; i++)
29659 enum machine_mode mode = GET_MODE (operands[i]);
29660 if (register_operand (operands[i], mode))
29663 else if (memory_operand (operands[i], mode))
29665 mem_mask |= (1 << i);
29671 rtx pattern = PATTERN (insn);
29673 /* allow 0 for pcmov */
29674 if (GET_CODE (pattern) != SET
29675 || GET_CODE (SET_SRC (pattern)) != IF_THEN_ELSE
29677 || operands[i] != CONST0_RTX (mode))
29682 /* Special case pmacsdq{l,h} where we allow the 3rd argument to be
29683 a memory operation. */
29684 if (num_memory < 0)
29686 num_memory = -num_memory;
29687 if ((mem_mask & (1 << (num-1))) != 0)
29689 mem_mask &= ~(1 << (num-1));
29694 /* If there were no memory operations, allow the insn */
29698 /* Do not allow the destination register to be a memory operand. */
29699 else if (mem_mask & (1 << 0))
29702 /* If there are too many memory operations, disallow the instruction. While
29703 the hardware only allows 1 memory reference, before register allocation
29704 for some insns, we allow two memory operations sometimes in order to allow
29705 code like the following to be optimized:
29707 float fmadd (float *a, float *b, float *c) { return (*a * *b) + *c; }
29709 or similar cases that are vectorized into using the fmaddss
29711 else if (mem_count > num_memory)
29714 /* Don't allow more than one memory operation if not optimizing. */
29715 else if (mem_count > 1 && !optimize)
29718 else if (num == 4 && mem_count == 1)
29720 /* formats (destination is the first argument), example fmaddss:
29721 xmm1, xmm1, xmm2, xmm3/mem
29722 xmm1, xmm1, xmm2/mem, xmm3
29723 xmm1, xmm2, xmm3/mem, xmm1
29724 xmm1, xmm2/mem, xmm3, xmm1 */
29726 return ((mem_mask == (1 << 1))
29727 || (mem_mask == (1 << 2))
29728 || (mem_mask == (1 << 3)));
29730 /* format, example pmacsdd:
29731 xmm1, xmm2, xmm3/mem, xmm1 */
29733 return (mem_mask == (1 << 2) || mem_mask == (1 << 1));
29735 return (mem_mask == (1 << 2));
29738 else if (num == 4 && num_memory == 2)
29740 /* If there are two memory operations, we can load one of the memory ops
29741 into the destination register. This is for optimizing the
29742 multiply/add ops, which the combiner has optimized both the multiply
29743 and the add insns to have a memory operation. We have to be careful
29744 that the destination doesn't overlap with the inputs. */
29745 rtx op0 = operands[0];
29747 if (reg_mentioned_p (op0, operands[1])
29748 || reg_mentioned_p (op0, operands[2])
29749 || reg_mentioned_p (op0, operands[3]))
29752 /* formats (destination is the first argument), example fmaddss:
29753 xmm1, xmm1, xmm2, xmm3/mem
29754 xmm1, xmm1, xmm2/mem, xmm3
29755 xmm1, xmm2, xmm3/mem, xmm1
29756 xmm1, xmm2/mem, xmm3, xmm1
29758 For the oc0 case, we will load either operands[1] or operands[3] into
29759 operands[0], so any combination of 2 memory operands is ok. */
29763 /* format, example pmacsdd:
29764 xmm1, xmm2, xmm3/mem, xmm1
29766 For the integer multiply/add instructions be more restrictive and
29767 require operands[2] and operands[3] to be the memory operands. */
29769 return (mem_mask == ((1 << 1) | (1 << 3)) || ((1 << 2) | (1 << 3)));
29771 return (mem_mask == ((1 << 2) | (1 << 3)));
29774 else if (num == 3 && num_memory == 1)
29776 /* formats, example protb:
29777 xmm1, xmm2, xmm3/mem
29778 xmm1, xmm2/mem, xmm3 */
29780 return ((mem_mask == (1 << 1)) || (mem_mask == (1 << 2)));
29782 /* format, example comeq:
29783 xmm1, xmm2, xmm3/mem */
29785 return (mem_mask == (1 << 2));
29789 gcc_unreachable ();
29795 /* Fixup an SSE5 instruction that has 2 memory input references into a form the
29796 hardware will allow by using the destination register to load one of the
29797 memory operations. Presently this is used by the multiply/add routines to
29798 allow 2 memory references. */
29801 ix86_expand_sse5_multiple_memory (rtx operands[],
29803 enum machine_mode mode)
29805 rtx op0 = operands[0];
29807 || memory_operand (op0, mode)
29808 || reg_mentioned_p (op0, operands[1])
29809 || reg_mentioned_p (op0, operands[2])
29810 || reg_mentioned_p (op0, operands[3]))
29811 gcc_unreachable ();
29813 /* For 2 memory operands, pick either operands[1] or operands[3] to move into
29814 the destination register. */
29815 if (memory_operand (operands[1], mode))
29817 emit_move_insn (op0, operands[1]);
29820 else if (memory_operand (operands[3], mode))
29822 emit_move_insn (op0, operands[3]);
29826 gcc_unreachable ();
29832 /* Table of valid machine attributes. */
29833 static const struct attribute_spec ix86_attribute_table[] =
29835 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler } */
29836 /* Stdcall attribute says callee is responsible for popping arguments
29837 if they are not variable. */
29838 { "stdcall", 0, 0, false, true, true, ix86_handle_cconv_attribute },
29839 /* Fastcall attribute says callee is responsible for popping arguments
29840 if they are not variable. */
29841 { "fastcall", 0, 0, false, true, true, ix86_handle_cconv_attribute },
29842 /* Cdecl attribute says the callee is a normal C declaration */
29843 { "cdecl", 0, 0, false, true, true, ix86_handle_cconv_attribute },
29844 /* Regparm attribute specifies how many integer arguments are to be
29845 passed in registers. */
29846 { "regparm", 1, 1, false, true, true, ix86_handle_cconv_attribute },
29847 /* Sseregparm attribute says we are using x86_64 calling conventions
29848 for FP arguments. */
29849 { "sseregparm", 0, 0, false, true, true, ix86_handle_cconv_attribute },
29850 /* force_align_arg_pointer says this function realigns the stack at entry. */
29851 { (const char *)&ix86_force_align_arg_pointer_string, 0, 0,
29852 false, true, true, ix86_handle_cconv_attribute },
29853 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
29854 { "dllimport", 0, 0, false, false, false, handle_dll_attribute },
29855 { "dllexport", 0, 0, false, false, false, handle_dll_attribute },
29856 { "shared", 0, 0, true, false, false, ix86_handle_shared_attribute },
29858 { "ms_struct", 0, 0, false, false, false, ix86_handle_struct_attribute },
29859 { "gcc_struct", 0, 0, false, false, false, ix86_handle_struct_attribute },
29860 #ifdef SUBTARGET_ATTRIBUTE_TABLE
29861 SUBTARGET_ATTRIBUTE_TABLE,
29863 /* ms_abi and sysv_abi calling convention function attributes. */
29864 { "ms_abi", 0, 0, false, true, true, ix86_handle_abi_attribute },
29865 { "sysv_abi", 0, 0, false, true, true, ix86_handle_abi_attribute },
29867 { NULL, 0, 0, false, false, false, NULL }
29870 /* Implement targetm.vectorize.builtin_vectorization_cost. */
29872 x86_builtin_vectorization_cost (bool runtime_test)
29874 /* If the branch of the runtime test is taken - i.e. - the vectorized
29875 version is skipped - this incurs a misprediction cost (because the
29876 vectorized version is expected to be the fall-through). So we subtract
29877 the latency of a mispredicted branch from the costs that are incured
29878 when the vectorized version is executed.
29880 TODO: The values in individual target tables have to be tuned or new
29881 fields may be needed. For eg. on K8, the default branch path is the
29882 not-taken path. If the taken path is predicted correctly, the minimum
29883 penalty of going down the taken-path is 1 cycle. If the taken-path is
29884 not predicted correctly, then the minimum penalty is 10 cycles. */
29888 return (-(ix86_cost->cond_taken_branch_cost));
29894 /* This function returns the calling abi specific va_list type node.
29895 It returns the FNDECL specific va_list type. */
29898 ix86_fn_abi_va_list (tree fndecl)
29901 return va_list_type_node;
29902 gcc_assert (fndecl != NULL_TREE);
29904 if (ix86_function_abi ((const_tree) fndecl) == MS_ABI)
29905 return ms_va_list_type_node;
29907 return sysv_va_list_type_node;
29910 /* Returns the canonical va_list type specified by TYPE. If there
29911 is no valid TYPE provided, it return NULL_TREE. */
29914 ix86_canonical_va_list_type (tree type)
29918 /* Resolve references and pointers to va_list type. */
29919 if (INDIRECT_REF_P (type))
29920 type = TREE_TYPE (type);
29921 else if (POINTER_TYPE_P (type) && POINTER_TYPE_P (TREE_TYPE(type)))
29922 type = TREE_TYPE (type);
29926 wtype = va_list_type_node;
29927 gcc_assert (wtype != NULL_TREE);
29929 if (TREE_CODE (wtype) == ARRAY_TYPE)
29931 /* If va_list is an array type, the argument may have decayed
29932 to a pointer type, e.g. by being passed to another function.
29933 In that case, unwrap both types so that we can compare the
29934 underlying records. */
29935 if (TREE_CODE (htype) == ARRAY_TYPE
29936 || POINTER_TYPE_P (htype))
29938 wtype = TREE_TYPE (wtype);
29939 htype = TREE_TYPE (htype);
29942 if (TYPE_MAIN_VARIANT (wtype) == TYPE_MAIN_VARIANT (htype))
29943 return va_list_type_node;
29944 wtype = sysv_va_list_type_node;
29945 gcc_assert (wtype != NULL_TREE);
29947 if (TREE_CODE (wtype) == ARRAY_TYPE)
29949 /* If va_list is an array type, the argument may have decayed
29950 to a pointer type, e.g. by being passed to another function.
29951 In that case, unwrap both types so that we can compare the
29952 underlying records. */
29953 if (TREE_CODE (htype) == ARRAY_TYPE
29954 || POINTER_TYPE_P (htype))
29956 wtype = TREE_TYPE (wtype);
29957 htype = TREE_TYPE (htype);
29960 if (TYPE_MAIN_VARIANT (wtype) == TYPE_MAIN_VARIANT (htype))
29961 return sysv_va_list_type_node;
29962 wtype = ms_va_list_type_node;
29963 gcc_assert (wtype != NULL_TREE);
29965 if (TREE_CODE (wtype) == ARRAY_TYPE)
29967 /* If va_list is an array type, the argument may have decayed
29968 to a pointer type, e.g. by being passed to another function.
29969 In that case, unwrap both types so that we can compare the
29970 underlying records. */
29971 if (TREE_CODE (htype) == ARRAY_TYPE
29972 || POINTER_TYPE_P (htype))
29974 wtype = TREE_TYPE (wtype);
29975 htype = TREE_TYPE (htype);
29978 if (TYPE_MAIN_VARIANT (wtype) == TYPE_MAIN_VARIANT (htype))
29979 return ms_va_list_type_node;
29982 return std_canonical_va_list_type (type);
29985 /* Iterate through the target-specific builtin types for va_list.
29986 IDX denotes the iterator, *PTREE is set to the result type of
29987 the va_list builtin, and *PNAME to its internal type.
29988 Returns zero if there is no element for this index, otherwise
29989 IDX should be increased upon the next call.
29990 Note, do not iterate a base builtin's name like __builtin_va_list.
29991 Used from c_common_nodes_and_builtins. */
29994 ix86_enum_va_list (int idx, const char **pname, tree *ptree)
30000 *ptree = ms_va_list_type_node;
30001 *pname = "__builtin_ms_va_list";
30004 *ptree = sysv_va_list_type_node;
30005 *pname = "__builtin_sysv_va_list";
30013 /* Initialize the GCC target structure. */
30014 #undef TARGET_RETURN_IN_MEMORY
30015 #define TARGET_RETURN_IN_MEMORY ix86_return_in_memory
30017 #undef TARGET_ATTRIBUTE_TABLE
30018 #define TARGET_ATTRIBUTE_TABLE ix86_attribute_table
30019 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
30020 # undef TARGET_MERGE_DECL_ATTRIBUTES
30021 # define TARGET_MERGE_DECL_ATTRIBUTES merge_dllimport_decl_attributes
30024 #undef TARGET_COMP_TYPE_ATTRIBUTES
30025 #define TARGET_COMP_TYPE_ATTRIBUTES ix86_comp_type_attributes
30027 #undef TARGET_INIT_BUILTINS
30028 #define TARGET_INIT_BUILTINS ix86_init_builtins
30029 #undef TARGET_EXPAND_BUILTIN
30030 #define TARGET_EXPAND_BUILTIN ix86_expand_builtin
30032 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION
30033 #define TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION \
30034 ix86_builtin_vectorized_function
30036 #undef TARGET_VECTORIZE_BUILTIN_CONVERSION
30037 #define TARGET_VECTORIZE_BUILTIN_CONVERSION ix86_vectorize_builtin_conversion
30039 #undef TARGET_BUILTIN_RECIPROCAL
30040 #define TARGET_BUILTIN_RECIPROCAL ix86_builtin_reciprocal
30042 #undef TARGET_ASM_FUNCTION_EPILOGUE
30043 #define TARGET_ASM_FUNCTION_EPILOGUE ix86_output_function_epilogue
30045 #undef TARGET_ENCODE_SECTION_INFO
30046 #ifndef SUBTARGET_ENCODE_SECTION_INFO
30047 #define TARGET_ENCODE_SECTION_INFO ix86_encode_section_info
30049 #define TARGET_ENCODE_SECTION_INFO SUBTARGET_ENCODE_SECTION_INFO
30052 #undef TARGET_ASM_OPEN_PAREN
30053 #define TARGET_ASM_OPEN_PAREN ""
30054 #undef TARGET_ASM_CLOSE_PAREN
30055 #define TARGET_ASM_CLOSE_PAREN ""
30057 #undef TARGET_ASM_ALIGNED_HI_OP
30058 #define TARGET_ASM_ALIGNED_HI_OP ASM_SHORT
30059 #undef TARGET_ASM_ALIGNED_SI_OP
30060 #define TARGET_ASM_ALIGNED_SI_OP ASM_LONG
30062 #undef TARGET_ASM_ALIGNED_DI_OP
30063 #define TARGET_ASM_ALIGNED_DI_OP ASM_QUAD
30066 #undef TARGET_ASM_UNALIGNED_HI_OP
30067 #define TARGET_ASM_UNALIGNED_HI_OP TARGET_ASM_ALIGNED_HI_OP
30068 #undef TARGET_ASM_UNALIGNED_SI_OP
30069 #define TARGET_ASM_UNALIGNED_SI_OP TARGET_ASM_ALIGNED_SI_OP
30070 #undef TARGET_ASM_UNALIGNED_DI_OP
30071 #define TARGET_ASM_UNALIGNED_DI_OP TARGET_ASM_ALIGNED_DI_OP
30073 #undef TARGET_SCHED_ADJUST_COST
30074 #define TARGET_SCHED_ADJUST_COST ix86_adjust_cost
30075 #undef TARGET_SCHED_ISSUE_RATE
30076 #define TARGET_SCHED_ISSUE_RATE ix86_issue_rate
30077 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD
30078 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD \
30079 ia32_multipass_dfa_lookahead
30081 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
30082 #define TARGET_FUNCTION_OK_FOR_SIBCALL ix86_function_ok_for_sibcall
30085 #undef TARGET_HAVE_TLS
30086 #define TARGET_HAVE_TLS true
30088 #undef TARGET_CANNOT_FORCE_CONST_MEM
30089 #define TARGET_CANNOT_FORCE_CONST_MEM ix86_cannot_force_const_mem
30090 #undef TARGET_USE_BLOCKS_FOR_CONSTANT_P
30091 #define TARGET_USE_BLOCKS_FOR_CONSTANT_P hook_bool_mode_const_rtx_true
30093 #undef TARGET_DELEGITIMIZE_ADDRESS
30094 #define TARGET_DELEGITIMIZE_ADDRESS ix86_delegitimize_address
30096 #undef TARGET_MS_BITFIELD_LAYOUT_P
30097 #define TARGET_MS_BITFIELD_LAYOUT_P ix86_ms_bitfield_layout_p
30100 #undef TARGET_BINDS_LOCAL_P
30101 #define TARGET_BINDS_LOCAL_P darwin_binds_local_p
30103 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
30104 #undef TARGET_BINDS_LOCAL_P
30105 #define TARGET_BINDS_LOCAL_P i386_pe_binds_local_p
30108 #undef TARGET_ASM_OUTPUT_MI_THUNK
30109 #define TARGET_ASM_OUTPUT_MI_THUNK x86_output_mi_thunk
30110 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
30111 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK x86_can_output_mi_thunk
30113 #undef TARGET_ASM_FILE_START
30114 #define TARGET_ASM_FILE_START x86_file_start
30116 #undef TARGET_DEFAULT_TARGET_FLAGS
30117 #define TARGET_DEFAULT_TARGET_FLAGS \
30119 | TARGET_SUBTARGET_DEFAULT \
30120 | TARGET_TLS_DIRECT_SEG_REFS_DEFAULT)
30122 #undef TARGET_HANDLE_OPTION
30123 #define TARGET_HANDLE_OPTION ix86_handle_option
30125 #undef TARGET_RTX_COSTS
30126 #define TARGET_RTX_COSTS ix86_rtx_costs
30127 #undef TARGET_ADDRESS_COST
30128 #define TARGET_ADDRESS_COST ix86_address_cost
30130 #undef TARGET_FIXED_CONDITION_CODE_REGS
30131 #define TARGET_FIXED_CONDITION_CODE_REGS ix86_fixed_condition_code_regs
30132 #undef TARGET_CC_MODES_COMPATIBLE
30133 #define TARGET_CC_MODES_COMPATIBLE ix86_cc_modes_compatible
30135 #undef TARGET_MACHINE_DEPENDENT_REORG
30136 #define TARGET_MACHINE_DEPENDENT_REORG ix86_reorg
30138 #undef TARGET_BUILTIN_SETJMP_FRAME_VALUE
30139 #define TARGET_BUILTIN_SETJMP_FRAME_VALUE ix86_builtin_setjmp_frame_value
30141 #undef TARGET_BUILD_BUILTIN_VA_LIST
30142 #define TARGET_BUILD_BUILTIN_VA_LIST ix86_build_builtin_va_list
30144 #undef TARGET_FN_ABI_VA_LIST
30145 #define TARGET_FN_ABI_VA_LIST ix86_fn_abi_va_list
30147 #undef TARGET_CANONICAL_VA_LIST_TYPE
30148 #define TARGET_CANONICAL_VA_LIST_TYPE ix86_canonical_va_list_type
30150 #undef TARGET_EXPAND_BUILTIN_VA_START
30151 #define TARGET_EXPAND_BUILTIN_VA_START ix86_va_start
30153 #undef TARGET_MD_ASM_CLOBBERS
30154 #define TARGET_MD_ASM_CLOBBERS ix86_md_asm_clobbers
30156 #undef TARGET_PROMOTE_PROTOTYPES
30157 #define TARGET_PROMOTE_PROTOTYPES hook_bool_const_tree_true
30158 #undef TARGET_STRUCT_VALUE_RTX
30159 #define TARGET_STRUCT_VALUE_RTX ix86_struct_value_rtx
30160 #undef TARGET_SETUP_INCOMING_VARARGS
30161 #define TARGET_SETUP_INCOMING_VARARGS ix86_setup_incoming_varargs
30162 #undef TARGET_MUST_PASS_IN_STACK
30163 #define TARGET_MUST_PASS_IN_STACK ix86_must_pass_in_stack
30164 #undef TARGET_PASS_BY_REFERENCE
30165 #define TARGET_PASS_BY_REFERENCE ix86_pass_by_reference
30166 #undef TARGET_INTERNAL_ARG_POINTER
30167 #define TARGET_INTERNAL_ARG_POINTER ix86_internal_arg_pointer
30168 #undef TARGET_UPDATE_STACK_BOUNDARY
30169 #define TARGET_UPDATE_STACK_BOUNDARY ix86_update_stack_boundary
30170 #undef TARGET_GET_DRAP_RTX
30171 #define TARGET_GET_DRAP_RTX ix86_get_drap_rtx
30172 #undef TARGET_DWARF_HANDLE_FRAME_UNSPEC
30173 #define TARGET_DWARF_HANDLE_FRAME_UNSPEC ix86_dwarf_handle_frame_unspec
30174 #undef TARGET_STRICT_ARGUMENT_NAMING
30175 #define TARGET_STRICT_ARGUMENT_NAMING hook_bool_CUMULATIVE_ARGS_true
30177 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
30178 #define TARGET_GIMPLIFY_VA_ARG_EXPR ix86_gimplify_va_arg
30180 #undef TARGET_SCALAR_MODE_SUPPORTED_P
30181 #define TARGET_SCALAR_MODE_SUPPORTED_P ix86_scalar_mode_supported_p
30183 #undef TARGET_VECTOR_MODE_SUPPORTED_P
30184 #define TARGET_VECTOR_MODE_SUPPORTED_P ix86_vector_mode_supported_p
30186 #undef TARGET_C_MODE_FOR_SUFFIX
30187 #define TARGET_C_MODE_FOR_SUFFIX ix86_c_mode_for_suffix
30190 #undef TARGET_ASM_OUTPUT_DWARF_DTPREL
30191 #define TARGET_ASM_OUTPUT_DWARF_DTPREL i386_output_dwarf_dtprel
30194 #ifdef SUBTARGET_INSERT_ATTRIBUTES
30195 #undef TARGET_INSERT_ATTRIBUTES
30196 #define TARGET_INSERT_ATTRIBUTES SUBTARGET_INSERT_ATTRIBUTES
30199 #undef TARGET_MANGLE_TYPE
30200 #define TARGET_MANGLE_TYPE ix86_mangle_type
30202 #undef TARGET_STACK_PROTECT_FAIL
30203 #define TARGET_STACK_PROTECT_FAIL ix86_stack_protect_fail
30205 #undef TARGET_FUNCTION_VALUE
30206 #define TARGET_FUNCTION_VALUE ix86_function_value
30208 #undef TARGET_SECONDARY_RELOAD
30209 #define TARGET_SECONDARY_RELOAD ix86_secondary_reload
30211 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST
30212 #define TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST x86_builtin_vectorization_cost
30214 #undef TARGET_SET_CURRENT_FUNCTION
30215 #define TARGET_SET_CURRENT_FUNCTION ix86_set_current_function
30217 #undef TARGET_OPTION_VALID_ATTRIBUTE_P
30218 #define TARGET_OPTION_VALID_ATTRIBUTE_P ix86_valid_target_attribute_p
30220 #undef TARGET_OPTION_SAVE
30221 #define TARGET_OPTION_SAVE ix86_function_specific_save
30223 #undef TARGET_OPTION_RESTORE
30224 #define TARGET_OPTION_RESTORE ix86_function_specific_restore
30226 #undef TARGET_OPTION_PRINT
30227 #define TARGET_OPTION_PRINT ix86_function_specific_print
30229 #undef TARGET_OPTION_CAN_INLINE_P
30230 #define TARGET_OPTION_CAN_INLINE_P ix86_can_inline_p
30232 #undef TARGET_EXPAND_TO_RTL_HOOK
30233 #define TARGET_EXPAND_TO_RTL_HOOK ix86_maybe_switch_abi
30235 struct gcc_target targetm = TARGET_INITIALIZER;
30237 #include "gt-i386.h"