1 /* Subroutines used for code generation on IA-32.
2 Copyright (C) 1988, 1992, 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001,
3 2002, 2003, 2004, 2005, 2006, 2007, 2008
4 Free Software Foundation, Inc.
6 This file is part of GCC.
8 GCC is free software; you can redistribute it and/or modify
9 it under the terms of the GNU General Public License as published by
10 the Free Software Foundation; either version 3, or (at your option)
13 GCC is distributed in the hope that it will be useful,
14 but WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 GNU General Public License for more details.
18 You should have received a copy of the GNU General Public License
19 along with GCC; see the file COPYING3. If not see
20 <http://www.gnu.org/licenses/>. */
24 #include "coretypes.h"
30 #include "hard-reg-set.h"
32 #include "insn-config.h"
33 #include "conditions.h"
35 #include "insn-codes.h"
36 #include "insn-attr.h"
45 #include "basic-block.h"
48 #include "target-def.h"
49 #include "langhooks.h"
51 #include "tree-gimple.h"
54 #include "tm-constrs.h"
57 static int x86_builtin_vectorization_cost (bool);
58 static rtx legitimize_dllimport_symbol (rtx, bool);
60 #ifndef CHECK_STACK_LIMIT
61 #define CHECK_STACK_LIMIT (-1)
64 /* Return index of given mode in mult and division cost tables. */
65 #define MODE_INDEX(mode) \
66 ((mode) == QImode ? 0 \
67 : (mode) == HImode ? 1 \
68 : (mode) == SImode ? 2 \
69 : (mode) == DImode ? 3 \
72 /* Processor costs (relative to an add) */
73 /* We assume COSTS_N_INSNS is defined as (N)*4 and an addition is 2 bytes. */
74 #define COSTS_N_BYTES(N) ((N) * 2)
76 #define DUMMY_STRINGOP_ALGS {libcall, {{-1, libcall}}}
79 struct processor_costs size_cost = { /* costs for tuning for size */
80 COSTS_N_BYTES (2), /* cost of an add instruction */
81 COSTS_N_BYTES (3), /* cost of a lea instruction */
82 COSTS_N_BYTES (2), /* variable shift costs */
83 COSTS_N_BYTES (3), /* constant shift costs */
84 {COSTS_N_BYTES (3), /* cost of starting multiply for QI */
85 COSTS_N_BYTES (3), /* HI */
86 COSTS_N_BYTES (3), /* SI */
87 COSTS_N_BYTES (3), /* DI */
88 COSTS_N_BYTES (5)}, /* other */
89 0, /* cost of multiply per each bit set */
90 {COSTS_N_BYTES (3), /* cost of a divide/mod for QI */
91 COSTS_N_BYTES (3), /* HI */
92 COSTS_N_BYTES (3), /* SI */
93 COSTS_N_BYTES (3), /* DI */
94 COSTS_N_BYTES (5)}, /* other */
95 COSTS_N_BYTES (3), /* cost of movsx */
96 COSTS_N_BYTES (3), /* cost of movzx */
99 2, /* cost for loading QImode using movzbl */
100 {2, 2, 2}, /* cost of loading integer registers
101 in QImode, HImode and SImode.
102 Relative to reg-reg move (2). */
103 {2, 2, 2}, /* cost of storing integer registers */
104 2, /* cost of reg,reg fld/fst */
105 {2, 2, 2}, /* cost of loading fp registers
106 in SFmode, DFmode and XFmode */
107 {2, 2, 2}, /* cost of storing fp registers
108 in SFmode, DFmode and XFmode */
109 3, /* cost of moving MMX register */
110 {3, 3}, /* cost of loading MMX registers
111 in SImode and DImode */
112 {3, 3}, /* cost of storing MMX registers
113 in SImode and DImode */
114 3, /* cost of moving SSE register */
115 {3, 3, 3}, /* cost of loading SSE registers
116 in SImode, DImode and TImode */
117 {3, 3, 3}, /* cost of storing SSE registers
118 in SImode, DImode and TImode */
119 3, /* MMX or SSE register to integer */
120 0, /* size of l1 cache */
121 0, /* size of l2 cache */
122 0, /* size of prefetch block */
123 0, /* number of parallel prefetches */
125 COSTS_N_BYTES (2), /* cost of FADD and FSUB insns. */
126 COSTS_N_BYTES (2), /* cost of FMUL instruction. */
127 COSTS_N_BYTES (2), /* cost of FDIV instruction. */
128 COSTS_N_BYTES (2), /* cost of FABS instruction. */
129 COSTS_N_BYTES (2), /* cost of FCHS instruction. */
130 COSTS_N_BYTES (2), /* cost of FSQRT instruction. */
131 {{rep_prefix_1_byte, {{-1, rep_prefix_1_byte}}},
132 {rep_prefix_1_byte, {{-1, rep_prefix_1_byte}}}},
133 {{rep_prefix_1_byte, {{-1, rep_prefix_1_byte}}},
134 {rep_prefix_1_byte, {{-1, rep_prefix_1_byte}}}},
135 1, /* scalar_stmt_cost. */
136 1, /* scalar load_cost. */
137 1, /* scalar_store_cost. */
138 1, /* vec_stmt_cost. */
139 1, /* vec_to_scalar_cost. */
140 1, /* scalar_to_vec_cost. */
141 1, /* vec_align_load_cost. */
142 1, /* vec_unalign_load_cost. */
143 1, /* vec_store_cost. */
144 1, /* cond_taken_branch_cost. */
145 1, /* cond_not_taken_branch_cost. */
148 /* Processor costs (relative to an add) */
150 struct processor_costs i386_cost = { /* 386 specific costs */
151 COSTS_N_INSNS (1), /* cost of an add instruction */
152 COSTS_N_INSNS (1), /* cost of a lea instruction */
153 COSTS_N_INSNS (3), /* variable shift costs */
154 COSTS_N_INSNS (2), /* constant shift costs */
155 {COSTS_N_INSNS (6), /* cost of starting multiply for QI */
156 COSTS_N_INSNS (6), /* HI */
157 COSTS_N_INSNS (6), /* SI */
158 COSTS_N_INSNS (6), /* DI */
159 COSTS_N_INSNS (6)}, /* other */
160 COSTS_N_INSNS (1), /* cost of multiply per each bit set */
161 {COSTS_N_INSNS (23), /* cost of a divide/mod for QI */
162 COSTS_N_INSNS (23), /* HI */
163 COSTS_N_INSNS (23), /* SI */
164 COSTS_N_INSNS (23), /* DI */
165 COSTS_N_INSNS (23)}, /* other */
166 COSTS_N_INSNS (3), /* cost of movsx */
167 COSTS_N_INSNS (2), /* cost of movzx */
168 15, /* "large" insn */
170 4, /* cost for loading QImode using movzbl */
171 {2, 4, 2}, /* cost of loading integer registers
172 in QImode, HImode and SImode.
173 Relative to reg-reg move (2). */
174 {2, 4, 2}, /* cost of storing integer registers */
175 2, /* cost of reg,reg fld/fst */
176 {8, 8, 8}, /* cost of loading fp registers
177 in SFmode, DFmode and XFmode */
178 {8, 8, 8}, /* cost of storing fp registers
179 in SFmode, DFmode and XFmode */
180 2, /* cost of moving MMX register */
181 {4, 8}, /* cost of loading MMX registers
182 in SImode and DImode */
183 {4, 8}, /* cost of storing MMX registers
184 in SImode and DImode */
185 2, /* cost of moving SSE register */
186 {4, 8, 16}, /* cost of loading SSE registers
187 in SImode, DImode and TImode */
188 {4, 8, 16}, /* cost of storing SSE registers
189 in SImode, DImode and TImode */
190 3, /* MMX or SSE register to integer */
191 0, /* size of l1 cache */
192 0, /* size of l2 cache */
193 0, /* size of prefetch block */
194 0, /* number of parallel prefetches */
196 COSTS_N_INSNS (23), /* cost of FADD and FSUB insns. */
197 COSTS_N_INSNS (27), /* cost of FMUL instruction. */
198 COSTS_N_INSNS (88), /* cost of FDIV instruction. */
199 COSTS_N_INSNS (22), /* cost of FABS instruction. */
200 COSTS_N_INSNS (24), /* cost of FCHS instruction. */
201 COSTS_N_INSNS (122), /* cost of FSQRT instruction. */
202 {{rep_prefix_1_byte, {{-1, rep_prefix_1_byte}}},
203 DUMMY_STRINGOP_ALGS},
204 {{rep_prefix_1_byte, {{-1, rep_prefix_1_byte}}},
205 DUMMY_STRINGOP_ALGS},
206 1, /* scalar_stmt_cost. */
207 1, /* scalar load_cost. */
208 1, /* scalar_store_cost. */
209 1, /* vec_stmt_cost. */
210 1, /* vec_to_scalar_cost. */
211 1, /* scalar_to_vec_cost. */
212 1, /* vec_align_load_cost. */
213 2, /* vec_unalign_load_cost. */
214 1, /* vec_store_cost. */
215 3, /* cond_taken_branch_cost. */
216 1, /* cond_not_taken_branch_cost. */
220 struct processor_costs i486_cost = { /* 486 specific costs */
221 COSTS_N_INSNS (1), /* cost of an add instruction */
222 COSTS_N_INSNS (1), /* cost of a lea instruction */
223 COSTS_N_INSNS (3), /* variable shift costs */
224 COSTS_N_INSNS (2), /* constant shift costs */
225 {COSTS_N_INSNS (12), /* cost of starting multiply for QI */
226 COSTS_N_INSNS (12), /* HI */
227 COSTS_N_INSNS (12), /* SI */
228 COSTS_N_INSNS (12), /* DI */
229 COSTS_N_INSNS (12)}, /* other */
230 1, /* cost of multiply per each bit set */
231 {COSTS_N_INSNS (40), /* cost of a divide/mod for QI */
232 COSTS_N_INSNS (40), /* HI */
233 COSTS_N_INSNS (40), /* SI */
234 COSTS_N_INSNS (40), /* DI */
235 COSTS_N_INSNS (40)}, /* other */
236 COSTS_N_INSNS (3), /* cost of movsx */
237 COSTS_N_INSNS (2), /* cost of movzx */
238 15, /* "large" insn */
240 4, /* cost for loading QImode using movzbl */
241 {2, 4, 2}, /* cost of loading integer registers
242 in QImode, HImode and SImode.
243 Relative to reg-reg move (2). */
244 {2, 4, 2}, /* cost of storing integer registers */
245 2, /* cost of reg,reg fld/fst */
246 {8, 8, 8}, /* cost of loading fp registers
247 in SFmode, DFmode and XFmode */
248 {8, 8, 8}, /* cost of storing fp registers
249 in SFmode, DFmode and XFmode */
250 2, /* cost of moving MMX register */
251 {4, 8}, /* cost of loading MMX registers
252 in SImode and DImode */
253 {4, 8}, /* cost of storing MMX registers
254 in SImode and DImode */
255 2, /* cost of moving SSE register */
256 {4, 8, 16}, /* cost of loading SSE registers
257 in SImode, DImode and TImode */
258 {4, 8, 16}, /* cost of storing SSE registers
259 in SImode, DImode and TImode */
260 3, /* MMX or SSE register to integer */
261 4, /* size of l1 cache. 486 has 8kB cache
262 shared for code and data, so 4kB is
263 not really precise. */
264 4, /* size of l2 cache */
265 0, /* size of prefetch block */
266 0, /* number of parallel prefetches */
268 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
269 COSTS_N_INSNS (16), /* cost of FMUL instruction. */
270 COSTS_N_INSNS (73), /* cost of FDIV instruction. */
271 COSTS_N_INSNS (3), /* cost of FABS instruction. */
272 COSTS_N_INSNS (3), /* cost of FCHS instruction. */
273 COSTS_N_INSNS (83), /* cost of FSQRT instruction. */
274 {{rep_prefix_4_byte, {{-1, rep_prefix_4_byte}}},
275 DUMMY_STRINGOP_ALGS},
276 {{rep_prefix_4_byte, {{-1, rep_prefix_4_byte}}},
277 DUMMY_STRINGOP_ALGS},
278 1, /* scalar_stmt_cost. */
279 1, /* scalar load_cost. */
280 1, /* scalar_store_cost. */
281 1, /* vec_stmt_cost. */
282 1, /* vec_to_scalar_cost. */
283 1, /* scalar_to_vec_cost. */
284 1, /* vec_align_load_cost. */
285 2, /* vec_unalign_load_cost. */
286 1, /* vec_store_cost. */
287 3, /* cond_taken_branch_cost. */
288 1, /* cond_not_taken_branch_cost. */
292 struct processor_costs pentium_cost = {
293 COSTS_N_INSNS (1), /* cost of an add instruction */
294 COSTS_N_INSNS (1), /* cost of a lea instruction */
295 COSTS_N_INSNS (4), /* variable shift costs */
296 COSTS_N_INSNS (1), /* constant shift costs */
297 {COSTS_N_INSNS (11), /* cost of starting multiply for QI */
298 COSTS_N_INSNS (11), /* HI */
299 COSTS_N_INSNS (11), /* SI */
300 COSTS_N_INSNS (11), /* DI */
301 COSTS_N_INSNS (11)}, /* other */
302 0, /* cost of multiply per each bit set */
303 {COSTS_N_INSNS (25), /* cost of a divide/mod for QI */
304 COSTS_N_INSNS (25), /* HI */
305 COSTS_N_INSNS (25), /* SI */
306 COSTS_N_INSNS (25), /* DI */
307 COSTS_N_INSNS (25)}, /* other */
308 COSTS_N_INSNS (3), /* cost of movsx */
309 COSTS_N_INSNS (2), /* cost of movzx */
310 8, /* "large" insn */
312 6, /* cost for loading QImode using movzbl */
313 {2, 4, 2}, /* cost of loading integer registers
314 in QImode, HImode and SImode.
315 Relative to reg-reg move (2). */
316 {2, 4, 2}, /* cost of storing integer registers */
317 2, /* cost of reg,reg fld/fst */
318 {2, 2, 6}, /* cost of loading fp registers
319 in SFmode, DFmode and XFmode */
320 {4, 4, 6}, /* cost of storing fp registers
321 in SFmode, DFmode and XFmode */
322 8, /* cost of moving MMX register */
323 {8, 8}, /* cost of loading MMX registers
324 in SImode and DImode */
325 {8, 8}, /* cost of storing MMX registers
326 in SImode and DImode */
327 2, /* cost of moving SSE register */
328 {4, 8, 16}, /* cost of loading SSE registers
329 in SImode, DImode and TImode */
330 {4, 8, 16}, /* cost of storing SSE registers
331 in SImode, DImode and TImode */
332 3, /* MMX or SSE register to integer */
333 8, /* size of l1 cache. */
334 8, /* size of l2 cache */
335 0, /* size of prefetch block */
336 0, /* number of parallel prefetches */
338 COSTS_N_INSNS (3), /* cost of FADD and FSUB insns. */
339 COSTS_N_INSNS (3), /* cost of FMUL instruction. */
340 COSTS_N_INSNS (39), /* cost of FDIV instruction. */
341 COSTS_N_INSNS (1), /* cost of FABS instruction. */
342 COSTS_N_INSNS (1), /* cost of FCHS instruction. */
343 COSTS_N_INSNS (70), /* cost of FSQRT instruction. */
344 {{libcall, {{256, rep_prefix_4_byte}, {-1, libcall}}},
345 DUMMY_STRINGOP_ALGS},
346 {{libcall, {{-1, rep_prefix_4_byte}}},
347 DUMMY_STRINGOP_ALGS},
348 1, /* scalar_stmt_cost. */
349 1, /* scalar load_cost. */
350 1, /* scalar_store_cost. */
351 1, /* vec_stmt_cost. */
352 1, /* vec_to_scalar_cost. */
353 1, /* scalar_to_vec_cost. */
354 1, /* vec_align_load_cost. */
355 2, /* vec_unalign_load_cost. */
356 1, /* vec_store_cost. */
357 3, /* cond_taken_branch_cost. */
358 1, /* cond_not_taken_branch_cost. */
362 struct processor_costs pentiumpro_cost = {
363 COSTS_N_INSNS (1), /* cost of an add instruction */
364 COSTS_N_INSNS (1), /* cost of a lea instruction */
365 COSTS_N_INSNS (1), /* variable shift costs */
366 COSTS_N_INSNS (1), /* constant shift costs */
367 {COSTS_N_INSNS (4), /* cost of starting multiply for QI */
368 COSTS_N_INSNS (4), /* HI */
369 COSTS_N_INSNS (4), /* SI */
370 COSTS_N_INSNS (4), /* DI */
371 COSTS_N_INSNS (4)}, /* other */
372 0, /* cost of multiply per each bit set */
373 {COSTS_N_INSNS (17), /* cost of a divide/mod for QI */
374 COSTS_N_INSNS (17), /* HI */
375 COSTS_N_INSNS (17), /* SI */
376 COSTS_N_INSNS (17), /* DI */
377 COSTS_N_INSNS (17)}, /* other */
378 COSTS_N_INSNS (1), /* cost of movsx */
379 COSTS_N_INSNS (1), /* cost of movzx */
380 8, /* "large" insn */
382 2, /* cost for loading QImode using movzbl */
383 {4, 4, 4}, /* cost of loading integer registers
384 in QImode, HImode and SImode.
385 Relative to reg-reg move (2). */
386 {2, 2, 2}, /* cost of storing integer registers */
387 2, /* cost of reg,reg fld/fst */
388 {2, 2, 6}, /* cost of loading fp registers
389 in SFmode, DFmode and XFmode */
390 {4, 4, 6}, /* cost of storing fp registers
391 in SFmode, DFmode and XFmode */
392 2, /* cost of moving MMX register */
393 {2, 2}, /* cost of loading MMX registers
394 in SImode and DImode */
395 {2, 2}, /* cost of storing MMX registers
396 in SImode and DImode */
397 2, /* cost of moving SSE register */
398 {2, 2, 8}, /* cost of loading SSE registers
399 in SImode, DImode and TImode */
400 {2, 2, 8}, /* cost of storing SSE registers
401 in SImode, DImode and TImode */
402 3, /* MMX or SSE register to integer */
403 8, /* size of l1 cache. */
404 256, /* size of l2 cache */
405 32, /* size of prefetch block */
406 6, /* number of parallel prefetches */
408 COSTS_N_INSNS (3), /* cost of FADD and FSUB insns. */
409 COSTS_N_INSNS (5), /* cost of FMUL instruction. */
410 COSTS_N_INSNS (56), /* cost of FDIV instruction. */
411 COSTS_N_INSNS (2), /* cost of FABS instruction. */
412 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
413 COSTS_N_INSNS (56), /* cost of FSQRT instruction. */
414 /* PentiumPro has optimized rep instructions for blocks aligned by 8 bytes (we ensure
415 the alignment). For small blocks inline loop is still a noticeable win, for bigger
416 blocks either rep movsl or rep movsb is way to go. Rep movsb has apparently
417 more expensive startup time in CPU, but after 4K the difference is down in the noise.
419 {{rep_prefix_4_byte, {{128, loop}, {1024, unrolled_loop},
420 {8192, rep_prefix_4_byte}, {-1, rep_prefix_1_byte}}},
421 DUMMY_STRINGOP_ALGS},
422 {{rep_prefix_4_byte, {{1024, unrolled_loop},
423 {8192, rep_prefix_4_byte}, {-1, libcall}}},
424 DUMMY_STRINGOP_ALGS},
425 1, /* scalar_stmt_cost. */
426 1, /* scalar load_cost. */
427 1, /* scalar_store_cost. */
428 1, /* vec_stmt_cost. */
429 1, /* vec_to_scalar_cost. */
430 1, /* scalar_to_vec_cost. */
431 1, /* vec_align_load_cost. */
432 2, /* vec_unalign_load_cost. */
433 1, /* vec_store_cost. */
434 3, /* cond_taken_branch_cost. */
435 1, /* cond_not_taken_branch_cost. */
439 struct processor_costs geode_cost = {
440 COSTS_N_INSNS (1), /* cost of an add instruction */
441 COSTS_N_INSNS (1), /* cost of a lea instruction */
442 COSTS_N_INSNS (2), /* variable shift costs */
443 COSTS_N_INSNS (1), /* constant shift costs */
444 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
445 COSTS_N_INSNS (4), /* HI */
446 COSTS_N_INSNS (7), /* SI */
447 COSTS_N_INSNS (7), /* DI */
448 COSTS_N_INSNS (7)}, /* other */
449 0, /* cost of multiply per each bit set */
450 {COSTS_N_INSNS (15), /* cost of a divide/mod for QI */
451 COSTS_N_INSNS (23), /* HI */
452 COSTS_N_INSNS (39), /* SI */
453 COSTS_N_INSNS (39), /* DI */
454 COSTS_N_INSNS (39)}, /* other */
455 COSTS_N_INSNS (1), /* cost of movsx */
456 COSTS_N_INSNS (1), /* cost of movzx */
457 8, /* "large" insn */
459 1, /* cost for loading QImode using movzbl */
460 {1, 1, 1}, /* cost of loading integer registers
461 in QImode, HImode and SImode.
462 Relative to reg-reg move (2). */
463 {1, 1, 1}, /* cost of storing integer registers */
464 1, /* cost of reg,reg fld/fst */
465 {1, 1, 1}, /* cost of loading fp registers
466 in SFmode, DFmode and XFmode */
467 {4, 6, 6}, /* cost of storing fp registers
468 in SFmode, DFmode and XFmode */
470 1, /* cost of moving MMX register */
471 {1, 1}, /* cost of loading MMX registers
472 in SImode and DImode */
473 {1, 1}, /* cost of storing MMX registers
474 in SImode and DImode */
475 1, /* cost of moving SSE register */
476 {1, 1, 1}, /* cost of loading SSE registers
477 in SImode, DImode and TImode */
478 {1, 1, 1}, /* cost of storing SSE registers
479 in SImode, DImode and TImode */
480 1, /* MMX or SSE register to integer */
481 64, /* size of l1 cache. */
482 128, /* size of l2 cache. */
483 32, /* size of prefetch block */
484 1, /* number of parallel prefetches */
486 COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */
487 COSTS_N_INSNS (11), /* cost of FMUL instruction. */
488 COSTS_N_INSNS (47), /* cost of FDIV instruction. */
489 COSTS_N_INSNS (1), /* cost of FABS instruction. */
490 COSTS_N_INSNS (1), /* cost of FCHS instruction. */
491 COSTS_N_INSNS (54), /* cost of FSQRT instruction. */
492 {{libcall, {{256, rep_prefix_4_byte}, {-1, libcall}}},
493 DUMMY_STRINGOP_ALGS},
494 {{libcall, {{256, rep_prefix_4_byte}, {-1, libcall}}},
495 DUMMY_STRINGOP_ALGS},
496 1, /* scalar_stmt_cost. */
497 1, /* scalar load_cost. */
498 1, /* scalar_store_cost. */
499 1, /* vec_stmt_cost. */
500 1, /* vec_to_scalar_cost. */
501 1, /* scalar_to_vec_cost. */
502 1, /* vec_align_load_cost. */
503 2, /* vec_unalign_load_cost. */
504 1, /* vec_store_cost. */
505 3, /* cond_taken_branch_cost. */
506 1, /* cond_not_taken_branch_cost. */
510 struct processor_costs k6_cost = {
511 COSTS_N_INSNS (1), /* cost of an add instruction */
512 COSTS_N_INSNS (2), /* cost of a lea instruction */
513 COSTS_N_INSNS (1), /* variable shift costs */
514 COSTS_N_INSNS (1), /* constant shift costs */
515 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
516 COSTS_N_INSNS (3), /* HI */
517 COSTS_N_INSNS (3), /* SI */
518 COSTS_N_INSNS (3), /* DI */
519 COSTS_N_INSNS (3)}, /* other */
520 0, /* cost of multiply per each bit set */
521 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
522 COSTS_N_INSNS (18), /* HI */
523 COSTS_N_INSNS (18), /* SI */
524 COSTS_N_INSNS (18), /* DI */
525 COSTS_N_INSNS (18)}, /* other */
526 COSTS_N_INSNS (2), /* cost of movsx */
527 COSTS_N_INSNS (2), /* cost of movzx */
528 8, /* "large" insn */
530 3, /* cost for loading QImode using movzbl */
531 {4, 5, 4}, /* cost of loading integer registers
532 in QImode, HImode and SImode.
533 Relative to reg-reg move (2). */
534 {2, 3, 2}, /* cost of storing integer registers */
535 4, /* cost of reg,reg fld/fst */
536 {6, 6, 6}, /* cost of loading fp registers
537 in SFmode, DFmode and XFmode */
538 {4, 4, 4}, /* cost of storing fp registers
539 in SFmode, DFmode and XFmode */
540 2, /* cost of moving MMX register */
541 {2, 2}, /* cost of loading MMX registers
542 in SImode and DImode */
543 {2, 2}, /* cost of storing MMX registers
544 in SImode and DImode */
545 2, /* cost of moving SSE register */
546 {2, 2, 8}, /* cost of loading SSE registers
547 in SImode, DImode and TImode */
548 {2, 2, 8}, /* cost of storing SSE registers
549 in SImode, DImode and TImode */
550 6, /* MMX or SSE register to integer */
551 32, /* size of l1 cache. */
552 32, /* size of l2 cache. Some models
553 have integrated l2 cache, but
554 optimizing for k6 is not important
555 enough to worry about that. */
556 32, /* size of prefetch block */
557 1, /* number of parallel prefetches */
559 COSTS_N_INSNS (2), /* cost of FADD and FSUB insns. */
560 COSTS_N_INSNS (2), /* cost of FMUL instruction. */
561 COSTS_N_INSNS (56), /* cost of FDIV instruction. */
562 COSTS_N_INSNS (2), /* cost of FABS instruction. */
563 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
564 COSTS_N_INSNS (56), /* cost of FSQRT instruction. */
565 {{libcall, {{256, rep_prefix_4_byte}, {-1, libcall}}},
566 DUMMY_STRINGOP_ALGS},
567 {{libcall, {{256, rep_prefix_4_byte}, {-1, libcall}}},
568 DUMMY_STRINGOP_ALGS},
569 1, /* scalar_stmt_cost. */
570 1, /* scalar load_cost. */
571 1, /* scalar_store_cost. */
572 1, /* vec_stmt_cost. */
573 1, /* vec_to_scalar_cost. */
574 1, /* scalar_to_vec_cost. */
575 1, /* vec_align_load_cost. */
576 2, /* vec_unalign_load_cost. */
577 1, /* vec_store_cost. */
578 3, /* cond_taken_branch_cost. */
579 1, /* cond_not_taken_branch_cost. */
583 struct processor_costs athlon_cost = {
584 COSTS_N_INSNS (1), /* cost of an add instruction */
585 COSTS_N_INSNS (2), /* cost of a lea instruction */
586 COSTS_N_INSNS (1), /* variable shift costs */
587 COSTS_N_INSNS (1), /* constant shift costs */
588 {COSTS_N_INSNS (5), /* cost of starting multiply for QI */
589 COSTS_N_INSNS (5), /* HI */
590 COSTS_N_INSNS (5), /* SI */
591 COSTS_N_INSNS (5), /* DI */
592 COSTS_N_INSNS (5)}, /* other */
593 0, /* cost of multiply per each bit set */
594 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
595 COSTS_N_INSNS (26), /* HI */
596 COSTS_N_INSNS (42), /* SI */
597 COSTS_N_INSNS (74), /* DI */
598 COSTS_N_INSNS (74)}, /* other */
599 COSTS_N_INSNS (1), /* cost of movsx */
600 COSTS_N_INSNS (1), /* cost of movzx */
601 8, /* "large" insn */
603 4, /* cost for loading QImode using movzbl */
604 {3, 4, 3}, /* cost of loading integer registers
605 in QImode, HImode and SImode.
606 Relative to reg-reg move (2). */
607 {3, 4, 3}, /* cost of storing integer registers */
608 4, /* cost of reg,reg fld/fst */
609 {4, 4, 12}, /* cost of loading fp registers
610 in SFmode, DFmode and XFmode */
611 {6, 6, 8}, /* cost of storing fp registers
612 in SFmode, DFmode and XFmode */
613 2, /* cost of moving MMX register */
614 {4, 4}, /* cost of loading MMX registers
615 in SImode and DImode */
616 {4, 4}, /* cost of storing MMX registers
617 in SImode and DImode */
618 2, /* cost of moving SSE register */
619 {4, 4, 6}, /* cost of loading SSE registers
620 in SImode, DImode and TImode */
621 {4, 4, 5}, /* cost of storing SSE registers
622 in SImode, DImode and TImode */
623 5, /* MMX or SSE register to integer */
624 64, /* size of l1 cache. */
625 256, /* size of l2 cache. */
626 64, /* size of prefetch block */
627 6, /* number of parallel prefetches */
629 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
630 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
631 COSTS_N_INSNS (24), /* cost of FDIV instruction. */
632 COSTS_N_INSNS (2), /* cost of FABS instruction. */
633 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
634 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
635 /* For some reason, Athlon deals better with REP prefix (relative to loops)
636 compared to K8. Alignment becomes important after 8 bytes for memcpy and
637 128 bytes for memset. */
638 {{libcall, {{2048, rep_prefix_4_byte}, {-1, libcall}}},
639 DUMMY_STRINGOP_ALGS},
640 {{libcall, {{2048, rep_prefix_4_byte}, {-1, libcall}}},
641 DUMMY_STRINGOP_ALGS},
642 1, /* scalar_stmt_cost. */
643 1, /* scalar load_cost. */
644 1, /* scalar_store_cost. */
645 1, /* vec_stmt_cost. */
646 1, /* vec_to_scalar_cost. */
647 1, /* scalar_to_vec_cost. */
648 1, /* vec_align_load_cost. */
649 2, /* vec_unalign_load_cost. */
650 1, /* vec_store_cost. */
651 3, /* cond_taken_branch_cost. */
652 1, /* cond_not_taken_branch_cost. */
656 struct processor_costs k8_cost = {
657 COSTS_N_INSNS (1), /* cost of an add instruction */
658 COSTS_N_INSNS (2), /* cost of a lea instruction */
659 COSTS_N_INSNS (1), /* variable shift costs */
660 COSTS_N_INSNS (1), /* constant shift costs */
661 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
662 COSTS_N_INSNS (4), /* HI */
663 COSTS_N_INSNS (3), /* SI */
664 COSTS_N_INSNS (4), /* DI */
665 COSTS_N_INSNS (5)}, /* other */
666 0, /* cost of multiply per each bit set */
667 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
668 COSTS_N_INSNS (26), /* HI */
669 COSTS_N_INSNS (42), /* SI */
670 COSTS_N_INSNS (74), /* DI */
671 COSTS_N_INSNS (74)}, /* other */
672 COSTS_N_INSNS (1), /* cost of movsx */
673 COSTS_N_INSNS (1), /* cost of movzx */
674 8, /* "large" insn */
676 4, /* cost for loading QImode using movzbl */
677 {3, 4, 3}, /* cost of loading integer registers
678 in QImode, HImode and SImode.
679 Relative to reg-reg move (2). */
680 {3, 4, 3}, /* cost of storing integer registers */
681 4, /* cost of reg,reg fld/fst */
682 {4, 4, 12}, /* cost of loading fp registers
683 in SFmode, DFmode and XFmode */
684 {6, 6, 8}, /* cost of storing fp registers
685 in SFmode, DFmode and XFmode */
686 2, /* cost of moving MMX register */
687 {3, 3}, /* cost of loading MMX registers
688 in SImode and DImode */
689 {4, 4}, /* cost of storing MMX registers
690 in SImode and DImode */
691 2, /* cost of moving SSE register */
692 {4, 3, 6}, /* cost of loading SSE registers
693 in SImode, DImode and TImode */
694 {4, 4, 5}, /* cost of storing SSE registers
695 in SImode, DImode and TImode */
696 5, /* MMX or SSE register to integer */
697 64, /* size of l1 cache. */
698 512, /* size of l2 cache. */
699 64, /* size of prefetch block */
700 /* New AMD processors never drop prefetches; if they cannot be performed
701 immediately, they are queued. We set number of simultaneous prefetches
702 to a large constant to reflect this (it probably is not a good idea not
703 to limit number of prefetches at all, as their execution also takes some
705 100, /* number of parallel prefetches */
707 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
708 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
709 COSTS_N_INSNS (19), /* cost of FDIV instruction. */
710 COSTS_N_INSNS (2), /* cost of FABS instruction. */
711 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
712 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
713 /* K8 has optimized REP instruction for medium sized blocks, but for very small
714 blocks it is better to use loop. For large blocks, libcall can do
715 nontemporary accesses and beat inline considerably. */
716 {{libcall, {{6, loop}, {14, unrolled_loop}, {-1, rep_prefix_4_byte}}},
717 {libcall, {{16, loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
718 {{libcall, {{8, loop}, {24, unrolled_loop},
719 {2048, rep_prefix_4_byte}, {-1, libcall}}},
720 {libcall, {{48, unrolled_loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
721 4, /* scalar_stmt_cost. */
722 2, /* scalar load_cost. */
723 2, /* scalar_store_cost. */
724 5, /* vec_stmt_cost. */
725 0, /* vec_to_scalar_cost. */
726 2, /* scalar_to_vec_cost. */
727 2, /* vec_align_load_cost. */
728 3, /* vec_unalign_load_cost. */
729 3, /* vec_store_cost. */
730 3, /* cond_taken_branch_cost. */
731 2, /* cond_not_taken_branch_cost. */
734 struct processor_costs amdfam10_cost = {
735 COSTS_N_INSNS (1), /* cost of an add instruction */
736 COSTS_N_INSNS (2), /* cost of a lea instruction */
737 COSTS_N_INSNS (1), /* variable shift costs */
738 COSTS_N_INSNS (1), /* constant shift costs */
739 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
740 COSTS_N_INSNS (4), /* HI */
741 COSTS_N_INSNS (3), /* SI */
742 COSTS_N_INSNS (4), /* DI */
743 COSTS_N_INSNS (5)}, /* other */
744 0, /* cost of multiply per each bit set */
745 {COSTS_N_INSNS (19), /* cost of a divide/mod for QI */
746 COSTS_N_INSNS (35), /* HI */
747 COSTS_N_INSNS (51), /* SI */
748 COSTS_N_INSNS (83), /* DI */
749 COSTS_N_INSNS (83)}, /* other */
750 COSTS_N_INSNS (1), /* cost of movsx */
751 COSTS_N_INSNS (1), /* cost of movzx */
752 8, /* "large" insn */
754 4, /* cost for loading QImode using movzbl */
755 {3, 4, 3}, /* cost of loading integer registers
756 in QImode, HImode and SImode.
757 Relative to reg-reg move (2). */
758 {3, 4, 3}, /* cost of storing integer registers */
759 4, /* cost of reg,reg fld/fst */
760 {4, 4, 12}, /* cost of loading fp registers
761 in SFmode, DFmode and XFmode */
762 {6, 6, 8}, /* cost of storing fp registers
763 in SFmode, DFmode and XFmode */
764 2, /* cost of moving MMX register */
765 {3, 3}, /* cost of loading MMX registers
766 in SImode and DImode */
767 {4, 4}, /* cost of storing MMX registers
768 in SImode and DImode */
769 2, /* cost of moving SSE register */
770 {4, 4, 3}, /* cost of loading SSE registers
771 in SImode, DImode and TImode */
772 {4, 4, 5}, /* cost of storing SSE registers
773 in SImode, DImode and TImode */
774 3, /* MMX or SSE register to integer */
776 MOVD reg64, xmmreg Double FSTORE 4
777 MOVD reg32, xmmreg Double FSTORE 4
779 MOVD reg64, xmmreg Double FADD 3
781 MOVD reg32, xmmreg Double FADD 3
783 64, /* size of l1 cache. */
784 512, /* size of l2 cache. */
785 64, /* size of prefetch block */
786 /* New AMD processors never drop prefetches; if they cannot be performed
787 immediately, they are queued. We set number of simultaneous prefetches
788 to a large constant to reflect this (it probably is not a good idea not
789 to limit number of prefetches at all, as their execution also takes some
791 100, /* number of parallel prefetches */
793 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
794 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
795 COSTS_N_INSNS (19), /* cost of FDIV instruction. */
796 COSTS_N_INSNS (2), /* cost of FABS instruction. */
797 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
798 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
800 /* AMDFAM10 has optimized REP instruction for medium sized blocks, but for
801 very small blocks it is better to use loop. For large blocks, libcall can
802 do nontemporary accesses and beat inline considerably. */
803 {{libcall, {{6, loop}, {14, unrolled_loop}, {-1, rep_prefix_4_byte}}},
804 {libcall, {{16, loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
805 {{libcall, {{8, loop}, {24, unrolled_loop},
806 {2048, rep_prefix_4_byte}, {-1, libcall}}},
807 {libcall, {{48, unrolled_loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
808 4, /* scalar_stmt_cost. */
809 2, /* scalar load_cost. */
810 2, /* scalar_store_cost. */
811 6, /* vec_stmt_cost. */
812 0, /* vec_to_scalar_cost. */
813 2, /* scalar_to_vec_cost. */
814 2, /* vec_align_load_cost. */
815 2, /* vec_unalign_load_cost. */
816 2, /* vec_store_cost. */
817 2, /* cond_taken_branch_cost. */
818 1, /* cond_not_taken_branch_cost. */
822 struct processor_costs pentium4_cost = {
823 COSTS_N_INSNS (1), /* cost of an add instruction */
824 COSTS_N_INSNS (3), /* cost of a lea instruction */
825 COSTS_N_INSNS (4), /* variable shift costs */
826 COSTS_N_INSNS (4), /* constant shift costs */
827 {COSTS_N_INSNS (15), /* cost of starting multiply for QI */
828 COSTS_N_INSNS (15), /* HI */
829 COSTS_N_INSNS (15), /* SI */
830 COSTS_N_INSNS (15), /* DI */
831 COSTS_N_INSNS (15)}, /* other */
832 0, /* cost of multiply per each bit set */
833 {COSTS_N_INSNS (56), /* cost of a divide/mod for QI */
834 COSTS_N_INSNS (56), /* HI */
835 COSTS_N_INSNS (56), /* SI */
836 COSTS_N_INSNS (56), /* DI */
837 COSTS_N_INSNS (56)}, /* other */
838 COSTS_N_INSNS (1), /* cost of movsx */
839 COSTS_N_INSNS (1), /* cost of movzx */
840 16, /* "large" insn */
842 2, /* cost for loading QImode using movzbl */
843 {4, 5, 4}, /* cost of loading integer registers
844 in QImode, HImode and SImode.
845 Relative to reg-reg move (2). */
846 {2, 3, 2}, /* cost of storing integer registers */
847 2, /* cost of reg,reg fld/fst */
848 {2, 2, 6}, /* cost of loading fp registers
849 in SFmode, DFmode and XFmode */
850 {4, 4, 6}, /* cost of storing fp registers
851 in SFmode, DFmode and XFmode */
852 2, /* cost of moving MMX register */
853 {2, 2}, /* cost of loading MMX registers
854 in SImode and DImode */
855 {2, 2}, /* cost of storing MMX registers
856 in SImode and DImode */
857 12, /* cost of moving SSE register */
858 {12, 12, 12}, /* cost of loading SSE registers
859 in SImode, DImode and TImode */
860 {2, 2, 8}, /* cost of storing SSE registers
861 in SImode, DImode and TImode */
862 10, /* MMX or SSE register to integer */
863 8, /* size of l1 cache. */
864 256, /* size of l2 cache. */
865 64, /* size of prefetch block */
866 6, /* number of parallel prefetches */
868 COSTS_N_INSNS (5), /* cost of FADD and FSUB insns. */
869 COSTS_N_INSNS (7), /* cost of FMUL instruction. */
870 COSTS_N_INSNS (43), /* cost of FDIV instruction. */
871 COSTS_N_INSNS (2), /* cost of FABS instruction. */
872 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
873 COSTS_N_INSNS (43), /* cost of FSQRT instruction. */
874 {{libcall, {{12, loop_1_byte}, {-1, rep_prefix_4_byte}}},
875 DUMMY_STRINGOP_ALGS},
876 {{libcall, {{6, loop_1_byte}, {48, loop}, {20480, rep_prefix_4_byte},
878 DUMMY_STRINGOP_ALGS},
879 1, /* scalar_stmt_cost. */
880 1, /* scalar load_cost. */
881 1, /* scalar_store_cost. */
882 1, /* vec_stmt_cost. */
883 1, /* vec_to_scalar_cost. */
884 1, /* scalar_to_vec_cost. */
885 1, /* vec_align_load_cost. */
886 2, /* vec_unalign_load_cost. */
887 1, /* vec_store_cost. */
888 3, /* cond_taken_branch_cost. */
889 1, /* cond_not_taken_branch_cost. */
893 struct processor_costs nocona_cost = {
894 COSTS_N_INSNS (1), /* cost of an add instruction */
895 COSTS_N_INSNS (1), /* cost of a lea instruction */
896 COSTS_N_INSNS (1), /* variable shift costs */
897 COSTS_N_INSNS (1), /* constant shift costs */
898 {COSTS_N_INSNS (10), /* cost of starting multiply for QI */
899 COSTS_N_INSNS (10), /* HI */
900 COSTS_N_INSNS (10), /* SI */
901 COSTS_N_INSNS (10), /* DI */
902 COSTS_N_INSNS (10)}, /* other */
903 0, /* cost of multiply per each bit set */
904 {COSTS_N_INSNS (66), /* cost of a divide/mod for QI */
905 COSTS_N_INSNS (66), /* HI */
906 COSTS_N_INSNS (66), /* SI */
907 COSTS_N_INSNS (66), /* DI */
908 COSTS_N_INSNS (66)}, /* other */
909 COSTS_N_INSNS (1), /* cost of movsx */
910 COSTS_N_INSNS (1), /* cost of movzx */
911 16, /* "large" insn */
913 4, /* cost for loading QImode using movzbl */
914 {4, 4, 4}, /* cost of loading integer registers
915 in QImode, HImode and SImode.
916 Relative to reg-reg move (2). */
917 {4, 4, 4}, /* cost of storing integer registers */
918 3, /* cost of reg,reg fld/fst */
919 {12, 12, 12}, /* cost of loading fp registers
920 in SFmode, DFmode and XFmode */
921 {4, 4, 4}, /* cost of storing fp registers
922 in SFmode, DFmode and XFmode */
923 6, /* cost of moving MMX register */
924 {12, 12}, /* cost of loading MMX registers
925 in SImode and DImode */
926 {12, 12}, /* cost of storing MMX registers
927 in SImode and DImode */
928 6, /* cost of moving SSE register */
929 {12, 12, 12}, /* cost of loading SSE registers
930 in SImode, DImode and TImode */
931 {12, 12, 12}, /* cost of storing SSE registers
932 in SImode, DImode and TImode */
933 8, /* MMX or SSE register to integer */
934 8, /* size of l1 cache. */
935 1024, /* size of l2 cache. */
936 128, /* size of prefetch block */
937 8, /* number of parallel prefetches */
939 COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */
940 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
941 COSTS_N_INSNS (40), /* cost of FDIV instruction. */
942 COSTS_N_INSNS (3), /* cost of FABS instruction. */
943 COSTS_N_INSNS (3), /* cost of FCHS instruction. */
944 COSTS_N_INSNS (44), /* cost of FSQRT instruction. */
945 {{libcall, {{12, loop_1_byte}, {-1, rep_prefix_4_byte}}},
946 {libcall, {{32, loop}, {20000, rep_prefix_8_byte},
947 {100000, unrolled_loop}, {-1, libcall}}}},
948 {{libcall, {{6, loop_1_byte}, {48, loop}, {20480, rep_prefix_4_byte},
950 {libcall, {{24, loop}, {64, unrolled_loop},
951 {8192, rep_prefix_8_byte}, {-1, libcall}}}},
952 1, /* scalar_stmt_cost. */
953 1, /* scalar load_cost. */
954 1, /* scalar_store_cost. */
955 1, /* vec_stmt_cost. */
956 1, /* vec_to_scalar_cost. */
957 1, /* scalar_to_vec_cost. */
958 1, /* vec_align_load_cost. */
959 2, /* vec_unalign_load_cost. */
960 1, /* vec_store_cost. */
961 3, /* cond_taken_branch_cost. */
962 1, /* cond_not_taken_branch_cost. */
966 struct processor_costs core2_cost = {
967 COSTS_N_INSNS (1), /* cost of an add instruction */
968 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
969 COSTS_N_INSNS (1), /* variable shift costs */
970 COSTS_N_INSNS (1), /* constant shift costs */
971 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
972 COSTS_N_INSNS (3), /* HI */
973 COSTS_N_INSNS (3), /* SI */
974 COSTS_N_INSNS (3), /* DI */
975 COSTS_N_INSNS (3)}, /* other */
976 0, /* cost of multiply per each bit set */
977 {COSTS_N_INSNS (22), /* cost of a divide/mod for QI */
978 COSTS_N_INSNS (22), /* HI */
979 COSTS_N_INSNS (22), /* SI */
980 COSTS_N_INSNS (22), /* DI */
981 COSTS_N_INSNS (22)}, /* other */
982 COSTS_N_INSNS (1), /* cost of movsx */
983 COSTS_N_INSNS (1), /* cost of movzx */
984 8, /* "large" insn */
986 2, /* cost for loading QImode using movzbl */
987 {6, 6, 6}, /* cost of loading integer registers
988 in QImode, HImode and SImode.
989 Relative to reg-reg move (2). */
990 {4, 4, 4}, /* cost of storing integer registers */
991 2, /* cost of reg,reg fld/fst */
992 {6, 6, 6}, /* cost of loading fp registers
993 in SFmode, DFmode and XFmode */
994 {4, 4, 4}, /* cost of loading integer registers */
995 2, /* cost of moving MMX register */
996 {6, 6}, /* cost of loading MMX registers
997 in SImode and DImode */
998 {4, 4}, /* cost of storing MMX registers
999 in SImode and DImode */
1000 2, /* cost of moving SSE register */
1001 {6, 6, 6}, /* cost of loading SSE registers
1002 in SImode, DImode and TImode */
1003 {4, 4, 4}, /* cost of storing SSE registers
1004 in SImode, DImode and TImode */
1005 2, /* MMX or SSE register to integer */
1006 32, /* size of l1 cache. */
1007 2048, /* size of l2 cache. */
1008 128, /* size of prefetch block */
1009 8, /* number of parallel prefetches */
1010 3, /* Branch cost */
1011 COSTS_N_INSNS (3), /* cost of FADD and FSUB insns. */
1012 COSTS_N_INSNS (5), /* cost of FMUL instruction. */
1013 COSTS_N_INSNS (32), /* cost of FDIV instruction. */
1014 COSTS_N_INSNS (1), /* cost of FABS instruction. */
1015 COSTS_N_INSNS (1), /* cost of FCHS instruction. */
1016 COSTS_N_INSNS (58), /* cost of FSQRT instruction. */
1017 {{libcall, {{11, loop}, {-1, rep_prefix_4_byte}}},
1018 {libcall, {{32, loop}, {64, rep_prefix_4_byte},
1019 {8192, rep_prefix_8_byte}, {-1, libcall}}}},
1020 {{libcall, {{8, loop}, {15, unrolled_loop},
1021 {2048, rep_prefix_4_byte}, {-1, libcall}}},
1022 {libcall, {{24, loop}, {32, unrolled_loop},
1023 {8192, rep_prefix_8_byte}, {-1, libcall}}}},
1024 1, /* scalar_stmt_cost. */
1025 1, /* scalar load_cost. */
1026 1, /* scalar_store_cost. */
1027 1, /* vec_stmt_cost. */
1028 1, /* vec_to_scalar_cost. */
1029 1, /* scalar_to_vec_cost. */
1030 1, /* vec_align_load_cost. */
1031 2, /* vec_unalign_load_cost. */
1032 1, /* vec_store_cost. */
1033 3, /* cond_taken_branch_cost. */
1034 1, /* cond_not_taken_branch_cost. */
1037 /* Generic64 should produce code tuned for Nocona and K8. */
1039 struct processor_costs generic64_cost = {
1040 COSTS_N_INSNS (1), /* cost of an add instruction */
1041 /* On all chips taken into consideration lea is 2 cycles and more. With
1042 this cost however our current implementation of synth_mult results in
1043 use of unnecessary temporary registers causing regression on several
1044 SPECfp benchmarks. */
1045 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
1046 COSTS_N_INSNS (1), /* variable shift costs */
1047 COSTS_N_INSNS (1), /* constant shift costs */
1048 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1049 COSTS_N_INSNS (4), /* HI */
1050 COSTS_N_INSNS (3), /* SI */
1051 COSTS_N_INSNS (4), /* DI */
1052 COSTS_N_INSNS (2)}, /* other */
1053 0, /* cost of multiply per each bit set */
1054 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
1055 COSTS_N_INSNS (26), /* HI */
1056 COSTS_N_INSNS (42), /* SI */
1057 COSTS_N_INSNS (74), /* DI */
1058 COSTS_N_INSNS (74)}, /* other */
1059 COSTS_N_INSNS (1), /* cost of movsx */
1060 COSTS_N_INSNS (1), /* cost of movzx */
1061 8, /* "large" insn */
1062 17, /* MOVE_RATIO */
1063 4, /* cost for loading QImode using movzbl */
1064 {4, 4, 4}, /* cost of loading integer registers
1065 in QImode, HImode and SImode.
1066 Relative to reg-reg move (2). */
1067 {4, 4, 4}, /* cost of storing integer registers */
1068 4, /* cost of reg,reg fld/fst */
1069 {12, 12, 12}, /* cost of loading fp registers
1070 in SFmode, DFmode and XFmode */
1071 {6, 6, 8}, /* cost of storing fp registers
1072 in SFmode, DFmode and XFmode */
1073 2, /* cost of moving MMX register */
1074 {8, 8}, /* cost of loading MMX registers
1075 in SImode and DImode */
1076 {8, 8}, /* cost of storing MMX registers
1077 in SImode and DImode */
1078 2, /* cost of moving SSE register */
1079 {8, 8, 8}, /* cost of loading SSE registers
1080 in SImode, DImode and TImode */
1081 {8, 8, 8}, /* cost of storing SSE registers
1082 in SImode, DImode and TImode */
1083 5, /* MMX or SSE register to integer */
1084 32, /* size of l1 cache. */
1085 512, /* size of l2 cache. */
1086 64, /* size of prefetch block */
1087 6, /* number of parallel prefetches */
1088 /* Benchmarks shows large regressions on K8 sixtrack benchmark when this value
1089 is increased to perhaps more appropriate value of 5. */
1090 3, /* Branch cost */
1091 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
1092 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
1093 COSTS_N_INSNS (20), /* cost of FDIV instruction. */
1094 COSTS_N_INSNS (8), /* cost of FABS instruction. */
1095 COSTS_N_INSNS (8), /* cost of FCHS instruction. */
1096 COSTS_N_INSNS (40), /* cost of FSQRT instruction. */
1097 {DUMMY_STRINGOP_ALGS,
1098 {libcall, {{32, loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
1099 {DUMMY_STRINGOP_ALGS,
1100 {libcall, {{32, loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
1101 1, /* scalar_stmt_cost. */
1102 1, /* scalar load_cost. */
1103 1, /* scalar_store_cost. */
1104 1, /* vec_stmt_cost. */
1105 1, /* vec_to_scalar_cost. */
1106 1, /* scalar_to_vec_cost. */
1107 1, /* vec_align_load_cost. */
1108 2, /* vec_unalign_load_cost. */
1109 1, /* vec_store_cost. */
1110 3, /* cond_taken_branch_cost. */
1111 1, /* cond_not_taken_branch_cost. */
1114 /* Generic32 should produce code tuned for Athlon, PPro, Pentium4, Nocona and K8. */
1116 struct processor_costs generic32_cost = {
1117 COSTS_N_INSNS (1), /* cost of an add instruction */
1118 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
1119 COSTS_N_INSNS (1), /* variable shift costs */
1120 COSTS_N_INSNS (1), /* constant shift costs */
1121 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1122 COSTS_N_INSNS (4), /* HI */
1123 COSTS_N_INSNS (3), /* SI */
1124 COSTS_N_INSNS (4), /* DI */
1125 COSTS_N_INSNS (2)}, /* other */
1126 0, /* cost of multiply per each bit set */
1127 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
1128 COSTS_N_INSNS (26), /* HI */
1129 COSTS_N_INSNS (42), /* SI */
1130 COSTS_N_INSNS (74), /* DI */
1131 COSTS_N_INSNS (74)}, /* other */
1132 COSTS_N_INSNS (1), /* cost of movsx */
1133 COSTS_N_INSNS (1), /* cost of movzx */
1134 8, /* "large" insn */
1135 17, /* MOVE_RATIO */
1136 4, /* cost for loading QImode using movzbl */
1137 {4, 4, 4}, /* cost of loading integer registers
1138 in QImode, HImode and SImode.
1139 Relative to reg-reg move (2). */
1140 {4, 4, 4}, /* cost of storing integer registers */
1141 4, /* cost of reg,reg fld/fst */
1142 {12, 12, 12}, /* cost of loading fp registers
1143 in SFmode, DFmode and XFmode */
1144 {6, 6, 8}, /* cost of storing fp registers
1145 in SFmode, DFmode and XFmode */
1146 2, /* cost of moving MMX register */
1147 {8, 8}, /* cost of loading MMX registers
1148 in SImode and DImode */
1149 {8, 8}, /* cost of storing MMX registers
1150 in SImode and DImode */
1151 2, /* cost of moving SSE register */
1152 {8, 8, 8}, /* cost of loading SSE registers
1153 in SImode, DImode and TImode */
1154 {8, 8, 8}, /* cost of storing SSE registers
1155 in SImode, DImode and TImode */
1156 5, /* MMX or SSE register to integer */
1157 32, /* size of l1 cache. */
1158 256, /* size of l2 cache. */
1159 64, /* size of prefetch block */
1160 6, /* number of parallel prefetches */
1161 3, /* Branch cost */
1162 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
1163 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
1164 COSTS_N_INSNS (20), /* cost of FDIV instruction. */
1165 COSTS_N_INSNS (8), /* cost of FABS instruction. */
1166 COSTS_N_INSNS (8), /* cost of FCHS instruction. */
1167 COSTS_N_INSNS (40), /* cost of FSQRT instruction. */
1168 {{libcall, {{32, loop}, {8192, rep_prefix_4_byte}, {-1, libcall}}},
1169 DUMMY_STRINGOP_ALGS},
1170 {{libcall, {{32, loop}, {8192, rep_prefix_4_byte}, {-1, libcall}}},
1171 DUMMY_STRINGOP_ALGS},
1172 1, /* scalar_stmt_cost. */
1173 1, /* scalar load_cost. */
1174 1, /* scalar_store_cost. */
1175 1, /* vec_stmt_cost. */
1176 1, /* vec_to_scalar_cost. */
1177 1, /* scalar_to_vec_cost. */
1178 1, /* vec_align_load_cost. */
1179 2, /* vec_unalign_load_cost. */
1180 1, /* vec_store_cost. */
1181 3, /* cond_taken_branch_cost. */
1182 1, /* cond_not_taken_branch_cost. */
1185 const struct processor_costs *ix86_cost = &pentium_cost;
1187 /* Processor feature/optimization bitmasks. */
1188 #define m_386 (1<<PROCESSOR_I386)
1189 #define m_486 (1<<PROCESSOR_I486)
1190 #define m_PENT (1<<PROCESSOR_PENTIUM)
1191 #define m_PPRO (1<<PROCESSOR_PENTIUMPRO)
1192 #define m_PENT4 (1<<PROCESSOR_PENTIUM4)
1193 #define m_NOCONA (1<<PROCESSOR_NOCONA)
1194 #define m_CORE2 (1<<PROCESSOR_CORE2)
1196 #define m_GEODE (1<<PROCESSOR_GEODE)
1197 #define m_K6 (1<<PROCESSOR_K6)
1198 #define m_K6_GEODE (m_K6 | m_GEODE)
1199 #define m_K8 (1<<PROCESSOR_K8)
1200 #define m_ATHLON (1<<PROCESSOR_ATHLON)
1201 #define m_ATHLON_K8 (m_K8 | m_ATHLON)
1202 #define m_AMDFAM10 (1<<PROCESSOR_AMDFAM10)
1203 #define m_AMD_MULTIPLE (m_K8 | m_ATHLON | m_AMDFAM10)
1205 #define m_GENERIC32 (1<<PROCESSOR_GENERIC32)
1206 #define m_GENERIC64 (1<<PROCESSOR_GENERIC64)
1208 /* Generic instruction choice should be common subset of supported CPUs
1209 (PPro/PENT4/NOCONA/CORE2/Athlon/K8). */
1210 #define m_GENERIC (m_GENERIC32 | m_GENERIC64)
1212 /* Feature tests against the various tunings. */
1213 unsigned char ix86_tune_features[X86_TUNE_LAST];
1215 /* Feature tests against the various tunings used to create ix86_tune_features
1216 based on the processor mask. */
1217 static unsigned int initial_ix86_tune_features[X86_TUNE_LAST] = {
1218 /* X86_TUNE_USE_LEAVE: Leave does not affect Nocona SPEC2000 results
1219 negatively, so enabling for Generic64 seems like good code size
1220 tradeoff. We can't enable it for 32bit generic because it does not
1221 work well with PPro base chips. */
1222 m_386 | m_K6_GEODE | m_AMD_MULTIPLE | m_CORE2 | m_GENERIC64,
1224 /* X86_TUNE_PUSH_MEMORY */
1225 m_386 | m_K6_GEODE | m_AMD_MULTIPLE | m_PENT4
1226 | m_NOCONA | m_CORE2 | m_GENERIC,
1228 /* X86_TUNE_ZERO_EXTEND_WITH_AND */
1231 /* X86_TUNE_USE_BIT_TEST */
1234 /* X86_TUNE_UNROLL_STRLEN */
1235 m_486 | m_PENT | m_PPRO | m_AMD_MULTIPLE | m_K6 | m_CORE2 | m_GENERIC,
1237 /* X86_TUNE_DEEP_BRANCH_PREDICTION */
1238 m_PPRO | m_K6_GEODE | m_AMD_MULTIPLE | m_PENT4 | m_GENERIC,
1240 /* X86_TUNE_BRANCH_PREDICTION_HINTS: Branch hints were put in P4 based
1241 on simulation result. But after P4 was made, no performance benefit
1242 was observed with branch hints. It also increases the code size.
1243 As a result, icc never generates branch hints. */
1246 /* X86_TUNE_DOUBLE_WITH_ADD */
1249 /* X86_TUNE_USE_SAHF */
1250 m_PPRO | m_K6_GEODE | m_K8 | m_AMDFAM10 | m_PENT4
1251 | m_NOCONA | m_CORE2 | m_GENERIC,
1253 /* X86_TUNE_MOVX: Enable to zero extend integer registers to avoid
1254 partial dependencies. */
1255 m_AMD_MULTIPLE | m_PPRO | m_PENT4 | m_NOCONA
1256 | m_CORE2 | m_GENERIC | m_GEODE /* m_386 | m_K6 */,
1258 /* X86_TUNE_PARTIAL_REG_STALL: We probably ought to watch for partial
1259 register stalls on Generic32 compilation setting as well. However
1260 in current implementation the partial register stalls are not eliminated
1261 very well - they can be introduced via subregs synthesized by combine
1262 and can happen in caller/callee saving sequences. Because this option
1263 pays back little on PPro based chips and is in conflict with partial reg
1264 dependencies used by Athlon/P4 based chips, it is better to leave it off
1265 for generic32 for now. */
1268 /* X86_TUNE_PARTIAL_FLAG_REG_STALL */
1269 m_CORE2 | m_GENERIC,
1271 /* X86_TUNE_USE_HIMODE_FIOP */
1272 m_386 | m_486 | m_K6_GEODE,
1274 /* X86_TUNE_USE_SIMODE_FIOP */
1275 ~(m_PPRO | m_AMD_MULTIPLE | m_PENT | m_CORE2 | m_GENERIC),
1277 /* X86_TUNE_USE_MOV0 */
1280 /* X86_TUNE_USE_CLTD */
1281 ~(m_PENT | m_K6 | m_CORE2 | m_GENERIC),
1283 /* X86_TUNE_USE_XCHGB: Use xchgb %rh,%rl instead of rolw/rorw $8,rx. */
1286 /* X86_TUNE_SPLIT_LONG_MOVES */
1289 /* X86_TUNE_READ_MODIFY_WRITE */
1292 /* X86_TUNE_READ_MODIFY */
1295 /* X86_TUNE_PROMOTE_QIMODE */
1296 m_K6_GEODE | m_PENT | m_386 | m_486 | m_AMD_MULTIPLE | m_CORE2
1297 | m_GENERIC /* | m_PENT4 ? */,
1299 /* X86_TUNE_FAST_PREFIX */
1300 ~(m_PENT | m_486 | m_386),
1302 /* X86_TUNE_SINGLE_STRINGOP */
1303 m_386 | m_PENT4 | m_NOCONA,
1305 /* X86_TUNE_QIMODE_MATH */
1308 /* X86_TUNE_HIMODE_MATH: On PPro this flag is meant to avoid partial
1309 register stalls. Just like X86_TUNE_PARTIAL_REG_STALL this option
1310 might be considered for Generic32 if our scheme for avoiding partial
1311 stalls was more effective. */
1314 /* X86_TUNE_PROMOTE_QI_REGS */
1317 /* X86_TUNE_PROMOTE_HI_REGS */
1320 /* X86_TUNE_ADD_ESP_4: Enable if add/sub is preferred over 1/2 push/pop. */
1321 m_AMD_MULTIPLE | m_K6_GEODE | m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC,
1323 /* X86_TUNE_ADD_ESP_8 */
1324 m_AMD_MULTIPLE | m_PPRO | m_K6_GEODE | m_386
1325 | m_486 | m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC,
1327 /* X86_TUNE_SUB_ESP_4 */
1328 m_AMD_MULTIPLE | m_PPRO | m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC,
1330 /* X86_TUNE_SUB_ESP_8 */
1331 m_AMD_MULTIPLE | m_PPRO | m_386 | m_486
1332 | m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC,
1334 /* X86_TUNE_INTEGER_DFMODE_MOVES: Enable if integer moves are preferred
1335 for DFmode copies */
1336 ~(m_AMD_MULTIPLE | m_PENT4 | m_NOCONA | m_PPRO | m_CORE2
1337 | m_GENERIC | m_GEODE),
1339 /* X86_TUNE_PARTIAL_REG_DEPENDENCY */
1340 m_AMD_MULTIPLE | m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC,
1342 /* X86_TUNE_SSE_PARTIAL_REG_DEPENDENCY: In the Generic model we have a
1343 conflict here in between PPro/Pentium4 based chips that thread 128bit
1344 SSE registers as single units versus K8 based chips that divide SSE
1345 registers to two 64bit halves. This knob promotes all store destinations
1346 to be 128bit to allow register renaming on 128bit SSE units, but usually
1347 results in one extra microop on 64bit SSE units. Experimental results
1348 shows that disabling this option on P4 brings over 20% SPECfp regression,
1349 while enabling it on K8 brings roughly 2.4% regression that can be partly
1350 masked by careful scheduling of moves. */
1351 m_PENT4 | m_NOCONA | m_PPRO | m_CORE2 | m_GENERIC | m_AMDFAM10,
1353 /* X86_TUNE_SSE_UNALIGNED_MOVE_OPTIMAL */
1356 /* X86_TUNE_SSE_SPLIT_REGS: Set for machines where the type and dependencies
1357 are resolved on SSE register parts instead of whole registers, so we may
1358 maintain just lower part of scalar values in proper format leaving the
1359 upper part undefined. */
1362 /* X86_TUNE_SSE_TYPELESS_STORES */
1365 /* X86_TUNE_SSE_LOAD0_BY_PXOR */
1366 m_PPRO | m_PENT4 | m_NOCONA,
1368 /* X86_TUNE_MEMORY_MISMATCH_STALL */
1369 m_AMD_MULTIPLE | m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC,
1371 /* X86_TUNE_PROLOGUE_USING_MOVE */
1372 m_ATHLON_K8 | m_PPRO | m_CORE2 | m_GENERIC,
1374 /* X86_TUNE_EPILOGUE_USING_MOVE */
1375 m_ATHLON_K8 | m_PPRO | m_CORE2 | m_GENERIC,
1377 /* X86_TUNE_SHIFT1 */
1380 /* X86_TUNE_USE_FFREEP */
1383 /* X86_TUNE_INTER_UNIT_MOVES */
1384 ~(m_AMD_MULTIPLE | m_GENERIC),
1386 /* X86_TUNE_INTER_UNIT_CONVERSIONS */
1389 /* X86_TUNE_FOUR_JUMP_LIMIT: Some CPU cores are not able to predict more
1390 than 4 branch instructions in the 16 byte window. */
1391 m_PPRO | m_AMD_MULTIPLE | m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC,
1393 /* X86_TUNE_SCHEDULE */
1394 m_PPRO | m_AMD_MULTIPLE | m_K6_GEODE | m_PENT | m_CORE2 | m_GENERIC,
1396 /* X86_TUNE_USE_BT */
1397 m_AMD_MULTIPLE | m_CORE2 | m_GENERIC,
1399 /* X86_TUNE_USE_INCDEC */
1400 ~(m_PENT4 | m_NOCONA | m_GENERIC),
1402 /* X86_TUNE_PAD_RETURNS */
1403 m_AMD_MULTIPLE | m_CORE2 | m_GENERIC,
1405 /* X86_TUNE_EXT_80387_CONSTANTS */
1406 m_K6_GEODE | m_ATHLON_K8 | m_PENT4 | m_NOCONA | m_PPRO | m_CORE2 | m_GENERIC,
1408 /* X86_TUNE_SHORTEN_X87_SSE */
1411 /* X86_TUNE_AVOID_VECTOR_DECODE */
1414 /* X86_TUNE_PROMOTE_HIMODE_IMUL: Modern CPUs have same latency for HImode
1415 and SImode multiply, but 386 and 486 do HImode multiply faster. */
1418 /* X86_TUNE_SLOW_IMUL_IMM32_MEM: Imul of 32-bit constant and memory is
1419 vector path on AMD machines. */
1420 m_K8 | m_GENERIC64 | m_AMDFAM10,
1422 /* X86_TUNE_SLOW_IMUL_IMM8: Imul of 8-bit constant is vector path on AMD
1424 m_K8 | m_GENERIC64 | m_AMDFAM10,
1426 /* X86_TUNE_MOVE_M1_VIA_OR: On pentiums, it is faster to load -1 via OR
1430 /* X86_TUNE_NOT_UNPAIRABLE: NOT is not pairable on Pentium, while XOR is,
1431 but one byte longer. */
1434 /* X86_TUNE_NOT_VECTORMODE: On AMD K6, NOT is vector decoded with memory
1435 operand that cannot be represented using a modRM byte. The XOR
1436 replacement is long decoded, so this split helps here as well. */
1439 /* X86_TUNE_USE_VECTOR_CONVERTS: Prefer vector packed SSE conversion
1440 from integer to FP. */
1443 /* X86_TUNE_FUSE_CMP_AND_BRANCH: Fuse a compare or test instruction
1444 with a subsequent conditional jump instruction into a single
1445 compare-and-branch uop. */
1449 /* Feature tests against the various architecture variations. */
1450 unsigned char ix86_arch_features[X86_ARCH_LAST];
1452 /* Feature tests against the various architecture variations, used to create
1453 ix86_arch_features based on the processor mask. */
1454 static unsigned int initial_ix86_arch_features[X86_ARCH_LAST] = {
1455 /* X86_ARCH_CMOVE: Conditional move was added for pentiumpro. */
1456 ~(m_386 | m_486 | m_PENT | m_K6),
1458 /* X86_ARCH_CMPXCHG: Compare and exchange was added for 80486. */
1461 /* X86_ARCH_CMPXCHG8B: Compare and exchange 8 bytes was added for pentium. */
1464 /* X86_ARCH_XADD: Exchange and add was added for 80486. */
1467 /* X86_ARCH_BSWAP: Byteswap was added for 80486. */
1471 static const unsigned int x86_accumulate_outgoing_args
1472 = m_AMD_MULTIPLE | m_PENT4 | m_NOCONA | m_PPRO | m_CORE2 | m_GENERIC;
1474 static const unsigned int x86_arch_always_fancy_math_387
1475 = m_PENT | m_PPRO | m_AMD_MULTIPLE | m_PENT4
1476 | m_NOCONA | m_CORE2 | m_GENERIC;
1478 static enum stringop_alg stringop_alg = no_stringop;
1480 /* In case the average insn count for single function invocation is
1481 lower than this constant, emit fast (but longer) prologue and
1483 #define FAST_PROLOGUE_INSN_COUNT 20
1485 /* Names for 8 (low), 8 (high), and 16-bit registers, respectively. */
1486 static const char *const qi_reg_name[] = QI_REGISTER_NAMES;
1487 static const char *const qi_high_reg_name[] = QI_HIGH_REGISTER_NAMES;
1488 static const char *const hi_reg_name[] = HI_REGISTER_NAMES;
1490 /* Array of the smallest class containing reg number REGNO, indexed by
1491 REGNO. Used by REGNO_REG_CLASS in i386.h. */
1493 enum reg_class const regclass_map[FIRST_PSEUDO_REGISTER] =
1495 /* ax, dx, cx, bx */
1496 AREG, DREG, CREG, BREG,
1497 /* si, di, bp, sp */
1498 SIREG, DIREG, NON_Q_REGS, NON_Q_REGS,
1500 FP_TOP_REG, FP_SECOND_REG, FLOAT_REGS, FLOAT_REGS,
1501 FLOAT_REGS, FLOAT_REGS, FLOAT_REGS, FLOAT_REGS,
1504 /* flags, fpsr, fpcr, frame */
1505 NO_REGS, NO_REGS, NO_REGS, NON_Q_REGS,
1507 SSE_FIRST_REG, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS,
1510 MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS,
1513 NON_Q_REGS, NON_Q_REGS, NON_Q_REGS, NON_Q_REGS,
1514 NON_Q_REGS, NON_Q_REGS, NON_Q_REGS, NON_Q_REGS,
1515 /* SSE REX registers */
1516 SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS,
1520 /* The "default" register map used in 32bit mode. */
1522 int const dbx_register_map[FIRST_PSEUDO_REGISTER] =
1524 0, 2, 1, 3, 6, 7, 4, 5, /* general regs */
1525 12, 13, 14, 15, 16, 17, 18, 19, /* fp regs */
1526 -1, -1, -1, -1, -1, /* arg, flags, fpsr, fpcr, frame */
1527 21, 22, 23, 24, 25, 26, 27, 28, /* SSE */
1528 29, 30, 31, 32, 33, 34, 35, 36, /* MMX */
1529 -1, -1, -1, -1, -1, -1, -1, -1, /* extended integer registers */
1530 -1, -1, -1, -1, -1, -1, -1, -1, /* extended SSE registers */
1533 static int const x86_64_int_parameter_registers[6] =
1535 5 /*RDI*/, 4 /*RSI*/, 1 /*RDX*/, 2 /*RCX*/,
1536 FIRST_REX_INT_REG /*R8 */, FIRST_REX_INT_REG + 1 /*R9 */
1539 static int const x86_64_ms_abi_int_parameter_registers[4] =
1541 2 /*RCX*/, 1 /*RDX*/,
1542 FIRST_REX_INT_REG /*R8 */, FIRST_REX_INT_REG + 1 /*R9 */
1545 static int const x86_64_int_return_registers[4] =
1547 0 /*RAX*/, 1 /*RDX*/, 5 /*RDI*/, 4 /*RSI*/
1550 /* The "default" register map used in 64bit mode. */
1551 int const dbx64_register_map[FIRST_PSEUDO_REGISTER] =
1553 0, 1, 2, 3, 4, 5, 6, 7, /* general regs */
1554 33, 34, 35, 36, 37, 38, 39, 40, /* fp regs */
1555 -1, -1, -1, -1, -1, /* arg, flags, fpsr, fpcr, frame */
1556 17, 18, 19, 20, 21, 22, 23, 24, /* SSE */
1557 41, 42, 43, 44, 45, 46, 47, 48, /* MMX */
1558 8,9,10,11,12,13,14,15, /* extended integer registers */
1559 25, 26, 27, 28, 29, 30, 31, 32, /* extended SSE registers */
1562 /* Define the register numbers to be used in Dwarf debugging information.
1563 The SVR4 reference port C compiler uses the following register numbers
1564 in its Dwarf output code:
1565 0 for %eax (gcc regno = 0)
1566 1 for %ecx (gcc regno = 2)
1567 2 for %edx (gcc regno = 1)
1568 3 for %ebx (gcc regno = 3)
1569 4 for %esp (gcc regno = 7)
1570 5 for %ebp (gcc regno = 6)
1571 6 for %esi (gcc regno = 4)
1572 7 for %edi (gcc regno = 5)
1573 The following three DWARF register numbers are never generated by
1574 the SVR4 C compiler or by the GNU compilers, but SDB on x86/svr4
1575 believes these numbers have these meanings.
1576 8 for %eip (no gcc equivalent)
1577 9 for %eflags (gcc regno = 17)
1578 10 for %trapno (no gcc equivalent)
1579 It is not at all clear how we should number the FP stack registers
1580 for the x86 architecture. If the version of SDB on x86/svr4 were
1581 a bit less brain dead with respect to floating-point then we would
1582 have a precedent to follow with respect to DWARF register numbers
1583 for x86 FP registers, but the SDB on x86/svr4 is so completely
1584 broken with respect to FP registers that it is hardly worth thinking
1585 of it as something to strive for compatibility with.
1586 The version of x86/svr4 SDB I have at the moment does (partially)
1587 seem to believe that DWARF register number 11 is associated with
1588 the x86 register %st(0), but that's about all. Higher DWARF
1589 register numbers don't seem to be associated with anything in
1590 particular, and even for DWARF regno 11, SDB only seems to under-
1591 stand that it should say that a variable lives in %st(0) (when
1592 asked via an `=' command) if we said it was in DWARF regno 11,
1593 but SDB still prints garbage when asked for the value of the
1594 variable in question (via a `/' command).
1595 (Also note that the labels SDB prints for various FP stack regs
1596 when doing an `x' command are all wrong.)
1597 Note that these problems generally don't affect the native SVR4
1598 C compiler because it doesn't allow the use of -O with -g and
1599 because when it is *not* optimizing, it allocates a memory
1600 location for each floating-point variable, and the memory
1601 location is what gets described in the DWARF AT_location
1602 attribute for the variable in question.
1603 Regardless of the severe mental illness of the x86/svr4 SDB, we
1604 do something sensible here and we use the following DWARF
1605 register numbers. Note that these are all stack-top-relative
1607 11 for %st(0) (gcc regno = 8)
1608 12 for %st(1) (gcc regno = 9)
1609 13 for %st(2) (gcc regno = 10)
1610 14 for %st(3) (gcc regno = 11)
1611 15 for %st(4) (gcc regno = 12)
1612 16 for %st(5) (gcc regno = 13)
1613 17 for %st(6) (gcc regno = 14)
1614 18 for %st(7) (gcc regno = 15)
1616 int const svr4_dbx_register_map[FIRST_PSEUDO_REGISTER] =
1618 0, 2, 1, 3, 6, 7, 5, 4, /* general regs */
1619 11, 12, 13, 14, 15, 16, 17, 18, /* fp regs */
1620 -1, 9, -1, -1, -1, /* arg, flags, fpsr, fpcr, frame */
1621 21, 22, 23, 24, 25, 26, 27, 28, /* SSE registers */
1622 29, 30, 31, 32, 33, 34, 35, 36, /* MMX registers */
1623 -1, -1, -1, -1, -1, -1, -1, -1, /* extended integer registers */
1624 -1, -1, -1, -1, -1, -1, -1, -1, /* extended SSE registers */
1627 /* Test and compare insns in i386.md store the information needed to
1628 generate branch and scc insns here. */
1630 rtx ix86_compare_op0 = NULL_RTX;
1631 rtx ix86_compare_op1 = NULL_RTX;
1632 rtx ix86_compare_emitted = NULL_RTX;
1634 /* Size of the register save area. */
1635 #define X86_64_VARARGS_SIZE (X86_64_REGPARM_MAX * UNITS_PER_WORD + X86_64_SSE_REGPARM_MAX * 16)
1637 /* Define the structure for the machine field in struct function. */
1639 struct stack_local_entry GTY(())
1641 unsigned short mode;
1644 struct stack_local_entry *next;
1647 /* Structure describing stack frame layout.
1648 Stack grows downward:
1654 saved frame pointer if frame_pointer_needed
1655 <- HARD_FRAME_POINTER
1660 [va_arg registers] (
1661 > to_allocate <- FRAME_POINTER
1671 HOST_WIDE_INT frame;
1673 int outgoing_arguments_size;
1676 HOST_WIDE_INT to_allocate;
1677 /* The offsets relative to ARG_POINTER. */
1678 HOST_WIDE_INT frame_pointer_offset;
1679 HOST_WIDE_INT hard_frame_pointer_offset;
1680 HOST_WIDE_INT stack_pointer_offset;
1682 /* When save_regs_using_mov is set, emit prologue using
1683 move instead of push instructions. */
1684 bool save_regs_using_mov;
1687 /* Code model option. */
1688 enum cmodel ix86_cmodel;
1690 enum asm_dialect ix86_asm_dialect = ASM_ATT;
1692 enum tls_dialect ix86_tls_dialect = TLS_DIALECT_GNU;
1694 /* Which unit we are generating floating point math for. */
1695 enum fpmath_unit ix86_fpmath;
1697 /* Which cpu are we scheduling for. */
1698 enum processor_type ix86_tune;
1700 /* Which instruction set architecture to use. */
1701 enum processor_type ix86_arch;
1703 /* true if sse prefetch instruction is not NOOP. */
1704 int x86_prefetch_sse;
1706 /* ix86_regparm_string as a number */
1707 static int ix86_regparm;
1709 /* -mstackrealign option */
1710 extern int ix86_force_align_arg_pointer;
1711 static const char ix86_force_align_arg_pointer_string[] = "force_align_arg_pointer";
1713 static rtx (*ix86_gen_leave) (void);
1714 static rtx (*ix86_gen_pop1) (rtx);
1715 static rtx (*ix86_gen_add3) (rtx, rtx, rtx);
1716 static rtx (*ix86_gen_sub3) (rtx, rtx, rtx);
1717 static rtx (*ix86_gen_sub3_carry) (rtx, rtx, rtx, rtx);
1718 static rtx (*ix86_gen_one_cmpl2) (rtx, rtx);
1719 static rtx (*ix86_gen_monitor) (rtx, rtx, rtx);
1721 /* Preferred alignment for stack boundary in bits. */
1722 unsigned int ix86_preferred_stack_boundary;
1724 /* Values 1-5: see jump.c */
1725 int ix86_branch_cost;
1727 /* Calling abi specific va_list type nodes. */
1728 static GTY(()) tree sysv_va_list_type_node;
1729 static GTY(()) tree ms_va_list_type_node;
1731 /* Variables which are this size or smaller are put in the data/bss
1732 or ldata/lbss sections. */
1734 int ix86_section_threshold = 65536;
1736 /* Prefix built by ASM_GENERATE_INTERNAL_LABEL. */
1737 char internal_label_prefix[16];
1738 int internal_label_prefix_len;
1740 /* Fence to use after loop using movnt. */
1743 /* Register class used for passing given 64bit part of the argument.
1744 These represent classes as documented by the PS ABI, with the exception
1745 of SSESF, SSEDF classes, that are basically SSE class, just gcc will
1746 use SF or DFmode move instead of DImode to avoid reformatting penalties.
1748 Similarly we play games with INTEGERSI_CLASS to use cheaper SImode moves
1749 whenever possible (upper half does contain padding). */
1750 enum x86_64_reg_class
1753 X86_64_INTEGER_CLASS,
1754 X86_64_INTEGERSI_CLASS,
1761 X86_64_COMPLEX_X87_CLASS,
1764 static const char * const x86_64_reg_class_name[] =
1766 "no", "integer", "integerSI", "sse", "sseSF", "sseDF",
1767 "sseup", "x87", "x87up", "cplx87", "no"
1770 #define MAX_CLASSES 4
1772 /* Table of constants used by fldpi, fldln2, etc.... */
1773 static REAL_VALUE_TYPE ext_80387_constants_table [5];
1774 static bool ext_80387_constants_init = 0;
1777 static struct machine_function * ix86_init_machine_status (void);
1778 static rtx ix86_function_value (const_tree, const_tree, bool);
1779 static int ix86_function_regparm (const_tree, const_tree);
1780 static void ix86_compute_frame_layout (struct ix86_frame *);
1781 static bool ix86_expand_vector_init_one_nonzero (bool, enum machine_mode,
1784 enum ix86_function_specific_strings
1786 IX86_FUNCTION_SPECIFIC_ARCH,
1787 IX86_FUNCTION_SPECIFIC_TUNE,
1788 IX86_FUNCTION_SPECIFIC_FPMATH,
1789 IX86_FUNCTION_SPECIFIC_MAX
1792 static char *ix86_target_string (int, int, const char *, const char *,
1793 const char *, bool);
1794 static void ix86_debug_options (void) ATTRIBUTE_UNUSED;
1795 static void ix86_function_specific_save (struct cl_target_option *);
1796 static void ix86_function_specific_restore (struct cl_target_option *);
1797 static void ix86_function_specific_print (FILE *, int,
1798 struct cl_target_option *);
1799 static bool ix86_valid_option_attribute_p (tree, tree, tree, int);
1800 static bool ix86_valid_option_attribute_inner_p (tree, char *[]);
1801 static bool ix86_can_inline_p (tree, tree);
1802 static void ix86_set_current_function (tree);
1805 /* The svr4 ABI for the i386 says that records and unions are returned
1807 #ifndef DEFAULT_PCC_STRUCT_RETURN
1808 #define DEFAULT_PCC_STRUCT_RETURN 1
1811 /* Whether -mtune= or -march= were specified */
1812 static int ix86_tune_defaulted;
1813 static int ix86_arch_specified;
1815 /* Bit flags that specify the ISA we are compiling for. */
1816 int ix86_isa_flags = TARGET_64BIT_DEFAULT | TARGET_SUBTARGET_ISA_DEFAULT;
1818 /* A mask of ix86_isa_flags that includes bit X if X
1819 was set or cleared on the command line. */
1820 static int ix86_isa_flags_explicit;
1822 /* Define a set of ISAs which are available when a given ISA is
1823 enabled. MMX and SSE ISAs are handled separately. */
1825 #define OPTION_MASK_ISA_MMX_SET OPTION_MASK_ISA_MMX
1826 #define OPTION_MASK_ISA_3DNOW_SET \
1827 (OPTION_MASK_ISA_3DNOW | OPTION_MASK_ISA_MMX_SET)
1829 #define OPTION_MASK_ISA_SSE_SET OPTION_MASK_ISA_SSE
1830 #define OPTION_MASK_ISA_SSE2_SET \
1831 (OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_SSE_SET)
1832 #define OPTION_MASK_ISA_SSE3_SET \
1833 (OPTION_MASK_ISA_SSE3 | OPTION_MASK_ISA_SSE2_SET)
1834 #define OPTION_MASK_ISA_SSSE3_SET \
1835 (OPTION_MASK_ISA_SSSE3 | OPTION_MASK_ISA_SSE3_SET)
1836 #define OPTION_MASK_ISA_SSE4_1_SET \
1837 (OPTION_MASK_ISA_SSE4_1 | OPTION_MASK_ISA_SSSE3_SET)
1838 #define OPTION_MASK_ISA_SSE4_2_SET \
1839 (OPTION_MASK_ISA_SSE4_2 | OPTION_MASK_ISA_SSE4_1_SET)
1841 /* SSE4 includes both SSE4.1 and SSE4.2. -msse4 should be the same
1843 #define OPTION_MASK_ISA_SSE4_SET OPTION_MASK_ISA_SSE4_2_SET
1845 #define OPTION_MASK_ISA_SSE4A_SET \
1846 (OPTION_MASK_ISA_SSE4A | OPTION_MASK_ISA_SSE3_SET)
1847 #define OPTION_MASK_ISA_SSE5_SET \
1848 (OPTION_MASK_ISA_SSE5 | OPTION_MASK_ISA_SSE4A_SET)
1850 /* AES and PCLMUL need SSE2 because they use xmm registers */
1851 #define OPTION_MASK_ISA_AES_SET \
1852 (OPTION_MASK_ISA_AES | OPTION_MASK_ISA_SSE2_SET)
1853 #define OPTION_MASK_ISA_PCLMUL_SET \
1854 (OPTION_MASK_ISA_PCLMUL | OPTION_MASK_ISA_SSE2_SET)
1856 #define OPTION_MASK_ISA_ABM_SET \
1857 (OPTION_MASK_ISA_ABM | OPTION_MASK_ISA_POPCNT)
1858 #define OPTION_MASK_ISA_POPCNT_SET OPTION_MASK_ISA_POPCNT
1859 #define OPTION_MASK_ISA_CX16_SET OPTION_MASK_ISA_CX16
1860 #define OPTION_MASK_ISA_SAHF_SET OPTION_MASK_ISA_SAHF
1862 /* Define a set of ISAs which aren't available when a given ISA is
1863 disabled. MMX and SSE ISAs are handled separately. */
1865 #define OPTION_MASK_ISA_MMX_UNSET \
1866 (OPTION_MASK_ISA_MMX | OPTION_MASK_ISA_3DNOW_UNSET)
1867 #define OPTION_MASK_ISA_3DNOW_UNSET \
1868 (OPTION_MASK_ISA_3DNOW | OPTION_MASK_ISA_3DNOW_A_UNSET)
1869 #define OPTION_MASK_ISA_3DNOW_A_UNSET OPTION_MASK_ISA_3DNOW_A
1871 #define OPTION_MASK_ISA_SSE_UNSET \
1872 (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_SSE2_UNSET)
1873 #define OPTION_MASK_ISA_SSE2_UNSET \
1874 (OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_SSE3_UNSET)
1875 #define OPTION_MASK_ISA_SSE3_UNSET \
1876 (OPTION_MASK_ISA_SSE3 \
1877 | OPTION_MASK_ISA_SSSE3_UNSET \
1878 | OPTION_MASK_ISA_SSE4A_UNSET )
1879 #define OPTION_MASK_ISA_SSSE3_UNSET \
1880 (OPTION_MASK_ISA_SSSE3 | OPTION_MASK_ISA_SSE4_1_UNSET)
1881 #define OPTION_MASK_ISA_SSE4_1_UNSET \
1882 (OPTION_MASK_ISA_SSE4_1 | OPTION_MASK_ISA_SSE4_2_UNSET)
1883 #define OPTION_MASK_ISA_SSE4_2_UNSET OPTION_MASK_ISA_SSE4_2
1885 /* SSE4 includes both SSE4.1 and SSE4.2. -mno-sse4 should the same
1887 #define OPTION_MASK_ISA_SSE4_UNSET OPTION_MASK_ISA_SSE4_1_UNSET
1889 #define OPTION_MASK_ISA_SSE4A_UNSET \
1890 (OPTION_MASK_ISA_SSE4A | OPTION_MASK_ISA_SSE5_UNSET)
1891 #define OPTION_MASK_ISA_SSE5_UNSET OPTION_MASK_ISA_SSE5
1892 #define OPTION_MASK_ISA_AES_UNSET OPTION_MASK_ISA_AES
1893 #define OPTION_MASK_ISA_PCLMUL_UNSET OPTION_MASK_ISA_PCLMUL
1894 #define OPTION_MASK_ISA_ABM_UNSET OPTION_MASK_ISA_ABM
1895 #define OPTION_MASK_ISA_POPCNT_UNSET OPTION_MASK_ISA_POPCNT
1896 #define OPTION_MASK_ISA_CX16_UNSET OPTION_MASK_ISA_CX16
1897 #define OPTION_MASK_ISA_SAHF_UNSET OPTION_MASK_ISA_SAHF
1899 /* Vectorization library interface and handlers. */
1900 tree (*ix86_veclib_handler)(enum built_in_function, tree, tree) = NULL;
1901 static tree ix86_veclibabi_svml (enum built_in_function, tree, tree);
1902 static tree ix86_veclibabi_acml (enum built_in_function, tree, tree);
1904 /* Processor target table, indexed by processor number */
1907 const struct processor_costs *cost; /* Processor costs */
1908 const int align_loop; /* Default alignments. */
1909 const int align_loop_max_skip;
1910 const int align_jump;
1911 const int align_jump_max_skip;
1912 const int align_func;
1915 static const struct ptt processor_target_table[PROCESSOR_max] =
1917 {&i386_cost, 4, 3, 4, 3, 4},
1918 {&i486_cost, 16, 15, 16, 15, 16},
1919 {&pentium_cost, 16, 7, 16, 7, 16},
1920 {&pentiumpro_cost, 16, 15, 16, 10, 16},
1921 {&geode_cost, 0, 0, 0, 0, 0},
1922 {&k6_cost, 32, 7, 32, 7, 32},
1923 {&athlon_cost, 16, 7, 16, 7, 16},
1924 {&pentium4_cost, 0, 0, 0, 0, 0},
1925 {&k8_cost, 16, 7, 16, 7, 16},
1926 {&nocona_cost, 0, 0, 0, 0, 0},
1927 {&core2_cost, 16, 10, 16, 10, 16},
1928 {&generic32_cost, 16, 7, 16, 7, 16},
1929 {&generic64_cost, 16, 10, 16, 10, 16},
1930 {&amdfam10_cost, 32, 24, 32, 7, 32}
1933 static const char *const cpu_names[TARGET_CPU_DEFAULT_max] =
1958 /* Implement TARGET_HANDLE_OPTION. */
1961 ix86_handle_option (size_t code, const char *arg ATTRIBUTE_UNUSED, int value)
1968 ix86_isa_flags |= OPTION_MASK_ISA_MMX_SET;
1969 ix86_isa_flags_explicit |= OPTION_MASK_ISA_MMX_SET;
1973 ix86_isa_flags &= ~OPTION_MASK_ISA_MMX_UNSET;
1974 ix86_isa_flags_explicit |= OPTION_MASK_ISA_MMX_UNSET;
1981 ix86_isa_flags |= OPTION_MASK_ISA_3DNOW_SET;
1982 ix86_isa_flags_explicit |= OPTION_MASK_ISA_3DNOW_SET;
1986 ix86_isa_flags &= ~OPTION_MASK_ISA_3DNOW_UNSET;
1987 ix86_isa_flags_explicit |= OPTION_MASK_ISA_3DNOW_UNSET;
1997 ix86_isa_flags |= OPTION_MASK_ISA_SSE_SET;
1998 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE_SET;
2002 ix86_isa_flags &= ~OPTION_MASK_ISA_SSE_UNSET;
2003 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE_UNSET;
2010 ix86_isa_flags |= OPTION_MASK_ISA_SSE2_SET;
2011 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE2_SET;
2015 ix86_isa_flags &= ~OPTION_MASK_ISA_SSE2_UNSET;
2016 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE2_UNSET;
2023 ix86_isa_flags |= OPTION_MASK_ISA_SSE3_SET;
2024 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE3_SET;
2028 ix86_isa_flags &= ~OPTION_MASK_ISA_SSE3_UNSET;
2029 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE3_UNSET;
2036 ix86_isa_flags |= OPTION_MASK_ISA_SSSE3_SET;
2037 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSSE3_SET;
2041 ix86_isa_flags &= ~OPTION_MASK_ISA_SSSE3_UNSET;
2042 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSSE3_UNSET;
2049 ix86_isa_flags |= OPTION_MASK_ISA_SSE4_1_SET;
2050 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE4_1_SET;
2054 ix86_isa_flags &= ~OPTION_MASK_ISA_SSE4_1_UNSET;
2055 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE4_1_UNSET;
2062 ix86_isa_flags |= OPTION_MASK_ISA_SSE4_2_SET;
2063 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE4_2_SET;
2067 ix86_isa_flags &= ~OPTION_MASK_ISA_SSE4_2_UNSET;
2068 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE4_2_UNSET;
2073 ix86_isa_flags |= OPTION_MASK_ISA_SSE4_SET;
2074 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE4_SET;
2078 ix86_isa_flags &= ~OPTION_MASK_ISA_SSE4_UNSET;
2079 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE4_UNSET;
2085 ix86_isa_flags |= OPTION_MASK_ISA_SSE4A_SET;
2086 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE4A_SET;
2090 ix86_isa_flags &= ~OPTION_MASK_ISA_SSE4A_UNSET;
2091 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE4A_UNSET;
2098 ix86_isa_flags |= OPTION_MASK_ISA_SSE5_SET;
2099 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE5_SET;
2103 ix86_isa_flags &= ~OPTION_MASK_ISA_SSE5_UNSET;
2104 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE5_UNSET;
2111 ix86_isa_flags |= OPTION_MASK_ISA_ABM_SET;
2112 ix86_isa_flags_explicit |= OPTION_MASK_ISA_ABM_SET;
2116 ix86_isa_flags &= ~OPTION_MASK_ISA_ABM_UNSET;
2117 ix86_isa_flags_explicit |= OPTION_MASK_ISA_ABM_UNSET;
2124 ix86_isa_flags |= OPTION_MASK_ISA_POPCNT_SET;
2125 ix86_isa_flags_explicit |= OPTION_MASK_ISA_POPCNT_SET;
2129 ix86_isa_flags &= ~OPTION_MASK_ISA_POPCNT_UNSET;
2130 ix86_isa_flags_explicit |= OPTION_MASK_ISA_POPCNT_UNSET;
2137 ix86_isa_flags |= OPTION_MASK_ISA_SAHF_SET;
2138 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SAHF_SET;
2142 ix86_isa_flags &= ~OPTION_MASK_ISA_SAHF_UNSET;
2143 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SAHF_UNSET;
2150 ix86_isa_flags |= OPTION_MASK_ISA_CX16_SET;
2151 ix86_isa_flags_explicit |= OPTION_MASK_ISA_CX16_SET;
2155 ix86_isa_flags &= ~OPTION_MASK_ISA_CX16_UNSET;
2156 ix86_isa_flags_explicit |= OPTION_MASK_ISA_CX16_UNSET;
2163 ix86_isa_flags |= OPTION_MASK_ISA_AES_SET;
2164 ix86_isa_flags_explicit |= OPTION_MASK_ISA_AES_SET;
2168 ix86_isa_flags &= ~OPTION_MASK_ISA_AES_UNSET;
2169 ix86_isa_flags_explicit |= OPTION_MASK_ISA_AES_UNSET;
2176 ix86_isa_flags |= OPTION_MASK_ISA_PCLMUL_SET;
2177 ix86_isa_flags_explicit |= OPTION_MASK_ISA_PCLMUL_SET;
2181 ix86_isa_flags &= ~OPTION_MASK_ISA_PCLMUL_UNSET;
2182 ix86_isa_flags_explicit |= OPTION_MASK_ISA_PCLMUL_UNSET;
2191 /* Return a string the documents the current -m options. The caller is
2192 responsible for freeing the string. */
2195 ix86_target_string (int isa, int flags, const char *arch, const char *tune,
2196 const char *fpmath, bool add_nl_p)
2198 struct ix86_target_opts
2200 const char *option; /* option string */
2201 int mask; /* isa mask options */
2204 /* This table is ordered so that options like -msse5 or -msse4.2 that imply
2205 preceding options while match those first. */
2206 static struct ix86_target_opts isa_opts[] =
2208 { "-m64", OPTION_MASK_ISA_64BIT },
2209 { "-msse5", OPTION_MASK_ISA_SSE5 },
2210 { "-msse4a", OPTION_MASK_ISA_SSE4A },
2211 { "-msse4.2", OPTION_MASK_ISA_SSE4_2 },
2212 { "-msse4.1", OPTION_MASK_ISA_SSE4_1 },
2213 { "-mssse3", OPTION_MASK_ISA_SSSE3 },
2214 { "-msse3", OPTION_MASK_ISA_SSE3 },
2215 { "-msse2", OPTION_MASK_ISA_SSE2 },
2216 { "-msse", OPTION_MASK_ISA_SSE },
2217 { "-m3dnow", OPTION_MASK_ISA_3DNOW },
2218 { "-mmmx", OPTION_MASK_ISA_MMX },
2219 { "-mabm", OPTION_MASK_ISA_ABM },
2220 { "-mpopcnt", OPTION_MASK_ISA_POPCNT },
2221 { "-maes", OPTION_MASK_ISA_AES },
2222 { "-mpclmul", OPTION_MASK_ISA_PCLMUL },
2226 static struct ix86_target_opts flag_opts[] =
2228 { "-m128bit-long-double", MASK_128BIT_LONG_DOUBLE },
2229 { "-m80387", MASK_80387 },
2230 { "-maccumulate-outgoing-args", MASK_ACCUMULATE_OUTGOING_ARGS },
2231 { "-malign-double", MASK_ALIGN_DOUBLE },
2232 { "-mcld", MASK_CLD },
2233 { "-mfp-ret-in-387", MASK_FLOAT_RETURNS },
2234 { "-mieee-fp", MASK_IEEE_FP },
2235 { "-minline-all-stringops", MASK_INLINE_ALL_STRINGOPS },
2236 { "-minline-stringops-dynamically", MASK_INLINE_STRINGOPS_DYNAMICALLY },
2237 { "-mms-bitfields", MASK_MS_BITFIELD_LAYOUT },
2238 { "-mno-align-stringops", MASK_NO_ALIGN_STRINGOPS },
2239 { "-mno-fancy-math-387", MASK_NO_FANCY_MATH_387 },
2240 { "-mno-fused-madd", MASK_NO_FUSED_MADD },
2241 { "-mno-push-args", MASK_NO_PUSH_ARGS },
2242 { "-mno-red-zone", MASK_NO_RED_ZONE },
2243 { "-momit-leaf-frame-pointer", MASK_OMIT_LEAF_FRAME_POINTER },
2244 { "-mrecip", MASK_RECIP },
2245 { "-mrtd", MASK_RTD },
2246 { "-msseregparm", MASK_SSEREGPARM },
2247 { "-mstack-arg-probe", MASK_STACK_PROBE },
2248 { "-mtls-direct-seg-refs", MASK_TLS_DIRECT_SEG_REFS },
2251 const char *opts[ (sizeof (isa_opts) / sizeof (isa_opts[0])
2252 + sizeof (flag_opts) / sizeof (flag_opts[0])
2256 char target_other[40];
2265 memset (opts, '\0', sizeof (opts));
2267 /* Add -march= option. */
2270 opts[num][0] = "-march=";
2271 opts[num++][1] = arch;
2274 /* Add -mtune= option. */
2277 opts[num][0] = "-mtune=";
2278 opts[num++][1] = tune;
2281 /* Pick out the options in isa options. */
2282 for (i = 0; i < sizeof (isa_opts) / sizeof (isa_opts[0]); i++)
2284 if ((isa & isa_opts[i].mask) != 0)
2286 opts[num++][0] = isa_opts[i].option;
2287 isa &= ~ isa_opts[i].mask;
2291 if (isa && add_nl_p)
2293 opts[num++][0] = isa_other;
2294 sprintf (isa_other, "(other isa: 0x%x)", isa);
2297 /* Add flag options. */
2298 for (i = 0; i < sizeof (flag_opts) / sizeof (flag_opts[0]); i++)
2300 if ((flags & flag_opts[i].mask) != 0)
2302 opts[num++][0] = flag_opts[i].option;
2303 flags &= ~ flag_opts[i].mask;
2307 if (flags && add_nl_p)
2309 opts[num++][0] = target_other;
2310 sprintf (target_other, "(other flags: 0x%x)", isa);
2313 /* Add -fpmath= option. */
2316 opts[num][0] = "-mfpmath=";
2317 opts[num++][1] = fpmath;
2324 gcc_assert (num < sizeof (opts) / sizeof (opts[0]));
2326 /* Size the string. */
2328 sep_len = (add_nl_p) ? 3 : 1;
2329 for (i = 0; i < num; i++)
2332 for (j = 0; j < 2; j++)
2334 len += strlen (opts[i][j]);
2337 /* Build the string. */
2338 ret = ptr = (char *) xmalloc (len);
2341 for (i = 0; i < num; i++)
2345 for (j = 0; j < 2; j++)
2346 len2[j] = (opts[i][j]) ? strlen (opts[i][j]) : 0;
2353 if (add_nl_p && line_len + len2[0] + len2[1] > 70)
2361 for (j = 0; j < 2; j++)
2364 memcpy (ptr, opts[i][j], len2[j]);
2366 line_len += len2[j];
2371 gcc_assert (ret + len >= ptr);
2376 /* Function that is callable from the debugger to print the current
2379 ix86_debug_options (void)
2381 char *opts = ix86_target_string (ix86_isa_flags, target_flags,
2382 ix86_arch_string, ix86_tune_string,
2383 ix86_fpmath_string, true);
2387 fprintf (stderr, "%s\n\n", opts);
2391 fprintf (stderr, "<no options>\n\n");
2396 /* Sometimes certain combinations of command options do not make
2397 sense on a particular target machine. You can define a macro
2398 `OVERRIDE_OPTIONS' to take account of this. This macro, if
2399 defined, is executed once just after all the command options have
2402 Don't use this macro to turn on various extra optimizations for
2403 `-O'. That is what `OPTIMIZATION_OPTIONS' is for. */
2406 override_options (bool main_args_p)
2409 unsigned int ix86_arch_mask, ix86_tune_mask;
2414 /* Comes from final.c -- no real reason to change it. */
2415 #define MAX_CODE_ALIGN 16
2423 PTA_PREFETCH_SSE = 1 << 4,
2425 PTA_3DNOW_A = 1 << 6,
2429 PTA_POPCNT = 1 << 10,
2431 PTA_SSE4A = 1 << 12,
2432 PTA_NO_SAHF = 1 << 13,
2433 PTA_SSE4_1 = 1 << 14,
2434 PTA_SSE4_2 = 1 << 15,
2437 PTA_PCLMUL = 1 << 18
2442 const char *const name; /* processor name or nickname. */
2443 const enum processor_type processor;
2444 const unsigned /*enum pta_flags*/ flags;
2446 const processor_alias_table[] =
2448 {"i386", PROCESSOR_I386, 0},
2449 {"i486", PROCESSOR_I486, 0},
2450 {"i586", PROCESSOR_PENTIUM, 0},
2451 {"pentium", PROCESSOR_PENTIUM, 0},
2452 {"pentium-mmx", PROCESSOR_PENTIUM, PTA_MMX},
2453 {"winchip-c6", PROCESSOR_I486, PTA_MMX},
2454 {"winchip2", PROCESSOR_I486, PTA_MMX | PTA_3DNOW},
2455 {"c3", PROCESSOR_I486, PTA_MMX | PTA_3DNOW},
2456 {"c3-2", PROCESSOR_PENTIUMPRO, PTA_MMX | PTA_SSE},
2457 {"i686", PROCESSOR_PENTIUMPRO, 0},
2458 {"pentiumpro", PROCESSOR_PENTIUMPRO, 0},
2459 {"pentium2", PROCESSOR_PENTIUMPRO, PTA_MMX},
2460 {"pentium3", PROCESSOR_PENTIUMPRO, PTA_MMX | PTA_SSE},
2461 {"pentium3m", PROCESSOR_PENTIUMPRO, PTA_MMX | PTA_SSE},
2462 {"pentium-m", PROCESSOR_PENTIUMPRO, PTA_MMX | PTA_SSE | PTA_SSE2},
2463 {"pentium4", PROCESSOR_PENTIUM4, PTA_MMX |PTA_SSE | PTA_SSE2},
2464 {"pentium4m", PROCESSOR_PENTIUM4, PTA_MMX | PTA_SSE | PTA_SSE2},
2465 {"prescott", PROCESSOR_NOCONA, PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3},
2466 {"nocona", PROCESSOR_NOCONA, (PTA_64BIT
2467 | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
2468 | PTA_CX16 | PTA_NO_SAHF)},
2469 {"core2", PROCESSOR_CORE2, (PTA_64BIT
2470 | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
2473 {"geode", PROCESSOR_GEODE, (PTA_MMX | PTA_3DNOW | PTA_3DNOW_A
2474 |PTA_PREFETCH_SSE)},
2475 {"k6", PROCESSOR_K6, PTA_MMX},
2476 {"k6-2", PROCESSOR_K6, PTA_MMX | PTA_3DNOW},
2477 {"k6-3", PROCESSOR_K6, PTA_MMX | PTA_3DNOW},
2478 {"athlon", PROCESSOR_ATHLON, (PTA_MMX | PTA_3DNOW | PTA_3DNOW_A
2479 | PTA_PREFETCH_SSE)},
2480 {"athlon-tbird", PROCESSOR_ATHLON, (PTA_MMX | PTA_3DNOW | PTA_3DNOW_A
2481 | PTA_PREFETCH_SSE)},
2482 {"athlon-4", PROCESSOR_ATHLON, (PTA_MMX | PTA_3DNOW | PTA_3DNOW_A
2484 {"athlon-xp", PROCESSOR_ATHLON, (PTA_MMX | PTA_3DNOW | PTA_3DNOW_A
2486 {"athlon-mp", PROCESSOR_ATHLON, (PTA_MMX | PTA_3DNOW | PTA_3DNOW_A
2488 {"x86-64", PROCESSOR_K8, (PTA_64BIT
2489 | PTA_MMX | PTA_SSE | PTA_SSE2
2491 {"k8", PROCESSOR_K8, (PTA_64BIT
2492 | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A
2493 | PTA_SSE | PTA_SSE2
2495 {"k8-sse3", PROCESSOR_K8, (PTA_64BIT
2496 | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A
2497 | PTA_SSE | PTA_SSE2 | PTA_SSE3
2499 {"opteron", PROCESSOR_K8, (PTA_64BIT
2500 | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A
2501 | PTA_SSE | PTA_SSE2
2503 {"opteron-sse3", PROCESSOR_K8, (PTA_64BIT
2504 | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A
2505 | PTA_SSE | PTA_SSE2 | PTA_SSE3
2507 {"athlon64", PROCESSOR_K8, (PTA_64BIT
2508 | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A
2509 | PTA_SSE | PTA_SSE2
2511 {"athlon64-sse3", PROCESSOR_K8, (PTA_64BIT
2512 | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A
2513 | PTA_SSE | PTA_SSE2 | PTA_SSE3
2515 {"athlon-fx", PROCESSOR_K8, (PTA_64BIT
2516 | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A
2517 | PTA_SSE | PTA_SSE2
2519 {"amdfam10", PROCESSOR_AMDFAM10, (PTA_64BIT
2520 | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A
2521 | PTA_SSE | PTA_SSE2 | PTA_SSE3
2523 | PTA_CX16 | PTA_ABM)},
2524 {"barcelona", PROCESSOR_AMDFAM10, (PTA_64BIT
2525 | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A
2526 | PTA_SSE | PTA_SSE2 | PTA_SSE3
2528 | PTA_CX16 | PTA_ABM)},
2529 {"generic32", PROCESSOR_GENERIC32, 0 /* flags are only used for -march switch. */ },
2530 {"generic64", PROCESSOR_GENERIC64, PTA_64BIT /* flags are only used for -march switch. */ },
2533 int const pta_size = ARRAY_SIZE (processor_alias_table);
2535 /* Set up prefix/suffix so the error messages refer to either the command
2536 line argument, or the attribute(option). */
2545 prefix = "option(\"";
2550 #ifdef SUBTARGET_OVERRIDE_OPTIONS
2551 SUBTARGET_OVERRIDE_OPTIONS;
2554 #ifdef SUBSUBTARGET_OVERRIDE_OPTIONS
2555 SUBSUBTARGET_OVERRIDE_OPTIONS;
2558 /* -fPIC is the default for x86_64. */
2559 if (TARGET_MACHO && TARGET_64BIT)
2562 /* Set the default values for switches whose default depends on TARGET_64BIT
2563 in case they weren't overwritten by command line options. */
2566 /* Mach-O doesn't support omitting the frame pointer for now. */
2567 if (flag_omit_frame_pointer == 2)
2568 flag_omit_frame_pointer = (TARGET_MACHO ? 0 : 1);
2569 if (flag_asynchronous_unwind_tables == 2)
2570 flag_asynchronous_unwind_tables = 1;
2571 if (flag_pcc_struct_return == 2)
2572 flag_pcc_struct_return = 0;
2576 if (flag_omit_frame_pointer == 2)
2577 flag_omit_frame_pointer = 0;
2578 if (flag_asynchronous_unwind_tables == 2)
2579 flag_asynchronous_unwind_tables = 0;
2580 if (flag_pcc_struct_return == 2)
2581 flag_pcc_struct_return = DEFAULT_PCC_STRUCT_RETURN;
2584 /* Need to check -mtune=generic first. */
2585 if (ix86_tune_string)
2587 if (!strcmp (ix86_tune_string, "generic")
2588 || !strcmp (ix86_tune_string, "i686")
2589 /* As special support for cross compilers we read -mtune=native
2590 as -mtune=generic. With native compilers we won't see the
2591 -mtune=native, as it was changed by the driver. */
2592 || !strcmp (ix86_tune_string, "native"))
2595 ix86_tune_string = "generic64";
2597 ix86_tune_string = "generic32";
2599 /* If this call is for setting the option attribute, allow the
2600 generic32/generic64 that was previously set. */
2601 else if (!main_args_p
2602 && (!strcmp (ix86_tune_string, "generic32")
2603 || !strcmp (ix86_tune_string, "generic64")))
2605 else if (!strncmp (ix86_tune_string, "generic", 7))
2606 error ("bad value (%s) for %stune=%s %s",
2607 ix86_tune_string, prefix, suffix, sw);
2611 if (ix86_arch_string)
2612 ix86_tune_string = ix86_arch_string;
2613 if (!ix86_tune_string)
2615 ix86_tune_string = cpu_names[TARGET_CPU_DEFAULT];
2616 ix86_tune_defaulted = 1;
2619 /* ix86_tune_string is set to ix86_arch_string or defaulted. We
2620 need to use a sensible tune option. */
2621 if (!strcmp (ix86_tune_string, "generic")
2622 || !strcmp (ix86_tune_string, "x86-64")
2623 || !strcmp (ix86_tune_string, "i686"))
2626 ix86_tune_string = "generic64";
2628 ix86_tune_string = "generic32";
2631 if (ix86_stringop_string)
2633 if (!strcmp (ix86_stringop_string, "rep_byte"))
2634 stringop_alg = rep_prefix_1_byte;
2635 else if (!strcmp (ix86_stringop_string, "libcall"))
2636 stringop_alg = libcall;
2637 else if (!strcmp (ix86_stringop_string, "rep_4byte"))
2638 stringop_alg = rep_prefix_4_byte;
2639 else if (!strcmp (ix86_stringop_string, "rep_8byte"))
2640 stringop_alg = rep_prefix_8_byte;
2641 else if (!strcmp (ix86_stringop_string, "byte_loop"))
2642 stringop_alg = loop_1_byte;
2643 else if (!strcmp (ix86_stringop_string, "loop"))
2644 stringop_alg = loop;
2645 else if (!strcmp (ix86_stringop_string, "unrolled_loop"))
2646 stringop_alg = unrolled_loop;
2648 error ("bad value (%s) for %sstringop-strategy=%s %s",
2649 ix86_stringop_string, prefix, suffix, sw);
2651 if (!strcmp (ix86_tune_string, "x86-64"))
2652 warning (OPT_Wdeprecated, "%stune=x86-64%s is deprecated. Use "
2653 "%stune=k8%s or %stune=generic%s instead as appropriate.",
2654 prefix, suffix, prefix, suffix, prefix, suffix);
2656 if (!ix86_arch_string)
2657 ix86_arch_string = TARGET_64BIT ? "x86-64" : "i386";
2659 ix86_arch_specified = 1;
2661 if (!strcmp (ix86_arch_string, "generic"))
2662 error ("generic CPU can be used only for %stune=%s %s",
2663 prefix, suffix, sw);
2664 if (!strncmp (ix86_arch_string, "generic", 7))
2665 error ("bad value (%s) for %sarch=%s %s",
2666 ix86_arch_string, prefix, suffix, sw);
2668 if (ix86_cmodel_string != 0)
2670 if (!strcmp (ix86_cmodel_string, "small"))
2671 ix86_cmodel = flag_pic ? CM_SMALL_PIC : CM_SMALL;
2672 else if (!strcmp (ix86_cmodel_string, "medium"))
2673 ix86_cmodel = flag_pic ? CM_MEDIUM_PIC : CM_MEDIUM;
2674 else if (!strcmp (ix86_cmodel_string, "large"))
2675 ix86_cmodel = flag_pic ? CM_LARGE_PIC : CM_LARGE;
2677 error ("code model %s does not support PIC mode", ix86_cmodel_string);
2678 else if (!strcmp (ix86_cmodel_string, "32"))
2679 ix86_cmodel = CM_32;
2680 else if (!strcmp (ix86_cmodel_string, "kernel") && !flag_pic)
2681 ix86_cmodel = CM_KERNEL;
2683 error ("bad value (%s) for %scmodel=%s %s",
2684 ix86_cmodel_string, prefix, suffix, sw);
2688 /* For TARGET_64BIT and MS_ABI, force pic on, in order to enable the
2689 use of rip-relative addressing. This eliminates fixups that
2690 would otherwise be needed if this object is to be placed in a
2691 DLL, and is essentially just as efficient as direct addressing. */
2692 if (TARGET_64BIT && DEFAULT_ABI == MS_ABI)
2693 ix86_cmodel = CM_SMALL_PIC, flag_pic = 1;
2694 else if (TARGET_64BIT)
2695 ix86_cmodel = flag_pic ? CM_SMALL_PIC : CM_SMALL;
2697 ix86_cmodel = CM_32;
2699 if (ix86_asm_string != 0)
2702 && !strcmp (ix86_asm_string, "intel"))
2703 ix86_asm_dialect = ASM_INTEL;
2704 else if (!strcmp (ix86_asm_string, "att"))
2705 ix86_asm_dialect = ASM_ATT;
2707 error ("bad value (%s) for %sasm=%s %s",
2708 ix86_asm_string, prefix, suffix, sw);
2710 if ((TARGET_64BIT == 0) != (ix86_cmodel == CM_32))
2711 error ("code model %qs not supported in the %s bit mode",
2712 ix86_cmodel_string, TARGET_64BIT ? "64" : "32");
2713 if ((TARGET_64BIT != 0) != ((ix86_isa_flags & OPTION_MASK_ISA_64BIT) != 0))
2714 sorry ("%i-bit mode not compiled in",
2715 (ix86_isa_flags & OPTION_MASK_ISA_64BIT) ? 64 : 32);
2717 for (i = 0; i < pta_size; i++)
2718 if (! strcmp (ix86_arch_string, processor_alias_table[i].name))
2720 ix86_arch = processor_alias_table[i].processor;
2721 /* Default cpu tuning to the architecture. */
2722 ix86_tune = ix86_arch;
2724 if (TARGET_64BIT && !(processor_alias_table[i].flags & PTA_64BIT))
2725 error ("CPU you selected does not support x86-64 "
2728 if (processor_alias_table[i].flags & PTA_MMX
2729 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_MMX))
2730 ix86_isa_flags |= OPTION_MASK_ISA_MMX;
2731 if (processor_alias_table[i].flags & PTA_3DNOW
2732 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_3DNOW))
2733 ix86_isa_flags |= OPTION_MASK_ISA_3DNOW;
2734 if (processor_alias_table[i].flags & PTA_3DNOW_A
2735 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_3DNOW_A))
2736 ix86_isa_flags |= OPTION_MASK_ISA_3DNOW_A;
2737 if (processor_alias_table[i].flags & PTA_SSE
2738 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE))
2739 ix86_isa_flags |= OPTION_MASK_ISA_SSE;
2740 if (processor_alias_table[i].flags & PTA_SSE2
2741 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE2))
2742 ix86_isa_flags |= OPTION_MASK_ISA_SSE2;
2743 if (processor_alias_table[i].flags & PTA_SSE3
2744 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE3))
2745 ix86_isa_flags |= OPTION_MASK_ISA_SSE3;
2746 if (processor_alias_table[i].flags & PTA_SSSE3
2747 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSSE3))
2748 ix86_isa_flags |= OPTION_MASK_ISA_SSSE3;
2749 if (processor_alias_table[i].flags & PTA_SSE4_1
2750 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE4_1))
2751 ix86_isa_flags |= OPTION_MASK_ISA_SSE4_1;
2752 if (processor_alias_table[i].flags & PTA_SSE4_2
2753 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE4_2))
2754 ix86_isa_flags |= OPTION_MASK_ISA_SSE4_2;
2755 if (processor_alias_table[i].flags & PTA_SSE4A
2756 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE4A))
2757 ix86_isa_flags |= OPTION_MASK_ISA_SSE4A;
2758 if (processor_alias_table[i].flags & PTA_SSE5
2759 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE5))
2760 ix86_isa_flags |= OPTION_MASK_ISA_SSE5;
2761 if (processor_alias_table[i].flags & PTA_ABM
2762 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_ABM))
2763 ix86_isa_flags |= OPTION_MASK_ISA_ABM;
2764 if (processor_alias_table[i].flags & PTA_CX16
2765 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_CX16))
2766 ix86_isa_flags |= OPTION_MASK_ISA_CX16;
2767 if (processor_alias_table[i].flags & (PTA_POPCNT | PTA_ABM)
2768 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_POPCNT))
2769 ix86_isa_flags |= OPTION_MASK_ISA_POPCNT;
2770 if (!(TARGET_64BIT && (processor_alias_table[i].flags & PTA_NO_SAHF))
2771 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SAHF))
2772 ix86_isa_flags |= OPTION_MASK_ISA_SAHF;
2773 if (processor_alias_table[i].flags & PTA_AES
2774 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_AES))
2775 ix86_isa_flags |= OPTION_MASK_ISA_AES;
2776 if (processor_alias_table[i].flags & PTA_PCLMUL
2777 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_PCLMUL))
2778 ix86_isa_flags |= OPTION_MASK_ISA_PCLMUL;
2779 if (processor_alias_table[i].flags & (PTA_PREFETCH_SSE | PTA_SSE))
2780 x86_prefetch_sse = true;
2786 error ("bad value (%s) for %sarch=%s %s",
2787 ix86_arch_string, prefix, suffix, sw);
2789 ix86_arch_mask = 1u << ix86_arch;
2790 for (i = 0; i < X86_ARCH_LAST; ++i)
2791 ix86_arch_features[i] = !!(initial_ix86_arch_features[i] & ix86_arch_mask);
2793 for (i = 0; i < pta_size; i++)
2794 if (! strcmp (ix86_tune_string, processor_alias_table[i].name))
2796 ix86_tune = processor_alias_table[i].processor;
2797 if (TARGET_64BIT && !(processor_alias_table[i].flags & PTA_64BIT))
2799 if (ix86_tune_defaulted)
2801 ix86_tune_string = "x86-64";
2802 for (i = 0; i < pta_size; i++)
2803 if (! strcmp (ix86_tune_string,
2804 processor_alias_table[i].name))
2806 ix86_tune = processor_alias_table[i].processor;
2809 error ("CPU you selected does not support x86-64 "
2812 /* Intel CPUs have always interpreted SSE prefetch instructions as
2813 NOPs; so, we can enable SSE prefetch instructions even when
2814 -mtune (rather than -march) points us to a processor that has them.
2815 However, the VIA C3 gives a SIGILL, so we only do that for i686 and
2816 higher processors. */
2818 && (processor_alias_table[i].flags & (PTA_PREFETCH_SSE | PTA_SSE)))
2819 x86_prefetch_sse = true;
2823 error ("bad value (%s) for %stune=%s %s",
2824 ix86_tune_string, prefix, suffix, sw);
2826 ix86_tune_mask = 1u << ix86_tune;
2827 for (i = 0; i < X86_TUNE_LAST; ++i)
2828 ix86_tune_features[i] = !!(initial_ix86_tune_features[i] & ix86_tune_mask);
2831 ix86_cost = &size_cost;
2833 ix86_cost = processor_target_table[ix86_tune].cost;
2835 /* Arrange to set up i386_stack_locals for all functions. */
2836 init_machine_status = ix86_init_machine_status;
2838 /* Validate -mregparm= value. */
2839 if (ix86_regparm_string)
2842 warning (0, "%sregparm%s is ignored in 64-bit mode", prefix, suffix);
2843 i = atoi (ix86_regparm_string);
2844 if (i < 0 || i > REGPARM_MAX)
2845 error ("%sregparm=%d%s is not between 0 and %d",
2846 prefix, i, suffix, REGPARM_MAX);
2851 ix86_regparm = REGPARM_MAX;
2853 /* If the user has provided any of the -malign-* options,
2854 warn and use that value only if -falign-* is not set.
2855 Remove this code in GCC 3.2 or later. */
2856 if (ix86_align_loops_string)
2858 warning (0, "%salign-loops%s is obsolete, use %salign-loops%s",
2859 prefix, suffix, prefix, suffix);
2860 if (align_loops == 0)
2862 i = atoi (ix86_align_loops_string);
2863 if (i < 0 || i > MAX_CODE_ALIGN)
2864 error ("%salign-loops=%d%s is not between 0 and %d",
2865 prefix, i, suffix, MAX_CODE_ALIGN);
2867 align_loops = 1 << i;
2871 if (ix86_align_jumps_string)
2873 warning (0, "%salign-jumps%s is obsolete, use %salign-jumps%s",
2874 prefix, suffix, prefix, suffix);
2875 if (align_jumps == 0)
2877 i = atoi (ix86_align_jumps_string);
2878 if (i < 0 || i > MAX_CODE_ALIGN)
2879 error ("%salign-loops=%d%s is not between 0 and %d",
2880 prefix, i, suffix, MAX_CODE_ALIGN);
2882 align_jumps = 1 << i;
2886 if (ix86_align_funcs_string)
2888 warning (0, "%salign-functions%s is obsolete, use %salign-functions%s",
2889 prefix, suffix, prefix, suffix);
2890 if (align_functions == 0)
2892 i = atoi (ix86_align_funcs_string);
2893 if (i < 0 || i > MAX_CODE_ALIGN)
2894 error ("%salign-loops=%d%s is not between 0 and %d",
2895 prefix, i, suffix, MAX_CODE_ALIGN);
2897 align_functions = 1 << i;
2901 /* Default align_* from the processor table. */
2902 if (align_loops == 0)
2904 align_loops = processor_target_table[ix86_tune].align_loop;
2905 align_loops_max_skip = processor_target_table[ix86_tune].align_loop_max_skip;
2907 if (align_jumps == 0)
2909 align_jumps = processor_target_table[ix86_tune].align_jump;
2910 align_jumps_max_skip = processor_target_table[ix86_tune].align_jump_max_skip;
2912 if (align_functions == 0)
2914 align_functions = processor_target_table[ix86_tune].align_func;
2917 /* Validate -mbranch-cost= value, or provide default. */
2918 ix86_branch_cost = ix86_cost->branch_cost;
2919 if (ix86_branch_cost_string)
2921 i = atoi (ix86_branch_cost_string);
2923 error ("%sbranch-cost=%d%s is not between 0 and 5", prefix, i, suffix);
2925 ix86_branch_cost = i;
2927 if (ix86_section_threshold_string)
2929 i = atoi (ix86_section_threshold_string);
2931 error ("%slarge-data-threshold=%d%s is negative", prefix, i, suffix);
2933 ix86_section_threshold = i;
2936 if (ix86_tls_dialect_string)
2938 if (strcmp (ix86_tls_dialect_string, "gnu") == 0)
2939 ix86_tls_dialect = TLS_DIALECT_GNU;
2940 else if (strcmp (ix86_tls_dialect_string, "gnu2") == 0)
2941 ix86_tls_dialect = TLS_DIALECT_GNU2;
2942 else if (strcmp (ix86_tls_dialect_string, "sun") == 0)
2943 ix86_tls_dialect = TLS_DIALECT_SUN;
2945 error ("bad value (%s) for %stls-dialect=%s %s",
2946 ix86_tls_dialect_string, prefix, suffix, sw);
2949 if (ix87_precision_string)
2951 i = atoi (ix87_precision_string);
2952 if (i != 32 && i != 64 && i != 80)
2953 error ("pc%d is not valid precision setting (32, 64 or 80)", i);
2958 target_flags |= TARGET_SUBTARGET64_DEFAULT & ~target_flags_explicit;
2960 /* Enable by default the SSE and MMX builtins. Do allow the user to
2961 explicitly disable any of these. In particular, disabling SSE and
2962 MMX for kernel code is extremely useful. */
2963 if (!ix86_arch_specified)
2965 |= ((OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_MMX
2966 | TARGET_SUBTARGET64_ISA_DEFAULT) & ~ix86_isa_flags_explicit);
2969 warning (0, "%srtd%s is ignored in 64bit mode", prefix, suffix);
2973 target_flags |= TARGET_SUBTARGET32_DEFAULT & ~target_flags_explicit;
2975 if (!ix86_arch_specified)
2977 |= TARGET_SUBTARGET32_ISA_DEFAULT & ~ix86_isa_flags_explicit;
2979 /* i386 ABI does not specify red zone. It still makes sense to use it
2980 when programmer takes care to stack from being destroyed. */
2981 if (!(target_flags_explicit & MASK_NO_RED_ZONE))
2982 target_flags |= MASK_NO_RED_ZONE;
2985 /* Keep nonleaf frame pointers. */
2986 if (flag_omit_frame_pointer)
2987 target_flags &= ~MASK_OMIT_LEAF_FRAME_POINTER;
2988 else if (TARGET_OMIT_LEAF_FRAME_POINTER)
2989 flag_omit_frame_pointer = 1;
2991 /* If we're doing fast math, we don't care about comparison order
2992 wrt NaNs. This lets us use a shorter comparison sequence. */
2993 if (flag_finite_math_only)
2994 target_flags &= ~MASK_IEEE_FP;
2996 /* If the architecture always has an FPU, turn off NO_FANCY_MATH_387,
2997 since the insns won't need emulation. */
2998 if (x86_arch_always_fancy_math_387 & ix86_arch_mask)
2999 target_flags &= ~MASK_NO_FANCY_MATH_387;
3001 /* Likewise, if the target doesn't have a 387, or we've specified
3002 software floating point, don't use 387 inline intrinsics. */
3004 target_flags |= MASK_NO_FANCY_MATH_387;
3006 /* Turn on MMX builtins for -msse. */
3009 ix86_isa_flags |= OPTION_MASK_ISA_MMX & ~ix86_isa_flags_explicit;
3010 x86_prefetch_sse = true;
3013 /* Turn on popcnt instruction for -msse4.2 or -mabm. */
3014 if (TARGET_SSE4_2 || TARGET_ABM)
3015 ix86_isa_flags |= OPTION_MASK_ISA_POPCNT & ~ix86_isa_flags_explicit;
3017 /* Validate -mpreferred-stack-boundary= value, or provide default.
3018 The default of 128 bits is for Pentium III's SSE __m128. We can't
3019 change it because of optimize_size. Otherwise, we can't mix object
3020 files compiled with -Os and -On. */
3021 ix86_preferred_stack_boundary = 128;
3022 if (ix86_preferred_stack_boundary_string)
3024 i = atoi (ix86_preferred_stack_boundary_string);
3025 if (i < (TARGET_64BIT ? 4 : 2) || i > 12)
3026 error ("%spreferred-stack-boundary=%d%s is not between %d and 12",
3027 prefix, i, suffix, TARGET_64BIT ? 4 : 2);
3029 ix86_preferred_stack_boundary = (1 << i) * BITS_PER_UNIT;
3032 /* Accept -msseregparm only if at least SSE support is enabled. */
3033 if (TARGET_SSEREGPARM
3035 error ("%ssseregparm%s used without SSE enabled", prefix, suffix);
3037 ix86_fpmath = TARGET_FPMATH_DEFAULT;
3038 if (ix86_fpmath_string != 0)
3040 if (! strcmp (ix86_fpmath_string, "387"))
3041 ix86_fpmath = FPMATH_387;
3042 else if (! strcmp (ix86_fpmath_string, "sse"))
3046 warning (0, "SSE instruction set disabled, using 387 arithmetics");
3047 ix86_fpmath = FPMATH_387;
3050 ix86_fpmath = FPMATH_SSE;
3052 else if (! strcmp (ix86_fpmath_string, "387,sse")
3053 || ! strcmp (ix86_fpmath_string, "387+sse")
3054 || ! strcmp (ix86_fpmath_string, "sse,387")
3055 || ! strcmp (ix86_fpmath_string, "sse+387")
3056 || ! strcmp (ix86_fpmath_string, "both"))
3060 warning (0, "SSE instruction set disabled, using 387 arithmetics");
3061 ix86_fpmath = FPMATH_387;
3063 else if (!TARGET_80387)
3065 warning (0, "387 instruction set disabled, using SSE arithmetics");
3066 ix86_fpmath = FPMATH_SSE;
3069 ix86_fpmath = (enum fpmath_unit) (FPMATH_SSE | FPMATH_387);
3072 error ("bad value (%s) for %sfpmath=%s %s",
3073 ix86_fpmath_string, prefix, suffix, sw);
3076 /* If the i387 is disabled, then do not return values in it. */
3078 target_flags &= ~MASK_FLOAT_RETURNS;
3080 /* Use external vectorized library in vectorizing intrinsics. */
3081 if (ix86_veclibabi_string)
3083 if (strcmp (ix86_veclibabi_string, "svml") == 0)
3084 ix86_veclib_handler = ix86_veclibabi_svml;
3085 else if (strcmp (ix86_veclibabi_string, "acml") == 0)
3086 ix86_veclib_handler = ix86_veclibabi_acml;
3088 error ("unknown vectorization library ABI type (%s) for "
3089 "%sveclibabi=%s %s", ix86_veclibabi_string,
3090 prefix, suffix, sw);
3093 if ((x86_accumulate_outgoing_args & ix86_tune_mask)
3094 && !(target_flags_explicit & MASK_ACCUMULATE_OUTGOING_ARGS)
3096 target_flags |= MASK_ACCUMULATE_OUTGOING_ARGS;
3098 /* ??? Unwind info is not correct around the CFG unless either a frame
3099 pointer is present or M_A_O_A is set. Fixing this requires rewriting
3100 unwind info generation to be aware of the CFG and propagating states
3102 if ((flag_unwind_tables || flag_asynchronous_unwind_tables
3103 || flag_exceptions || flag_non_call_exceptions)
3104 && flag_omit_frame_pointer
3105 && !(target_flags & MASK_ACCUMULATE_OUTGOING_ARGS))
3107 if (target_flags_explicit & MASK_ACCUMULATE_OUTGOING_ARGS)
3108 warning (0, "unwind tables currently require either a frame pointer "
3109 "or %saccumulate-outgoing-args%s for correctness",
3111 target_flags |= MASK_ACCUMULATE_OUTGOING_ARGS;
3114 /* If stack probes are required, the space used for large function
3115 arguments on the stack must also be probed, so enable
3116 -maccumulate-outgoing-args so this happens in the prologue. */
3117 if (TARGET_STACK_PROBE
3118 && !(target_flags & MASK_ACCUMULATE_OUTGOING_ARGS))
3120 if (target_flags_explicit & MASK_ACCUMULATE_OUTGOING_ARGS)
3121 warning (0, "stack probing requires %saccumulate-outgoing-args%s "
3122 "for correctness", prefix, suffix);
3123 target_flags |= MASK_ACCUMULATE_OUTGOING_ARGS;
3128 /* For sane SSE instruction set generation we need fcomi instruction.
3129 It is safe to enable all CMOVE instructions. */
3133 /* Figure out what ASM_GENERATE_INTERNAL_LABEL builds as a prefix. */
3136 ASM_GENERATE_INTERNAL_LABEL (internal_label_prefix, "LX", 0);
3137 p = strchr (internal_label_prefix, 'X');
3138 internal_label_prefix_len = p - internal_label_prefix;
3142 /* When scheduling description is not available, disable scheduler pass
3143 so it won't slow down the compilation and make x87 code slower. */
3144 if (!TARGET_SCHEDULE)
3145 flag_schedule_insns_after_reload = flag_schedule_insns = 0;
3147 if (!PARAM_SET_P (PARAM_SIMULTANEOUS_PREFETCHES))
3148 set_param_value ("simultaneous-prefetches",
3149 ix86_cost->simultaneous_prefetches);
3150 if (!PARAM_SET_P (PARAM_L1_CACHE_LINE_SIZE))
3151 set_param_value ("l1-cache-line-size", ix86_cost->prefetch_block);
3152 if (!PARAM_SET_P (PARAM_L1_CACHE_SIZE))
3153 set_param_value ("l1-cache-size", ix86_cost->l1_cache_size);
3154 if (!PARAM_SET_P (PARAM_L2_CACHE_SIZE))
3155 set_param_value ("l2-cache-size", ix86_cost->l2_cache_size);
3157 /* If using typedef char *va_list, signal that __builtin_va_start (&ap, 0)
3158 can be optimized to ap = __builtin_next_arg (0). */
3160 targetm.expand_builtin_va_start = NULL;
3164 ix86_gen_leave = gen_leave_rex64;
3165 ix86_gen_pop1 = gen_popdi1;
3166 ix86_gen_add3 = gen_adddi3;
3167 ix86_gen_sub3 = gen_subdi3;
3168 ix86_gen_sub3_carry = gen_subdi3_carry_rex64;
3169 ix86_gen_one_cmpl2 = gen_one_cmpldi2;
3170 ix86_gen_monitor = gen_sse3_monitor64;
3174 ix86_gen_leave = gen_leave;
3175 ix86_gen_pop1 = gen_popsi1;
3176 ix86_gen_add3 = gen_addsi3;
3177 ix86_gen_sub3 = gen_subsi3;
3178 ix86_gen_sub3_carry = gen_subsi3_carry;
3179 ix86_gen_one_cmpl2 = gen_one_cmplsi2;
3180 ix86_gen_monitor = gen_sse3_monitor;
3184 /* Use -mcld by default for 32-bit code if configured with --enable-cld. */
3186 target_flags |= MASK_CLD & ~target_flags_explicit;
3189 /* Save the initial options in case the user does function specific options */
3191 target_option_default_node = target_option_current_node
3192 = build_target_option_node ();
3195 /* Save the current options */
3198 ix86_function_specific_save (struct cl_target_option *ptr)
3200 gcc_assert (IN_RANGE (ix86_arch, 0, 255));
3201 gcc_assert (IN_RANGE (ix86_tune, 0, 255));
3202 gcc_assert (IN_RANGE (ix86_fpmath, 0, 255));
3203 gcc_assert (IN_RANGE (ix86_branch_cost, 0, 255));
3205 ptr->arch = ix86_arch;
3206 ptr->tune = ix86_tune;
3207 ptr->fpmath = ix86_fpmath;
3208 ptr->branch_cost = ix86_branch_cost;
3209 ptr->tune_defaulted = ix86_tune_defaulted;
3210 ptr->arch_specified = ix86_arch_specified;
3211 ptr->ix86_isa_flags_explicit = ix86_isa_flags_explicit;
3212 ptr->target_flags_explicit = target_flags_explicit;
3215 /* Restore the current options */
3218 ix86_function_specific_restore (struct cl_target_option *ptr)
3220 enum processor_type old_tune = ix86_tune;
3221 enum processor_type old_arch = ix86_arch;
3222 unsigned int ix86_arch_mask, ix86_tune_mask;
3225 ix86_arch = ptr->arch;
3226 ix86_tune = ptr->tune;
3227 ix86_fpmath = ptr->fpmath;
3228 ix86_branch_cost = ptr->branch_cost;
3229 ix86_tune_defaulted = ptr->tune_defaulted;
3230 ix86_arch_specified = ptr->arch_specified;
3231 ix86_isa_flags_explicit = ptr->ix86_isa_flags_explicit;
3232 target_flags_explicit = ptr->target_flags_explicit;
3234 /* Recreate the arch feature tests if the arch changed */
3235 if (old_arch != ix86_arch)
3237 ix86_arch_mask = 1u << ix86_arch;
3238 for (i = 0; i < X86_ARCH_LAST; ++i)
3239 ix86_arch_features[i]
3240 = !!(initial_ix86_arch_features[i] & ix86_arch_mask);
3243 /* Recreate the tune optimization tests */
3244 if (old_tune != ix86_tune)
3246 ix86_tune_mask = 1u << ix86_tune;
3247 for (i = 0; i < X86_TUNE_LAST; ++i)
3248 ix86_tune_features[i]
3249 = !!(initial_ix86_tune_features[i] & ix86_tune_mask);
3253 /* Print the current options */
3256 ix86_function_specific_print (FILE *file, int indent,
3257 struct cl_target_option *ptr)
3260 = ix86_target_string (ptr->ix86_isa_flags, ptr->target_flags,
3261 NULL, NULL, NULL, false);
3263 fprintf (file, "%*sarch = %d (%s)\n",
3266 ((ptr->arch < TARGET_CPU_DEFAULT_max)
3267 ? cpu_names[ptr->arch]
3270 fprintf (file, "%*stune = %d (%s)\n",
3273 ((ptr->tune < TARGET_CPU_DEFAULT_max)
3274 ? cpu_names[ptr->tune]
3277 fprintf (file, "%*sfpmath = %d%s%s\n", indent, "", ptr->fpmath,
3278 (ptr->fpmath & FPMATH_387) ? ", 387" : "",
3279 (ptr->fpmath & FPMATH_SSE) ? ", sse" : "");
3280 fprintf (file, "%*sbranch_cost = %d\n", indent, "", ptr->branch_cost);
3284 fprintf (file, "%*s%s\n", indent, "", target_string);
3285 free (target_string);
3290 /* Inner function to process the attribute((option(...))), take an argument and
3291 set the current options from the argument. If we have a list, recursively go
3295 ix86_valid_option_attribute_inner_p (tree args, char *p_strings[])
3300 #define IX86_ATTR_ISA(S,O) { S, sizeof (S)-1, ix86_opt_isa, O, 0 }
3301 #define IX86_ATTR_STR(S,O) { S, sizeof (S)-1, ix86_opt_str, O, 0 }
3302 #define IX86_ATTR_YES(S,O,M) { S, sizeof (S)-1, ix86_opt_yes, O, M }
3303 #define IX86_ATTR_NO(S,O,M) { S, sizeof (S)-1, ix86_opt_no, O, M }
3318 enum ix86_opt_type type;
3323 IX86_ATTR_ISA ("3dnow", OPT_m3dnow),
3324 IX86_ATTR_ISA ("abm", OPT_mabm),
3325 IX86_ATTR_ISA ("aes", OPT_maes),
3326 IX86_ATTR_ISA ("mmx", OPT_mmmx),
3327 IX86_ATTR_ISA ("pclmul", OPT_mpclmul),
3328 IX86_ATTR_ISA ("popcnt", OPT_mpopcnt),
3329 IX86_ATTR_ISA ("sse", OPT_msse),
3330 IX86_ATTR_ISA ("sse2", OPT_msse2),
3331 IX86_ATTR_ISA ("sse3", OPT_msse3),
3332 IX86_ATTR_ISA ("sse4", OPT_msse4),
3333 IX86_ATTR_ISA ("sse4.1", OPT_msse4_1),
3334 IX86_ATTR_ISA ("sse4.2", OPT_msse4_2),
3335 IX86_ATTR_ISA ("sse4a", OPT_msse4a),
3336 IX86_ATTR_ISA ("sse5", OPT_msse5),
3337 IX86_ATTR_ISA ("ssse3", OPT_mssse3),
3339 /* string options */
3340 IX86_ATTR_STR ("arch=", IX86_FUNCTION_SPECIFIC_ARCH),
3341 IX86_ATTR_STR ("fpmath=", IX86_FUNCTION_SPECIFIC_FPMATH),
3342 IX86_ATTR_STR ("tune=", IX86_FUNCTION_SPECIFIC_TUNE),
3345 IX86_ATTR_YES ("cld",
3349 IX86_ATTR_NO ("fancy-math-387",
3350 OPT_mfancy_math_387,
3351 MASK_NO_FANCY_MATH_387),
3353 IX86_ATTR_NO ("fused-madd",
3355 MASK_NO_FUSED_MADD),
3357 IX86_ATTR_YES ("ieee-fp",
3361 IX86_ATTR_YES ("inline-all-stringops",
3362 OPT_minline_all_stringops,
3363 MASK_INLINE_ALL_STRINGOPS),
3365 IX86_ATTR_YES ("inline-stringops-dynamically",
3366 OPT_minline_stringops_dynamically,
3367 MASK_INLINE_STRINGOPS_DYNAMICALLY),
3369 IX86_ATTR_NO ("align-stringops",
3370 OPT_mno_align_stringops,
3371 MASK_NO_ALIGN_STRINGOPS),
3373 IX86_ATTR_YES ("recip",
3379 /* If this is a list, recurse to get the options. */
3380 if (TREE_CODE (args) == TREE_LIST)
3384 for (; args; args = TREE_CHAIN (args))
3385 if (TREE_VALUE (args)
3386 && !ix86_valid_option_attribute_inner_p (TREE_VALUE (args), p_strings))
3392 else if (TREE_CODE (args) != STRING_CST)
3395 /* Handle multiple arguments separated by commas. */
3396 next_optstr = ASTRDUP (TREE_STRING_POINTER (args));
3398 while (next_optstr && *next_optstr != '\0')
3400 char *p = next_optstr;
3402 char *comma = strchr (next_optstr, ',');
3403 const char *opt_string;
3404 size_t len, opt_len;
3409 enum ix86_opt_type type = ix86_opt_unknown;
3415 len = comma - next_optstr;
3416 next_optstr = comma + 1;
3424 /* Recognize no-xxx. */
3425 if (len > 3 && p[0] == 'n' && p[1] == 'o' && p[2] == '-')
3434 /* Find the option. */
3437 for (i = 0; i < sizeof (attrs) / sizeof (attrs[0]); i++)
3439 type = attrs[i].type;
3440 opt_len = attrs[i].len;
3441 if (ch == attrs[i].string[0]
3442 && ((type != ix86_opt_str) ? len == opt_len : len > opt_len)
3443 && memcmp (p, attrs[i].string, opt_len) == 0)
3446 mask = attrs[i].mask;
3447 opt_string = attrs[i].string;
3452 /* Process the option. */
3455 error ("attribute(option(\"%s\")) is unknown", orig_p);
3459 else if (type == ix86_opt_isa)
3460 ix86_handle_option (opt, p, opt_set_p);
3462 else if (type == ix86_opt_yes || type == ix86_opt_no)
3464 if (type == ix86_opt_no)
3465 opt_set_p = !opt_set_p;
3468 target_flags |= mask;
3470 target_flags &= ~mask;
3473 else if (type == ix86_opt_str)
3477 error ("option(\"%s\") was already specified", opt_string);
3481 p_strings[opt] = xstrdup (p + opt_len);
3491 /* Return a TARGET_OPTION_NODE tree of the target options listed or NULL. */
3494 ix86_valid_option_attribute_tree (tree args)
3496 const char *orig_arch_string = ix86_arch_string;
3497 const char *orig_tune_string = ix86_tune_string;
3498 const char *orig_fpmath_string = ix86_fpmath_string;
3499 int orig_tune_defaulted = ix86_tune_defaulted;
3500 int orig_arch_specified = ix86_arch_specified;
3501 char *option_strings[IX86_FUNCTION_SPECIFIC_MAX] = { NULL, NULL, NULL };
3504 struct cl_target_option *def
3505 = TREE_TARGET_OPTION (target_option_default_node);
3507 /* Process each of the options on the chain. */
3508 if (! ix86_valid_option_attribute_inner_p (args, option_strings))
3511 /* If the changed options are different from the default, rerun override_options,
3512 and then save the options away. The string options are are attribute options,
3513 and will be undone when we copy the save structure. */
3514 if (ix86_isa_flags != def->ix86_isa_flags
3515 || target_flags != def->target_flags
3516 || option_strings[IX86_FUNCTION_SPECIFIC_ARCH]
3517 || option_strings[IX86_FUNCTION_SPECIFIC_TUNE]
3518 || option_strings[IX86_FUNCTION_SPECIFIC_FPMATH])
3520 /* If we are using the default tune= or arch=, undo the string assigned,
3521 and use the default. */
3522 if (option_strings[IX86_FUNCTION_SPECIFIC_ARCH])
3523 ix86_arch_string = option_strings[IX86_FUNCTION_SPECIFIC_ARCH];
3524 else if (!orig_arch_specified)
3525 ix86_arch_string = NULL;
3527 if (option_strings[IX86_FUNCTION_SPECIFIC_TUNE])
3528 ix86_tune_string = option_strings[IX86_FUNCTION_SPECIFIC_TUNE];
3529 else if (orig_tune_defaulted)
3530 ix86_tune_string = NULL;
3532 /* If fpmath= is not set, and we now have sse2 on 32-bit, use it. */
3533 if (option_strings[IX86_FUNCTION_SPECIFIC_FPMATH])
3534 ix86_fpmath_string = option_strings[IX86_FUNCTION_SPECIFIC_FPMATH];
3535 else if (!TARGET_64BIT && TARGET_SSE)
3536 ix86_fpmath_string = "sse,387";
3538 /* Do any overrides, such as arch=xxx, or tune=xxx support. */
3539 override_options (false);
3541 /* Save the current options unless we are validating options for
3543 t = build_target_option_node ();
3545 ix86_arch_string = orig_arch_string;
3546 ix86_tune_string = orig_tune_string;
3547 ix86_fpmath_string = orig_fpmath_string;
3549 /* Free up memory allocated to hold the strings */
3550 for (i = 0; i < IX86_FUNCTION_SPECIFIC_MAX; i++)
3551 if (option_strings[i])
3552 free (option_strings[i]);
3558 /* Hook to validate attribute((option("string"))). */
3561 ix86_valid_option_attribute_p (tree fndecl,
3562 tree ARG_UNUSED (name),
3564 int ARG_UNUSED (flags))
3566 struct cl_target_option cur_opts;
3570 cl_target_option_save (&cur_opts);
3571 new_opts = ix86_valid_option_attribute_tree (args);
3576 DECL_FUNCTION_SPECIFIC_TARGET (fndecl) = new_opts;
3578 cl_target_option_restore (&cur_opts);
3583 /* Hook to determine if one function can safely inline another. */
3586 ix86_can_inline_p (tree caller, tree callee)
3589 tree caller_tree = DECL_FUNCTION_SPECIFIC_TARGET (caller);
3590 tree callee_tree = DECL_FUNCTION_SPECIFIC_TARGET (callee);
3592 /* If callee has no option attributes, then it is ok to inline. */
3596 /* If caller has no option attributes, but callee does then it is not ok to
3598 else if (!caller_tree)
3603 struct cl_target_option *caller_opts = TREE_TARGET_OPTION (caller_tree);
3604 struct cl_target_option *callee_opts = TREE_TARGET_OPTION (callee_tree);
3606 /* Callee's isa options should a subset of the caller's, i.e. a SSE5 function
3607 can inline a SSE2 function but a SSE2 function can't inline a SSE5
3609 if ((caller_opts->ix86_isa_flags & callee_opts->ix86_isa_flags)
3610 != callee_opts->ix86_isa_flags)
3613 /* See if we have the same non-isa options. */
3614 else if (caller_opts->target_flags != callee_opts->target_flags)
3617 /* See if arch, tune, etc. are the same. */
3618 else if (caller_opts->arch != callee_opts->arch)
3621 else if (caller_opts->tune != callee_opts->tune)
3624 else if (caller_opts->fpmath != callee_opts->fpmath)
3627 else if (caller_opts->branch_cost != callee_opts->branch_cost)
3638 /* Remember the last target of ix86_set_current_function. */
3639 static GTY(()) tree ix86_previous_fndecl;
3641 /* Establish appropriate back-end context for processing the function
3642 FNDECL. The argument might be NULL to indicate processing at top
3643 level, outside of any function scope. */
3645 ix86_set_current_function (tree fndecl)
3647 /* Only change the context if the function changes. This hook is called
3648 several times in the course of compiling a function, and we don't want to
3649 slow things down too much or call target_reinit when it isn't safe. */
3650 if (fndecl && fndecl != ix86_previous_fndecl)
3652 tree old_tree = (ix86_previous_fndecl
3653 ? DECL_FUNCTION_SPECIFIC_TARGET (ix86_previous_fndecl)
3656 tree new_tree = (fndecl
3657 ? DECL_FUNCTION_SPECIFIC_TARGET (fndecl)
3660 ix86_previous_fndecl = fndecl;
3661 if (old_tree == new_tree)
3666 cl_target_option_restore (TREE_TARGET_OPTION (new_tree));
3672 struct cl_target_option *def
3673 = TREE_TARGET_OPTION (target_option_current_node);
3675 cl_target_option_restore (def);
3682 /* Return true if this goes in large data/bss. */
3685 ix86_in_large_data_p (tree exp)
3687 if (ix86_cmodel != CM_MEDIUM && ix86_cmodel != CM_MEDIUM_PIC)
3690 /* Functions are never large data. */
3691 if (TREE_CODE (exp) == FUNCTION_DECL)
3694 if (TREE_CODE (exp) == VAR_DECL && DECL_SECTION_NAME (exp))
3696 const char *section = TREE_STRING_POINTER (DECL_SECTION_NAME (exp));
3697 if (strcmp (section, ".ldata") == 0
3698 || strcmp (section, ".lbss") == 0)
3704 HOST_WIDE_INT size = int_size_in_bytes (TREE_TYPE (exp));
3706 /* If this is an incomplete type with size 0, then we can't put it
3707 in data because it might be too big when completed. */
3708 if (!size || size > ix86_section_threshold)
3715 /* Switch to the appropriate section for output of DECL.
3716 DECL is either a `VAR_DECL' node or a constant of some sort.
3717 RELOC indicates whether forming the initial value of DECL requires
3718 link-time relocations. */
3720 static section * x86_64_elf_select_section (tree, int, unsigned HOST_WIDE_INT)
3724 x86_64_elf_select_section (tree decl, int reloc,
3725 unsigned HOST_WIDE_INT align)
3727 if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
3728 && ix86_in_large_data_p (decl))
3730 const char *sname = NULL;
3731 unsigned int flags = SECTION_WRITE;
3732 switch (categorize_decl_for_section (decl, reloc))
3737 case SECCAT_DATA_REL:
3738 sname = ".ldata.rel";
3740 case SECCAT_DATA_REL_LOCAL:
3741 sname = ".ldata.rel.local";
3743 case SECCAT_DATA_REL_RO:
3744 sname = ".ldata.rel.ro";
3746 case SECCAT_DATA_REL_RO_LOCAL:
3747 sname = ".ldata.rel.ro.local";
3751 flags |= SECTION_BSS;
3754 case SECCAT_RODATA_MERGE_STR:
3755 case SECCAT_RODATA_MERGE_STR_INIT:
3756 case SECCAT_RODATA_MERGE_CONST:
3760 case SECCAT_SRODATA:
3767 /* We don't split these for medium model. Place them into
3768 default sections and hope for best. */
3770 case SECCAT_EMUTLS_VAR:
3771 case SECCAT_EMUTLS_TMPL:
3776 /* We might get called with string constants, but get_named_section
3777 doesn't like them as they are not DECLs. Also, we need to set
3778 flags in that case. */
3780 return get_section (sname, flags, NULL);
3781 return get_named_section (decl, sname, reloc);
3784 return default_elf_select_section (decl, reloc, align);
3787 /* Build up a unique section name, expressed as a
3788 STRING_CST node, and assign it to DECL_SECTION_NAME (decl).
3789 RELOC indicates whether the initial value of EXP requires
3790 link-time relocations. */
3792 static void ATTRIBUTE_UNUSED
3793 x86_64_elf_unique_section (tree decl, int reloc)
3795 if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
3796 && ix86_in_large_data_p (decl))
3798 const char *prefix = NULL;
3799 /* We only need to use .gnu.linkonce if we don't have COMDAT groups. */
3800 bool one_only = DECL_ONE_ONLY (decl) && !HAVE_COMDAT_GROUP;
3802 switch (categorize_decl_for_section (decl, reloc))
3805 case SECCAT_DATA_REL:
3806 case SECCAT_DATA_REL_LOCAL:
3807 case SECCAT_DATA_REL_RO:
3808 case SECCAT_DATA_REL_RO_LOCAL:
3809 prefix = one_only ? ".ld" : ".ldata";
3812 prefix = one_only ? ".lb" : ".lbss";
3815 case SECCAT_RODATA_MERGE_STR:
3816 case SECCAT_RODATA_MERGE_STR_INIT:
3817 case SECCAT_RODATA_MERGE_CONST:
3818 prefix = one_only ? ".lr" : ".lrodata";
3820 case SECCAT_SRODATA:
3827 /* We don't split these for medium model. Place them into
3828 default sections and hope for best. */
3830 case SECCAT_EMUTLS_VAR:
3831 prefix = targetm.emutls.var_section;
3833 case SECCAT_EMUTLS_TMPL:
3834 prefix = targetm.emutls.tmpl_section;
3839 const char *name, *linkonce;
3842 name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl));
3843 name = targetm.strip_name_encoding (name);
3845 /* If we're using one_only, then there needs to be a .gnu.linkonce
3846 prefix to the section name. */
3847 linkonce = one_only ? ".gnu.linkonce" : "";
3849 string = ACONCAT ((linkonce, prefix, ".", name, NULL));
3851 DECL_SECTION_NAME (decl) = build_string (strlen (string), string);
3855 default_unique_section (decl, reloc);
3858 #ifdef COMMON_ASM_OP
3859 /* This says how to output assembler code to declare an
3860 uninitialized external linkage data object.
3862 For medium model x86-64 we need to use .largecomm opcode for
3865 x86_elf_aligned_common (FILE *file,
3866 const char *name, unsigned HOST_WIDE_INT size,
3869 if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
3870 && size > (unsigned int)ix86_section_threshold)
3871 fprintf (file, ".largecomm\t");
3873 fprintf (file, "%s", COMMON_ASM_OP);
3874 assemble_name (file, name);
3875 fprintf (file, ","HOST_WIDE_INT_PRINT_UNSIGNED",%u\n",
3876 size, align / BITS_PER_UNIT);
3880 /* Utility function for targets to use in implementing
3881 ASM_OUTPUT_ALIGNED_BSS. */
3884 x86_output_aligned_bss (FILE *file, tree decl ATTRIBUTE_UNUSED,
3885 const char *name, unsigned HOST_WIDE_INT size,
3888 if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
3889 && size > (unsigned int)ix86_section_threshold)
3890 switch_to_section (get_named_section (decl, ".lbss", 0));
3892 switch_to_section (bss_section);
3893 ASM_OUTPUT_ALIGN (file, floor_log2 (align / BITS_PER_UNIT));
3894 #ifdef ASM_DECLARE_OBJECT_NAME
3895 last_assemble_variable_decl = decl;
3896 ASM_DECLARE_OBJECT_NAME (file, name, decl);
3898 /* Standard thing is just output label for the object. */
3899 ASM_OUTPUT_LABEL (file, name);
3900 #endif /* ASM_DECLARE_OBJECT_NAME */
3901 ASM_OUTPUT_SKIP (file, size ? size : 1);
3905 optimization_options (int level, int size ATTRIBUTE_UNUSED)
3907 /* For -O2 and beyond, turn off -fschedule-insns by default. It tends to
3908 make the problem with not enough registers even worse. */
3909 #ifdef INSN_SCHEDULING
3911 flag_schedule_insns = 0;
3915 /* The Darwin libraries never set errno, so we might as well
3916 avoid calling them when that's the only reason we would. */
3917 flag_errno_math = 0;
3919 /* The default values of these switches depend on the TARGET_64BIT
3920 that is not known at this moment. Mark these values with 2 and
3921 let user the to override these. In case there is no command line option
3922 specifying them, we will set the defaults in override_options. */
3924 flag_omit_frame_pointer = 2;
3925 flag_pcc_struct_return = 2;
3926 flag_asynchronous_unwind_tables = 2;
3927 flag_vect_cost_model = 1;
3928 #ifdef SUBTARGET_OPTIMIZATION_OPTIONS
3929 SUBTARGET_OPTIMIZATION_OPTIONS;
3933 /* Decide whether we can make a sibling call to a function. DECL is the
3934 declaration of the function being targeted by the call and EXP is the
3935 CALL_EXPR representing the call. */
3938 ix86_function_ok_for_sibcall (tree decl, tree exp)
3943 /* If we are generating position-independent code, we cannot sibcall
3944 optimize any indirect call, or a direct call to a global function,
3945 as the PLT requires %ebx be live. */
3946 if (!TARGET_64BIT && flag_pic && (!decl || !targetm.binds_local_p (decl)))
3953 func = TREE_TYPE (CALL_EXPR_FN (exp));
3954 if (POINTER_TYPE_P (func))
3955 func = TREE_TYPE (func);
3958 /* Check that the return value locations are the same. Like
3959 if we are returning floats on the 80387 register stack, we cannot
3960 make a sibcall from a function that doesn't return a float to a
3961 function that does or, conversely, from a function that does return
3962 a float to a function that doesn't; the necessary stack adjustment
3963 would not be executed. This is also the place we notice
3964 differences in the return value ABI. Note that it is ok for one
3965 of the functions to have void return type as long as the return
3966 value of the other is passed in a register. */
3967 a = ix86_function_value (TREE_TYPE (exp), func, false);
3968 b = ix86_function_value (TREE_TYPE (DECL_RESULT (cfun->decl)),
3970 if (STACK_REG_P (a) || STACK_REG_P (b))
3972 if (!rtx_equal_p (a, b))
3975 else if (VOID_TYPE_P (TREE_TYPE (DECL_RESULT (cfun->decl))))
3977 else if (!rtx_equal_p (a, b))
3980 /* If this call is indirect, we'll need to be able to use a call-clobbered
3981 register for the address of the target function. Make sure that all
3982 such registers are not used for passing parameters. */
3983 if (!decl && !TARGET_64BIT)
3987 /* We're looking at the CALL_EXPR, we need the type of the function. */
3988 type = CALL_EXPR_FN (exp); /* pointer expression */
3989 type = TREE_TYPE (type); /* pointer type */
3990 type = TREE_TYPE (type); /* function type */
3992 if (ix86_function_regparm (type, NULL) >= 3)
3994 /* ??? Need to count the actual number of registers to be used,
3995 not the possible number of registers. Fix later. */
4000 /* Dllimport'd functions are also called indirectly. */
4001 if (TARGET_DLLIMPORT_DECL_ATTRIBUTES
4002 && decl && DECL_DLLIMPORT_P (decl)
4003 && ix86_function_regparm (TREE_TYPE (decl), NULL) >= 3)
4006 /* If we forced aligned the stack, then sibcalling would unalign the
4007 stack, which may break the called function. */
4008 if (cfun->machine->force_align_arg_pointer)
4011 /* Otherwise okay. That also includes certain types of indirect calls. */
4015 /* Handle "cdecl", "stdcall", "fastcall", "regparm" and "sseregparm"
4016 calling convention attributes;
4017 arguments as in struct attribute_spec.handler. */
4020 ix86_handle_cconv_attribute (tree *node, tree name,
4022 int flags ATTRIBUTE_UNUSED,
4025 if (TREE_CODE (*node) != FUNCTION_TYPE
4026 && TREE_CODE (*node) != METHOD_TYPE
4027 && TREE_CODE (*node) != FIELD_DECL
4028 && TREE_CODE (*node) != TYPE_DECL)
4030 warning (OPT_Wattributes, "%qs attribute only applies to functions",
4031 IDENTIFIER_POINTER (name));
4032 *no_add_attrs = true;
4036 /* Can combine regparm with all attributes but fastcall. */
4037 if (is_attribute_p ("regparm", name))
4041 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
4043 error ("fastcall and regparm attributes are not compatible");
4046 cst = TREE_VALUE (args);
4047 if (TREE_CODE (cst) != INTEGER_CST)
4049 warning (OPT_Wattributes,
4050 "%qs attribute requires an integer constant argument",
4051 IDENTIFIER_POINTER (name));
4052 *no_add_attrs = true;
4054 else if (compare_tree_int (cst, REGPARM_MAX) > 0)
4056 warning (OPT_Wattributes, "argument to %qs attribute larger than %d",
4057 IDENTIFIER_POINTER (name), REGPARM_MAX);
4058 *no_add_attrs = true;
4062 && lookup_attribute (ix86_force_align_arg_pointer_string,
4063 TYPE_ATTRIBUTES (*node))
4064 && compare_tree_int (cst, REGPARM_MAX-1))
4066 error ("%s functions limited to %d register parameters",
4067 ix86_force_align_arg_pointer_string, REGPARM_MAX-1);
4075 /* Do not warn when emulating the MS ABI. */
4076 if (TREE_CODE (*node) != FUNCTION_TYPE || ix86_function_type_abi (*node)!=MS_ABI)
4077 warning (OPT_Wattributes, "%qs attribute ignored",
4078 IDENTIFIER_POINTER (name));
4079 *no_add_attrs = true;
4083 /* Can combine fastcall with stdcall (redundant) and sseregparm. */
4084 if (is_attribute_p ("fastcall", name))
4086 if (lookup_attribute ("cdecl", TYPE_ATTRIBUTES (*node)))
4088 error ("fastcall and cdecl attributes are not compatible");
4090 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (*node)))
4092 error ("fastcall and stdcall attributes are not compatible");
4094 if (lookup_attribute ("regparm", TYPE_ATTRIBUTES (*node)))
4096 error ("fastcall and regparm attributes are not compatible");
4100 /* Can combine stdcall with fastcall (redundant), regparm and
4102 else if (is_attribute_p ("stdcall", name))
4104 if (lookup_attribute ("cdecl", TYPE_ATTRIBUTES (*node)))
4106 error ("stdcall and cdecl attributes are not compatible");
4108 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
4110 error ("stdcall and fastcall attributes are not compatible");
4114 /* Can combine cdecl with regparm and sseregparm. */
4115 else if (is_attribute_p ("cdecl", name))
4117 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (*node)))
4119 error ("stdcall and cdecl attributes are not compatible");
4121 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
4123 error ("fastcall and cdecl attributes are not compatible");
4127 /* Can combine sseregparm with all attributes. */
4132 /* Return 0 if the attributes for two types are incompatible, 1 if they
4133 are compatible, and 2 if they are nearly compatible (which causes a
4134 warning to be generated). */
4137 ix86_comp_type_attributes (const_tree type1, const_tree type2)
4139 /* Check for mismatch of non-default calling convention. */
4140 const char *const rtdstr = TARGET_RTD ? "cdecl" : "stdcall";
4142 if (TREE_CODE (type1) != FUNCTION_TYPE
4143 && TREE_CODE (type1) != METHOD_TYPE)
4146 /* Check for mismatched fastcall/regparm types. */
4147 if ((!lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type1))
4148 != !lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type2)))
4149 || (ix86_function_regparm (type1, NULL)
4150 != ix86_function_regparm (type2, NULL)))
4153 /* Check for mismatched sseregparm types. */
4154 if (!lookup_attribute ("sseregparm", TYPE_ATTRIBUTES (type1))
4155 != !lookup_attribute ("sseregparm", TYPE_ATTRIBUTES (type2)))
4158 /* Check for mismatched return types (cdecl vs stdcall). */
4159 if (!lookup_attribute (rtdstr, TYPE_ATTRIBUTES (type1))
4160 != !lookup_attribute (rtdstr, TYPE_ATTRIBUTES (type2)))
4166 /* Return the regparm value for a function with the indicated TYPE and DECL.
4167 DECL may be NULL when calling function indirectly
4168 or considering a libcall. */
4171 ix86_function_regparm (const_tree type, const_tree decl)
4174 int regparm = ix86_regparm;
4176 static bool error_issued;
4180 if (ix86_function_type_abi (type) == DEFAULT_ABI)
4182 return DEFAULT_ABI != SYSV_ABI ? X86_64_REGPARM_MAX : X64_REGPARM_MAX;
4185 attr = lookup_attribute ("regparm", TYPE_ATTRIBUTES (type));
4189 = TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (attr)));
4191 if (decl && TREE_CODE (decl) == FUNCTION_DECL)
4193 /* We can't use regparm(3) for nested functions because
4194 these pass static chain pointer in %ecx register. */
4195 if (!error_issued && regparm == 3
4196 && decl_function_context (decl)
4197 && !DECL_NO_STATIC_CHAIN (decl))
4199 error ("nested functions are limited to 2 register parameters");
4200 error_issued = true;
4208 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type)))
4211 /* Use register calling convention for local functions when possible. */
4212 if (decl && TREE_CODE (decl) == FUNCTION_DECL
4213 && flag_unit_at_a_time && !profile_flag)
4215 /* FIXME: remove this CONST_CAST when cgraph.[ch] is constified. */
4216 struct cgraph_local_info *i = cgraph_local_info (CONST_CAST_TREE(decl));
4219 int local_regparm, globals = 0, regno;
4222 /* Make sure no regparm register is taken by a
4223 fixed register variable. */
4224 for (local_regparm = 0; local_regparm < REGPARM_MAX; local_regparm++)
4225 if (fixed_regs[local_regparm])
4228 /* We can't use regparm(3) for nested functions as these use
4229 static chain pointer in third argument. */
4230 if (local_regparm == 3
4231 && (decl_function_context (decl)
4232 || ix86_force_align_arg_pointer)
4233 && !DECL_NO_STATIC_CHAIN (decl))
4236 /* If the function realigns its stackpointer, the prologue will
4237 clobber %ecx. If we've already generated code for the callee,
4238 the callee DECL_STRUCT_FUNCTION is gone, so we fall back to
4239 scanning the attributes for the self-realigning property. */
4240 f = DECL_STRUCT_FUNCTION (decl);
4241 if (local_regparm == 3
4242 && (f ? !!f->machine->force_align_arg_pointer
4243 : !!lookup_attribute (ix86_force_align_arg_pointer_string,
4244 TYPE_ATTRIBUTES (TREE_TYPE (decl)))))
4247 /* Each fixed register usage increases register pressure,
4248 so less registers should be used for argument passing.
4249 This functionality can be overriden by an explicit
4251 for (regno = 0; regno <= DI_REG; regno++)
4252 if (fixed_regs[regno])
4256 = globals < local_regparm ? local_regparm - globals : 0;
4258 if (local_regparm > regparm)
4259 regparm = local_regparm;
4266 /* Return 1 or 2, if we can pass up to SSE_REGPARM_MAX SFmode (1) and
4267 DFmode (2) arguments in SSE registers for a function with the
4268 indicated TYPE and DECL. DECL may be NULL when calling function
4269 indirectly or considering a libcall. Otherwise return 0. */
4272 ix86_function_sseregparm (const_tree type, const_tree decl, bool warn)
4274 gcc_assert (!TARGET_64BIT);
4276 /* Use SSE registers to pass SFmode and DFmode arguments if requested
4277 by the sseregparm attribute. */
4278 if (TARGET_SSEREGPARM
4279 || (type && lookup_attribute ("sseregparm", TYPE_ATTRIBUTES (type))))
4286 error ("Calling %qD with attribute sseregparm without "
4287 "SSE/SSE2 enabled", decl);
4289 error ("Calling %qT with attribute sseregparm without "
4290 "SSE/SSE2 enabled", type);
4298 /* For local functions, pass up to SSE_REGPARM_MAX SFmode
4299 (and DFmode for SSE2) arguments in SSE registers. */
4300 if (decl && TARGET_SSE_MATH && flag_unit_at_a_time && !profile_flag)
4302 /* FIXME: remove this CONST_CAST when cgraph.[ch] is constified. */
4303 struct cgraph_local_info *i = cgraph_local_info (CONST_CAST_TREE(decl));
4305 return TARGET_SSE2 ? 2 : 1;
4311 /* Return true if EAX is live at the start of the function. Used by
4312 ix86_expand_prologue to determine if we need special help before
4313 calling allocate_stack_worker. */
4316 ix86_eax_live_at_start_p (void)
4318 /* Cheat. Don't bother working forward from ix86_function_regparm
4319 to the function type to whether an actual argument is located in
4320 eax. Instead just look at cfg info, which is still close enough
4321 to correct at this point. This gives false positives for broken
4322 functions that might use uninitialized data that happens to be
4323 allocated in eax, but who cares? */
4324 return REGNO_REG_SET_P (df_get_live_out (ENTRY_BLOCK_PTR), 0);
4327 /* Value is the number of bytes of arguments automatically
4328 popped when returning from a subroutine call.
4329 FUNDECL is the declaration node of the function (as a tree),
4330 FUNTYPE is the data type of the function (as a tree),
4331 or for a library call it is an identifier node for the subroutine name.
4332 SIZE is the number of bytes of arguments passed on the stack.
4334 On the 80386, the RTD insn may be used to pop them if the number
4335 of args is fixed, but if the number is variable then the caller
4336 must pop them all. RTD can't be used for library calls now
4337 because the library is compiled with the Unix compiler.
4338 Use of RTD is a selectable option, since it is incompatible with
4339 standard Unix calling sequences. If the option is not selected,
4340 the caller must always pop the args.
4342 The attribute stdcall is equivalent to RTD on a per module basis. */
4345 ix86_return_pops_args (tree fundecl, tree funtype, int size)
4349 /* None of the 64-bit ABIs pop arguments. */
4353 rtd = TARGET_RTD && (!fundecl || TREE_CODE (fundecl) != IDENTIFIER_NODE);
4355 /* Cdecl functions override -mrtd, and never pop the stack. */
4356 if (! lookup_attribute ("cdecl", TYPE_ATTRIBUTES (funtype)))
4358 /* Stdcall and fastcall functions will pop the stack if not
4360 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (funtype))
4361 || lookup_attribute ("fastcall", TYPE_ATTRIBUTES (funtype)))
4364 if (rtd && ! stdarg_p (funtype))
4368 /* Lose any fake structure return argument if it is passed on the stack. */
4369 if (aggregate_value_p (TREE_TYPE (funtype), fundecl)
4370 && !KEEP_AGGREGATE_RETURN_POINTER)
4372 int nregs = ix86_function_regparm (funtype, fundecl);
4374 return GET_MODE_SIZE (Pmode);
4380 /* Argument support functions. */
4382 /* Return true when register may be used to pass function parameters. */
4384 ix86_function_arg_regno_p (int regno)
4387 const int *parm_regs;
4392 return (regno < REGPARM_MAX
4393 || (TARGET_SSE && SSE_REGNO_P (regno) && !fixed_regs[regno]));
4395 return (regno < REGPARM_MAX
4396 || (TARGET_MMX && MMX_REGNO_P (regno)
4397 && (regno < FIRST_MMX_REG + MMX_REGPARM_MAX))
4398 || (TARGET_SSE && SSE_REGNO_P (regno)
4399 && (regno < FIRST_SSE_REG + SSE_REGPARM_MAX)));
4404 if (SSE_REGNO_P (regno) && TARGET_SSE)
4409 if (TARGET_SSE && SSE_REGNO_P (regno)
4410 && (regno < FIRST_SSE_REG + SSE_REGPARM_MAX))
4414 /* TODO: The function should depend on current function ABI but
4415 builtins.c would need updating then. Therefore we use the
4418 /* RAX is used as hidden argument to va_arg functions. */
4419 if (DEFAULT_ABI == SYSV_ABI && regno == AX_REG)
4422 if (DEFAULT_ABI == MS_ABI)
4423 parm_regs = x86_64_ms_abi_int_parameter_registers;
4425 parm_regs = x86_64_int_parameter_registers;
4426 for (i = 0; i < (DEFAULT_ABI == MS_ABI ? X64_REGPARM_MAX
4427 : X86_64_REGPARM_MAX); i++)
4428 if (regno == parm_regs[i])
4433 /* Return if we do not know how to pass TYPE solely in registers. */
4436 ix86_must_pass_in_stack (enum machine_mode mode, const_tree type)
4438 if (must_pass_in_stack_var_size_or_pad (mode, type))
4441 /* For 32-bit, we want TImode aggregates to go on the stack. But watch out!
4442 The layout_type routine is crafty and tries to trick us into passing
4443 currently unsupported vector types on the stack by using TImode. */
4444 return (!TARGET_64BIT && mode == TImode
4445 && type && TREE_CODE (type) != VECTOR_TYPE);
4448 /* It returns the size, in bytes, of the area reserved for arguments passed
4449 in registers for the function represented by fndecl dependent to the used
4452 ix86_reg_parm_stack_space (const_tree fndecl)
4455 /* For libcalls it is possible that there is no fndecl at hand.
4456 Therefore assume for this case the default abi of the target. */
4458 call_abi = DEFAULT_ABI;
4460 call_abi = ix86_function_abi (fndecl);
4466 /* Returns value SYSV_ABI, MS_ABI dependent on fntype, specifying the
4469 ix86_function_type_abi (const_tree fntype)
4471 if (TARGET_64BIT && fntype != NULL)
4474 if (DEFAULT_ABI == SYSV_ABI)
4475 abi = lookup_attribute ("ms_abi", TYPE_ATTRIBUTES (fntype)) ? MS_ABI : SYSV_ABI;
4477 abi = lookup_attribute ("sysv_abi", TYPE_ATTRIBUTES (fntype)) ? SYSV_ABI : MS_ABI;
4485 ix86_function_abi (const_tree fndecl)
4489 return ix86_function_type_abi (TREE_TYPE (fndecl));
4492 /* Returns value SYSV_ABI, MS_ABI dependent on cfun, specifying the
4495 ix86_cfun_abi (void)
4497 if (! cfun || ! TARGET_64BIT)
4499 return cfun->machine->call_abi;
4503 extern void init_regs (void);
4505 /* Implementation of call abi switching target hook. Specific to FNDECL
4506 the specific call register sets are set. See also CONDITIONAL_REGISTER_USAGE
4508 To prevent redudant calls of costy function init_regs (), it checks not to
4509 reset register usage for default abi. */
4511 ix86_call_abi_override (const_tree fndecl)
4513 if (fndecl == NULL_TREE)
4514 cfun->machine->call_abi = DEFAULT_ABI;
4516 cfun->machine->call_abi = ix86_function_type_abi (TREE_TYPE (fndecl));
4517 if (TARGET_64BIT && cfun->machine->call_abi == MS_ABI)
4519 if (call_used_regs[4 /*RSI*/] != 0 || call_used_regs[5 /*RDI*/] != 0)
4521 call_used_regs[4 /*RSI*/] = 0;
4522 call_used_regs[5 /*RDI*/] = 0;
4526 else if (TARGET_64BIT)
4528 if (call_used_regs[4 /*RSI*/] != 1 || call_used_regs[5 /*RDI*/] != 1)
4530 call_used_regs[4 /*RSI*/] = 1;
4531 call_used_regs[5 /*RDI*/] = 1;
4537 /* Initialize a variable CUM of type CUMULATIVE_ARGS
4538 for a call to a function whose data type is FNTYPE.
4539 For a library call, FNTYPE is 0. */
4542 init_cumulative_args (CUMULATIVE_ARGS *cum, /* Argument info to initialize */
4543 tree fntype, /* tree ptr for function decl */
4544 rtx libname, /* SYMBOL_REF of library name or 0 */
4547 struct cgraph_local_info *i = fndecl ? cgraph_local_info (fndecl) : NULL;
4548 memset (cum, 0, sizeof (*cum));
4550 cum->call_abi = ix86_function_type_abi (fntype);
4551 /* Set up the number of registers to use for passing arguments. */
4552 cum->nregs = ix86_regparm;
4555 if (cum->call_abi != DEFAULT_ABI)
4556 cum->nregs = DEFAULT_ABI != SYSV_ABI ? X86_64_REGPARM_MAX
4561 cum->sse_nregs = SSE_REGPARM_MAX;
4564 if (cum->call_abi != DEFAULT_ABI)
4565 cum->sse_nregs = DEFAULT_ABI != SYSV_ABI ? X86_64_SSE_REGPARM_MAX
4566 : X64_SSE_REGPARM_MAX;
4570 cum->mmx_nregs = MMX_REGPARM_MAX;
4571 cum->warn_sse = true;
4572 cum->warn_mmx = true;
4574 /* Because type might mismatch in between caller and callee, we need to
4575 use actual type of function for local calls.
4576 FIXME: cgraph_analyze can be told to actually record if function uses
4577 va_start so for local functions maybe_vaarg can be made aggressive
4579 FIXME: once typesytem is fixed, we won't need this code anymore. */
4581 fntype = TREE_TYPE (fndecl);
4582 cum->maybe_vaarg = (fntype
4583 ? (!prototype_p (fntype) || stdarg_p (fntype))
4588 /* If there are variable arguments, then we won't pass anything
4589 in registers in 32-bit mode. */
4590 if (stdarg_p (fntype))
4600 /* Use ecx and edx registers if function has fastcall attribute,
4601 else look for regparm information. */
4604 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (fntype)))
4610 cum->nregs = ix86_function_regparm (fntype, fndecl);
4613 /* Set up the number of SSE registers used for passing SFmode
4614 and DFmode arguments. Warn for mismatching ABI. */
4615 cum->float_in_sse = ix86_function_sseregparm (fntype, fndecl, true);
4619 /* Return the "natural" mode for TYPE. In most cases, this is just TYPE_MODE.
4620 But in the case of vector types, it is some vector mode.
4622 When we have only some of our vector isa extensions enabled, then there
4623 are some modes for which vector_mode_supported_p is false. For these
4624 modes, the generic vector support in gcc will choose some non-vector mode
4625 in order to implement the type. By computing the natural mode, we'll
4626 select the proper ABI location for the operand and not depend on whatever
4627 the middle-end decides to do with these vector types. */
4629 static enum machine_mode
4630 type_natural_mode (const_tree type)
4632 enum machine_mode mode = TYPE_MODE (type);
4634 if (TREE_CODE (type) == VECTOR_TYPE && !VECTOR_MODE_P (mode))
4636 HOST_WIDE_INT size = int_size_in_bytes (type);
4637 if ((size == 8 || size == 16)
4638 /* ??? Generic code allows us to create width 1 vectors. Ignore. */
4639 && TYPE_VECTOR_SUBPARTS (type) > 1)
4641 enum machine_mode innermode = TYPE_MODE (TREE_TYPE (type));
4643 if (TREE_CODE (TREE_TYPE (type)) == REAL_TYPE)
4644 mode = MIN_MODE_VECTOR_FLOAT;
4646 mode = MIN_MODE_VECTOR_INT;
4648 /* Get the mode which has this inner mode and number of units. */
4649 for (; mode != VOIDmode; mode = GET_MODE_WIDER_MODE (mode))
4650 if (GET_MODE_NUNITS (mode) == TYPE_VECTOR_SUBPARTS (type)
4651 && GET_MODE_INNER (mode) == innermode)
4661 /* We want to pass a value in REGNO whose "natural" mode is MODE. However,
4662 this may not agree with the mode that the type system has chosen for the
4663 register, which is ORIG_MODE. If ORIG_MODE is not BLKmode, then we can
4664 go ahead and use it. Otherwise we have to build a PARALLEL instead. */
4667 gen_reg_or_parallel (enum machine_mode mode, enum machine_mode orig_mode,
4672 if (orig_mode != BLKmode)
4673 tmp = gen_rtx_REG (orig_mode, regno);
4676 tmp = gen_rtx_REG (mode, regno);
4677 tmp = gen_rtx_EXPR_LIST (VOIDmode, tmp, const0_rtx);
4678 tmp = gen_rtx_PARALLEL (orig_mode, gen_rtvec (1, tmp));
4684 /* x86-64 register passing implementation. See x86-64 ABI for details. Goal
4685 of this code is to classify each 8bytes of incoming argument by the register
4686 class and assign registers accordingly. */
4688 /* Return the union class of CLASS1 and CLASS2.
4689 See the x86-64 PS ABI for details. */
4691 static enum x86_64_reg_class
4692 merge_classes (enum x86_64_reg_class class1, enum x86_64_reg_class class2)
4694 /* Rule #1: If both classes are equal, this is the resulting class. */
4695 if (class1 == class2)
4698 /* Rule #2: If one of the classes is NO_CLASS, the resulting class is
4700 if (class1 == X86_64_NO_CLASS)
4702 if (class2 == X86_64_NO_CLASS)
4705 /* Rule #3: If one of the classes is MEMORY, the result is MEMORY. */
4706 if (class1 == X86_64_MEMORY_CLASS || class2 == X86_64_MEMORY_CLASS)
4707 return X86_64_MEMORY_CLASS;
4709 /* Rule #4: If one of the classes is INTEGER, the result is INTEGER. */
4710 if ((class1 == X86_64_INTEGERSI_CLASS && class2 == X86_64_SSESF_CLASS)
4711 || (class2 == X86_64_INTEGERSI_CLASS && class1 == X86_64_SSESF_CLASS))
4712 return X86_64_INTEGERSI_CLASS;
4713 if (class1 == X86_64_INTEGER_CLASS || class1 == X86_64_INTEGERSI_CLASS
4714 || class2 == X86_64_INTEGER_CLASS || class2 == X86_64_INTEGERSI_CLASS)
4715 return X86_64_INTEGER_CLASS;
4717 /* Rule #5: If one of the classes is X87, X87UP, or COMPLEX_X87 class,
4719 if (class1 == X86_64_X87_CLASS
4720 || class1 == X86_64_X87UP_CLASS
4721 || class1 == X86_64_COMPLEX_X87_CLASS
4722 || class2 == X86_64_X87_CLASS
4723 || class2 == X86_64_X87UP_CLASS
4724 || class2 == X86_64_COMPLEX_X87_CLASS)
4725 return X86_64_MEMORY_CLASS;
4727 /* Rule #6: Otherwise class SSE is used. */
4728 return X86_64_SSE_CLASS;
4731 /* Classify the argument of type TYPE and mode MODE.
4732 CLASSES will be filled by the register class used to pass each word
4733 of the operand. The number of words is returned. In case the parameter
4734 should be passed in memory, 0 is returned. As a special case for zero
4735 sized containers, classes[0] will be NO_CLASS and 1 is returned.
4737 BIT_OFFSET is used internally for handling records and specifies offset
4738 of the offset in bits modulo 256 to avoid overflow cases.
4740 See the x86-64 PS ABI for details.
4744 classify_argument (enum machine_mode mode, const_tree type,
4745 enum x86_64_reg_class classes[MAX_CLASSES], int bit_offset)
4747 HOST_WIDE_INT bytes =
4748 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
4749 int words = (bytes + (bit_offset % 64) / 8 + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
4751 /* Variable sized entities are always passed/returned in memory. */
4755 if (mode != VOIDmode
4756 && targetm.calls.must_pass_in_stack (mode, type))
4759 if (type && AGGREGATE_TYPE_P (type))
4763 enum x86_64_reg_class subclasses[MAX_CLASSES];
4765 /* On x86-64 we pass structures larger than 16 bytes on the stack. */
4769 for (i = 0; i < words; i++)
4770 classes[i] = X86_64_NO_CLASS;
4772 /* Zero sized arrays or structures are NO_CLASS. We return 0 to
4773 signalize memory class, so handle it as special case. */
4776 classes[0] = X86_64_NO_CLASS;
4780 /* Classify each field of record and merge classes. */
4781 switch (TREE_CODE (type))
4784 /* And now merge the fields of structure. */
4785 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
4787 if (TREE_CODE (field) == FIELD_DECL)
4791 if (TREE_TYPE (field) == error_mark_node)
4794 /* Bitfields are always classified as integer. Handle them
4795 early, since later code would consider them to be
4796 misaligned integers. */
4797 if (DECL_BIT_FIELD (field))
4799 for (i = (int_bit_position (field) + (bit_offset % 64)) / 8 / 8;
4800 i < ((int_bit_position (field) + (bit_offset % 64))
4801 + tree_low_cst (DECL_SIZE (field), 0)
4804 merge_classes (X86_64_INTEGER_CLASS,
4809 num = classify_argument (TYPE_MODE (TREE_TYPE (field)),
4810 TREE_TYPE (field), subclasses,
4811 (int_bit_position (field)
4812 + bit_offset) % 256);
4815 for (i = 0; i < num; i++)
4818 (int_bit_position (field) + (bit_offset % 64)) / 8 / 8;
4820 merge_classes (subclasses[i], classes[i + pos]);
4828 /* Arrays are handled as small records. */
4831 num = classify_argument (TYPE_MODE (TREE_TYPE (type)),
4832 TREE_TYPE (type), subclasses, bit_offset);
4836 /* The partial classes are now full classes. */
4837 if (subclasses[0] == X86_64_SSESF_CLASS && bytes != 4)
4838 subclasses[0] = X86_64_SSE_CLASS;
4839 if (subclasses[0] == X86_64_INTEGERSI_CLASS && bytes != 4)
4840 subclasses[0] = X86_64_INTEGER_CLASS;
4842 for (i = 0; i < words; i++)
4843 classes[i] = subclasses[i % num];
4848 case QUAL_UNION_TYPE:
4849 /* Unions are similar to RECORD_TYPE but offset is always 0.
4851 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
4853 if (TREE_CODE (field) == FIELD_DECL)
4857 if (TREE_TYPE (field) == error_mark_node)
4860 num = classify_argument (TYPE_MODE (TREE_TYPE (field)),
4861 TREE_TYPE (field), subclasses,
4865 for (i = 0; i < num; i++)
4866 classes[i] = merge_classes (subclasses[i], classes[i]);
4875 /* Final merger cleanup. */
4876 for (i = 0; i < words; i++)
4878 /* If one class is MEMORY, everything should be passed in
4880 if (classes[i] == X86_64_MEMORY_CLASS)
4883 /* The X86_64_SSEUP_CLASS should be always preceded by
4884 X86_64_SSE_CLASS. */
4885 if (classes[i] == X86_64_SSEUP_CLASS
4886 && (i == 0 || classes[i - 1] != X86_64_SSE_CLASS))
4887 classes[i] = X86_64_SSE_CLASS;
4889 /* X86_64_X87UP_CLASS should be preceded by X86_64_X87_CLASS. */
4890 if (classes[i] == X86_64_X87UP_CLASS
4891 && (i == 0 || classes[i - 1] != X86_64_X87_CLASS))
4892 classes[i] = X86_64_SSE_CLASS;
4897 /* Compute alignment needed. We align all types to natural boundaries with
4898 exception of XFmode that is aligned to 64bits. */
4899 if (mode != VOIDmode && mode != BLKmode)
4901 int mode_alignment = GET_MODE_BITSIZE (mode);
4904 mode_alignment = 128;
4905 else if (mode == XCmode)
4906 mode_alignment = 256;
4907 if (COMPLEX_MODE_P (mode))
4908 mode_alignment /= 2;
4909 /* Misaligned fields are always returned in memory. */
4910 if (bit_offset % mode_alignment)
4914 /* for V1xx modes, just use the base mode */
4915 if (VECTOR_MODE_P (mode) && mode != V1DImode
4916 && GET_MODE_SIZE (GET_MODE_INNER (mode)) == bytes)
4917 mode = GET_MODE_INNER (mode);
4919 /* Classification of atomic types. */
4924 classes[0] = X86_64_SSE_CLASS;
4927 classes[0] = X86_64_SSE_CLASS;
4928 classes[1] = X86_64_SSEUP_CLASS;
4937 if (bit_offset + GET_MODE_BITSIZE (mode) <= 32)
4938 classes[0] = X86_64_INTEGERSI_CLASS;
4940 classes[0] = X86_64_INTEGER_CLASS;
4944 classes[0] = classes[1] = X86_64_INTEGER_CLASS;
4949 if (!(bit_offset % 64))
4950 classes[0] = X86_64_SSESF_CLASS;
4952 classes[0] = X86_64_SSE_CLASS;
4955 classes[0] = X86_64_SSEDF_CLASS;
4958 classes[0] = X86_64_X87_CLASS;
4959 classes[1] = X86_64_X87UP_CLASS;
4962 classes[0] = X86_64_SSE_CLASS;
4963 classes[1] = X86_64_SSEUP_CLASS;
4966 classes[0] = X86_64_SSE_CLASS;
4969 classes[0] = X86_64_SSEDF_CLASS;
4970 classes[1] = X86_64_SSEDF_CLASS;
4973 classes[0] = X86_64_COMPLEX_X87_CLASS;
4976 /* This modes is larger than 16 bytes. */
4984 classes[0] = X86_64_SSE_CLASS;
4985 classes[1] = X86_64_SSEUP_CLASS;
4992 classes[0] = X86_64_SSE_CLASS;
4998 gcc_assert (VECTOR_MODE_P (mode));
5003 gcc_assert (GET_MODE_CLASS (GET_MODE_INNER (mode)) == MODE_INT);
5005 if (bit_offset + GET_MODE_BITSIZE (mode) <= 32)
5006 classes[0] = X86_64_INTEGERSI_CLASS;
5008 classes[0] = X86_64_INTEGER_CLASS;
5009 classes[1] = X86_64_INTEGER_CLASS;
5010 return 1 + (bytes > 8);
5014 /* Examine the argument and return set number of register required in each
5015 class. Return 0 iff parameter should be passed in memory. */
5017 examine_argument (enum machine_mode mode, const_tree type, int in_return,
5018 int *int_nregs, int *sse_nregs)
5020 enum x86_64_reg_class regclass[MAX_CLASSES];
5021 int n = classify_argument (mode, type, regclass, 0);
5027 for (n--; n >= 0; n--)
5028 switch (regclass[n])
5030 case X86_64_INTEGER_CLASS:
5031 case X86_64_INTEGERSI_CLASS:
5034 case X86_64_SSE_CLASS:
5035 case X86_64_SSESF_CLASS:
5036 case X86_64_SSEDF_CLASS:
5039 case X86_64_NO_CLASS:
5040 case X86_64_SSEUP_CLASS:
5042 case X86_64_X87_CLASS:
5043 case X86_64_X87UP_CLASS:
5047 case X86_64_COMPLEX_X87_CLASS:
5048 return in_return ? 2 : 0;
5049 case X86_64_MEMORY_CLASS:
5055 /* Construct container for the argument used by GCC interface. See
5056 FUNCTION_ARG for the detailed description. */
5059 construct_container (enum machine_mode mode, enum machine_mode orig_mode,
5060 const_tree type, int in_return, int nintregs, int nsseregs,
5061 const int *intreg, int sse_regno)
5063 /* The following variables hold the static issued_error state. */
5064 static bool issued_sse_arg_error;
5065 static bool issued_sse_ret_error;
5066 static bool issued_x87_ret_error;
5068 enum machine_mode tmpmode;
5070 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
5071 enum x86_64_reg_class regclass[MAX_CLASSES];
5075 int needed_sseregs, needed_intregs;
5076 rtx exp[MAX_CLASSES];
5079 n = classify_argument (mode, type, regclass, 0);
5082 if (!examine_argument (mode, type, in_return, &needed_intregs,
5085 if (needed_intregs > nintregs || needed_sseregs > nsseregs)
5088 /* We allowed the user to turn off SSE for kernel mode. Don't crash if
5089 some less clueful developer tries to use floating-point anyway. */
5090 if (needed_sseregs && !TARGET_SSE)
5094 if (!issued_sse_ret_error)
5096 error ("SSE register return with SSE disabled");
5097 issued_sse_ret_error = true;
5100 else if (!issued_sse_arg_error)
5102 error ("SSE register argument with SSE disabled");
5103 issued_sse_arg_error = true;
5108 /* Likewise, error if the ABI requires us to return values in the
5109 x87 registers and the user specified -mno-80387. */
5110 if (!TARGET_80387 && in_return)
5111 for (i = 0; i < n; i++)
5112 if (regclass[i] == X86_64_X87_CLASS
5113 || regclass[i] == X86_64_X87UP_CLASS
5114 || regclass[i] == X86_64_COMPLEX_X87_CLASS)
5116 if (!issued_x87_ret_error)
5118 error ("x87 register return with x87 disabled");
5119 issued_x87_ret_error = true;
5124 /* First construct simple cases. Avoid SCmode, since we want to use
5125 single register to pass this type. */
5126 if (n == 1 && mode != SCmode)
5127 switch (regclass[0])
5129 case X86_64_INTEGER_CLASS:
5130 case X86_64_INTEGERSI_CLASS:
5131 return gen_rtx_REG (mode, intreg[0]);
5132 case X86_64_SSE_CLASS:
5133 case X86_64_SSESF_CLASS:
5134 case X86_64_SSEDF_CLASS:
5135 return gen_reg_or_parallel (mode, orig_mode, SSE_REGNO (sse_regno));
5136 case X86_64_X87_CLASS:
5137 case X86_64_COMPLEX_X87_CLASS:
5138 return gen_rtx_REG (mode, FIRST_STACK_REG);
5139 case X86_64_NO_CLASS:
5140 /* Zero sized array, struct or class. */
5145 if (n == 2 && regclass[0] == X86_64_SSE_CLASS
5146 && regclass[1] == X86_64_SSEUP_CLASS && mode != BLKmode)
5147 return gen_rtx_REG (mode, SSE_REGNO (sse_regno));
5150 && regclass[0] == X86_64_X87_CLASS && regclass[1] == X86_64_X87UP_CLASS)
5151 return gen_rtx_REG (XFmode, FIRST_STACK_REG);
5152 if (n == 2 && regclass[0] == X86_64_INTEGER_CLASS
5153 && regclass[1] == X86_64_INTEGER_CLASS
5154 && (mode == CDImode || mode == TImode || mode == TFmode)
5155 && intreg[0] + 1 == intreg[1])
5156 return gen_rtx_REG (mode, intreg[0]);
5158 /* Otherwise figure out the entries of the PARALLEL. */
5159 for (i = 0; i < n; i++)
5161 switch (regclass[i])
5163 case X86_64_NO_CLASS:
5165 case X86_64_INTEGER_CLASS:
5166 case X86_64_INTEGERSI_CLASS:
5167 /* Merge TImodes on aligned occasions here too. */
5168 if (i * 8 + 8 > bytes)
5169 tmpmode = mode_for_size ((bytes - i * 8) * BITS_PER_UNIT, MODE_INT, 0);
5170 else if (regclass[i] == X86_64_INTEGERSI_CLASS)
5174 /* We've requested 24 bytes we don't have mode for. Use DImode. */
5175 if (tmpmode == BLKmode)
5177 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
5178 gen_rtx_REG (tmpmode, *intreg),
5182 case X86_64_SSESF_CLASS:
5183 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
5184 gen_rtx_REG (SFmode,
5185 SSE_REGNO (sse_regno)),
5189 case X86_64_SSEDF_CLASS:
5190 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
5191 gen_rtx_REG (DFmode,
5192 SSE_REGNO (sse_regno)),
5196 case X86_64_SSE_CLASS:
5197 if (i < n - 1 && regclass[i + 1] == X86_64_SSEUP_CLASS)
5201 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
5202 gen_rtx_REG (tmpmode,
5203 SSE_REGNO (sse_regno)),
5205 if (tmpmode == TImode)
5214 /* Empty aligned struct, union or class. */
5218 ret = gen_rtx_PARALLEL (mode, rtvec_alloc (nexps));
5219 for (i = 0; i < nexps; i++)
5220 XVECEXP (ret, 0, i) = exp [i];
5224 /* Update the data in CUM to advance over an argument of mode MODE
5225 and data type TYPE. (TYPE is null for libcalls where that information
5226 may not be available.) */
5229 function_arg_advance_32 (CUMULATIVE_ARGS *cum, enum machine_mode mode,
5230 tree type, HOST_WIDE_INT bytes, HOST_WIDE_INT words)
5246 cum->words += words;
5247 cum->nregs -= words;
5248 cum->regno += words;
5250 if (cum->nregs <= 0)
5258 if (cum->float_in_sse < 2)
5261 if (cum->float_in_sse < 1)
5272 if (!type || !AGGREGATE_TYPE_P (type))
5274 cum->sse_words += words;
5275 cum->sse_nregs -= 1;
5276 cum->sse_regno += 1;
5277 if (cum->sse_nregs <= 0)
5290 if (!type || !AGGREGATE_TYPE_P (type))
5292 cum->mmx_words += words;
5293 cum->mmx_nregs -= 1;
5294 cum->mmx_regno += 1;
5295 if (cum->mmx_nregs <= 0)
5306 function_arg_advance_64 (CUMULATIVE_ARGS *cum, enum machine_mode mode,
5307 tree type, HOST_WIDE_INT words)
5309 int int_nregs, sse_nregs;
5311 if (!examine_argument (mode, type, 0, &int_nregs, &sse_nregs))
5312 cum->words += words;
5313 else if (sse_nregs <= cum->sse_nregs && int_nregs <= cum->nregs)
5315 cum->nregs -= int_nregs;
5316 cum->sse_nregs -= sse_nregs;
5317 cum->regno += int_nregs;
5318 cum->sse_regno += sse_nregs;
5321 cum->words += words;
5325 function_arg_advance_ms_64 (CUMULATIVE_ARGS *cum, HOST_WIDE_INT bytes,
5326 HOST_WIDE_INT words)
5328 /* Otherwise, this should be passed indirect. */
5329 gcc_assert (bytes == 1 || bytes == 2 || bytes == 4 || bytes == 8);
5331 cum->words += words;
5340 function_arg_advance (CUMULATIVE_ARGS *cum, enum machine_mode mode,
5341 tree type, int named ATTRIBUTE_UNUSED)
5343 HOST_WIDE_INT bytes, words;
5345 if (mode == BLKmode)
5346 bytes = int_size_in_bytes (type);
5348 bytes = GET_MODE_SIZE (mode);
5349 words = (bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
5352 mode = type_natural_mode (type);
5354 if (TARGET_64BIT && (cum ? cum->call_abi : DEFAULT_ABI) == MS_ABI)
5355 function_arg_advance_ms_64 (cum, bytes, words);
5356 else if (TARGET_64BIT)
5357 function_arg_advance_64 (cum, mode, type, words);
5359 function_arg_advance_32 (cum, mode, type, bytes, words);
5362 /* Define where to put the arguments to a function.
5363 Value is zero to push the argument on the stack,
5364 or a hard register in which to store the argument.
5366 MODE is the argument's machine mode.
5367 TYPE is the data type of the argument (as a tree).
5368 This is null for libcalls where that information may
5370 CUM is a variable of type CUMULATIVE_ARGS which gives info about
5371 the preceding args and about the function being called.
5372 NAMED is nonzero if this argument is a named parameter
5373 (otherwise it is an extra parameter matching an ellipsis). */
5376 function_arg_32 (CUMULATIVE_ARGS *cum, enum machine_mode mode,
5377 enum machine_mode orig_mode, tree type,
5378 HOST_WIDE_INT bytes, HOST_WIDE_INT words)
5380 static bool warnedsse, warnedmmx;
5382 /* Avoid the AL settings for the Unix64 ABI. */
5383 if (mode == VOIDmode)
5399 if (words <= cum->nregs)
5401 int regno = cum->regno;
5403 /* Fastcall allocates the first two DWORD (SImode) or
5404 smaller arguments to ECX and EDX if it isn't an
5410 || (type && AGGREGATE_TYPE_P (type)))
5413 /* ECX not EAX is the first allocated register. */
5414 if (regno == AX_REG)
5417 return gen_rtx_REG (mode, regno);
5422 if (cum->float_in_sse < 2)
5425 if (cum->float_in_sse < 1)
5435 if (!type || !AGGREGATE_TYPE_P (type))
5437 if (!TARGET_SSE && !warnedsse && cum->warn_sse)
5440 warning (0, "SSE vector argument without SSE enabled "
5444 return gen_reg_or_parallel (mode, orig_mode,
5445 cum->sse_regno + FIRST_SSE_REG);
5454 if (!type || !AGGREGATE_TYPE_P (type))
5456 if (!TARGET_MMX && !warnedmmx && cum->warn_mmx)
5459 warning (0, "MMX vector argument without MMX enabled "
5463 return gen_reg_or_parallel (mode, orig_mode,
5464 cum->mmx_regno + FIRST_MMX_REG);
5473 function_arg_64 (CUMULATIVE_ARGS *cum, enum machine_mode mode,
5474 enum machine_mode orig_mode, tree type)
5476 /* Handle a hidden AL argument containing number of registers
5477 for varargs x86-64 functions. */
5478 if (mode == VOIDmode)
5479 return GEN_INT (cum->maybe_vaarg
5480 ? (cum->sse_nregs < 0
5481 ? (cum->call_abi == DEFAULT_ABI
5483 : (DEFAULT_ABI != SYSV_ABI ? X86_64_SSE_REGPARM_MAX
5484 : X64_SSE_REGPARM_MAX))
5488 return construct_container (mode, orig_mode, type, 0, cum->nregs,
5490 &x86_64_int_parameter_registers [cum->regno],
5495 function_arg_ms_64 (CUMULATIVE_ARGS *cum, enum machine_mode mode,
5496 enum machine_mode orig_mode, int named,
5497 HOST_WIDE_INT bytes)
5501 /* Avoid the AL settings for the Unix64 ABI. */
5502 if (mode == VOIDmode)
5505 /* If we've run out of registers, it goes on the stack. */
5506 if (cum->nregs == 0)
5509 regno = x86_64_ms_abi_int_parameter_registers[cum->regno];
5511 /* Only floating point modes are passed in anything but integer regs. */
5512 if (TARGET_SSE && (mode == SFmode || mode == DFmode))
5515 regno = cum->regno + FIRST_SSE_REG;
5520 /* Unnamed floating parameters are passed in both the
5521 SSE and integer registers. */
5522 t1 = gen_rtx_REG (mode, cum->regno + FIRST_SSE_REG);
5523 t2 = gen_rtx_REG (mode, regno);
5524 t1 = gen_rtx_EXPR_LIST (VOIDmode, t1, const0_rtx);
5525 t2 = gen_rtx_EXPR_LIST (VOIDmode, t2, const0_rtx);
5526 return gen_rtx_PARALLEL (mode, gen_rtvec (2, t1, t2));
5529 /* Handle aggregated types passed in register. */
5530 if (orig_mode == BLKmode)
5532 if (bytes > 0 && bytes <= 8)
5533 mode = (bytes > 4 ? DImode : SImode);
5534 if (mode == BLKmode)
5538 return gen_reg_or_parallel (mode, orig_mode, regno);
5542 function_arg (CUMULATIVE_ARGS *cum, enum machine_mode omode,
5543 tree type, int named)
5545 enum machine_mode mode = omode;
5546 HOST_WIDE_INT bytes, words;
5548 if (mode == BLKmode)
5549 bytes = int_size_in_bytes (type);
5551 bytes = GET_MODE_SIZE (mode);
5552 words = (bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
5554 /* To simplify the code below, represent vector types with a vector mode
5555 even if MMX/SSE are not active. */
5556 if (type && TREE_CODE (type) == VECTOR_TYPE)
5557 mode = type_natural_mode (type);
5559 if (TARGET_64BIT && (cum ? cum->call_abi : DEFAULT_ABI) == MS_ABI)
5560 return function_arg_ms_64 (cum, mode, omode, named, bytes);
5561 else if (TARGET_64BIT)
5562 return function_arg_64 (cum, mode, omode, type);
5564 return function_arg_32 (cum, mode, omode, type, bytes, words);
5567 /* A C expression that indicates when an argument must be passed by
5568 reference. If nonzero for an argument, a copy of that argument is
5569 made in memory and a pointer to the argument is passed instead of
5570 the argument itself. The pointer is passed in whatever way is
5571 appropriate for passing a pointer to that type. */
5574 ix86_pass_by_reference (CUMULATIVE_ARGS *cum ATTRIBUTE_UNUSED,
5575 enum machine_mode mode ATTRIBUTE_UNUSED,
5576 const_tree type, bool named ATTRIBUTE_UNUSED)
5578 /* See Windows x64 Software Convention. */
5579 if (TARGET_64BIT && (cum ? cum->call_abi : DEFAULT_ABI) == MS_ABI)
5581 int msize = (int) GET_MODE_SIZE (mode);
5584 /* Arrays are passed by reference. */
5585 if (TREE_CODE (type) == ARRAY_TYPE)
5588 if (AGGREGATE_TYPE_P (type))
5590 /* Structs/unions of sizes other than 8, 16, 32, or 64 bits
5591 are passed by reference. */
5592 msize = int_size_in_bytes (type);
5596 /* __m128 is passed by reference. */
5598 case 1: case 2: case 4: case 8:
5604 else if (TARGET_64BIT && type && int_size_in_bytes (type) == -1)
5610 /* Return true when TYPE should be 128bit aligned for 32bit argument passing
5613 contains_aligned_value_p (tree type)
5615 enum machine_mode mode = TYPE_MODE (type);
5616 if (((TARGET_SSE && SSE_REG_MODE_P (mode))
5620 && (!TYPE_USER_ALIGN (type) || TYPE_ALIGN (type) > 128))
5622 if (TYPE_ALIGN (type) < 128)
5625 if (AGGREGATE_TYPE_P (type))
5627 /* Walk the aggregates recursively. */
5628 switch (TREE_CODE (type))
5632 case QUAL_UNION_TYPE:
5636 /* Walk all the structure fields. */
5637 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
5639 if (TREE_CODE (field) == FIELD_DECL
5640 && contains_aligned_value_p (TREE_TYPE (field)))
5647 /* Just for use if some languages passes arrays by value. */
5648 if (contains_aligned_value_p (TREE_TYPE (type)))
5659 /* Gives the alignment boundary, in bits, of an argument with the
5660 specified mode and type. */
5663 ix86_function_arg_boundary (enum machine_mode mode, tree type)
5668 /* Since canonical type is used for call, we convert it to
5669 canonical type if needed. */
5670 if (!TYPE_STRUCTURAL_EQUALITY_P (type))
5671 type = TYPE_CANONICAL (type);
5672 align = TYPE_ALIGN (type);
5675 align = GET_MODE_ALIGNMENT (mode);
5676 if (align < PARM_BOUNDARY)
5677 align = PARM_BOUNDARY;
5678 /* In 32bit, only _Decimal128 and __float128 are aligned to their
5679 natural boundaries. */
5680 if (!TARGET_64BIT && mode != TDmode && mode != TFmode)
5682 /* i386 ABI defines all arguments to be 4 byte aligned. We have to
5683 make an exception for SSE modes since these require 128bit
5686 The handling here differs from field_alignment. ICC aligns MMX
5687 arguments to 4 byte boundaries, while structure fields are aligned
5688 to 8 byte boundaries. */
5691 if (!(TARGET_SSE && SSE_REG_MODE_P (mode)))
5692 align = PARM_BOUNDARY;
5696 if (!contains_aligned_value_p (type))
5697 align = PARM_BOUNDARY;
5700 if (align > BIGGEST_ALIGNMENT)
5701 align = BIGGEST_ALIGNMENT;
5705 /* Return true if N is a possible register number of function value. */
5708 ix86_function_value_regno_p (int regno)
5715 case FIRST_FLOAT_REG:
5716 /* TODO: The function should depend on current function ABI but
5717 builtins.c would need updating then. Therefore we use the
5719 if (TARGET_64BIT && DEFAULT_ABI == MS_ABI)
5721 return TARGET_FLOAT_RETURNS_IN_80387;
5727 if (TARGET_MACHO || TARGET_64BIT)
5735 /* Define how to find the value returned by a function.
5736 VALTYPE is the data type of the value (as a tree).
5737 If the precise function being called is known, FUNC is its FUNCTION_DECL;
5738 otherwise, FUNC is 0. */
5741 function_value_32 (enum machine_mode orig_mode, enum machine_mode mode,
5742 const_tree fntype, const_tree fn)
5746 /* 8-byte vector modes in %mm0. See ix86_return_in_memory for where
5747 we normally prevent this case when mmx is not available. However
5748 some ABIs may require the result to be returned like DImode. */
5749 if (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 8)
5750 regno = TARGET_MMX ? FIRST_MMX_REG : 0;
5752 /* 16-byte vector modes in %xmm0. See ix86_return_in_memory for where
5753 we prevent this case when sse is not available. However some ABIs
5754 may require the result to be returned like integer TImode. */
5755 else if (mode == TImode
5756 || (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 16))
5757 regno = TARGET_SSE ? FIRST_SSE_REG : 0;
5759 /* Floating point return values in %st(0) (unless -mno-fp-ret-in-387). */
5760 else if (X87_FLOAT_MODE_P (mode) && TARGET_FLOAT_RETURNS_IN_80387)
5761 regno = FIRST_FLOAT_REG;
5763 /* Most things go in %eax. */
5766 /* Override FP return register with %xmm0 for local functions when
5767 SSE math is enabled or for functions with sseregparm attribute. */
5768 if ((fn || fntype) && (mode == SFmode || mode == DFmode))
5770 int sse_level = ix86_function_sseregparm (fntype, fn, false);
5771 if ((sse_level >= 1 && mode == SFmode)
5772 || (sse_level == 2 && mode == DFmode))
5773 regno = FIRST_SSE_REG;
5776 return gen_rtx_REG (orig_mode, regno);
5780 function_value_64 (enum machine_mode orig_mode, enum machine_mode mode,
5785 /* Handle libcalls, which don't provide a type node. */
5786 if (valtype == NULL)
5798 return gen_rtx_REG (mode, FIRST_SSE_REG);
5801 return gen_rtx_REG (mode, FIRST_FLOAT_REG);
5805 return gen_rtx_REG (mode, AX_REG);
5809 ret = construct_container (mode, orig_mode, valtype, 1,
5810 X86_64_REGPARM_MAX, X86_64_SSE_REGPARM_MAX,
5811 x86_64_int_return_registers, 0);
5813 /* For zero sized structures, construct_container returns NULL, but we
5814 need to keep rest of compiler happy by returning meaningful value. */
5816 ret = gen_rtx_REG (orig_mode, AX_REG);
5822 function_value_ms_64 (enum machine_mode orig_mode, enum machine_mode mode)
5824 unsigned int regno = AX_REG;
5828 switch (GET_MODE_SIZE (mode))
5831 if((SCALAR_INT_MODE_P (mode) || VECTOR_MODE_P (mode))
5832 && !COMPLEX_MODE_P (mode))
5833 regno = FIRST_SSE_REG;
5837 if (mode == SFmode || mode == DFmode)
5838 regno = FIRST_SSE_REG;
5844 return gen_rtx_REG (orig_mode, regno);
5848 ix86_function_value_1 (const_tree valtype, const_tree fntype_or_decl,
5849 enum machine_mode orig_mode, enum machine_mode mode)
5851 const_tree fn, fntype;
5854 if (fntype_or_decl && DECL_P (fntype_or_decl))
5855 fn = fntype_or_decl;
5856 fntype = fn ? TREE_TYPE (fn) : fntype_or_decl;
5858 if (TARGET_64BIT && ix86_function_type_abi (fntype) == MS_ABI)
5859 return function_value_ms_64 (orig_mode, mode);
5860 else if (TARGET_64BIT)
5861 return function_value_64 (orig_mode, mode, valtype);
5863 return function_value_32 (orig_mode, mode, fntype, fn);
5867 ix86_function_value (const_tree valtype, const_tree fntype_or_decl,
5868 bool outgoing ATTRIBUTE_UNUSED)
5870 enum machine_mode mode, orig_mode;
5872 orig_mode = TYPE_MODE (valtype);
5873 mode = type_natural_mode (valtype);
5874 return ix86_function_value_1 (valtype, fntype_or_decl, orig_mode, mode);
5878 ix86_libcall_value (enum machine_mode mode)
5880 return ix86_function_value_1 (NULL, NULL, mode, mode);
5883 /* Return true iff type is returned in memory. */
5885 static int ATTRIBUTE_UNUSED
5886 return_in_memory_32 (const_tree type, enum machine_mode mode)
5890 if (mode == BLKmode)
5893 size = int_size_in_bytes (type);
5895 if (MS_AGGREGATE_RETURN && AGGREGATE_TYPE_P (type) && size <= 8)
5898 if (VECTOR_MODE_P (mode) || mode == TImode)
5900 /* User-created vectors small enough to fit in EAX. */
5904 /* MMX/3dNow values are returned in MM0,
5905 except when it doesn't exits. */
5907 return (TARGET_MMX ? 0 : 1);
5909 /* SSE values are returned in XMM0, except when it doesn't exist. */
5911 return (TARGET_SSE ? 0 : 1);
5922 static int ATTRIBUTE_UNUSED
5923 return_in_memory_64 (const_tree type, enum machine_mode mode)
5925 int needed_intregs, needed_sseregs;
5926 return !examine_argument (mode, type, 1, &needed_intregs, &needed_sseregs);
5929 static int ATTRIBUTE_UNUSED
5930 return_in_memory_ms_64 (const_tree type, enum machine_mode mode)
5932 HOST_WIDE_INT size = int_size_in_bytes (type);
5934 /* __m128 is returned in xmm0. */
5935 if ((SCALAR_INT_MODE_P (mode) || VECTOR_MODE_P (mode))
5936 && !COMPLEX_MODE_P (mode) && (GET_MODE_SIZE (mode) == 16 || size == 16))
5939 /* Otherwise, the size must be exactly in [1248]. */
5940 return (size != 1 && size != 2 && size != 4 && size != 8);
5944 ix86_return_in_memory (const_tree type, const_tree fntype ATTRIBUTE_UNUSED)
5946 #ifdef SUBTARGET_RETURN_IN_MEMORY
5947 return SUBTARGET_RETURN_IN_MEMORY (type, fntype);
5949 const enum machine_mode mode = type_natural_mode (type);
5951 if (TARGET_64BIT_MS_ABI)
5952 return return_in_memory_ms_64 (type, mode);
5953 else if (TARGET_64BIT)
5954 return return_in_memory_64 (type, mode);
5956 return return_in_memory_32 (type, mode);
5960 /* Return false iff TYPE is returned in memory. This version is used
5961 on Solaris 10. It is similar to the generic ix86_return_in_memory,
5962 but differs notably in that when MMX is available, 8-byte vectors
5963 are returned in memory, rather than in MMX registers. */
5966 ix86_sol10_return_in_memory (const_tree type, const_tree fntype ATTRIBUTE_UNUSED)
5969 enum machine_mode mode = type_natural_mode (type);
5972 return return_in_memory_64 (type, mode);
5974 if (mode == BLKmode)
5977 size = int_size_in_bytes (type);
5979 if (VECTOR_MODE_P (mode))
5981 /* Return in memory only if MMX registers *are* available. This
5982 seems backwards, but it is consistent with the existing
5989 else if (mode == TImode)
5991 else if (mode == XFmode)
5997 /* When returning SSE vector types, we have a choice of either
5998 (1) being abi incompatible with a -march switch, or
5999 (2) generating an error.
6000 Given no good solution, I think the safest thing is one warning.
6001 The user won't be able to use -Werror, but....
6003 Choose the STRUCT_VALUE_RTX hook because that's (at present) only
6004 called in response to actually generating a caller or callee that
6005 uses such a type. As opposed to TARGET_RETURN_IN_MEMORY, which is called
6006 via aggregate_value_p for general type probing from tree-ssa. */
6009 ix86_struct_value_rtx (tree type, int incoming ATTRIBUTE_UNUSED)
6011 static bool warnedsse, warnedmmx;
6013 if (!TARGET_64BIT && type)
6015 /* Look at the return type of the function, not the function type. */
6016 enum machine_mode mode = TYPE_MODE (TREE_TYPE (type));
6018 if (!TARGET_SSE && !warnedsse)
6021 || (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 16))
6024 warning (0, "SSE vector return without SSE enabled "
6029 if (!TARGET_MMX && !warnedmmx)
6031 if (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 8)
6034 warning (0, "MMX vector return without MMX enabled "
6044 /* Create the va_list data type. */
6046 /* Returns the calling convention specific va_list date type.
6047 The argument ABI can be DEFAULT_ABI, MS_ABI, or SYSV_ABI. */
6050 ix86_build_builtin_va_list_abi (enum calling_abi abi)
6052 tree f_gpr, f_fpr, f_ovf, f_sav, record, type_decl;
6054 /* For i386 we use plain pointer to argument area. */
6055 if (!TARGET_64BIT || abi == MS_ABI)
6056 return build_pointer_type (char_type_node);
6058 record = (*lang_hooks.types.make_type) (RECORD_TYPE);
6059 type_decl = build_decl (TYPE_DECL, get_identifier ("__va_list_tag"), record);
6061 f_gpr = build_decl (FIELD_DECL, get_identifier ("gp_offset"),
6062 unsigned_type_node);
6063 f_fpr = build_decl (FIELD_DECL, get_identifier ("fp_offset"),
6064 unsigned_type_node);
6065 f_ovf = build_decl (FIELD_DECL, get_identifier ("overflow_arg_area"),
6067 f_sav = build_decl (FIELD_DECL, get_identifier ("reg_save_area"),
6070 va_list_gpr_counter_field = f_gpr;
6071 va_list_fpr_counter_field = f_fpr;
6073 DECL_FIELD_CONTEXT (f_gpr) = record;
6074 DECL_FIELD_CONTEXT (f_fpr) = record;
6075 DECL_FIELD_CONTEXT (f_ovf) = record;
6076 DECL_FIELD_CONTEXT (f_sav) = record;
6078 TREE_CHAIN (record) = type_decl;
6079 TYPE_NAME (record) = type_decl;
6080 TYPE_FIELDS (record) = f_gpr;
6081 TREE_CHAIN (f_gpr) = f_fpr;
6082 TREE_CHAIN (f_fpr) = f_ovf;
6083 TREE_CHAIN (f_ovf) = f_sav;
6085 layout_type (record);
6087 /* The correct type is an array type of one element. */
6088 return build_array_type (record, build_index_type (size_zero_node));
6091 /* Setup the builtin va_list data type and for 64-bit the additional
6092 calling convention specific va_list data types. */
6095 ix86_build_builtin_va_list (void)
6097 tree ret = ix86_build_builtin_va_list_abi (DEFAULT_ABI);
6099 /* Initialize abi specific va_list builtin types. */
6103 if (DEFAULT_ABI == MS_ABI)
6105 t = ix86_build_builtin_va_list_abi (SYSV_ABI);
6106 if (TREE_CODE (t) != RECORD_TYPE)
6107 t = build_variant_type_copy (t);
6108 sysv_va_list_type_node = t;
6113 if (TREE_CODE (t) != RECORD_TYPE)
6114 t = build_variant_type_copy (t);
6115 sysv_va_list_type_node = t;
6117 if (DEFAULT_ABI != MS_ABI)
6119 t = ix86_build_builtin_va_list_abi (MS_ABI);
6120 if (TREE_CODE (t) != RECORD_TYPE)
6121 t = build_variant_type_copy (t);
6122 ms_va_list_type_node = t;
6127 if (TREE_CODE (t) != RECORD_TYPE)
6128 t = build_variant_type_copy (t);
6129 ms_va_list_type_node = t;
6136 /* Worker function for TARGET_SETUP_INCOMING_VARARGS. */
6139 setup_incoming_varargs_64 (CUMULATIVE_ARGS *cum)
6148 int regparm = ix86_regparm;
6150 if((cum ? cum->call_abi : ix86_cfun_abi ()) != DEFAULT_ABI)
6151 regparm = DEFAULT_ABI != SYSV_ABI ? X86_64_REGPARM_MAX : X64_REGPARM_MAX;
6153 if (! cfun->va_list_gpr_size && ! cfun->va_list_fpr_size)
6156 /* Indicate to allocate space on the stack for varargs save area. */
6157 ix86_save_varrargs_registers = 1;
6158 /* We need 16-byte stack alignment to save SSE registers. If user
6159 asked for lower preferred_stack_boundary, lets just hope that he knows
6160 what he is doing and won't varargs SSE values.
6162 We also may end up assuming that only 64bit values are stored in SSE
6163 register let some floating point program work. */
6164 if (ix86_preferred_stack_boundary >= BIGGEST_ALIGNMENT)
6165 crtl->stack_alignment_needed = BIGGEST_ALIGNMENT;
6167 save_area = frame_pointer_rtx;
6168 set = get_varargs_alias_set ();
6170 for (i = cum->regno;
6172 && i < cum->regno + cfun->va_list_gpr_size / UNITS_PER_WORD;
6175 mem = gen_rtx_MEM (Pmode,
6176 plus_constant (save_area, i * UNITS_PER_WORD));
6177 MEM_NOTRAP_P (mem) = 1;
6178 set_mem_alias_set (mem, set);
6179 emit_move_insn (mem, gen_rtx_REG (Pmode,
6180 x86_64_int_parameter_registers[i]));
6183 if (cum->sse_nregs && cfun->va_list_fpr_size)
6185 /* Now emit code to save SSE registers. The AX parameter contains number
6186 of SSE parameter registers used to call this function. We use
6187 sse_prologue_save insn template that produces computed jump across
6188 SSE saves. We need some preparation work to get this working. */
6190 label = gen_label_rtx ();
6191 label_ref = gen_rtx_LABEL_REF (Pmode, label);
6193 /* Compute address to jump to :
6194 label - eax*4 + nnamed_sse_arguments*4 */
6195 tmp_reg = gen_reg_rtx (Pmode);
6196 nsse_reg = gen_reg_rtx (Pmode);
6197 emit_insn (gen_zero_extendqidi2 (nsse_reg, gen_rtx_REG (QImode, AX_REG)));
6198 emit_insn (gen_rtx_SET (VOIDmode, tmp_reg,
6199 gen_rtx_MULT (Pmode, nsse_reg,
6204 gen_rtx_CONST (DImode,
6205 gen_rtx_PLUS (DImode,
6207 GEN_INT (cum->sse_regno * 4))));
6209 emit_move_insn (nsse_reg, label_ref);
6210 emit_insn (gen_subdi3 (nsse_reg, nsse_reg, tmp_reg));
6212 /* Compute address of memory block we save into. We always use pointer
6213 pointing 127 bytes after first byte to store - this is needed to keep
6214 instruction size limited by 4 bytes. */
6215 tmp_reg = gen_reg_rtx (Pmode);
6216 emit_insn (gen_rtx_SET (VOIDmode, tmp_reg,
6217 plus_constant (save_area,
6218 8 * X86_64_REGPARM_MAX + 127)));
6219 mem = gen_rtx_MEM (BLKmode, plus_constant (tmp_reg, -127));
6220 MEM_NOTRAP_P (mem) = 1;
6221 set_mem_alias_set (mem, set);
6222 set_mem_align (mem, BITS_PER_WORD);
6224 /* And finally do the dirty job! */
6225 emit_insn (gen_sse_prologue_save (mem, nsse_reg,
6226 GEN_INT (cum->sse_regno), label));
6231 setup_incoming_varargs_ms_64 (CUMULATIVE_ARGS *cum)
6233 alias_set_type set = get_varargs_alias_set ();
6236 for (i = cum->regno; i < X64_REGPARM_MAX; i++)
6240 mem = gen_rtx_MEM (Pmode,
6241 plus_constant (virtual_incoming_args_rtx,
6242 i * UNITS_PER_WORD));
6243 MEM_NOTRAP_P (mem) = 1;
6244 set_mem_alias_set (mem, set);
6246 reg = gen_rtx_REG (Pmode, x86_64_ms_abi_int_parameter_registers[i]);
6247 emit_move_insn (mem, reg);
6252 ix86_setup_incoming_varargs (CUMULATIVE_ARGS *cum, enum machine_mode mode,
6253 tree type, int *pretend_size ATTRIBUTE_UNUSED,
6256 CUMULATIVE_ARGS next_cum;
6259 /* This argument doesn't appear to be used anymore. Which is good,
6260 because the old code here didn't suppress rtl generation. */
6261 gcc_assert (!no_rtl);
6266 fntype = TREE_TYPE (current_function_decl);
6268 /* For varargs, we do not want to skip the dummy va_dcl argument.
6269 For stdargs, we do want to skip the last named argument. */
6271 if (stdarg_p (fntype))
6272 function_arg_advance (&next_cum, mode, type, 1);
6274 if ((cum ? cum->call_abi : DEFAULT_ABI) == MS_ABI)
6275 setup_incoming_varargs_ms_64 (&next_cum);
6277 setup_incoming_varargs_64 (&next_cum);
6280 /* Checks if TYPE is of kind va_list char *. */
6283 is_va_list_char_pointer (tree type)
6287 /* For 32-bit it is always true. */
6290 canonic = ix86_canonical_va_list_type (type);
6291 return (canonic == ms_va_list_type_node
6292 || (DEFAULT_ABI == MS_ABI && canonic == va_list_type_node));
6295 /* Implement va_start. */
6298 ix86_va_start (tree valist, rtx nextarg)
6300 HOST_WIDE_INT words, n_gpr, n_fpr;
6301 tree f_gpr, f_fpr, f_ovf, f_sav;
6302 tree gpr, fpr, ovf, sav, t;
6305 /* Only 64bit target needs something special. */
6306 if (!TARGET_64BIT || is_va_list_char_pointer (TREE_TYPE (valist)))
6308 std_expand_builtin_va_start (valist, nextarg);
6312 f_gpr = TYPE_FIELDS (TREE_TYPE (sysv_va_list_type_node));
6313 f_fpr = TREE_CHAIN (f_gpr);
6314 f_ovf = TREE_CHAIN (f_fpr);
6315 f_sav = TREE_CHAIN (f_ovf);
6317 valist = build1 (INDIRECT_REF, TREE_TYPE (TREE_TYPE (valist)), valist);
6318 gpr = build3 (COMPONENT_REF, TREE_TYPE (f_gpr), valist, f_gpr, NULL_TREE);
6319 fpr = build3 (COMPONENT_REF, TREE_TYPE (f_fpr), valist, f_fpr, NULL_TREE);
6320 ovf = build3 (COMPONENT_REF, TREE_TYPE (f_ovf), valist, f_ovf, NULL_TREE);
6321 sav = build3 (COMPONENT_REF, TREE_TYPE (f_sav), valist, f_sav, NULL_TREE);
6323 /* Count number of gp and fp argument registers used. */
6324 words = crtl->args.info.words;
6325 n_gpr = crtl->args.info.regno;
6326 n_fpr = crtl->args.info.sse_regno;
6328 if (cfun->va_list_gpr_size)
6330 type = TREE_TYPE (gpr);
6331 t = build2 (GIMPLE_MODIFY_STMT, type, gpr,
6332 build_int_cst (type, n_gpr * 8));
6333 TREE_SIDE_EFFECTS (t) = 1;
6334 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
6337 if (cfun->va_list_fpr_size)
6339 type = TREE_TYPE (fpr);
6340 t = build2 (GIMPLE_MODIFY_STMT, type, fpr,
6341 build_int_cst (type, n_fpr * 16 + 8*X86_64_REGPARM_MAX));
6342 TREE_SIDE_EFFECTS (t) = 1;
6343 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
6346 /* Find the overflow area. */
6347 type = TREE_TYPE (ovf);
6348 t = make_tree (type, virtual_incoming_args_rtx);
6350 t = build2 (POINTER_PLUS_EXPR, type, t,
6351 size_int (words * UNITS_PER_WORD));
6352 t = build2 (GIMPLE_MODIFY_STMT, type, ovf, t);
6353 TREE_SIDE_EFFECTS (t) = 1;
6354 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
6356 if (cfun->va_list_gpr_size || cfun->va_list_fpr_size)
6358 /* Find the register save area.
6359 Prologue of the function save it right above stack frame. */
6360 type = TREE_TYPE (sav);
6361 t = make_tree (type, frame_pointer_rtx);
6362 t = build2 (GIMPLE_MODIFY_STMT, type, sav, t);
6363 TREE_SIDE_EFFECTS (t) = 1;
6364 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
6368 /* Implement va_arg. */
6371 ix86_gimplify_va_arg (tree valist, tree type, tree *pre_p, tree *post_p)
6373 static const int intreg[6] = { 0, 1, 2, 3, 4, 5 };
6374 tree f_gpr, f_fpr, f_ovf, f_sav;
6375 tree gpr, fpr, ovf, sav, t;
6377 tree lab_false, lab_over = NULL_TREE;
6382 enum machine_mode nat_mode;
6385 /* Only 64bit target needs something special. */
6386 if (!TARGET_64BIT || is_va_list_char_pointer (TREE_TYPE (valist)))
6387 return std_gimplify_va_arg_expr (valist, type, pre_p, post_p);
6389 f_gpr = TYPE_FIELDS (TREE_TYPE (sysv_va_list_type_node));
6390 f_fpr = TREE_CHAIN (f_gpr);
6391 f_ovf = TREE_CHAIN (f_fpr);
6392 f_sav = TREE_CHAIN (f_ovf);
6394 valist = build_va_arg_indirect_ref (valist);
6395 gpr = build3 (COMPONENT_REF, TREE_TYPE (f_gpr), valist, f_gpr, NULL_TREE);
6396 fpr = build3 (COMPONENT_REF, TREE_TYPE (f_fpr), valist, f_fpr, NULL_TREE);
6397 ovf = build3 (COMPONENT_REF, TREE_TYPE (f_ovf), valist, f_ovf, NULL_TREE);
6398 sav = build3 (COMPONENT_REF, TREE_TYPE (f_sav), valist, f_sav, NULL_TREE);
6400 indirect_p = pass_by_reference (NULL, TYPE_MODE (type), type, false);
6402 type = build_pointer_type (type);
6403 size = int_size_in_bytes (type);
6404 rsize = (size + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
6406 nat_mode = type_natural_mode (type);
6407 container = construct_container (nat_mode, TYPE_MODE (type), type, 0,
6408 X86_64_REGPARM_MAX, X86_64_SSE_REGPARM_MAX,
6411 /* Pull the value out of the saved registers. */
6413 addr = create_tmp_var (ptr_type_node, "addr");
6414 DECL_POINTER_ALIAS_SET (addr) = get_varargs_alias_set ();
6418 int needed_intregs, needed_sseregs;
6420 tree int_addr, sse_addr;
6422 lab_false = create_artificial_label ();
6423 lab_over = create_artificial_label ();
6425 examine_argument (nat_mode, type, 0, &needed_intregs, &needed_sseregs);
6427 need_temp = (!REG_P (container)
6428 && ((needed_intregs && TYPE_ALIGN (type) > 64)
6429 || TYPE_ALIGN (type) > 128));
6431 /* In case we are passing structure, verify that it is consecutive block
6432 on the register save area. If not we need to do moves. */
6433 if (!need_temp && !REG_P (container))
6435 /* Verify that all registers are strictly consecutive */
6436 if (SSE_REGNO_P (REGNO (XEXP (XVECEXP (container, 0, 0), 0))))
6440 for (i = 0; i < XVECLEN (container, 0) && !need_temp; i++)
6442 rtx slot = XVECEXP (container, 0, i);
6443 if (REGNO (XEXP (slot, 0)) != FIRST_SSE_REG + (unsigned int) i
6444 || INTVAL (XEXP (slot, 1)) != i * 16)
6452 for (i = 0; i < XVECLEN (container, 0) && !need_temp; i++)
6454 rtx slot = XVECEXP (container, 0, i);
6455 if (REGNO (XEXP (slot, 0)) != (unsigned int) i
6456 || INTVAL (XEXP (slot, 1)) != i * 8)
6468 int_addr = create_tmp_var (ptr_type_node, "int_addr");
6469 DECL_POINTER_ALIAS_SET (int_addr) = get_varargs_alias_set ();
6470 sse_addr = create_tmp_var (ptr_type_node, "sse_addr");
6471 DECL_POINTER_ALIAS_SET (sse_addr) = get_varargs_alias_set ();
6474 /* First ensure that we fit completely in registers. */
6477 t = build_int_cst (TREE_TYPE (gpr),
6478 (X86_64_REGPARM_MAX - needed_intregs + 1) * 8);
6479 t = build2 (GE_EXPR, boolean_type_node, gpr, t);
6480 t2 = build1 (GOTO_EXPR, void_type_node, lab_false);
6481 t = build3 (COND_EXPR, void_type_node, t, t2, NULL_TREE);
6482 gimplify_and_add (t, pre_p);
6486 t = build_int_cst (TREE_TYPE (fpr),
6487 (X86_64_SSE_REGPARM_MAX - needed_sseregs + 1) * 16
6488 + X86_64_REGPARM_MAX * 8);
6489 t = build2 (GE_EXPR, boolean_type_node, fpr, t);
6490 t2 = build1 (GOTO_EXPR, void_type_node, lab_false);
6491 t = build3 (COND_EXPR, void_type_node, t, t2, NULL_TREE);
6492 gimplify_and_add (t, pre_p);
6495 /* Compute index to start of area used for integer regs. */
6498 /* int_addr = gpr + sav; */
6499 t = fold_convert (sizetype, gpr);
6500 t = build2 (POINTER_PLUS_EXPR, ptr_type_node, sav, t);
6501 t = build2 (GIMPLE_MODIFY_STMT, void_type_node, int_addr, t);
6502 gimplify_and_add (t, pre_p);
6506 /* sse_addr = fpr + sav; */
6507 t = fold_convert (sizetype, fpr);
6508 t = build2 (POINTER_PLUS_EXPR, ptr_type_node, sav, t);
6509 t = build2 (GIMPLE_MODIFY_STMT, void_type_node, sse_addr, t);
6510 gimplify_and_add (t, pre_p);
6515 tree temp = create_tmp_var (type, "va_arg_tmp");
6518 t = build1 (ADDR_EXPR, build_pointer_type (type), temp);
6519 t = build2 (GIMPLE_MODIFY_STMT, void_type_node, addr, t);
6520 gimplify_and_add (t, pre_p);
6522 for (i = 0; i < XVECLEN (container, 0); i++)
6524 rtx slot = XVECEXP (container, 0, i);
6525 rtx reg = XEXP (slot, 0);
6526 enum machine_mode mode = GET_MODE (reg);
6527 tree piece_type = lang_hooks.types.type_for_mode (mode, 1);
6528 tree addr_type = build_pointer_type (piece_type);
6531 tree dest_addr, dest;
6533 if (SSE_REGNO_P (REGNO (reg)))
6535 src_addr = sse_addr;
6536 src_offset = (REGNO (reg) - FIRST_SSE_REG) * 16;
6540 src_addr = int_addr;
6541 src_offset = REGNO (reg) * 8;
6543 src_addr = fold_convert (addr_type, src_addr);
6544 src_addr = fold_build2 (POINTER_PLUS_EXPR, addr_type, src_addr,
6545 size_int (src_offset));
6546 src = build_va_arg_indirect_ref (src_addr);
6548 dest_addr = fold_convert (addr_type, addr);
6549 dest_addr = fold_build2 (POINTER_PLUS_EXPR, addr_type, dest_addr,
6550 size_int (INTVAL (XEXP (slot, 1))));
6551 dest = build_va_arg_indirect_ref (dest_addr);
6553 t = build2 (GIMPLE_MODIFY_STMT, void_type_node, dest, src);
6554 gimplify_and_add (t, pre_p);
6560 t = build2 (PLUS_EXPR, TREE_TYPE (gpr), gpr,
6561 build_int_cst (TREE_TYPE (gpr), needed_intregs * 8));
6562 t = build2 (GIMPLE_MODIFY_STMT, TREE_TYPE (gpr), gpr, t);
6563 gimplify_and_add (t, pre_p);
6567 t = build2 (PLUS_EXPR, TREE_TYPE (fpr), fpr,
6568 build_int_cst (TREE_TYPE (fpr), needed_sseregs * 16));
6569 t = build2 (GIMPLE_MODIFY_STMT, TREE_TYPE (fpr), fpr, t);
6570 gimplify_and_add (t, pre_p);
6573 t = build1 (GOTO_EXPR, void_type_node, lab_over);
6574 gimplify_and_add (t, pre_p);
6576 t = build1 (LABEL_EXPR, void_type_node, lab_false);
6577 append_to_statement_list (t, pre_p);
6580 /* ... otherwise out of the overflow area. */
6582 /* When we align parameter on stack for caller, if the parameter
6583 alignment is beyond PREFERRED_STACK_BOUNDARY, it will be
6584 aligned at PREFERRED_STACK_BOUNDARY. We will match callee
6585 here with caller. */
6586 arg_boundary = FUNCTION_ARG_BOUNDARY (VOIDmode, type);
6587 if ((unsigned int) arg_boundary > PREFERRED_STACK_BOUNDARY)
6588 arg_boundary = PREFERRED_STACK_BOUNDARY;
6590 /* Care for on-stack alignment if needed. */
6591 if (arg_boundary <= 64
6592 || integer_zerop (TYPE_SIZE (type)))
6596 HOST_WIDE_INT align = arg_boundary / 8;
6597 t = build2 (POINTER_PLUS_EXPR, TREE_TYPE (ovf), ovf,
6598 size_int (align - 1));
6599 t = fold_convert (sizetype, t);
6600 t = build2 (BIT_AND_EXPR, TREE_TYPE (t), t,
6602 t = fold_convert (TREE_TYPE (ovf), t);
6604 gimplify_expr (&t, pre_p, NULL, is_gimple_val, fb_rvalue);
6606 t2 = build2 (GIMPLE_MODIFY_STMT, void_type_node, addr, t);
6607 gimplify_and_add (t2, pre_p);
6609 t = build2 (POINTER_PLUS_EXPR, TREE_TYPE (t), t,
6610 size_int (rsize * UNITS_PER_WORD));
6611 t = build2 (GIMPLE_MODIFY_STMT, TREE_TYPE (ovf), ovf, t);
6612 gimplify_and_add (t, pre_p);
6616 t = build1 (LABEL_EXPR, void_type_node, lab_over);
6617 append_to_statement_list (t, pre_p);
6620 ptrtype = build_pointer_type (type);
6621 addr = fold_convert (ptrtype, addr);
6624 addr = build_va_arg_indirect_ref (addr);
6625 return build_va_arg_indirect_ref (addr);
6628 /* Return nonzero if OPNUM's MEM should be matched
6629 in movabs* patterns. */
6632 ix86_check_movabs (rtx insn, int opnum)
6636 set = PATTERN (insn);
6637 if (GET_CODE (set) == PARALLEL)
6638 set = XVECEXP (set, 0, 0);
6639 gcc_assert (GET_CODE (set) == SET);
6640 mem = XEXP (set, opnum);
6641 while (GET_CODE (mem) == SUBREG)
6642 mem = SUBREG_REG (mem);
6643 gcc_assert (MEM_P (mem));
6644 return (volatile_ok || !MEM_VOLATILE_P (mem));
6647 /* Initialize the table of extra 80387 mathematical constants. */
6650 init_ext_80387_constants (void)
6652 static const char * cst[5] =
6654 "0.3010299956639811952256464283594894482", /* 0: fldlg2 */
6655 "0.6931471805599453094286904741849753009", /* 1: fldln2 */
6656 "1.4426950408889634073876517827983434472", /* 2: fldl2e */
6657 "3.3219280948873623478083405569094566090", /* 3: fldl2t */
6658 "3.1415926535897932385128089594061862044", /* 4: fldpi */
6662 for (i = 0; i < 5; i++)
6664 real_from_string (&ext_80387_constants_table[i], cst[i]);
6665 /* Ensure each constant is rounded to XFmode precision. */
6666 real_convert (&ext_80387_constants_table[i],
6667 XFmode, &ext_80387_constants_table[i]);
6670 ext_80387_constants_init = 1;
6673 /* Return true if the constant is something that can be loaded with
6674 a special instruction. */
6677 standard_80387_constant_p (rtx x)
6679 enum machine_mode mode = GET_MODE (x);
6683 if (!(X87_FLOAT_MODE_P (mode) && (GET_CODE (x) == CONST_DOUBLE)))
6686 if (x == CONST0_RTX (mode))
6688 if (x == CONST1_RTX (mode))
6691 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
6693 /* For XFmode constants, try to find a special 80387 instruction when
6694 optimizing for size or on those CPUs that benefit from them. */
6696 && (optimize_size || TARGET_EXT_80387_CONSTANTS))
6700 if (! ext_80387_constants_init)
6701 init_ext_80387_constants ();
6703 for (i = 0; i < 5; i++)
6704 if (real_identical (&r, &ext_80387_constants_table[i]))
6708 /* Load of the constant -0.0 or -1.0 will be split as
6709 fldz;fchs or fld1;fchs sequence. */
6710 if (real_isnegzero (&r))
6712 if (real_identical (&r, &dconstm1))
6718 /* Return the opcode of the special instruction to be used to load
6722 standard_80387_constant_opcode (rtx x)
6724 switch (standard_80387_constant_p (x))
6748 /* Return the CONST_DOUBLE representing the 80387 constant that is
6749 loaded by the specified special instruction. The argument IDX
6750 matches the return value from standard_80387_constant_p. */
6753 standard_80387_constant_rtx (int idx)
6757 if (! ext_80387_constants_init)
6758 init_ext_80387_constants ();
6774 return CONST_DOUBLE_FROM_REAL_VALUE (ext_80387_constants_table[i],
6778 /* Return 1 if mode is a valid mode for sse. */
6780 standard_sse_mode_p (enum machine_mode mode)
6797 /* Return 1 if X is FP constant we can load to SSE register w/o using memory.
6800 standard_sse_constant_p (rtx x)
6802 enum machine_mode mode = GET_MODE (x);
6804 if (x == const0_rtx || x == CONST0_RTX (GET_MODE (x)))
6806 if (vector_all_ones_operand (x, mode)
6807 && standard_sse_mode_p (mode))
6808 return TARGET_SSE2 ? 2 : -1;
6813 /* Return the opcode of the special instruction to be used to load
6817 standard_sse_constant_opcode (rtx insn, rtx x)
6819 switch (standard_sse_constant_p (x))
6822 if (get_attr_mode (insn) == MODE_V4SF)
6823 return "xorps\t%0, %0";
6824 else if (get_attr_mode (insn) == MODE_V2DF)
6825 return "xorpd\t%0, %0";
6827 return "pxor\t%0, %0";
6829 return "pcmpeqd\t%0, %0";
6834 /* Returns 1 if OP contains a symbol reference */
6837 symbolic_reference_mentioned_p (rtx op)
6842 if (GET_CODE (op) == SYMBOL_REF || GET_CODE (op) == LABEL_REF)
6845 fmt = GET_RTX_FORMAT (GET_CODE (op));
6846 for (i = GET_RTX_LENGTH (GET_CODE (op)) - 1; i >= 0; i--)
6852 for (j = XVECLEN (op, i) - 1; j >= 0; j--)
6853 if (symbolic_reference_mentioned_p (XVECEXP (op, i, j)))
6857 else if (fmt[i] == 'e' && symbolic_reference_mentioned_p (XEXP (op, i)))
6864 /* Return 1 if it is appropriate to emit `ret' instructions in the
6865 body of a function. Do this only if the epilogue is simple, needing a
6866 couple of insns. Prior to reloading, we can't tell how many registers
6867 must be saved, so return 0 then. Return 0 if there is no frame
6868 marker to de-allocate. */
6871 ix86_can_use_return_insn_p (void)
6873 struct ix86_frame frame;
6875 if (! reload_completed || frame_pointer_needed)
6878 /* Don't allow more than 32 pop, since that's all we can do
6879 with one instruction. */
6880 if (crtl->args.pops_args
6881 && crtl->args.size >= 32768)
6884 ix86_compute_frame_layout (&frame);
6885 return frame.to_allocate == 0 && frame.nregs == 0;
6888 /* Value should be nonzero if functions must have frame pointers.
6889 Zero means the frame pointer need not be set up (and parms may
6890 be accessed via the stack pointer) in functions that seem suitable. */
6893 ix86_frame_pointer_required (void)
6895 /* If we accessed previous frames, then the generated code expects
6896 to be able to access the saved ebp value in our frame. */
6897 if (cfun->machine->accesses_prev_frame)
6900 /* Several x86 os'es need a frame pointer for other reasons,
6901 usually pertaining to setjmp. */
6902 if (SUBTARGET_FRAME_POINTER_REQUIRED)
6905 /* In override_options, TARGET_OMIT_LEAF_FRAME_POINTER turns off
6906 the frame pointer by default. Turn it back on now if we've not
6907 got a leaf function. */
6908 if (TARGET_OMIT_LEAF_FRAME_POINTER
6909 && (!current_function_is_leaf
6910 || ix86_current_function_calls_tls_descriptor))
6919 /* Record that the current function accesses previous call frames. */
6922 ix86_setup_frame_addresses (void)
6924 cfun->machine->accesses_prev_frame = 1;
6927 #if (defined(HAVE_GAS_HIDDEN) && (SUPPORTS_ONE_ONLY - 0)) || TARGET_MACHO
6928 # define USE_HIDDEN_LINKONCE 1
6930 # define USE_HIDDEN_LINKONCE 0
6933 static int pic_labels_used;
6935 /* Fills in the label name that should be used for a pc thunk for
6936 the given register. */
6939 get_pc_thunk_name (char name[32], unsigned int regno)
6941 gcc_assert (!TARGET_64BIT);
6943 if (USE_HIDDEN_LINKONCE)
6944 sprintf (name, "__i686.get_pc_thunk.%s", reg_names[regno]);
6946 ASM_GENERATE_INTERNAL_LABEL (name, "LPR", regno);
6950 /* This function generates code for -fpic that loads %ebx with
6951 the return address of the caller and then returns. */
6954 ix86_file_end (void)
6959 for (regno = 0; regno < 8; ++regno)
6963 if (! ((pic_labels_used >> regno) & 1))
6966 get_pc_thunk_name (name, regno);
6971 switch_to_section (darwin_sections[text_coal_section]);
6972 fputs ("\t.weak_definition\t", asm_out_file);
6973 assemble_name (asm_out_file, name);
6974 fputs ("\n\t.private_extern\t", asm_out_file);
6975 assemble_name (asm_out_file, name);
6976 fputs ("\n", asm_out_file);
6977 ASM_OUTPUT_LABEL (asm_out_file, name);
6981 if (USE_HIDDEN_LINKONCE)
6985 decl = build_decl (FUNCTION_DECL, get_identifier (name),
6987 TREE_PUBLIC (decl) = 1;
6988 TREE_STATIC (decl) = 1;
6989 DECL_ONE_ONLY (decl) = 1;
6991 (*targetm.asm_out.unique_section) (decl, 0);
6992 switch_to_section (get_named_section (decl, NULL, 0));
6994 (*targetm.asm_out.globalize_label) (asm_out_file, name);
6995 fputs ("\t.hidden\t", asm_out_file);
6996 assemble_name (asm_out_file, name);
6997 fputc ('\n', asm_out_file);
6998 ASM_DECLARE_FUNCTION_NAME (asm_out_file, name, decl);
7002 switch_to_section (text_section);
7003 ASM_OUTPUT_LABEL (asm_out_file, name);
7006 xops[0] = gen_rtx_REG (Pmode, regno);
7007 xops[1] = gen_rtx_MEM (Pmode, stack_pointer_rtx);
7008 output_asm_insn ("mov%z0\t{%1, %0|%0, %1}", xops);
7009 output_asm_insn ("ret", xops);
7012 if (NEED_INDICATE_EXEC_STACK)
7013 file_end_indicate_exec_stack ();
7016 /* Emit code for the SET_GOT patterns. */
7019 output_set_got (rtx dest, rtx label ATTRIBUTE_UNUSED)
7025 if (TARGET_VXWORKS_RTP && flag_pic)
7027 /* Load (*VXWORKS_GOTT_BASE) into the PIC register. */
7028 xops[2] = gen_rtx_MEM (Pmode,
7029 gen_rtx_SYMBOL_REF (Pmode, VXWORKS_GOTT_BASE));
7030 output_asm_insn ("mov{l}\t{%2, %0|%0, %2}", xops);
7032 /* Load (*VXWORKS_GOTT_BASE)[VXWORKS_GOTT_INDEX] into the PIC register.
7033 Use %P and a local symbol in order to print VXWORKS_GOTT_INDEX as
7034 an unadorned address. */
7035 xops[2] = gen_rtx_SYMBOL_REF (Pmode, VXWORKS_GOTT_INDEX);
7036 SYMBOL_REF_FLAGS (xops[2]) |= SYMBOL_FLAG_LOCAL;
7037 output_asm_insn ("mov{l}\t{%P2(%0), %0|%0, DWORD PTR %P2[%0]}", xops);
7041 xops[1] = gen_rtx_SYMBOL_REF (Pmode, GOT_SYMBOL_NAME);
7043 if (! TARGET_DEEP_BRANCH_PREDICTION || !flag_pic)
7045 xops[2] = gen_rtx_LABEL_REF (Pmode, label ? label : gen_label_rtx ());
7048 output_asm_insn ("mov%z0\t{%2, %0|%0, %2}", xops);
7050 output_asm_insn ("call\t%a2", xops);
7053 /* Output the Mach-O "canonical" label name ("Lxx$pb") here too. This
7054 is what will be referenced by the Mach-O PIC subsystem. */
7056 ASM_OUTPUT_LABEL (asm_out_file, machopic_function_base_name ());
7059 (*targetm.asm_out.internal_label) (asm_out_file, "L",
7060 CODE_LABEL_NUMBER (XEXP (xops[2], 0)));
7063 output_asm_insn ("pop%z0\t%0", xops);
7068 get_pc_thunk_name (name, REGNO (dest));
7069 pic_labels_used |= 1 << REGNO (dest);
7071 xops[2] = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (name));
7072 xops[2] = gen_rtx_MEM (QImode, xops[2]);
7073 output_asm_insn ("call\t%X2", xops);
7074 /* Output the Mach-O "canonical" label name ("Lxx$pb") here too. This
7075 is what will be referenced by the Mach-O PIC subsystem. */
7078 ASM_OUTPUT_LABEL (asm_out_file, machopic_function_base_name ());
7080 targetm.asm_out.internal_label (asm_out_file, "L",
7081 CODE_LABEL_NUMBER (label));
7088 if (!flag_pic || TARGET_DEEP_BRANCH_PREDICTION)
7089 output_asm_insn ("add%z0\t{%1, %0|%0, %1}", xops);
7091 output_asm_insn ("add%z0\t{%1+[.-%a2], %0|%0, %1+(.-%a2)}", xops);
7096 /* Generate an "push" pattern for input ARG. */
7101 return gen_rtx_SET (VOIDmode,
7103 gen_rtx_PRE_DEC (Pmode,
7104 stack_pointer_rtx)),
7108 /* Return >= 0 if there is an unused call-clobbered register available
7109 for the entire function. */
7112 ix86_select_alt_pic_regnum (void)
7114 if (current_function_is_leaf && !crtl->profile
7115 && !ix86_current_function_calls_tls_descriptor)
7118 for (i = 2; i >= 0; --i)
7119 if (!df_regs_ever_live_p (i))
7123 return INVALID_REGNUM;
7126 /* Return 1 if we need to save REGNO. */
7128 ix86_save_reg (unsigned int regno, int maybe_eh_return)
7130 if (pic_offset_table_rtx
7131 && regno == REAL_PIC_OFFSET_TABLE_REGNUM
7132 && (df_regs_ever_live_p (REAL_PIC_OFFSET_TABLE_REGNUM)
7134 || crtl->calls_eh_return
7135 || crtl->uses_const_pool))
7137 if (ix86_select_alt_pic_regnum () != INVALID_REGNUM)
7142 if (crtl->calls_eh_return && maybe_eh_return)
7147 unsigned test = EH_RETURN_DATA_REGNO (i);
7148 if (test == INVALID_REGNUM)
7155 if (cfun->machine->force_align_arg_pointer
7156 && regno == REGNO (cfun->machine->force_align_arg_pointer))
7159 return (df_regs_ever_live_p (regno)
7160 && !call_used_regs[regno]
7161 && !fixed_regs[regno]
7162 && (regno != HARD_FRAME_POINTER_REGNUM || !frame_pointer_needed));
7165 /* Return number of registers to be saved on the stack. */
7168 ix86_nsaved_regs (void)
7173 for (regno = FIRST_PSEUDO_REGISTER - 1; regno >= 0; regno--)
7174 if (ix86_save_reg (regno, true))
7179 /* Return the offset between two registers, one to be eliminated, and the other
7180 its replacement, at the start of a routine. */
7183 ix86_initial_elimination_offset (int from, int to)
7185 struct ix86_frame frame;
7186 ix86_compute_frame_layout (&frame);
7188 if (from == ARG_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
7189 return frame.hard_frame_pointer_offset;
7190 else if (from == FRAME_POINTER_REGNUM
7191 && to == HARD_FRAME_POINTER_REGNUM)
7192 return frame.hard_frame_pointer_offset - frame.frame_pointer_offset;
7195 gcc_assert (to == STACK_POINTER_REGNUM);
7197 if (from == ARG_POINTER_REGNUM)
7198 return frame.stack_pointer_offset;
7200 gcc_assert (from == FRAME_POINTER_REGNUM);
7201 return frame.stack_pointer_offset - frame.frame_pointer_offset;
7205 /* Fill structure ix86_frame about frame of currently computed function. */
7208 ix86_compute_frame_layout (struct ix86_frame *frame)
7210 HOST_WIDE_INT total_size;
7211 unsigned int stack_alignment_needed;
7212 HOST_WIDE_INT offset;
7213 unsigned int preferred_alignment;
7214 HOST_WIDE_INT size = get_frame_size ();
7216 frame->nregs = ix86_nsaved_regs ();
7219 stack_alignment_needed = crtl->stack_alignment_needed / BITS_PER_UNIT;
7220 preferred_alignment = crtl->preferred_stack_boundary / BITS_PER_UNIT;
7222 /* During reload iteration the amount of registers saved can change.
7223 Recompute the value as needed. Do not recompute when amount of registers
7224 didn't change as reload does multiple calls to the function and does not
7225 expect the decision to change within single iteration. */
7227 && cfun->machine->use_fast_prologue_epilogue_nregs != frame->nregs)
7229 int count = frame->nregs;
7231 cfun->machine->use_fast_prologue_epilogue_nregs = count;
7232 /* The fast prologue uses move instead of push to save registers. This
7233 is significantly longer, but also executes faster as modern hardware
7234 can execute the moves in parallel, but can't do that for push/pop.
7236 Be careful about choosing what prologue to emit: When function takes
7237 many instructions to execute we may use slow version as well as in
7238 case function is known to be outside hot spot (this is known with
7239 feedback only). Weight the size of function by number of registers
7240 to save as it is cheap to use one or two push instructions but very
7241 slow to use many of them. */
7243 count = (count - 1) * FAST_PROLOGUE_INSN_COUNT;
7244 if (cfun->function_frequency < FUNCTION_FREQUENCY_NORMAL
7245 || (flag_branch_probabilities
7246 && cfun->function_frequency < FUNCTION_FREQUENCY_HOT))
7247 cfun->machine->use_fast_prologue_epilogue = false;
7249 cfun->machine->use_fast_prologue_epilogue
7250 = !expensive_function_p (count);
7252 if (TARGET_PROLOGUE_USING_MOVE
7253 && cfun->machine->use_fast_prologue_epilogue)
7254 frame->save_regs_using_mov = true;
7256 frame->save_regs_using_mov = false;
7259 /* Skip return address and saved base pointer. */
7260 offset = frame_pointer_needed ? UNITS_PER_WORD * 2 : UNITS_PER_WORD;
7262 frame->hard_frame_pointer_offset = offset;
7264 /* Do some sanity checking of stack_alignment_needed and
7265 preferred_alignment, since i386 port is the only using those features
7266 that may break easily. */
7268 gcc_assert (!size || stack_alignment_needed);
7269 gcc_assert (preferred_alignment >= STACK_BOUNDARY / BITS_PER_UNIT);
7270 gcc_assert (preferred_alignment <= PREFERRED_STACK_BOUNDARY / BITS_PER_UNIT);
7271 gcc_assert (stack_alignment_needed
7272 <= PREFERRED_STACK_BOUNDARY / BITS_PER_UNIT);
7274 if (stack_alignment_needed < STACK_BOUNDARY / BITS_PER_UNIT)
7275 stack_alignment_needed = STACK_BOUNDARY / BITS_PER_UNIT;
7277 /* Register save area */
7278 offset += frame->nregs * UNITS_PER_WORD;
7281 if (ix86_save_varrargs_registers)
7283 offset += X86_64_VARARGS_SIZE;
7284 frame->va_arg_size = X86_64_VARARGS_SIZE;
7287 frame->va_arg_size = 0;
7289 /* Align start of frame for local function. */
7290 frame->padding1 = ((offset + stack_alignment_needed - 1)
7291 & -stack_alignment_needed) - offset;
7293 offset += frame->padding1;
7295 /* Frame pointer points here. */
7296 frame->frame_pointer_offset = offset;
7300 /* Add outgoing arguments area. Can be skipped if we eliminated
7301 all the function calls as dead code.
7302 Skipping is however impossible when function calls alloca. Alloca
7303 expander assumes that last crtl->outgoing_args_size
7304 of stack frame are unused. */
7305 if (ACCUMULATE_OUTGOING_ARGS
7306 && (!current_function_is_leaf || cfun->calls_alloca
7307 || ix86_current_function_calls_tls_descriptor))
7309 offset += crtl->outgoing_args_size;
7310 frame->outgoing_arguments_size = crtl->outgoing_args_size;
7313 frame->outgoing_arguments_size = 0;
7315 /* Align stack boundary. Only needed if we're calling another function
7317 if (!current_function_is_leaf || cfun->calls_alloca
7318 || ix86_current_function_calls_tls_descriptor)
7319 frame->padding2 = ((offset + preferred_alignment - 1)
7320 & -preferred_alignment) - offset;
7322 frame->padding2 = 0;
7324 offset += frame->padding2;
7326 /* We've reached end of stack frame. */
7327 frame->stack_pointer_offset = offset;
7329 /* Size prologue needs to allocate. */
7330 frame->to_allocate =
7331 (size + frame->padding1 + frame->padding2
7332 + frame->outgoing_arguments_size + frame->va_arg_size);
7334 if ((!frame->to_allocate && frame->nregs <= 1)
7335 || (TARGET_64BIT && frame->to_allocate >= (HOST_WIDE_INT) 0x80000000))
7336 frame->save_regs_using_mov = false;
7338 if (!TARGET_64BIT_MS_ABI && TARGET_RED_ZONE && current_function_sp_is_unchanging
7339 && current_function_is_leaf
7340 && !ix86_current_function_calls_tls_descriptor)
7342 frame->red_zone_size = frame->to_allocate;
7343 if (frame->save_regs_using_mov)
7344 frame->red_zone_size += frame->nregs * UNITS_PER_WORD;
7345 if (frame->red_zone_size > RED_ZONE_SIZE - RED_ZONE_RESERVE)
7346 frame->red_zone_size = RED_ZONE_SIZE - RED_ZONE_RESERVE;
7349 frame->red_zone_size = 0;
7350 frame->to_allocate -= frame->red_zone_size;
7351 frame->stack_pointer_offset -= frame->red_zone_size;
7353 fprintf (stderr, "\n");
7354 fprintf (stderr, "nregs: %ld\n", (long)frame->nregs);
7355 fprintf (stderr, "size: %ld\n", (long)size);
7356 fprintf (stderr, "alignment1: %ld\n", (long)stack_alignment_needed);
7357 fprintf (stderr, "padding1: %ld\n", (long)frame->padding1);
7358 fprintf (stderr, "va_arg: %ld\n", (long)frame->va_arg_size);
7359 fprintf (stderr, "padding2: %ld\n", (long)frame->padding2);
7360 fprintf (stderr, "to_allocate: %ld\n", (long)frame->to_allocate);
7361 fprintf (stderr, "red_zone_size: %ld\n", (long)frame->red_zone_size);
7362 fprintf (stderr, "frame_pointer_offset: %ld\n", (long)frame->frame_pointer_offset);
7363 fprintf (stderr, "hard_frame_pointer_offset: %ld\n",
7364 (long)frame->hard_frame_pointer_offset);
7365 fprintf (stderr, "stack_pointer_offset: %ld\n", (long)frame->stack_pointer_offset);
7366 fprintf (stderr, "current_function_is_leaf: %ld\n", (long)current_function_is_leaf);
7367 fprintf (stderr, "cfun->calls_alloca: %ld\n", (long)cfun->calls_alloca);
7368 fprintf (stderr, "x86_current_function_calls_tls_descriptor: %ld\n", (long)ix86_current_function_calls_tls_descriptor);
7372 /* Emit code to save registers in the prologue. */
7375 ix86_emit_save_regs (void)
7380 for (regno = FIRST_PSEUDO_REGISTER; regno-- > 0; )
7381 if (ix86_save_reg (regno, true))
7383 insn = emit_insn (gen_push (gen_rtx_REG (Pmode, regno)));
7384 RTX_FRAME_RELATED_P (insn) = 1;
7388 /* Emit code to save registers using MOV insns. First register
7389 is restored from POINTER + OFFSET. */
7391 ix86_emit_save_regs_using_mov (rtx pointer, HOST_WIDE_INT offset)
7396 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
7397 if (ix86_save_reg (regno, true))
7399 insn = emit_move_insn (adjust_address (gen_rtx_MEM (Pmode, pointer),
7401 gen_rtx_REG (Pmode, regno));
7402 RTX_FRAME_RELATED_P (insn) = 1;
7403 offset += UNITS_PER_WORD;
7407 /* Expand prologue or epilogue stack adjustment.
7408 The pattern exist to put a dependency on all ebp-based memory accesses.
7409 STYLE should be negative if instructions should be marked as frame related,
7410 zero if %r11 register is live and cannot be freely used and positive
7414 pro_epilogue_adjust_stack (rtx dest, rtx src, rtx offset, int style)
7419 insn = emit_insn (gen_pro_epilogue_adjust_stack_1 (dest, src, offset));
7420 else if (x86_64_immediate_operand (offset, DImode))
7421 insn = emit_insn (gen_pro_epilogue_adjust_stack_rex64 (dest, src, offset));
7425 /* r11 is used by indirect sibcall return as well, set before the
7426 epilogue and used after the epilogue. ATM indirect sibcall
7427 shouldn't be used together with huge frame sizes in one
7428 function because of the frame_size check in sibcall.c. */
7430 r11 = gen_rtx_REG (DImode, R11_REG);
7431 insn = emit_insn (gen_rtx_SET (DImode, r11, offset));
7433 RTX_FRAME_RELATED_P (insn) = 1;
7434 insn = emit_insn (gen_pro_epilogue_adjust_stack_rex64_2 (dest, src, r11,
7438 RTX_FRAME_RELATED_P (insn) = 1;
7441 /* Handle the TARGET_INTERNAL_ARG_POINTER hook. */
7444 ix86_internal_arg_pointer (void)
7446 bool has_force_align_arg_pointer =
7447 (0 != lookup_attribute (ix86_force_align_arg_pointer_string,
7448 TYPE_ATTRIBUTES (TREE_TYPE (current_function_decl))));
7449 if ((FORCE_PREFERRED_STACK_BOUNDARY_IN_MAIN
7450 && DECL_NAME (current_function_decl)
7451 && MAIN_NAME_P (DECL_NAME (current_function_decl))
7452 && DECL_FILE_SCOPE_P (current_function_decl))
7453 || ix86_force_align_arg_pointer
7454 || has_force_align_arg_pointer)
7456 /* Nested functions can't realign the stack due to a register
7458 if (DECL_CONTEXT (current_function_decl)
7459 && TREE_CODE (DECL_CONTEXT (current_function_decl)) == FUNCTION_DECL)
7461 if (ix86_force_align_arg_pointer)
7462 warning (0, "-mstackrealign ignored for nested functions");
7463 if (has_force_align_arg_pointer)
7464 error ("%s not supported for nested functions",
7465 ix86_force_align_arg_pointer_string);
7466 return virtual_incoming_args_rtx;
7468 cfun->machine->force_align_arg_pointer = gen_rtx_REG (Pmode, CX_REG);
7469 return copy_to_reg (cfun->machine->force_align_arg_pointer);
7472 return virtual_incoming_args_rtx;
7475 /* Handle the TARGET_DWARF_HANDLE_FRAME_UNSPEC hook.
7476 This is called from dwarf2out.c to emit call frame instructions
7477 for frame-related insns containing UNSPECs and UNSPEC_VOLATILEs. */
7479 ix86_dwarf_handle_frame_unspec (const char *label, rtx pattern, int index)
7481 rtx unspec = SET_SRC (pattern);
7482 gcc_assert (GET_CODE (unspec) == UNSPEC);
7486 case UNSPEC_REG_SAVE:
7487 dwarf2out_reg_save_reg (label, XVECEXP (unspec, 0, 0),
7488 SET_DEST (pattern));
7490 case UNSPEC_DEF_CFA:
7491 dwarf2out_def_cfa (label, REGNO (SET_DEST (pattern)),
7492 INTVAL (XVECEXP (unspec, 0, 0)));
7499 /* Expand the prologue into a bunch of separate insns. */
7502 ix86_expand_prologue (void)
7506 struct ix86_frame frame;
7507 HOST_WIDE_INT allocate;
7509 ix86_compute_frame_layout (&frame);
7511 if (cfun->machine->force_align_arg_pointer)
7515 /* Grab the argument pointer. */
7516 x = plus_constant (stack_pointer_rtx, 4);
7517 y = cfun->machine->force_align_arg_pointer;
7518 insn = emit_insn (gen_rtx_SET (VOIDmode, y, x));
7519 RTX_FRAME_RELATED_P (insn) = 1;
7521 /* The unwind info consists of two parts: install the fafp as the cfa,
7522 and record the fafp as the "save register" of the stack pointer.
7523 The later is there in order that the unwinder can see where it
7524 should restore the stack pointer across the and insn. */
7525 x = gen_rtx_UNSPEC (VOIDmode, gen_rtvec (1, const0_rtx), UNSPEC_DEF_CFA);
7526 x = gen_rtx_SET (VOIDmode, y, x);
7527 RTX_FRAME_RELATED_P (x) = 1;
7528 y = gen_rtx_UNSPEC (VOIDmode, gen_rtvec (1, stack_pointer_rtx),
7530 y = gen_rtx_SET (VOIDmode, cfun->machine->force_align_arg_pointer, y);
7531 RTX_FRAME_RELATED_P (y) = 1;
7532 x = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, x, y));
7533 x = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR, x, NULL);
7534 REG_NOTES (insn) = x;
7536 /* Align the stack. */
7537 emit_insn (gen_andsi3 (stack_pointer_rtx, stack_pointer_rtx,
7540 /* And here we cheat like madmen with the unwind info. We force the
7541 cfa register back to sp+4, which is exactly what it was at the
7542 start of the function. Re-pushing the return address results in
7543 the return at the same spot relative to the cfa, and thus is
7544 correct wrt the unwind info. */
7545 x = cfun->machine->force_align_arg_pointer;
7546 x = gen_frame_mem (Pmode, plus_constant (x, -4));
7547 insn = emit_insn (gen_push (x));
7548 RTX_FRAME_RELATED_P (insn) = 1;
7551 x = gen_rtx_UNSPEC (VOIDmode, gen_rtvec (1, x), UNSPEC_DEF_CFA);
7552 x = gen_rtx_SET (VOIDmode, stack_pointer_rtx, x);
7553 x = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR, x, NULL);
7554 REG_NOTES (insn) = x;
7557 /* Note: AT&T enter does NOT have reversed args. Enter is probably
7558 slower on all targets. Also sdb doesn't like it. */
7560 if (frame_pointer_needed)
7562 insn = emit_insn (gen_push (hard_frame_pointer_rtx));
7563 RTX_FRAME_RELATED_P (insn) = 1;
7565 insn = emit_move_insn (hard_frame_pointer_rtx, stack_pointer_rtx);
7566 RTX_FRAME_RELATED_P (insn) = 1;
7569 allocate = frame.to_allocate;
7571 if (!frame.save_regs_using_mov)
7572 ix86_emit_save_regs ();
7574 allocate += frame.nregs * UNITS_PER_WORD;
7576 /* When using red zone we may start register saving before allocating
7577 the stack frame saving one cycle of the prologue. However I will
7578 avoid doing this if I am going to have to probe the stack since
7579 at least on x86_64 the stack probe can turn into a call that clobbers
7580 a red zone location */
7581 if (!TARGET_64BIT_MS_ABI && TARGET_RED_ZONE && frame.save_regs_using_mov
7582 && (! TARGET_STACK_PROBE || allocate < CHECK_STACK_LIMIT))
7583 ix86_emit_save_regs_using_mov (frame_pointer_needed ? hard_frame_pointer_rtx
7584 : stack_pointer_rtx,
7585 -frame.nregs * UNITS_PER_WORD);
7589 else if (! TARGET_STACK_PROBE || allocate < CHECK_STACK_LIMIT)
7590 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
7591 GEN_INT (-allocate), -1);
7594 /* Only valid for Win32. */
7595 rtx eax = gen_rtx_REG (Pmode, AX_REG);
7599 gcc_assert (!TARGET_64BIT || cfun->machine->call_abi == MS_ABI);
7601 if (cfun->machine->call_abi == MS_ABI)
7604 eax_live = ix86_eax_live_at_start_p ();
7608 emit_insn (gen_push (eax));
7609 allocate -= UNITS_PER_WORD;
7612 emit_move_insn (eax, GEN_INT (allocate));
7615 insn = gen_allocate_stack_worker_64 (eax);
7617 insn = gen_allocate_stack_worker_32 (eax);
7618 insn = emit_insn (insn);
7619 RTX_FRAME_RELATED_P (insn) = 1;
7620 t = gen_rtx_PLUS (Pmode, stack_pointer_rtx, GEN_INT (-allocate));
7621 t = gen_rtx_SET (VOIDmode, stack_pointer_rtx, t);
7622 REG_NOTES (insn) = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR,
7623 t, REG_NOTES (insn));
7627 if (frame_pointer_needed)
7628 t = plus_constant (hard_frame_pointer_rtx,
7631 - frame.nregs * UNITS_PER_WORD);
7633 t = plus_constant (stack_pointer_rtx, allocate);
7634 emit_move_insn (eax, gen_rtx_MEM (Pmode, t));
7638 if (frame.save_regs_using_mov
7639 && !(!TARGET_64BIT_MS_ABI && TARGET_RED_ZONE
7640 && (! TARGET_STACK_PROBE || allocate < CHECK_STACK_LIMIT)))
7642 if (!frame_pointer_needed || !frame.to_allocate)
7643 ix86_emit_save_regs_using_mov (stack_pointer_rtx, frame.to_allocate);
7645 ix86_emit_save_regs_using_mov (hard_frame_pointer_rtx,
7646 -frame.nregs * UNITS_PER_WORD);
7649 pic_reg_used = false;
7650 if (pic_offset_table_rtx
7651 && (df_regs_ever_live_p (REAL_PIC_OFFSET_TABLE_REGNUM)
7654 unsigned int alt_pic_reg_used = ix86_select_alt_pic_regnum ();
7656 if (alt_pic_reg_used != INVALID_REGNUM)
7657 SET_REGNO (pic_offset_table_rtx, alt_pic_reg_used);
7659 pic_reg_used = true;
7666 if (ix86_cmodel == CM_LARGE_PIC)
7668 rtx tmp_reg = gen_rtx_REG (DImode, R11_REG);
7669 rtx label = gen_label_rtx ();
7671 LABEL_PRESERVE_P (label) = 1;
7672 gcc_assert (REGNO (pic_offset_table_rtx) != REGNO (tmp_reg));
7673 insn = emit_insn (gen_set_rip_rex64 (pic_offset_table_rtx, label));
7674 insn = emit_insn (gen_set_got_offset_rex64 (tmp_reg, label));
7675 insn = emit_insn (gen_adddi3 (pic_offset_table_rtx,
7676 pic_offset_table_rtx, tmp_reg));
7679 insn = emit_insn (gen_set_got_rex64 (pic_offset_table_rtx));
7682 insn = emit_insn (gen_set_got (pic_offset_table_rtx));
7685 /* Prevent function calls from being scheduled before the call to mcount.
7686 In the pic_reg_used case, make sure that the got load isn't deleted. */
7690 emit_insn (gen_prologue_use (pic_offset_table_rtx));
7691 emit_insn (gen_blockage ());
7694 /* Emit cld instruction if stringops are used in the function. */
7695 if (TARGET_CLD && ix86_current_function_needs_cld)
7696 emit_insn (gen_cld ());
7699 /* Emit code to restore saved registers using MOV insns. First register
7700 is restored from POINTER + OFFSET. */
7702 ix86_emit_restore_regs_using_mov (rtx pointer, HOST_WIDE_INT offset,
7703 int maybe_eh_return)
7706 rtx base_address = gen_rtx_MEM (Pmode, pointer);
7708 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
7709 if (ix86_save_reg (regno, maybe_eh_return))
7711 /* Ensure that adjust_address won't be forced to produce pointer
7712 out of range allowed by x86-64 instruction set. */
7713 if (TARGET_64BIT && offset != trunc_int_for_mode (offset, SImode))
7717 r11 = gen_rtx_REG (DImode, R11_REG);
7718 emit_move_insn (r11, GEN_INT (offset));
7719 emit_insn (gen_adddi3 (r11, r11, pointer));
7720 base_address = gen_rtx_MEM (Pmode, r11);
7723 emit_move_insn (gen_rtx_REG (Pmode, regno),
7724 adjust_address (base_address, Pmode, offset));
7725 offset += UNITS_PER_WORD;
7729 /* Restore function stack, frame, and registers. */
7732 ix86_expand_epilogue (int style)
7735 int sp_valid = !frame_pointer_needed || current_function_sp_is_unchanging;
7736 struct ix86_frame frame;
7737 HOST_WIDE_INT offset;
7739 ix86_compute_frame_layout (&frame);
7741 /* Calculate start of saved registers relative to ebp. Special care
7742 must be taken for the normal return case of a function using
7743 eh_return: the eax and edx registers are marked as saved, but not
7744 restored along this path. */
7745 offset = frame.nregs;
7746 if (crtl->calls_eh_return && style != 2)
7748 offset *= -UNITS_PER_WORD;
7750 /* If we're only restoring one register and sp is not valid then
7751 using a move instruction to restore the register since it's
7752 less work than reloading sp and popping the register.
7754 The default code result in stack adjustment using add/lea instruction,
7755 while this code results in LEAVE instruction (or discrete equivalent),
7756 so it is profitable in some other cases as well. Especially when there
7757 are no registers to restore. We also use this code when TARGET_USE_LEAVE
7758 and there is exactly one register to pop. This heuristic may need some
7759 tuning in future. */
7760 if ((!sp_valid && frame.nregs <= 1)
7761 || (TARGET_EPILOGUE_USING_MOVE
7762 && cfun->machine->use_fast_prologue_epilogue
7763 && (frame.nregs > 1 || frame.to_allocate))
7764 || (frame_pointer_needed && !frame.nregs && frame.to_allocate)
7765 || (frame_pointer_needed && TARGET_USE_LEAVE
7766 && cfun->machine->use_fast_prologue_epilogue
7767 && frame.nregs == 1)
7768 || crtl->calls_eh_return)
7770 /* Restore registers. We can use ebp or esp to address the memory
7771 locations. If both are available, default to ebp, since offsets
7772 are known to be small. Only exception is esp pointing directly to the
7773 end of block of saved registers, where we may simplify addressing
7776 if (!frame_pointer_needed || (sp_valid && !frame.to_allocate))
7777 ix86_emit_restore_regs_using_mov (stack_pointer_rtx,
7778 frame.to_allocate, style == 2);
7780 ix86_emit_restore_regs_using_mov (hard_frame_pointer_rtx,
7781 offset, style == 2);
7783 /* eh_return epilogues need %ecx added to the stack pointer. */
7786 rtx tmp, sa = EH_RETURN_STACKADJ_RTX;
7788 if (frame_pointer_needed)
7790 tmp = gen_rtx_PLUS (Pmode, hard_frame_pointer_rtx, sa);
7791 tmp = plus_constant (tmp, UNITS_PER_WORD);
7792 emit_insn (gen_rtx_SET (VOIDmode, sa, tmp));
7794 tmp = gen_rtx_MEM (Pmode, hard_frame_pointer_rtx);
7795 emit_move_insn (hard_frame_pointer_rtx, tmp);
7797 pro_epilogue_adjust_stack (stack_pointer_rtx, sa,
7802 tmp = gen_rtx_PLUS (Pmode, stack_pointer_rtx, sa);
7803 tmp = plus_constant (tmp, (frame.to_allocate
7804 + frame.nregs * UNITS_PER_WORD));
7805 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx, tmp));
7808 else if (!frame_pointer_needed)
7809 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
7810 GEN_INT (frame.to_allocate
7811 + frame.nregs * UNITS_PER_WORD),
7813 /* If not an i386, mov & pop is faster than "leave". */
7814 else if (TARGET_USE_LEAVE || optimize_size
7815 || !cfun->machine->use_fast_prologue_epilogue)
7816 emit_insn ((*ix86_gen_leave) ());
7819 pro_epilogue_adjust_stack (stack_pointer_rtx,
7820 hard_frame_pointer_rtx,
7823 emit_insn ((*ix86_gen_pop1) (hard_frame_pointer_rtx));
7828 /* First step is to deallocate the stack frame so that we can
7829 pop the registers. */
7832 gcc_assert (frame_pointer_needed);
7833 pro_epilogue_adjust_stack (stack_pointer_rtx,
7834 hard_frame_pointer_rtx,
7835 GEN_INT (offset), style);
7837 else if (frame.to_allocate)
7838 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
7839 GEN_INT (frame.to_allocate), style);
7841 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
7842 if (ix86_save_reg (regno, false))
7843 emit_insn ((*ix86_gen_pop1) (gen_rtx_REG (Pmode, regno)));
7844 if (frame_pointer_needed)
7846 /* Leave results in shorter dependency chains on CPUs that are
7847 able to grok it fast. */
7848 if (TARGET_USE_LEAVE)
7849 emit_insn ((*ix86_gen_leave) ());
7851 emit_insn ((*ix86_gen_pop1) (hard_frame_pointer_rtx));
7855 if (cfun->machine->force_align_arg_pointer)
7857 emit_insn (gen_addsi3 (stack_pointer_rtx,
7858 cfun->machine->force_align_arg_pointer,
7862 /* Sibcall epilogues don't want a return instruction. */
7866 if (crtl->args.pops_args && crtl->args.size)
7868 rtx popc = GEN_INT (crtl->args.pops_args);
7870 /* i386 can only pop 64K bytes. If asked to pop more, pop
7871 return address, do explicit add, and jump indirectly to the
7874 if (crtl->args.pops_args >= 65536)
7876 rtx ecx = gen_rtx_REG (SImode, CX_REG);
7878 /* There is no "pascal" calling convention in any 64bit ABI. */
7879 gcc_assert (!TARGET_64BIT);
7881 emit_insn (gen_popsi1 (ecx));
7882 emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx, popc));
7883 emit_jump_insn (gen_return_indirect_internal (ecx));
7886 emit_jump_insn (gen_return_pop_internal (popc));
7889 emit_jump_insn (gen_return_internal ());
7892 /* Reset from the function's potential modifications. */
7895 ix86_output_function_epilogue (FILE *file ATTRIBUTE_UNUSED,
7896 HOST_WIDE_INT size ATTRIBUTE_UNUSED)
7898 if (pic_offset_table_rtx)
7899 SET_REGNO (pic_offset_table_rtx, REAL_PIC_OFFSET_TABLE_REGNUM);
7901 /* Mach-O doesn't support labels at the end of objects, so if
7902 it looks like we might want one, insert a NOP. */
7904 rtx insn = get_last_insn ();
7907 && NOTE_KIND (insn) != NOTE_INSN_DELETED_LABEL)
7908 insn = PREV_INSN (insn);
7912 && NOTE_KIND (insn) == NOTE_INSN_DELETED_LABEL)))
7913 fputs ("\tnop\n", file);
7919 /* Extract the parts of an RTL expression that is a valid memory address
7920 for an instruction. Return 0 if the structure of the address is
7921 grossly off. Return -1 if the address contains ASHIFT, so it is not
7922 strictly valid, but still used for computing length of lea instruction. */
7925 ix86_decompose_address (rtx addr, struct ix86_address *out)
7927 rtx base = NULL_RTX, index = NULL_RTX, disp = NULL_RTX;
7928 rtx base_reg, index_reg;
7929 HOST_WIDE_INT scale = 1;
7930 rtx scale_rtx = NULL_RTX;
7932 enum ix86_address_seg seg = SEG_DEFAULT;
7934 if (REG_P (addr) || GET_CODE (addr) == SUBREG)
7936 else if (GET_CODE (addr) == PLUS)
7946 addends[n++] = XEXP (op, 1);
7949 while (GET_CODE (op) == PLUS);
7954 for (i = n; i >= 0; --i)
7957 switch (GET_CODE (op))
7962 index = XEXP (op, 0);
7963 scale_rtx = XEXP (op, 1);
7967 if (XINT (op, 1) == UNSPEC_TP
7968 && TARGET_TLS_DIRECT_SEG_REFS
7969 && seg == SEG_DEFAULT)
7970 seg = TARGET_64BIT ? SEG_FS : SEG_GS;
7999 else if (GET_CODE (addr) == MULT)
8001 index = XEXP (addr, 0); /* index*scale */
8002 scale_rtx = XEXP (addr, 1);
8004 else if (GET_CODE (addr) == ASHIFT)
8008 /* We're called for lea too, which implements ashift on occasion. */
8009 index = XEXP (addr, 0);
8010 tmp = XEXP (addr, 1);
8011 if (!CONST_INT_P (tmp))
8013 scale = INTVAL (tmp);
8014 if ((unsigned HOST_WIDE_INT) scale > 3)
8020 disp = addr; /* displacement */
8022 /* Extract the integral value of scale. */
8025 if (!CONST_INT_P (scale_rtx))
8027 scale = INTVAL (scale_rtx);
8030 base_reg = base && GET_CODE (base) == SUBREG ? SUBREG_REG (base) : base;
8031 index_reg = index && GET_CODE (index) == SUBREG ? SUBREG_REG (index) : index;
8033 /* Allow arg pointer and stack pointer as index if there is not scaling. */
8034 if (base_reg && index_reg && scale == 1
8035 && (index_reg == arg_pointer_rtx
8036 || index_reg == frame_pointer_rtx
8037 || (REG_P (index_reg) && REGNO (index_reg) == STACK_POINTER_REGNUM)))
8040 tmp = base, base = index, index = tmp;
8041 tmp = base_reg, base_reg = index_reg, index_reg = tmp;
8044 /* Special case: %ebp cannot be encoded as a base without a displacement. */
8045 if ((base_reg == hard_frame_pointer_rtx
8046 || base_reg == frame_pointer_rtx
8047 || base_reg == arg_pointer_rtx) && !disp)
8050 /* Special case: on K6, [%esi] makes the instruction vector decoded.
8051 Avoid this by transforming to [%esi+0]. */
8052 if (TARGET_K6 && !optimize_size
8053 && base_reg && !index_reg && !disp
8055 && REGNO_REG_CLASS (REGNO (base_reg)) == SIREG)
8058 /* Special case: encode reg+reg instead of reg*2. */
8059 if (!base && index && scale && scale == 2)
8060 base = index, base_reg = index_reg, scale = 1;
8062 /* Special case: scaling cannot be encoded without base or displacement. */
8063 if (!base && !disp && index && scale != 1)
8075 /* Return cost of the memory address x.
8076 For i386, it is better to use a complex address than let gcc copy
8077 the address into a reg and make a new pseudo. But not if the address
8078 requires to two regs - that would mean more pseudos with longer
8081 ix86_address_cost (rtx x)
8083 struct ix86_address parts;
8085 int ok = ix86_decompose_address (x, &parts);
8089 if (parts.base && GET_CODE (parts.base) == SUBREG)
8090 parts.base = SUBREG_REG (parts.base);
8091 if (parts.index && GET_CODE (parts.index) == SUBREG)
8092 parts.index = SUBREG_REG (parts.index);
8094 /* Attempt to minimize number of registers in the address. */
8096 && (!REG_P (parts.base) || REGNO (parts.base) >= FIRST_PSEUDO_REGISTER))
8098 && (!REG_P (parts.index)
8099 || REGNO (parts.index) >= FIRST_PSEUDO_REGISTER)))
8103 && (!REG_P (parts.base) || REGNO (parts.base) >= FIRST_PSEUDO_REGISTER)
8105 && (!REG_P (parts.index) || REGNO (parts.index) >= FIRST_PSEUDO_REGISTER)
8106 && parts.base != parts.index)
8109 /* AMD-K6 don't like addresses with ModR/M set to 00_xxx_100b,
8110 since it's predecode logic can't detect the length of instructions
8111 and it degenerates to vector decoded. Increase cost of such
8112 addresses here. The penalty is minimally 2 cycles. It may be worthwhile
8113 to split such addresses or even refuse such addresses at all.
8115 Following addressing modes are affected:
8120 The first and last case may be avoidable by explicitly coding the zero in
8121 memory address, but I don't have AMD-K6 machine handy to check this
8125 && ((!parts.disp && parts.base && parts.index && parts.scale != 1)
8126 || (parts.disp && !parts.base && parts.index && parts.scale != 1)
8127 || (!parts.disp && parts.base && parts.index && parts.scale == 1)))
8133 /* Allow {LABEL | SYMBOL}_REF - SYMBOL_REF-FOR-PICBASE for Mach-O as
8134 this is used for to form addresses to local data when -fPIC is in
8138 darwin_local_data_pic (rtx disp)
8140 if (GET_CODE (disp) == MINUS)
8142 if (GET_CODE (XEXP (disp, 0)) == LABEL_REF
8143 || GET_CODE (XEXP (disp, 0)) == SYMBOL_REF)
8144 if (GET_CODE (XEXP (disp, 1)) == SYMBOL_REF)
8146 const char *sym_name = XSTR (XEXP (disp, 1), 0);
8147 if (! strcmp (sym_name, "<pic base>"))
8155 /* Determine if a given RTX is a valid constant. We already know this
8156 satisfies CONSTANT_P. */
8159 legitimate_constant_p (rtx x)
8161 switch (GET_CODE (x))
8166 if (GET_CODE (x) == PLUS)
8168 if (!CONST_INT_P (XEXP (x, 1)))
8173 if (TARGET_MACHO && darwin_local_data_pic (x))
8176 /* Only some unspecs are valid as "constants". */
8177 if (GET_CODE (x) == UNSPEC)
8178 switch (XINT (x, 1))
8183 return TARGET_64BIT;
8186 x = XVECEXP (x, 0, 0);
8187 return (GET_CODE (x) == SYMBOL_REF
8188 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_EXEC);
8190 x = XVECEXP (x, 0, 0);
8191 return (GET_CODE (x) == SYMBOL_REF
8192 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_DYNAMIC);
8197 /* We must have drilled down to a symbol. */
8198 if (GET_CODE (x) == LABEL_REF)
8200 if (GET_CODE (x) != SYMBOL_REF)
8205 /* TLS symbols are never valid. */
8206 if (SYMBOL_REF_TLS_MODEL (x))
8209 /* DLLIMPORT symbols are never valid. */
8210 if (TARGET_DLLIMPORT_DECL_ATTRIBUTES
8211 && SYMBOL_REF_DLLIMPORT_P (x))
8216 if (GET_MODE (x) == TImode
8217 && x != CONST0_RTX (TImode)
8223 if (x == CONST0_RTX (GET_MODE (x)))
8231 /* Otherwise we handle everything else in the move patterns. */
8235 /* Determine if it's legal to put X into the constant pool. This
8236 is not possible for the address of thread-local symbols, which
8237 is checked above. */
8240 ix86_cannot_force_const_mem (rtx x)
8242 /* We can always put integral constants and vectors in memory. */
8243 switch (GET_CODE (x))
8253 return !legitimate_constant_p (x);
8256 /* Determine if a given RTX is a valid constant address. */
8259 constant_address_p (rtx x)
8261 return CONSTANT_P (x) && legitimate_address_p (Pmode, x, 1);
8264 /* Nonzero if the constant value X is a legitimate general operand
8265 when generating PIC code. It is given that flag_pic is on and
8266 that X satisfies CONSTANT_P or is a CONST_DOUBLE. */
8269 legitimate_pic_operand_p (rtx x)
8273 switch (GET_CODE (x))
8276 inner = XEXP (x, 0);
8277 if (GET_CODE (inner) == PLUS
8278 && CONST_INT_P (XEXP (inner, 1)))
8279 inner = XEXP (inner, 0);
8281 /* Only some unspecs are valid as "constants". */
8282 if (GET_CODE (inner) == UNSPEC)
8283 switch (XINT (inner, 1))
8288 return TARGET_64BIT;
8290 x = XVECEXP (inner, 0, 0);
8291 return (GET_CODE (x) == SYMBOL_REF
8292 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_EXEC);
8300 return legitimate_pic_address_disp_p (x);
8307 /* Determine if a given CONST RTX is a valid memory displacement
8311 legitimate_pic_address_disp_p (rtx disp)
8315 /* In 64bit mode we can allow direct addresses of symbols and labels
8316 when they are not dynamic symbols. */
8319 rtx op0 = disp, op1;
8321 switch (GET_CODE (disp))
8327 if (GET_CODE (XEXP (disp, 0)) != PLUS)
8329 op0 = XEXP (XEXP (disp, 0), 0);
8330 op1 = XEXP (XEXP (disp, 0), 1);
8331 if (!CONST_INT_P (op1)
8332 || INTVAL (op1) >= 16*1024*1024
8333 || INTVAL (op1) < -16*1024*1024)
8335 if (GET_CODE (op0) == LABEL_REF)
8337 if (GET_CODE (op0) != SYMBOL_REF)
8342 /* TLS references should always be enclosed in UNSPEC. */
8343 if (SYMBOL_REF_TLS_MODEL (op0))
8345 if (!SYMBOL_REF_FAR_ADDR_P (op0) && SYMBOL_REF_LOCAL_P (op0)
8346 && ix86_cmodel != CM_LARGE_PIC)
8354 if (GET_CODE (disp) != CONST)
8356 disp = XEXP (disp, 0);
8360 /* We are unsafe to allow PLUS expressions. This limit allowed distance
8361 of GOT tables. We should not need these anyway. */
8362 if (GET_CODE (disp) != UNSPEC
8363 || (XINT (disp, 1) != UNSPEC_GOTPCREL
8364 && XINT (disp, 1) != UNSPEC_GOTOFF
8365 && XINT (disp, 1) != UNSPEC_PLTOFF))
8368 if (GET_CODE (XVECEXP (disp, 0, 0)) != SYMBOL_REF
8369 && GET_CODE (XVECEXP (disp, 0, 0)) != LABEL_REF)
8375 if (GET_CODE (disp) == PLUS)
8377 if (!CONST_INT_P (XEXP (disp, 1)))
8379 disp = XEXP (disp, 0);
8383 if (TARGET_MACHO && darwin_local_data_pic (disp))
8386 if (GET_CODE (disp) != UNSPEC)
8389 switch (XINT (disp, 1))
8394 /* We need to check for both symbols and labels because VxWorks loads
8395 text labels with @GOT rather than @GOTOFF. See gotoff_operand for
8397 return (GET_CODE (XVECEXP (disp, 0, 0)) == SYMBOL_REF
8398 || GET_CODE (XVECEXP (disp, 0, 0)) == LABEL_REF);
8400 /* Refuse GOTOFF in 64bit mode since it is always 64bit when used.
8401 While ABI specify also 32bit relocation but we don't produce it in
8402 small PIC model at all. */
8403 if ((GET_CODE (XVECEXP (disp, 0, 0)) == SYMBOL_REF
8404 || GET_CODE (XVECEXP (disp, 0, 0)) == LABEL_REF)
8406 return gotoff_operand (XVECEXP (disp, 0, 0), Pmode);
8408 case UNSPEC_GOTTPOFF:
8409 case UNSPEC_GOTNTPOFF:
8410 case UNSPEC_INDNTPOFF:
8413 disp = XVECEXP (disp, 0, 0);
8414 return (GET_CODE (disp) == SYMBOL_REF
8415 && SYMBOL_REF_TLS_MODEL (disp) == TLS_MODEL_INITIAL_EXEC);
8417 disp = XVECEXP (disp, 0, 0);
8418 return (GET_CODE (disp) == SYMBOL_REF
8419 && SYMBOL_REF_TLS_MODEL (disp) == TLS_MODEL_LOCAL_EXEC);
8421 disp = XVECEXP (disp, 0, 0);
8422 return (GET_CODE (disp) == SYMBOL_REF
8423 && SYMBOL_REF_TLS_MODEL (disp) == TLS_MODEL_LOCAL_DYNAMIC);
8429 /* GO_IF_LEGITIMATE_ADDRESS recognizes an RTL expression that is a valid
8430 memory address for an instruction. The MODE argument is the machine mode
8431 for the MEM expression that wants to use this address.
8433 It only recognizes address in canonical form. LEGITIMIZE_ADDRESS should
8434 convert common non-canonical forms to canonical form so that they will
8438 legitimate_address_p (enum machine_mode mode ATTRIBUTE_UNUSED,
8439 rtx addr, int strict)
8441 struct ix86_address parts;
8442 rtx base, index, disp;
8443 HOST_WIDE_INT scale;
8444 const char *reason = NULL;
8445 rtx reason_rtx = NULL_RTX;
8447 if (ix86_decompose_address (addr, &parts) <= 0)
8449 reason = "decomposition failed";
8454 index = parts.index;
8456 scale = parts.scale;
8458 /* Validate base register.
8460 Don't allow SUBREG's that span more than a word here. It can lead to spill
8461 failures when the base is one word out of a two word structure, which is
8462 represented internally as a DImode int. */
8471 else if (GET_CODE (base) == SUBREG
8472 && REG_P (SUBREG_REG (base))
8473 && GET_MODE_SIZE (GET_MODE (SUBREG_REG (base)))
8475 reg = SUBREG_REG (base);
8478 reason = "base is not a register";
8482 if (GET_MODE (base) != Pmode)
8484 reason = "base is not in Pmode";
8488 if ((strict && ! REG_OK_FOR_BASE_STRICT_P (reg))
8489 || (! strict && ! REG_OK_FOR_BASE_NONSTRICT_P (reg)))
8491 reason = "base is not valid";
8496 /* Validate index register.
8498 Don't allow SUBREG's that span more than a word here -- same as above. */
8507 else if (GET_CODE (index) == SUBREG
8508 && REG_P (SUBREG_REG (index))
8509 && GET_MODE_SIZE (GET_MODE (SUBREG_REG (index)))
8511 reg = SUBREG_REG (index);
8514 reason = "index is not a register";
8518 if (GET_MODE (index) != Pmode)
8520 reason = "index is not in Pmode";
8524 if ((strict && ! REG_OK_FOR_INDEX_STRICT_P (reg))
8525 || (! strict && ! REG_OK_FOR_INDEX_NONSTRICT_P (reg)))
8527 reason = "index is not valid";
8532 /* Validate scale factor. */
8535 reason_rtx = GEN_INT (scale);
8538 reason = "scale without index";
8542 if (scale != 2 && scale != 4 && scale != 8)
8544 reason = "scale is not a valid multiplier";
8549 /* Validate displacement. */
8554 if (GET_CODE (disp) == CONST
8555 && GET_CODE (XEXP (disp, 0)) == UNSPEC)
8556 switch (XINT (XEXP (disp, 0), 1))
8558 /* Refuse GOTOFF and GOT in 64bit mode since it is always 64bit when
8559 used. While ABI specify also 32bit relocations, we don't produce
8560 them at all and use IP relative instead. */
8563 gcc_assert (flag_pic);
8565 goto is_legitimate_pic;
8566 reason = "64bit address unspec";
8569 case UNSPEC_GOTPCREL:
8570 gcc_assert (flag_pic);
8571 goto is_legitimate_pic;
8573 case UNSPEC_GOTTPOFF:
8574 case UNSPEC_GOTNTPOFF:
8575 case UNSPEC_INDNTPOFF:
8581 reason = "invalid address unspec";
8585 else if (SYMBOLIC_CONST (disp)
8589 && MACHOPIC_INDIRECT
8590 && !machopic_operand_p (disp)
8596 if (TARGET_64BIT && (index || base))
8598 /* foo@dtpoff(%rX) is ok. */
8599 if (GET_CODE (disp) != CONST
8600 || GET_CODE (XEXP (disp, 0)) != PLUS
8601 || GET_CODE (XEXP (XEXP (disp, 0), 0)) != UNSPEC
8602 || !CONST_INT_P (XEXP (XEXP (disp, 0), 1))
8603 || (XINT (XEXP (XEXP (disp, 0), 0), 1) != UNSPEC_DTPOFF
8604 && XINT (XEXP (XEXP (disp, 0), 0), 1) != UNSPEC_NTPOFF))
8606 reason = "non-constant pic memory reference";
8610 else if (! legitimate_pic_address_disp_p (disp))
8612 reason = "displacement is an invalid pic construct";
8616 /* This code used to verify that a symbolic pic displacement
8617 includes the pic_offset_table_rtx register.
8619 While this is good idea, unfortunately these constructs may
8620 be created by "adds using lea" optimization for incorrect
8629 This code is nonsensical, but results in addressing
8630 GOT table with pic_offset_table_rtx base. We can't
8631 just refuse it easily, since it gets matched by
8632 "addsi3" pattern, that later gets split to lea in the
8633 case output register differs from input. While this
8634 can be handled by separate addsi pattern for this case
8635 that never results in lea, this seems to be easier and
8636 correct fix for crash to disable this test. */
8638 else if (GET_CODE (disp) != LABEL_REF
8639 && !CONST_INT_P (disp)
8640 && (GET_CODE (disp) != CONST
8641 || !legitimate_constant_p (disp))
8642 && (GET_CODE (disp) != SYMBOL_REF
8643 || !legitimate_constant_p (disp)))
8645 reason = "displacement is not constant";
8648 else if (TARGET_64BIT
8649 && !x86_64_immediate_operand (disp, VOIDmode))
8651 reason = "displacement is out of range";
8656 /* Everything looks valid. */
8663 /* Return a unique alias set for the GOT. */
8665 static alias_set_type
8666 ix86_GOT_alias_set (void)
8668 static alias_set_type set = -1;
8670 set = new_alias_set ();
8674 /* Return a legitimate reference for ORIG (an address) using the
8675 register REG. If REG is 0, a new pseudo is generated.
8677 There are two types of references that must be handled:
8679 1. Global data references must load the address from the GOT, via
8680 the PIC reg. An insn is emitted to do this load, and the reg is
8683 2. Static data references, constant pool addresses, and code labels
8684 compute the address as an offset from the GOT, whose base is in
8685 the PIC reg. Static data objects have SYMBOL_FLAG_LOCAL set to
8686 differentiate them from global data objects. The returned
8687 address is the PIC reg + an unspec constant.
8689 GO_IF_LEGITIMATE_ADDRESS rejects symbolic references unless the PIC
8690 reg also appears in the address. */
8693 legitimize_pic_address (rtx orig, rtx reg)
8700 if (TARGET_MACHO && !TARGET_64BIT)
8703 reg = gen_reg_rtx (Pmode);
8704 /* Use the generic Mach-O PIC machinery. */
8705 return machopic_legitimize_pic_address (orig, GET_MODE (orig), reg);
8709 if (TARGET_64BIT && legitimate_pic_address_disp_p (addr))
8711 else if (TARGET_64BIT
8712 && ix86_cmodel != CM_SMALL_PIC
8713 && gotoff_operand (addr, Pmode))
8716 /* This symbol may be referenced via a displacement from the PIC
8717 base address (@GOTOFF). */
8719 if (reload_in_progress)
8720 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
8721 if (GET_CODE (addr) == CONST)
8722 addr = XEXP (addr, 0);
8723 if (GET_CODE (addr) == PLUS)
8725 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, XEXP (addr, 0)),
8727 new_rtx = gen_rtx_PLUS (Pmode, new_rtx, XEXP (addr, 1));
8730 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTOFF);
8731 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
8733 tmpreg = gen_reg_rtx (Pmode);
8736 emit_move_insn (tmpreg, new_rtx);
8740 new_rtx = expand_simple_binop (Pmode, PLUS, reg, pic_offset_table_rtx,
8741 tmpreg, 1, OPTAB_DIRECT);
8744 else new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, tmpreg);
8746 else if (!TARGET_64BIT && gotoff_operand (addr, Pmode))
8748 /* This symbol may be referenced via a displacement from the PIC
8749 base address (@GOTOFF). */
8751 if (reload_in_progress)
8752 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
8753 if (GET_CODE (addr) == CONST)
8754 addr = XEXP (addr, 0);
8755 if (GET_CODE (addr) == PLUS)
8757 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, XEXP (addr, 0)),
8759 new_rtx = gen_rtx_PLUS (Pmode, new_rtx, XEXP (addr, 1));
8762 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTOFF);
8763 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
8764 new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new_rtx);
8768 emit_move_insn (reg, new_rtx);
8772 else if ((GET_CODE (addr) == SYMBOL_REF && SYMBOL_REF_TLS_MODEL (addr) == 0)
8773 /* We can't use @GOTOFF for text labels on VxWorks;
8774 see gotoff_operand. */
8775 || (TARGET_VXWORKS_RTP && GET_CODE (addr) == LABEL_REF))
8777 if (TARGET_DLLIMPORT_DECL_ATTRIBUTES)
8779 if (GET_CODE (addr) == SYMBOL_REF && SYMBOL_REF_DLLIMPORT_P (addr))
8780 return legitimize_dllimport_symbol (addr, true);
8781 if (GET_CODE (addr) == CONST && GET_CODE (XEXP (addr, 0)) == PLUS
8782 && GET_CODE (XEXP (XEXP (addr, 0), 0)) == SYMBOL_REF
8783 && SYMBOL_REF_DLLIMPORT_P (XEXP (XEXP (addr, 0), 0)))
8785 rtx t = legitimize_dllimport_symbol (XEXP (XEXP (addr, 0), 0), true);
8786 return gen_rtx_PLUS (Pmode, t, XEXP (XEXP (addr, 0), 1));
8790 if (TARGET_64BIT && ix86_cmodel != CM_LARGE_PIC)
8792 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTPCREL);
8793 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
8794 new_rtx = gen_const_mem (Pmode, new_rtx);
8795 set_mem_alias_set (new_rtx, ix86_GOT_alias_set ());
8798 reg = gen_reg_rtx (Pmode);
8799 /* Use directly gen_movsi, otherwise the address is loaded
8800 into register for CSE. We don't want to CSE this addresses,
8801 instead we CSE addresses from the GOT table, so skip this. */
8802 emit_insn (gen_movsi (reg, new_rtx));
8807 /* This symbol must be referenced via a load from the
8808 Global Offset Table (@GOT). */
8810 if (reload_in_progress)
8811 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
8812 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOT);
8813 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
8815 new_rtx = force_reg (Pmode, new_rtx);
8816 new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new_rtx);
8817 new_rtx = gen_const_mem (Pmode, new_rtx);
8818 set_mem_alias_set (new_rtx, ix86_GOT_alias_set ());
8821 reg = gen_reg_rtx (Pmode);
8822 emit_move_insn (reg, new_rtx);
8828 if (CONST_INT_P (addr)
8829 && !x86_64_immediate_operand (addr, VOIDmode))
8833 emit_move_insn (reg, addr);
8837 new_rtx = force_reg (Pmode, addr);
8839 else if (GET_CODE (addr) == CONST)
8841 addr = XEXP (addr, 0);
8843 /* We must match stuff we generate before. Assume the only
8844 unspecs that can get here are ours. Not that we could do
8845 anything with them anyway.... */
8846 if (GET_CODE (addr) == UNSPEC
8847 || (GET_CODE (addr) == PLUS
8848 && GET_CODE (XEXP (addr, 0)) == UNSPEC))
8850 gcc_assert (GET_CODE (addr) == PLUS);
8852 if (GET_CODE (addr) == PLUS)
8854 rtx op0 = XEXP (addr, 0), op1 = XEXP (addr, 1);
8856 /* Check first to see if this is a constant offset from a @GOTOFF
8857 symbol reference. */
8858 if (gotoff_operand (op0, Pmode)
8859 && CONST_INT_P (op1))
8863 if (reload_in_progress)
8864 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
8865 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, op0),
8867 new_rtx = gen_rtx_PLUS (Pmode, new_rtx, op1);
8868 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
8869 new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new_rtx);
8873 emit_move_insn (reg, new_rtx);
8879 if (INTVAL (op1) < -16*1024*1024
8880 || INTVAL (op1) >= 16*1024*1024)
8882 if (!x86_64_immediate_operand (op1, Pmode))
8883 op1 = force_reg (Pmode, op1);
8884 new_rtx = gen_rtx_PLUS (Pmode, force_reg (Pmode, op0), op1);
8890 base = legitimize_pic_address (XEXP (addr, 0), reg);
8891 new_rtx = legitimize_pic_address (XEXP (addr, 1),
8892 base == reg ? NULL_RTX : reg);
8894 if (CONST_INT_P (new_rtx))
8895 new_rtx = plus_constant (base, INTVAL (new_rtx));
8898 if (GET_CODE (new_rtx) == PLUS && CONSTANT_P (XEXP (new_rtx, 1)))
8900 base = gen_rtx_PLUS (Pmode, base, XEXP (new_rtx, 0));
8901 new_rtx = XEXP (new_rtx, 1);
8903 new_rtx = gen_rtx_PLUS (Pmode, base, new_rtx);
8911 /* Load the thread pointer. If TO_REG is true, force it into a register. */
8914 get_thread_pointer (int to_reg)
8918 tp = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx), UNSPEC_TP);
8922 reg = gen_reg_rtx (Pmode);
8923 insn = gen_rtx_SET (VOIDmode, reg, tp);
8924 insn = emit_insn (insn);
8929 /* A subroutine of legitimize_address and ix86_expand_move. FOR_MOV is
8930 false if we expect this to be used for a memory address and true if
8931 we expect to load the address into a register. */
8934 legitimize_tls_address (rtx x, enum tls_model model, int for_mov)
8936 rtx dest, base, off, pic, tp;
8941 case TLS_MODEL_GLOBAL_DYNAMIC:
8942 dest = gen_reg_rtx (Pmode);
8943 tp = TARGET_GNU2_TLS ? get_thread_pointer (1) : 0;
8945 if (TARGET_64BIT && ! TARGET_GNU2_TLS)
8947 rtx rax = gen_rtx_REG (Pmode, AX_REG), insns;
8950 emit_call_insn (gen_tls_global_dynamic_64 (rax, x));
8951 insns = get_insns ();
8954 RTL_CONST_CALL_P (insns) = 1;
8955 emit_libcall_block (insns, dest, rax, x);
8957 else if (TARGET_64BIT && TARGET_GNU2_TLS)
8958 emit_insn (gen_tls_global_dynamic_64 (dest, x));
8960 emit_insn (gen_tls_global_dynamic_32 (dest, x));
8962 if (TARGET_GNU2_TLS)
8964 dest = force_reg (Pmode, gen_rtx_PLUS (Pmode, tp, dest));
8966 set_unique_reg_note (get_last_insn (), REG_EQUIV, x);
8970 case TLS_MODEL_LOCAL_DYNAMIC:
8971 base = gen_reg_rtx (Pmode);
8972 tp = TARGET_GNU2_TLS ? get_thread_pointer (1) : 0;
8974 if (TARGET_64BIT && ! TARGET_GNU2_TLS)
8976 rtx rax = gen_rtx_REG (Pmode, AX_REG), insns, note;
8979 emit_call_insn (gen_tls_local_dynamic_base_64 (rax));
8980 insns = get_insns ();
8983 note = gen_rtx_EXPR_LIST (VOIDmode, const0_rtx, NULL);
8984 note = gen_rtx_EXPR_LIST (VOIDmode, ix86_tls_get_addr (), note);
8985 RTL_CONST_CALL_P (insns) = 1;
8986 emit_libcall_block (insns, base, rax, note);
8988 else if (TARGET_64BIT && TARGET_GNU2_TLS)
8989 emit_insn (gen_tls_local_dynamic_base_64 (base));
8991 emit_insn (gen_tls_local_dynamic_base_32 (base));
8993 if (TARGET_GNU2_TLS)
8995 rtx x = ix86_tls_module_base ();
8997 set_unique_reg_note (get_last_insn (), REG_EQUIV,
8998 gen_rtx_MINUS (Pmode, x, tp));
9001 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x), UNSPEC_DTPOFF);
9002 off = gen_rtx_CONST (Pmode, off);
9004 dest = force_reg (Pmode, gen_rtx_PLUS (Pmode, base, off));
9006 if (TARGET_GNU2_TLS)
9008 dest = force_reg (Pmode, gen_rtx_PLUS (Pmode, dest, tp));
9010 set_unique_reg_note (get_last_insn (), REG_EQUIV, x);
9015 case TLS_MODEL_INITIAL_EXEC:
9019 type = UNSPEC_GOTNTPOFF;
9023 if (reload_in_progress)
9024 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
9025 pic = pic_offset_table_rtx;
9026 type = TARGET_ANY_GNU_TLS ? UNSPEC_GOTNTPOFF : UNSPEC_GOTTPOFF;
9028 else if (!TARGET_ANY_GNU_TLS)
9030 pic = gen_reg_rtx (Pmode);
9031 emit_insn (gen_set_got (pic));
9032 type = UNSPEC_GOTTPOFF;
9037 type = UNSPEC_INDNTPOFF;
9040 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x), type);
9041 off = gen_rtx_CONST (Pmode, off);
9043 off = gen_rtx_PLUS (Pmode, pic, off);
9044 off = gen_const_mem (Pmode, off);
9045 set_mem_alias_set (off, ix86_GOT_alias_set ());
9047 if (TARGET_64BIT || TARGET_ANY_GNU_TLS)
9049 base = get_thread_pointer (for_mov || !TARGET_TLS_DIRECT_SEG_REFS);
9050 off = force_reg (Pmode, off);
9051 return gen_rtx_PLUS (Pmode, base, off);
9055 base = get_thread_pointer (true);
9056 dest = gen_reg_rtx (Pmode);
9057 emit_insn (gen_subsi3 (dest, base, off));
9061 case TLS_MODEL_LOCAL_EXEC:
9062 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x),
9063 (TARGET_64BIT || TARGET_ANY_GNU_TLS)
9064 ? UNSPEC_NTPOFF : UNSPEC_TPOFF);
9065 off = gen_rtx_CONST (Pmode, off);
9067 if (TARGET_64BIT || TARGET_ANY_GNU_TLS)
9069 base = get_thread_pointer (for_mov || !TARGET_TLS_DIRECT_SEG_REFS);
9070 return gen_rtx_PLUS (Pmode, base, off);
9074 base = get_thread_pointer (true);
9075 dest = gen_reg_rtx (Pmode);
9076 emit_insn (gen_subsi3 (dest, base, off));
9087 /* Create or return the unique __imp_DECL dllimport symbol corresponding
9090 static GTY((if_marked ("tree_map_marked_p"), param_is (struct tree_map)))
9091 htab_t dllimport_map;
9094 get_dllimport_decl (tree decl)
9096 struct tree_map *h, in;
9100 size_t namelen, prefixlen;
9106 dllimport_map = htab_create_ggc (512, tree_map_hash, tree_map_eq, 0);
9108 in.hash = htab_hash_pointer (decl);
9109 in.base.from = decl;
9110 loc = htab_find_slot_with_hash (dllimport_map, &in, in.hash, INSERT);
9111 h = (struct tree_map *) *loc;
9115 *loc = h = GGC_NEW (struct tree_map);
9117 h->base.from = decl;
9118 h->to = to = build_decl (VAR_DECL, NULL, ptr_type_node);
9119 DECL_ARTIFICIAL (to) = 1;
9120 DECL_IGNORED_P (to) = 1;
9121 DECL_EXTERNAL (to) = 1;
9122 TREE_READONLY (to) = 1;
9124 name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl));
9125 name = targetm.strip_name_encoding (name);
9126 prefix = name[0] == FASTCALL_PREFIX ? "*__imp_": "*__imp__";
9127 namelen = strlen (name);
9128 prefixlen = strlen (prefix);
9129 imp_name = (char *) alloca (namelen + prefixlen + 1);
9130 memcpy (imp_name, prefix, prefixlen);
9131 memcpy (imp_name + prefixlen, name, namelen + 1);
9133 name = ggc_alloc_string (imp_name, namelen + prefixlen);
9134 rtl = gen_rtx_SYMBOL_REF (Pmode, name);
9135 SET_SYMBOL_REF_DECL (rtl, to);
9136 SYMBOL_REF_FLAGS (rtl) = SYMBOL_FLAG_LOCAL;
9138 rtl = gen_const_mem (Pmode, rtl);
9139 set_mem_alias_set (rtl, ix86_GOT_alias_set ());
9141 SET_DECL_RTL (to, rtl);
9142 SET_DECL_ASSEMBLER_NAME (to, get_identifier (name));
9147 /* Expand SYMBOL into its corresponding dllimport symbol. WANT_REG is
9148 true if we require the result be a register. */
9151 legitimize_dllimport_symbol (rtx symbol, bool want_reg)
9156 gcc_assert (SYMBOL_REF_DECL (symbol));
9157 imp_decl = get_dllimport_decl (SYMBOL_REF_DECL (symbol));
9159 x = DECL_RTL (imp_decl);
9161 x = force_reg (Pmode, x);
9165 /* Try machine-dependent ways of modifying an illegitimate address
9166 to be legitimate. If we find one, return the new, valid address.
9167 This macro is used in only one place: `memory_address' in explow.c.
9169 OLDX is the address as it was before break_out_memory_refs was called.
9170 In some cases it is useful to look at this to decide what needs to be done.
9172 MODE and WIN are passed so that this macro can use
9173 GO_IF_LEGITIMATE_ADDRESS.
9175 It is always safe for this macro to do nothing. It exists to recognize
9176 opportunities to optimize the output.
9178 For the 80386, we handle X+REG by loading X into a register R and
9179 using R+REG. R will go in a general reg and indexing will be used.
9180 However, if REG is a broken-out memory address or multiplication,
9181 nothing needs to be done because REG can certainly go in a general reg.
9183 When -fpic is used, special handling is needed for symbolic references.
9184 See comments by legitimize_pic_address in i386.c for details. */
9187 legitimize_address (rtx x, rtx oldx ATTRIBUTE_UNUSED, enum machine_mode mode)
9192 log = GET_CODE (x) == SYMBOL_REF ? SYMBOL_REF_TLS_MODEL (x) : 0;
9194 return legitimize_tls_address (x, (enum tls_model) log, false);
9195 if (GET_CODE (x) == CONST
9196 && GET_CODE (XEXP (x, 0)) == PLUS
9197 && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF
9198 && (log = SYMBOL_REF_TLS_MODEL (XEXP (XEXP (x, 0), 0))))
9200 rtx t = legitimize_tls_address (XEXP (XEXP (x, 0), 0),
9201 (enum tls_model) log, false);
9202 return gen_rtx_PLUS (Pmode, t, XEXP (XEXP (x, 0), 1));
9205 if (TARGET_DLLIMPORT_DECL_ATTRIBUTES)
9207 if (GET_CODE (x) == SYMBOL_REF && SYMBOL_REF_DLLIMPORT_P (x))
9208 return legitimize_dllimport_symbol (x, true);
9209 if (GET_CODE (x) == CONST
9210 && GET_CODE (XEXP (x, 0)) == PLUS
9211 && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF
9212 && SYMBOL_REF_DLLIMPORT_P (XEXP (XEXP (x, 0), 0)))
9214 rtx t = legitimize_dllimport_symbol (XEXP (XEXP (x, 0), 0), true);
9215 return gen_rtx_PLUS (Pmode, t, XEXP (XEXP (x, 0), 1));
9219 if (flag_pic && SYMBOLIC_CONST (x))
9220 return legitimize_pic_address (x, 0);
9222 /* Canonicalize shifts by 0, 1, 2, 3 into multiply */
9223 if (GET_CODE (x) == ASHIFT
9224 && CONST_INT_P (XEXP (x, 1))
9225 && (unsigned HOST_WIDE_INT) INTVAL (XEXP (x, 1)) < 4)
9228 log = INTVAL (XEXP (x, 1));
9229 x = gen_rtx_MULT (Pmode, force_reg (Pmode, XEXP (x, 0)),
9230 GEN_INT (1 << log));
9233 if (GET_CODE (x) == PLUS)
9235 /* Canonicalize shifts by 0, 1, 2, 3 into multiply. */
9237 if (GET_CODE (XEXP (x, 0)) == ASHIFT
9238 && CONST_INT_P (XEXP (XEXP (x, 0), 1))
9239 && (unsigned HOST_WIDE_INT) INTVAL (XEXP (XEXP (x, 0), 1)) < 4)
9242 log = INTVAL (XEXP (XEXP (x, 0), 1));
9243 XEXP (x, 0) = gen_rtx_MULT (Pmode,
9244 force_reg (Pmode, XEXP (XEXP (x, 0), 0)),
9245 GEN_INT (1 << log));
9248 if (GET_CODE (XEXP (x, 1)) == ASHIFT
9249 && CONST_INT_P (XEXP (XEXP (x, 1), 1))
9250 && (unsigned HOST_WIDE_INT) INTVAL (XEXP (XEXP (x, 1), 1)) < 4)
9253 log = INTVAL (XEXP (XEXP (x, 1), 1));
9254 XEXP (x, 1) = gen_rtx_MULT (Pmode,
9255 force_reg (Pmode, XEXP (XEXP (x, 1), 0)),
9256 GEN_INT (1 << log));
9259 /* Put multiply first if it isn't already. */
9260 if (GET_CODE (XEXP (x, 1)) == MULT)
9262 rtx tmp = XEXP (x, 0);
9263 XEXP (x, 0) = XEXP (x, 1);
9268 /* Canonicalize (plus (mult (reg) (const)) (plus (reg) (const)))
9269 into (plus (plus (mult (reg) (const)) (reg)) (const)). This can be
9270 created by virtual register instantiation, register elimination, and
9271 similar optimizations. */
9272 if (GET_CODE (XEXP (x, 0)) == MULT && GET_CODE (XEXP (x, 1)) == PLUS)
9275 x = gen_rtx_PLUS (Pmode,
9276 gen_rtx_PLUS (Pmode, XEXP (x, 0),
9277 XEXP (XEXP (x, 1), 0)),
9278 XEXP (XEXP (x, 1), 1));
9282 (plus (plus (mult (reg) (const)) (plus (reg) (const))) const)
9283 into (plus (plus (mult (reg) (const)) (reg)) (const)). */
9284 else if (GET_CODE (x) == PLUS && GET_CODE (XEXP (x, 0)) == PLUS
9285 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
9286 && GET_CODE (XEXP (XEXP (x, 0), 1)) == PLUS
9287 && CONSTANT_P (XEXP (x, 1)))
9290 rtx other = NULL_RTX;
9292 if (CONST_INT_P (XEXP (x, 1)))
9294 constant = XEXP (x, 1);
9295 other = XEXP (XEXP (XEXP (x, 0), 1), 1);
9297 else if (CONST_INT_P (XEXP (XEXP (XEXP (x, 0), 1), 1)))
9299 constant = XEXP (XEXP (XEXP (x, 0), 1), 1);
9300 other = XEXP (x, 1);
9308 x = gen_rtx_PLUS (Pmode,
9309 gen_rtx_PLUS (Pmode, XEXP (XEXP (x, 0), 0),
9310 XEXP (XEXP (XEXP (x, 0), 1), 0)),
9311 plus_constant (other, INTVAL (constant)));
9315 if (changed && legitimate_address_p (mode, x, FALSE))
9318 if (GET_CODE (XEXP (x, 0)) == MULT)
9321 XEXP (x, 0) = force_operand (XEXP (x, 0), 0);
9324 if (GET_CODE (XEXP (x, 1)) == MULT)
9327 XEXP (x, 1) = force_operand (XEXP (x, 1), 0);
9331 && REG_P (XEXP (x, 1))
9332 && REG_P (XEXP (x, 0)))
9335 if (flag_pic && SYMBOLIC_CONST (XEXP (x, 1)))
9338 x = legitimize_pic_address (x, 0);
9341 if (changed && legitimate_address_p (mode, x, FALSE))
9344 if (REG_P (XEXP (x, 0)))
9346 rtx temp = gen_reg_rtx (Pmode);
9347 rtx val = force_operand (XEXP (x, 1), temp);
9349 emit_move_insn (temp, val);
9355 else if (REG_P (XEXP (x, 1)))
9357 rtx temp = gen_reg_rtx (Pmode);
9358 rtx val = force_operand (XEXP (x, 0), temp);
9360 emit_move_insn (temp, val);
9370 /* Print an integer constant expression in assembler syntax. Addition
9371 and subtraction are the only arithmetic that may appear in these
9372 expressions. FILE is the stdio stream to write to, X is the rtx, and
9373 CODE is the operand print code from the output string. */
9376 output_pic_addr_const (FILE *file, rtx x, int code)
9380 switch (GET_CODE (x))
9383 gcc_assert (flag_pic);
9388 if (! TARGET_MACHO || TARGET_64BIT)
9389 output_addr_const (file, x);
9392 const char *name = XSTR (x, 0);
9394 /* Mark the decl as referenced so that cgraph will
9395 output the function. */
9396 if (SYMBOL_REF_DECL (x))
9397 mark_decl_referenced (SYMBOL_REF_DECL (x));
9400 if (MACHOPIC_INDIRECT
9401 && machopic_classify_symbol (x) == MACHOPIC_UNDEFINED_FUNCTION)
9402 name = machopic_indirection_name (x, /*stub_p=*/true);
9404 assemble_name (file, name);
9406 if (!TARGET_MACHO && !(TARGET_64BIT && DEFAULT_ABI == MS_ABI)
9407 && code == 'P' && ! SYMBOL_REF_LOCAL_P (x))
9408 fputs ("@PLT", file);
9415 ASM_GENERATE_INTERNAL_LABEL (buf, "L", CODE_LABEL_NUMBER (x));
9416 assemble_name (asm_out_file, buf);
9420 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
9424 /* This used to output parentheses around the expression,
9425 but that does not work on the 386 (either ATT or BSD assembler). */
9426 output_pic_addr_const (file, XEXP (x, 0), code);
9430 if (GET_MODE (x) == VOIDmode)
9432 /* We can use %d if the number is <32 bits and positive. */
9433 if (CONST_DOUBLE_HIGH (x) || CONST_DOUBLE_LOW (x) < 0)
9434 fprintf (file, "0x%lx%08lx",
9435 (unsigned long) CONST_DOUBLE_HIGH (x),
9436 (unsigned long) CONST_DOUBLE_LOW (x));
9438 fprintf (file, HOST_WIDE_INT_PRINT_DEC, CONST_DOUBLE_LOW (x));
9441 /* We can't handle floating point constants;
9442 PRINT_OPERAND must handle them. */
9443 output_operand_lossage ("floating constant misused");
9447 /* Some assemblers need integer constants to appear first. */
9448 if (CONST_INT_P (XEXP (x, 0)))
9450 output_pic_addr_const (file, XEXP (x, 0), code);
9452 output_pic_addr_const (file, XEXP (x, 1), code);
9456 gcc_assert (CONST_INT_P (XEXP (x, 1)));
9457 output_pic_addr_const (file, XEXP (x, 1), code);
9459 output_pic_addr_const (file, XEXP (x, 0), code);
9465 putc (ASSEMBLER_DIALECT == ASM_INTEL ? '(' : '[', file);
9466 output_pic_addr_const (file, XEXP (x, 0), code);
9468 output_pic_addr_const (file, XEXP (x, 1), code);
9470 putc (ASSEMBLER_DIALECT == ASM_INTEL ? ')' : ']', file);
9474 gcc_assert (XVECLEN (x, 0) == 1);
9475 output_pic_addr_const (file, XVECEXP (x, 0, 0), code);
9476 switch (XINT (x, 1))
9479 fputs ("@GOT", file);
9482 fputs ("@GOTOFF", file);
9485 fputs ("@PLTOFF", file);
9487 case UNSPEC_GOTPCREL:
9488 fputs (ASSEMBLER_DIALECT == ASM_ATT ?
9489 "@GOTPCREL(%rip)" : "@GOTPCREL[rip]", file);
9491 case UNSPEC_GOTTPOFF:
9492 /* FIXME: This might be @TPOFF in Sun ld too. */
9493 fputs ("@GOTTPOFF", file);
9496 fputs ("@TPOFF", file);
9500 fputs ("@TPOFF", file);
9502 fputs ("@NTPOFF", file);
9505 fputs ("@DTPOFF", file);
9507 case UNSPEC_GOTNTPOFF:
9509 fputs (ASSEMBLER_DIALECT == ASM_ATT ?
9510 "@GOTTPOFF(%rip)": "@GOTTPOFF[rip]", file);
9512 fputs ("@GOTNTPOFF", file);
9514 case UNSPEC_INDNTPOFF:
9515 fputs ("@INDNTPOFF", file);
9518 output_operand_lossage ("invalid UNSPEC as operand");
9524 output_operand_lossage ("invalid expression as operand");
9528 /* This is called from dwarf2out.c via TARGET_ASM_OUTPUT_DWARF_DTPREL.
9529 We need to emit DTP-relative relocations. */
9531 static void ATTRIBUTE_UNUSED
9532 i386_output_dwarf_dtprel (FILE *file, int size, rtx x)
9534 fputs (ASM_LONG, file);
9535 output_addr_const (file, x);
9536 fputs ("@DTPOFF", file);
9542 fputs (", 0", file);
9549 /* In the name of slightly smaller debug output, and to cater to
9550 general assembler lossage, recognize PIC+GOTOFF and turn it back
9551 into a direct symbol reference.
9553 On Darwin, this is necessary to avoid a crash, because Darwin
9554 has a different PIC label for each routine but the DWARF debugging
9555 information is not associated with any particular routine, so it's
9556 necessary to remove references to the PIC label from RTL stored by
9557 the DWARF output code. */
9560 ix86_delegitimize_address (rtx orig_x)
9563 /* reg_addend is NULL or a multiple of some register. */
9564 rtx reg_addend = NULL_RTX;
9565 /* const_addend is NULL or a const_int. */
9566 rtx const_addend = NULL_RTX;
9567 /* This is the result, or NULL. */
9568 rtx result = NULL_RTX;
9575 if (GET_CODE (x) != CONST
9576 || GET_CODE (XEXP (x, 0)) != UNSPEC
9577 || XINT (XEXP (x, 0), 1) != UNSPEC_GOTPCREL
9580 return XVECEXP (XEXP (x, 0), 0, 0);
9583 if (GET_CODE (x) != PLUS
9584 || GET_CODE (XEXP (x, 1)) != CONST)
9587 if (REG_P (XEXP (x, 0))
9588 && REGNO (XEXP (x, 0)) == PIC_OFFSET_TABLE_REGNUM)
9589 /* %ebx + GOT/GOTOFF */
9591 else if (GET_CODE (XEXP (x, 0)) == PLUS)
9593 /* %ebx + %reg * scale + GOT/GOTOFF */
9594 reg_addend = XEXP (x, 0);
9595 if (REG_P (XEXP (reg_addend, 0))
9596 && REGNO (XEXP (reg_addend, 0)) == PIC_OFFSET_TABLE_REGNUM)
9597 reg_addend = XEXP (reg_addend, 1);
9598 else if (REG_P (XEXP (reg_addend, 1))
9599 && REGNO (XEXP (reg_addend, 1)) == PIC_OFFSET_TABLE_REGNUM)
9600 reg_addend = XEXP (reg_addend, 0);
9603 if (!REG_P (reg_addend)
9604 && GET_CODE (reg_addend) != MULT
9605 && GET_CODE (reg_addend) != ASHIFT)
9611 x = XEXP (XEXP (x, 1), 0);
9612 if (GET_CODE (x) == PLUS
9613 && CONST_INT_P (XEXP (x, 1)))
9615 const_addend = XEXP (x, 1);
9619 if (GET_CODE (x) == UNSPEC
9620 && ((XINT (x, 1) == UNSPEC_GOT && MEM_P (orig_x))
9621 || (XINT (x, 1) == UNSPEC_GOTOFF && !MEM_P (orig_x))))
9622 result = XVECEXP (x, 0, 0);
9624 if (TARGET_MACHO && darwin_local_data_pic (x)
9626 result = XEXP (x, 0);
9632 result = gen_rtx_PLUS (Pmode, result, const_addend);
9634 result = gen_rtx_PLUS (Pmode, reg_addend, result);
9638 /* If X is a machine specific address (i.e. a symbol or label being
9639 referenced as a displacement from the GOT implemented using an
9640 UNSPEC), then return the base term. Otherwise return X. */
9643 ix86_find_base_term (rtx x)
9649 if (GET_CODE (x) != CONST)
9652 if (GET_CODE (term) == PLUS
9653 && (CONST_INT_P (XEXP (term, 1))
9654 || GET_CODE (XEXP (term, 1)) == CONST_DOUBLE))
9655 term = XEXP (term, 0);
9656 if (GET_CODE (term) != UNSPEC
9657 || XINT (term, 1) != UNSPEC_GOTPCREL)
9660 term = XVECEXP (term, 0, 0);
9662 if (GET_CODE (term) != SYMBOL_REF
9663 && GET_CODE (term) != LABEL_REF)
9669 term = ix86_delegitimize_address (x);
9671 if (GET_CODE (term) != SYMBOL_REF
9672 && GET_CODE (term) != LABEL_REF)
9679 put_condition_code (enum rtx_code code, enum machine_mode mode, int reverse,
9684 if (mode == CCFPmode || mode == CCFPUmode)
9686 enum rtx_code second_code, bypass_code;
9687 ix86_fp_comparison_codes (code, &bypass_code, &code, &second_code);
9688 gcc_assert (bypass_code == UNKNOWN && second_code == UNKNOWN);
9689 code = ix86_fp_compare_code_to_integer (code);
9693 code = reverse_condition (code);
9744 gcc_assert (mode == CCmode || mode == CCNOmode || mode == CCGCmode);
9748 /* ??? Use "nbe" instead of "a" for fcmov lossage on some assemblers.
9749 Those same assemblers have the same but opposite lossage on cmov. */
9751 suffix = fp ? "nbe" : "a";
9752 else if (mode == CCCmode)
9775 gcc_assert (mode == CCmode || mode == CCCmode);
9797 gcc_assert (mode == CCmode || mode == CCCmode);
9798 suffix = fp ? "nb" : "ae";
9801 gcc_assert (mode == CCmode || mode == CCGCmode || mode == CCNOmode);
9808 else if (mode == CCCmode)
9809 suffix = fp ? "nb" : "ae";
9814 suffix = fp ? "u" : "p";
9817 suffix = fp ? "nu" : "np";
9822 fputs (suffix, file);
9825 /* Print the name of register X to FILE based on its machine mode and number.
9826 If CODE is 'w', pretend the mode is HImode.
9827 If CODE is 'b', pretend the mode is QImode.
9828 If CODE is 'k', pretend the mode is SImode.
9829 If CODE is 'q', pretend the mode is DImode.
9830 If CODE is 'h', pretend the reg is the 'high' byte register.
9831 If CODE is 'y', print "st(0)" instead of "st", if the reg is stack op. */
9834 print_reg (rtx x, int code, FILE *file)
9836 gcc_assert (x == pc_rtx
9837 || (REGNO (x) != ARG_POINTER_REGNUM
9838 && REGNO (x) != FRAME_POINTER_REGNUM
9839 && REGNO (x) != FLAGS_REG
9840 && REGNO (x) != FPSR_REG
9841 && REGNO (x) != FPCR_REG));
9843 if (ASSEMBLER_DIALECT == ASM_ATT)
9848 gcc_assert (TARGET_64BIT);
9849 fputs ("rip", file);
9853 if (code == 'w' || MMX_REG_P (x))
9855 else if (code == 'b')
9857 else if (code == 'k')
9859 else if (code == 'q')
9861 else if (code == 'y')
9863 else if (code == 'h')
9866 code = GET_MODE_SIZE (GET_MODE (x));
9868 /* Irritatingly, AMD extended registers use different naming convention
9869 from the normal registers. */
9870 if (REX_INT_REG_P (x))
9872 gcc_assert (TARGET_64BIT);
9876 error ("extended registers have no high halves");
9879 fprintf (file, "r%ib", REGNO (x) - FIRST_REX_INT_REG + 8);
9882 fprintf (file, "r%iw", REGNO (x) - FIRST_REX_INT_REG + 8);
9885 fprintf (file, "r%id", REGNO (x) - FIRST_REX_INT_REG + 8);
9888 fprintf (file, "r%i", REGNO (x) - FIRST_REX_INT_REG + 8);
9891 error ("unsupported operand size for extended register");
9899 if (STACK_TOP_P (x))
9901 fputs ("st(0)", file);
9908 if (! ANY_FP_REG_P (x))
9909 putc (code == 8 && TARGET_64BIT ? 'r' : 'e', file);
9914 fputs (hi_reg_name[REGNO (x)], file);
9917 if (REGNO (x) >= ARRAY_SIZE (qi_reg_name))
9919 fputs (qi_reg_name[REGNO (x)], file);
9922 if (REGNO (x) >= ARRAY_SIZE (qi_high_reg_name))
9924 fputs (qi_high_reg_name[REGNO (x)], file);
9931 /* Locate some local-dynamic symbol still in use by this function
9932 so that we can print its name in some tls_local_dynamic_base
9936 get_some_local_dynamic_name_1 (rtx *px, void *data ATTRIBUTE_UNUSED)
9940 if (GET_CODE (x) == SYMBOL_REF
9941 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_DYNAMIC)
9943 cfun->machine->some_ld_name = XSTR (x, 0);
9951 get_some_local_dynamic_name (void)
9955 if (cfun->machine->some_ld_name)
9956 return cfun->machine->some_ld_name;
9958 for (insn = get_insns (); insn ; insn = NEXT_INSN (insn))
9960 && for_each_rtx (&PATTERN (insn), get_some_local_dynamic_name_1, 0))
9961 return cfun->machine->some_ld_name;
9967 L,W,B,Q,S,T -- print the opcode suffix for specified size of operand.
9968 C -- print opcode suffix for set/cmov insn.
9969 c -- like C, but print reversed condition
9970 E,e -- likewise, but for compare-and-branch fused insn.
9971 F,f -- likewise, but for floating-point.
9972 O -- if HAVE_AS_IX86_CMOV_SUN_SYNTAX, expand to "w.", "l." or "q.",
9974 R -- print the prefix for register names.
9975 z -- print the opcode suffix for the size of the current operand.
9976 * -- print a star (in certain assembler syntax)
9977 A -- print an absolute memory reference.
9978 w -- print the operand as if it's a "word" (HImode) even if it isn't.
9979 s -- print a shift double count, followed by the assemblers argument
9981 b -- print the QImode name of the register for the indicated operand.
9982 %b0 would print %al if operands[0] is reg 0.
9983 w -- likewise, print the HImode name of the register.
9984 k -- likewise, print the SImode name of the register.
9985 q -- likewise, print the DImode name of the register.
9986 h -- print the QImode name for a "high" register, either ah, bh, ch or dh.
9987 y -- print "st(0)" instead of "st" as a register.
9988 D -- print condition for SSE cmp instruction.
9989 P -- if PIC, print an @PLT suffix.
9990 X -- don't print any sort of PIC '@' suffix for a symbol.
9991 & -- print some in-use local-dynamic symbol name.
9992 H -- print a memory address offset by 8; used for sse high-parts
9993 Y -- print condition for SSE5 com* instruction.
9994 + -- print a branch hint as 'cs' or 'ds' prefix
9995 ; -- print a semicolon (after prefixes due to bug in older gas).
9999 print_operand (FILE *file, rtx x, int code)
10006 if (ASSEMBLER_DIALECT == ASM_ATT)
10011 assemble_name (file, get_some_local_dynamic_name ());
10015 switch (ASSEMBLER_DIALECT)
10022 /* Intel syntax. For absolute addresses, registers should not
10023 be surrounded by braces. */
10027 PRINT_OPERAND (file, x, 0);
10034 gcc_unreachable ();
10037 PRINT_OPERAND (file, x, 0);
10042 if (ASSEMBLER_DIALECT == ASM_ATT)
10047 if (ASSEMBLER_DIALECT == ASM_ATT)
10052 if (ASSEMBLER_DIALECT == ASM_ATT)
10057 if (ASSEMBLER_DIALECT == ASM_ATT)
10062 if (ASSEMBLER_DIALECT == ASM_ATT)
10067 if (ASSEMBLER_DIALECT == ASM_ATT)
10072 /* 387 opcodes don't get size suffixes if the operands are
10074 if (STACK_REG_P (x))
10077 /* Likewise if using Intel opcodes. */
10078 if (ASSEMBLER_DIALECT == ASM_INTEL)
10081 /* This is the size of op from size of operand. */
10082 switch (GET_MODE_SIZE (GET_MODE (x)))
10091 #ifdef HAVE_GAS_FILDS_FISTS
10101 if (GET_MODE (x) == SFmode)
10116 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)
10120 #ifdef GAS_MNEMONICS
10135 gcc_unreachable ();
10149 if (CONST_INT_P (x) || ! SHIFT_DOUBLE_OMITS_COUNT)
10151 PRINT_OPERAND (file, x, 0);
10152 fputs (", ", file);
10157 /* Little bit of braindamage here. The SSE compare instructions
10158 does use completely different names for the comparisons that the
10159 fp conditional moves. */
10160 switch (GET_CODE (x))
10164 fputs ("eq", file);
10168 fputs ("lt", file);
10172 fputs ("le", file);
10175 fputs ("unord", file);
10179 fputs ("neq", file);
10183 fputs ("nlt", file);
10187 fputs ("nle", file);
10190 fputs ("ord", file);
10193 gcc_unreachable ();
10197 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
10198 if (ASSEMBLER_DIALECT == ASM_ATT)
10200 switch (GET_MODE (x))
10202 case HImode: putc ('w', file); break;
10204 case SFmode: putc ('l', file); break;
10206 case DFmode: putc ('q', file); break;
10207 default: gcc_unreachable ();
10214 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 0, 0, file);
10217 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
10218 if (ASSEMBLER_DIALECT == ASM_ATT)
10221 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 0, 1, file);
10224 /* Like above, but reverse condition */
10226 /* Check to see if argument to %c is really a constant
10227 and not a condition code which needs to be reversed. */
10228 if (!COMPARISON_P (x))
10230 output_operand_lossage ("operand is neither a constant nor a condition code, invalid operand code 'c'");
10233 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 1, 0, file);
10236 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
10237 if (ASSEMBLER_DIALECT == ASM_ATT)
10240 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 1, 1, file);
10244 put_condition_code (GET_CODE (x), CCmode, 0, 0, file);
10248 put_condition_code (GET_CODE (x), CCmode, 1, 0, file);
10252 /* It doesn't actually matter what mode we use here, as we're
10253 only going to use this for printing. */
10254 x = adjust_address_nv (x, DImode, 8);
10261 if (!optimize || optimize_size || !TARGET_BRANCH_PREDICTION_HINTS)
10264 x = find_reg_note (current_output_insn, REG_BR_PROB, 0);
10267 int pred_val = INTVAL (XEXP (x, 0));
10269 if (pred_val < REG_BR_PROB_BASE * 45 / 100
10270 || pred_val > REG_BR_PROB_BASE * 55 / 100)
10272 int taken = pred_val > REG_BR_PROB_BASE / 2;
10273 int cputaken = final_forward_branch_p (current_output_insn) == 0;
10275 /* Emit hints only in the case default branch prediction
10276 heuristics would fail. */
10277 if (taken != cputaken)
10279 /* We use 3e (DS) prefix for taken branches and
10280 2e (CS) prefix for not taken branches. */
10282 fputs ("ds ; ", file);
10284 fputs ("cs ; ", file);
10292 switch (GET_CODE (x))
10295 fputs ("neq", file);
10298 fputs ("eq", file);
10302 fputs (INTEGRAL_MODE_P (GET_MODE (x)) ? "ge" : "unlt", file);
10306 fputs (INTEGRAL_MODE_P (GET_MODE (x)) ? "gt" : "unle", file);
10310 fputs ("le", file);
10314 fputs ("lt", file);
10317 fputs ("unord", file);
10320 fputs ("ord", file);
10323 fputs ("ueq", file);
10326 fputs ("nlt", file);
10329 fputs ("nle", file);
10332 fputs ("ule", file);
10335 fputs ("ult", file);
10338 fputs ("une", file);
10341 gcc_unreachable ();
10347 fputs (" ; ", file);
10354 output_operand_lossage ("invalid operand code '%c'", code);
10359 print_reg (x, code, file);
10361 else if (MEM_P (x))
10363 /* No `byte ptr' prefix for call instructions or BLKmode operands. */
10364 if (ASSEMBLER_DIALECT == ASM_INTEL && code != 'X' && code != 'P'
10365 && GET_MODE (x) != BLKmode)
10368 switch (GET_MODE_SIZE (GET_MODE (x)))
10370 case 1: size = "BYTE"; break;
10371 case 2: size = "WORD"; break;
10372 case 4: size = "DWORD"; break;
10373 case 8: size = "QWORD"; break;
10374 case 12: size = "XWORD"; break;
10376 if (GET_MODE (x) == XFmode)
10382 gcc_unreachable ();
10385 /* Check for explicit size override (codes 'b', 'w' and 'k') */
10388 else if (code == 'w')
10390 else if (code == 'k')
10393 fputs (size, file);
10394 fputs (" PTR ", file);
10398 /* Avoid (%rip) for call operands. */
10399 if (CONSTANT_ADDRESS_P (x) && code == 'P'
10400 && !CONST_INT_P (x))
10401 output_addr_const (file, x);
10402 else if (this_is_asm_operands && ! address_operand (x, VOIDmode))
10403 output_operand_lossage ("invalid constraints for operand");
10405 output_address (x);
10408 else if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) == SFmode)
10413 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
10414 REAL_VALUE_TO_TARGET_SINGLE (r, l);
10416 if (ASSEMBLER_DIALECT == ASM_ATT)
10418 fprintf (file, "0x%08lx", (long unsigned int) l);
10421 /* These float cases don't actually occur as immediate operands. */
10422 else if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) == DFmode)
10426 real_to_decimal (dstr, CONST_DOUBLE_REAL_VALUE (x), sizeof (dstr), 0, 1);
10427 fprintf (file, "%s", dstr);
10430 else if (GET_CODE (x) == CONST_DOUBLE
10431 && GET_MODE (x) == XFmode)
10435 real_to_decimal (dstr, CONST_DOUBLE_REAL_VALUE (x), sizeof (dstr), 0, 1);
10436 fprintf (file, "%s", dstr);
10441 /* We have patterns that allow zero sets of memory, for instance.
10442 In 64-bit mode, we should probably support all 8-byte vectors,
10443 since we can in fact encode that into an immediate. */
10444 if (GET_CODE (x) == CONST_VECTOR)
10446 gcc_assert (x == CONST0_RTX (GET_MODE (x)));
10452 if (CONST_INT_P (x) || GET_CODE (x) == CONST_DOUBLE)
10454 if (ASSEMBLER_DIALECT == ASM_ATT)
10457 else if (GET_CODE (x) == CONST || GET_CODE (x) == SYMBOL_REF
10458 || GET_CODE (x) == LABEL_REF)
10460 if (ASSEMBLER_DIALECT == ASM_ATT)
10463 fputs ("OFFSET FLAT:", file);
10466 if (CONST_INT_P (x))
10467 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
10469 output_pic_addr_const (file, x, code);
10471 output_addr_const (file, x);
10475 /* Print a memory operand whose address is ADDR. */
10478 print_operand_address (FILE *file, rtx addr)
10480 struct ix86_address parts;
10481 rtx base, index, disp;
10483 int ok = ix86_decompose_address (addr, &parts);
10488 index = parts.index;
10490 scale = parts.scale;
10498 if (ASSEMBLER_DIALECT == ASM_ATT)
10500 fputs ((parts.seg == SEG_FS ? "fs:" : "gs:"), file);
10503 gcc_unreachable ();
10506 /* Use one byte shorter RIP relative addressing for 64bit mode. */
10507 if (TARGET_64BIT && !base && !index)
10511 if (GET_CODE (disp) == CONST
10512 && GET_CODE (XEXP (disp, 0)) == PLUS
10513 && CONST_INT_P (XEXP (XEXP (disp, 0), 1)))
10514 symbol = XEXP (XEXP (disp, 0), 0);
10516 if (GET_CODE (symbol) == LABEL_REF
10517 || (GET_CODE (symbol) == SYMBOL_REF
10518 && SYMBOL_REF_TLS_MODEL (symbol) == 0))
10521 if (!base && !index)
10523 /* Displacement only requires special attention. */
10525 if (CONST_INT_P (disp))
10527 if (ASSEMBLER_DIALECT == ASM_INTEL && parts.seg == SEG_DEFAULT)
10528 fputs ("ds:", file);
10529 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (disp));
10532 output_pic_addr_const (file, disp, 0);
10534 output_addr_const (file, disp);
10538 if (ASSEMBLER_DIALECT == ASM_ATT)
10543 output_pic_addr_const (file, disp, 0);
10544 else if (GET_CODE (disp) == LABEL_REF)
10545 output_asm_label (disp);
10547 output_addr_const (file, disp);
10552 print_reg (base, 0, file);
10556 print_reg (index, 0, file);
10558 fprintf (file, ",%d", scale);
10564 rtx offset = NULL_RTX;
10568 /* Pull out the offset of a symbol; print any symbol itself. */
10569 if (GET_CODE (disp) == CONST
10570 && GET_CODE (XEXP (disp, 0)) == PLUS
10571 && CONST_INT_P (XEXP (XEXP (disp, 0), 1)))
10573 offset = XEXP (XEXP (disp, 0), 1);
10574 disp = gen_rtx_CONST (VOIDmode,
10575 XEXP (XEXP (disp, 0), 0));
10579 output_pic_addr_const (file, disp, 0);
10580 else if (GET_CODE (disp) == LABEL_REF)
10581 output_asm_label (disp);
10582 else if (CONST_INT_P (disp))
10585 output_addr_const (file, disp);
10591 print_reg (base, 0, file);
10594 if (INTVAL (offset) >= 0)
10596 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (offset));
10600 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (offset));
10607 print_reg (index, 0, file);
10609 fprintf (file, "*%d", scale);
10617 output_addr_const_extra (FILE *file, rtx x)
10621 if (GET_CODE (x) != UNSPEC)
10624 op = XVECEXP (x, 0, 0);
10625 switch (XINT (x, 1))
10627 case UNSPEC_GOTTPOFF:
10628 output_addr_const (file, op);
10629 /* FIXME: This might be @TPOFF in Sun ld. */
10630 fputs ("@GOTTPOFF", file);
10633 output_addr_const (file, op);
10634 fputs ("@TPOFF", file);
10636 case UNSPEC_NTPOFF:
10637 output_addr_const (file, op);
10639 fputs ("@TPOFF", file);
10641 fputs ("@NTPOFF", file);
10643 case UNSPEC_DTPOFF:
10644 output_addr_const (file, op);
10645 fputs ("@DTPOFF", file);
10647 case UNSPEC_GOTNTPOFF:
10648 output_addr_const (file, op);
10650 fputs (ASSEMBLER_DIALECT == ASM_ATT ?
10651 "@GOTTPOFF(%rip)" : "@GOTTPOFF[rip]", file);
10653 fputs ("@GOTNTPOFF", file);
10655 case UNSPEC_INDNTPOFF:
10656 output_addr_const (file, op);
10657 fputs ("@INDNTPOFF", file);
10667 /* Split one or more DImode RTL references into pairs of SImode
10668 references. The RTL can be REG, offsettable MEM, integer constant, or
10669 CONST_DOUBLE. "operands" is a pointer to an array of DImode RTL to
10670 split and "num" is its length. lo_half and hi_half are output arrays
10671 that parallel "operands". */
10674 split_di (rtx operands[], int num, rtx lo_half[], rtx hi_half[])
10678 rtx op = operands[num];
10680 /* simplify_subreg refuse to split volatile memory addresses,
10681 but we still have to handle it. */
10684 lo_half[num] = adjust_address (op, SImode, 0);
10685 hi_half[num] = adjust_address (op, SImode, 4);
10689 lo_half[num] = simplify_gen_subreg (SImode, op,
10690 GET_MODE (op) == VOIDmode
10691 ? DImode : GET_MODE (op), 0);
10692 hi_half[num] = simplify_gen_subreg (SImode, op,
10693 GET_MODE (op) == VOIDmode
10694 ? DImode : GET_MODE (op), 4);
10698 /* Split one or more TImode RTL references into pairs of DImode
10699 references. The RTL can be REG, offsettable MEM, integer constant, or
10700 CONST_DOUBLE. "operands" is a pointer to an array of DImode RTL to
10701 split and "num" is its length. lo_half and hi_half are output arrays
10702 that parallel "operands". */
10705 split_ti (rtx operands[], int num, rtx lo_half[], rtx hi_half[])
10709 rtx op = operands[num];
10711 /* simplify_subreg refuse to split volatile memory addresses, but we
10712 still have to handle it. */
10715 lo_half[num] = adjust_address (op, DImode, 0);
10716 hi_half[num] = adjust_address (op, DImode, 8);
10720 lo_half[num] = simplify_gen_subreg (DImode, op, TImode, 0);
10721 hi_half[num] = simplify_gen_subreg (DImode, op, TImode, 8);
10726 /* Output code to perform a 387 binary operation in INSN, one of PLUS,
10727 MINUS, MULT or DIV. OPERANDS are the insn operands, where operands[3]
10728 is the expression of the binary operation. The output may either be
10729 emitted here, or returned to the caller, like all output_* functions.
10731 There is no guarantee that the operands are the same mode, as they
10732 might be within FLOAT or FLOAT_EXTEND expressions. */
10734 #ifndef SYSV386_COMPAT
10735 /* Set to 1 for compatibility with brain-damaged assemblers. No-one
10736 wants to fix the assemblers because that causes incompatibility
10737 with gcc. No-one wants to fix gcc because that causes
10738 incompatibility with assemblers... You can use the option of
10739 -DSYSV386_COMPAT=0 if you recompile both gcc and gas this way. */
10740 #define SYSV386_COMPAT 1
10744 output_387_binary_op (rtx insn, rtx *operands)
10746 static char buf[30];
10749 int is_sse = SSE_REG_P (operands[0]) || SSE_REG_P (operands[1]) || SSE_REG_P (operands[2]);
10751 #ifdef ENABLE_CHECKING
10752 /* Even if we do not want to check the inputs, this documents input
10753 constraints. Which helps in understanding the following code. */
10754 if (STACK_REG_P (operands[0])
10755 && ((REG_P (operands[1])
10756 && REGNO (operands[0]) == REGNO (operands[1])
10757 && (STACK_REG_P (operands[2]) || MEM_P (operands[2])))
10758 || (REG_P (operands[2])
10759 && REGNO (operands[0]) == REGNO (operands[2])
10760 && (STACK_REG_P (operands[1]) || MEM_P (operands[1]))))
10761 && (STACK_TOP_P (operands[1]) || STACK_TOP_P (operands[2])))
10764 gcc_assert (is_sse);
10767 switch (GET_CODE (operands[3]))
10770 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
10771 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
10779 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
10780 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
10788 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
10789 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
10797 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
10798 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
10806 gcc_unreachable ();
10811 strcpy (buf, ssep);
10812 if (GET_MODE (operands[0]) == SFmode)
10813 strcat (buf, "ss\t{%2, %0|%0, %2}");
10815 strcat (buf, "sd\t{%2, %0|%0, %2}");
10820 switch (GET_CODE (operands[3]))
10824 if (REG_P (operands[2]) && REGNO (operands[0]) == REGNO (operands[2]))
10826 rtx temp = operands[2];
10827 operands[2] = operands[1];
10828 operands[1] = temp;
10831 /* know operands[0] == operands[1]. */
10833 if (MEM_P (operands[2]))
10839 if (find_regno_note (insn, REG_DEAD, REGNO (operands[2])))
10841 if (STACK_TOP_P (operands[0]))
10842 /* How is it that we are storing to a dead operand[2]?
10843 Well, presumably operands[1] is dead too. We can't
10844 store the result to st(0) as st(0) gets popped on this
10845 instruction. Instead store to operands[2] (which I
10846 think has to be st(1)). st(1) will be popped later.
10847 gcc <= 2.8.1 didn't have this check and generated
10848 assembly code that the Unixware assembler rejected. */
10849 p = "p\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
10851 p = "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
10855 if (STACK_TOP_P (operands[0]))
10856 p = "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
10858 p = "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
10863 if (MEM_P (operands[1]))
10869 if (MEM_P (operands[2]))
10875 if (find_regno_note (insn, REG_DEAD, REGNO (operands[2])))
10878 /* The SystemV/386 SVR3.2 assembler, and probably all AT&T
10879 derived assemblers, confusingly reverse the direction of
10880 the operation for fsub{r} and fdiv{r} when the
10881 destination register is not st(0). The Intel assembler
10882 doesn't have this brain damage. Read !SYSV386_COMPAT to
10883 figure out what the hardware really does. */
10884 if (STACK_TOP_P (operands[0]))
10885 p = "{p\t%0, %2|rp\t%2, %0}";
10887 p = "{rp\t%2, %0|p\t%0, %2}";
10889 if (STACK_TOP_P (operands[0]))
10890 /* As above for fmul/fadd, we can't store to st(0). */
10891 p = "rp\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
10893 p = "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
10898 if (find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
10901 if (STACK_TOP_P (operands[0]))
10902 p = "{rp\t%0, %1|p\t%1, %0}";
10904 p = "{p\t%1, %0|rp\t%0, %1}";
10906 if (STACK_TOP_P (operands[0]))
10907 p = "p\t{%0, %1|%1, %0}"; /* st(1) = st(1) op st(0); pop */
10909 p = "rp\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2); pop */
10914 if (STACK_TOP_P (operands[0]))
10916 if (STACK_TOP_P (operands[1]))
10917 p = "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
10919 p = "r\t{%y1, %0|%0, %y1}"; /* st(0) = st(r1) op st(0) */
10922 else if (STACK_TOP_P (operands[1]))
10925 p = "{\t%1, %0|r\t%0, %1}";
10927 p = "r\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2) */
10933 p = "{r\t%2, %0|\t%0, %2}";
10935 p = "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
10941 gcc_unreachable ();
10948 /* Return needed mode for entity in optimize_mode_switching pass. */
10951 ix86_mode_needed (int entity, rtx insn)
10953 enum attr_i387_cw mode;
10955 /* The mode UNINITIALIZED is used to store control word after a
10956 function call or ASM pattern. The mode ANY specify that function
10957 has no requirements on the control word and make no changes in the
10958 bits we are interested in. */
10961 || (NONJUMP_INSN_P (insn)
10962 && (asm_noperands (PATTERN (insn)) >= 0
10963 || GET_CODE (PATTERN (insn)) == ASM_INPUT)))
10964 return I387_CW_UNINITIALIZED;
10966 if (recog_memoized (insn) < 0)
10967 return I387_CW_ANY;
10969 mode = get_attr_i387_cw (insn);
10974 if (mode == I387_CW_TRUNC)
10979 if (mode == I387_CW_FLOOR)
10984 if (mode == I387_CW_CEIL)
10989 if (mode == I387_CW_MASK_PM)
10994 gcc_unreachable ();
10997 return I387_CW_ANY;
11000 /* Output code to initialize control word copies used by trunc?f?i and
11001 rounding patterns. CURRENT_MODE is set to current control word,
11002 while NEW_MODE is set to new control word. */
11005 emit_i387_cw_initialization (int mode)
11007 rtx stored_mode = assign_386_stack_local (HImode, SLOT_CW_STORED);
11010 enum ix86_stack_slot slot;
11012 rtx reg = gen_reg_rtx (HImode);
11014 emit_insn (gen_x86_fnstcw_1 (stored_mode));
11015 emit_move_insn (reg, copy_rtx (stored_mode));
11017 if (TARGET_64BIT || TARGET_PARTIAL_REG_STALL || optimize_size)
11021 case I387_CW_TRUNC:
11022 /* round toward zero (truncate) */
11023 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0c00)));
11024 slot = SLOT_CW_TRUNC;
11027 case I387_CW_FLOOR:
11028 /* round down toward -oo */
11029 emit_insn (gen_andhi3 (reg, reg, GEN_INT (~0x0c00)));
11030 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0400)));
11031 slot = SLOT_CW_FLOOR;
11035 /* round up toward +oo */
11036 emit_insn (gen_andhi3 (reg, reg, GEN_INT (~0x0c00)));
11037 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0800)));
11038 slot = SLOT_CW_CEIL;
11041 case I387_CW_MASK_PM:
11042 /* mask precision exception for nearbyint() */
11043 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0020)));
11044 slot = SLOT_CW_MASK_PM;
11048 gcc_unreachable ();
11055 case I387_CW_TRUNC:
11056 /* round toward zero (truncate) */
11057 emit_insn (gen_movsi_insv_1 (reg, GEN_INT (0xc)));
11058 slot = SLOT_CW_TRUNC;
11061 case I387_CW_FLOOR:
11062 /* round down toward -oo */
11063 emit_insn (gen_movsi_insv_1 (reg, GEN_INT (0x4)));
11064 slot = SLOT_CW_FLOOR;
11068 /* round up toward +oo */
11069 emit_insn (gen_movsi_insv_1 (reg, GEN_INT (0x8)));
11070 slot = SLOT_CW_CEIL;
11073 case I387_CW_MASK_PM:
11074 /* mask precision exception for nearbyint() */
11075 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0020)));
11076 slot = SLOT_CW_MASK_PM;
11080 gcc_unreachable ();
11084 gcc_assert (slot < MAX_386_STACK_LOCALS);
11086 new_mode = assign_386_stack_local (HImode, slot);
11087 emit_move_insn (new_mode, reg);
11090 /* Output code for INSN to convert a float to a signed int. OPERANDS
11091 are the insn operands. The output may be [HSD]Imode and the input
11092 operand may be [SDX]Fmode. */
11095 output_fix_trunc (rtx insn, rtx *operands, int fisttp)
11097 int stack_top_dies = find_regno_note (insn, REG_DEAD, FIRST_STACK_REG) != 0;
11098 int dimode_p = GET_MODE (operands[0]) == DImode;
11099 int round_mode = get_attr_i387_cw (insn);
11101 /* Jump through a hoop or two for DImode, since the hardware has no
11102 non-popping instruction. We used to do this a different way, but
11103 that was somewhat fragile and broke with post-reload splitters. */
11104 if ((dimode_p || fisttp) && !stack_top_dies)
11105 output_asm_insn ("fld\t%y1", operands);
11107 gcc_assert (STACK_TOP_P (operands[1]));
11108 gcc_assert (MEM_P (operands[0]));
11109 gcc_assert (GET_MODE (operands[1]) != TFmode);
11112 output_asm_insn ("fisttp%z0\t%0", operands);
11115 if (round_mode != I387_CW_ANY)
11116 output_asm_insn ("fldcw\t%3", operands);
11117 if (stack_top_dies || dimode_p)
11118 output_asm_insn ("fistp%z0\t%0", operands);
11120 output_asm_insn ("fist%z0\t%0", operands);
11121 if (round_mode != I387_CW_ANY)
11122 output_asm_insn ("fldcw\t%2", operands);
11128 /* Output code for x87 ffreep insn. The OPNO argument, which may only
11129 have the values zero or one, indicates the ffreep insn's operand
11130 from the OPERANDS array. */
11132 static const char *
11133 output_387_ffreep (rtx *operands ATTRIBUTE_UNUSED, int opno)
11135 if (TARGET_USE_FFREEP)
11136 #if HAVE_AS_IX86_FFREEP
11137 return opno ? "ffreep\t%y1" : "ffreep\t%y0";
11140 static char retval[] = ".word\t0xc_df";
11141 int regno = REGNO (operands[opno]);
11143 gcc_assert (FP_REGNO_P (regno));
11145 retval[9] = '0' + (regno - FIRST_STACK_REG);
11150 return opno ? "fstp\t%y1" : "fstp\t%y0";
11154 /* Output code for INSN to compare OPERANDS. EFLAGS_P is 1 when fcomi
11155 should be used. UNORDERED_P is true when fucom should be used. */
11158 output_fp_compare (rtx insn, rtx *operands, int eflags_p, int unordered_p)
11160 int stack_top_dies;
11161 rtx cmp_op0, cmp_op1;
11162 int is_sse = SSE_REG_P (operands[0]) || SSE_REG_P (operands[1]);
11166 cmp_op0 = operands[0];
11167 cmp_op1 = operands[1];
11171 cmp_op0 = operands[1];
11172 cmp_op1 = operands[2];
11177 if (GET_MODE (operands[0]) == SFmode)
11179 return "ucomiss\t{%1, %0|%0, %1}";
11181 return "comiss\t{%1, %0|%0, %1}";
11184 return "ucomisd\t{%1, %0|%0, %1}";
11186 return "comisd\t{%1, %0|%0, %1}";
11189 gcc_assert (STACK_TOP_P (cmp_op0));
11191 stack_top_dies = find_regno_note (insn, REG_DEAD, FIRST_STACK_REG) != 0;
11193 if (cmp_op1 == CONST0_RTX (GET_MODE (cmp_op1)))
11195 if (stack_top_dies)
11197 output_asm_insn ("ftst\n\tfnstsw\t%0", operands);
11198 return output_387_ffreep (operands, 1);
11201 return "ftst\n\tfnstsw\t%0";
11204 if (STACK_REG_P (cmp_op1)
11206 && find_regno_note (insn, REG_DEAD, REGNO (cmp_op1))
11207 && REGNO (cmp_op1) != FIRST_STACK_REG)
11209 /* If both the top of the 387 stack dies, and the other operand
11210 is also a stack register that dies, then this must be a
11211 `fcompp' float compare */
11215 /* There is no double popping fcomi variant. Fortunately,
11216 eflags is immune from the fstp's cc clobbering. */
11218 output_asm_insn ("fucomip\t{%y1, %0|%0, %y1}", operands);
11220 output_asm_insn ("fcomip\t{%y1, %0|%0, %y1}", operands);
11221 return output_387_ffreep (operands, 0);
11226 return "fucompp\n\tfnstsw\t%0";
11228 return "fcompp\n\tfnstsw\t%0";
11233 /* Encoded here as eflags_p | intmode | unordered_p | stack_top_dies. */
11235 static const char * const alt[16] =
11237 "fcom%z2\t%y2\n\tfnstsw\t%0",
11238 "fcomp%z2\t%y2\n\tfnstsw\t%0",
11239 "fucom%z2\t%y2\n\tfnstsw\t%0",
11240 "fucomp%z2\t%y2\n\tfnstsw\t%0",
11242 "ficom%z2\t%y2\n\tfnstsw\t%0",
11243 "ficomp%z2\t%y2\n\tfnstsw\t%0",
11247 "fcomi\t{%y1, %0|%0, %y1}",
11248 "fcomip\t{%y1, %0|%0, %y1}",
11249 "fucomi\t{%y1, %0|%0, %y1}",
11250 "fucomip\t{%y1, %0|%0, %y1}",
11261 mask = eflags_p << 3;
11262 mask |= (GET_MODE_CLASS (GET_MODE (cmp_op1)) == MODE_INT) << 2;
11263 mask |= unordered_p << 1;
11264 mask |= stack_top_dies;
11266 gcc_assert (mask < 16);
11275 ix86_output_addr_vec_elt (FILE *file, int value)
11277 const char *directive = ASM_LONG;
11281 directive = ASM_QUAD;
11283 gcc_assert (!TARGET_64BIT);
11286 fprintf (file, "%s%s%d\n", directive, LPREFIX, value);
11290 ix86_output_addr_diff_elt (FILE *file, int value, int rel)
11292 const char *directive = ASM_LONG;
11295 if (TARGET_64BIT && CASE_VECTOR_MODE == DImode)
11296 directive = ASM_QUAD;
11298 gcc_assert (!TARGET_64BIT);
11300 /* We can't use @GOTOFF for text labels on VxWorks; see gotoff_operand. */
11301 if (TARGET_64BIT || TARGET_VXWORKS_RTP)
11302 fprintf (file, "%s%s%d-%s%d\n",
11303 directive, LPREFIX, value, LPREFIX, rel);
11304 else if (HAVE_AS_GOTOFF_IN_DATA)
11305 fprintf (file, "%s%s%d@GOTOFF\n", ASM_LONG, LPREFIX, value);
11307 else if (TARGET_MACHO)
11309 fprintf (file, "%s%s%d-", ASM_LONG, LPREFIX, value);
11310 machopic_output_function_base_name (file);
11311 fprintf(file, "\n");
11315 asm_fprintf (file, "%s%U%s+[.-%s%d]\n",
11316 ASM_LONG, GOT_SYMBOL_NAME, LPREFIX, value);
11319 /* Generate either "mov $0, reg" or "xor reg, reg", as appropriate
11323 ix86_expand_clear (rtx dest)
11327 /* We play register width games, which are only valid after reload. */
11328 gcc_assert (reload_completed);
11330 /* Avoid HImode and its attendant prefix byte. */
11331 if (GET_MODE_SIZE (GET_MODE (dest)) < 4)
11332 dest = gen_rtx_REG (SImode, REGNO (dest));
11333 tmp = gen_rtx_SET (VOIDmode, dest, const0_rtx);
11335 /* This predicate should match that for movsi_xor and movdi_xor_rex64. */
11336 if (reload_completed && (!TARGET_USE_MOV0 || optimize_size))
11338 rtx clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
11339 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, tmp, clob));
11345 /* X is an unchanging MEM. If it is a constant pool reference, return
11346 the constant pool rtx, else NULL. */
11349 maybe_get_pool_constant (rtx x)
11351 x = ix86_delegitimize_address (XEXP (x, 0));
11353 if (GET_CODE (x) == SYMBOL_REF && CONSTANT_POOL_ADDRESS_P (x))
11354 return get_pool_constant (x);
11360 ix86_expand_move (enum machine_mode mode, rtx operands[])
11363 enum tls_model model;
11368 if (GET_CODE (op1) == SYMBOL_REF)
11370 model = SYMBOL_REF_TLS_MODEL (op1);
11373 op1 = legitimize_tls_address (op1, model, true);
11374 op1 = force_operand (op1, op0);
11378 else if (TARGET_DLLIMPORT_DECL_ATTRIBUTES
11379 && SYMBOL_REF_DLLIMPORT_P (op1))
11380 op1 = legitimize_dllimport_symbol (op1, false);
11382 else if (GET_CODE (op1) == CONST
11383 && GET_CODE (XEXP (op1, 0)) == PLUS
11384 && GET_CODE (XEXP (XEXP (op1, 0), 0)) == SYMBOL_REF)
11386 rtx addend = XEXP (XEXP (op1, 0), 1);
11387 rtx symbol = XEXP (XEXP (op1, 0), 0);
11390 model = SYMBOL_REF_TLS_MODEL (symbol);
11392 tmp = legitimize_tls_address (symbol, model, true);
11393 else if (TARGET_DLLIMPORT_DECL_ATTRIBUTES
11394 && SYMBOL_REF_DLLIMPORT_P (symbol))
11395 tmp = legitimize_dllimport_symbol (symbol, true);
11399 tmp = force_operand (tmp, NULL);
11400 tmp = expand_simple_binop (Pmode, PLUS, tmp, addend,
11401 op0, 1, OPTAB_DIRECT);
11407 if (flag_pic && mode == Pmode && symbolic_operand (op1, Pmode))
11409 if (TARGET_MACHO && !TARGET_64BIT)
11414 rtx temp = ((reload_in_progress
11415 || ((op0 && REG_P (op0))
11417 ? op0 : gen_reg_rtx (Pmode));
11418 op1 = machopic_indirect_data_reference (op1, temp);
11419 op1 = machopic_legitimize_pic_address (op1, mode,
11420 temp == op1 ? 0 : temp);
11422 else if (MACHOPIC_INDIRECT)
11423 op1 = machopic_indirect_data_reference (op1, 0);
11431 op1 = force_reg (Pmode, op1);
11432 else if (!TARGET_64BIT || !x86_64_movabs_operand (op1, Pmode))
11434 rtx reg = !can_create_pseudo_p () ? op0 : NULL_RTX;
11435 op1 = legitimize_pic_address (op1, reg);
11444 && (PUSH_ROUNDING (GET_MODE_SIZE (mode)) != GET_MODE_SIZE (mode)
11445 || !push_operand (op0, mode))
11447 op1 = force_reg (mode, op1);
11449 if (push_operand (op0, mode)
11450 && ! general_no_elim_operand (op1, mode))
11451 op1 = copy_to_mode_reg (mode, op1);
11453 /* Force large constants in 64bit compilation into register
11454 to get them CSEed. */
11455 if (can_create_pseudo_p ()
11456 && (mode == DImode) && TARGET_64BIT
11457 && immediate_operand (op1, mode)
11458 && !x86_64_zext_immediate_operand (op1, VOIDmode)
11459 && !register_operand (op0, mode)
11461 op1 = copy_to_mode_reg (mode, op1);
11463 if (can_create_pseudo_p ()
11464 && FLOAT_MODE_P (mode)
11465 && GET_CODE (op1) == CONST_DOUBLE)
11467 /* If we are loading a floating point constant to a register,
11468 force the value to memory now, since we'll get better code
11469 out the back end. */
11471 op1 = validize_mem (force_const_mem (mode, op1));
11472 if (!register_operand (op0, mode))
11474 rtx temp = gen_reg_rtx (mode);
11475 emit_insn (gen_rtx_SET (VOIDmode, temp, op1));
11476 emit_move_insn (op0, temp);
11482 emit_insn (gen_rtx_SET (VOIDmode, op0, op1));
11486 ix86_expand_vector_move (enum machine_mode mode, rtx operands[])
11488 rtx op0 = operands[0], op1 = operands[1];
11489 unsigned int align = GET_MODE_ALIGNMENT (mode);
11491 /* Force constants other than zero into memory. We do not know how
11492 the instructions used to build constants modify the upper 64 bits
11493 of the register, once we have that information we may be able
11494 to handle some of them more efficiently. */
11495 if (can_create_pseudo_p ()
11496 && register_operand (op0, mode)
11497 && (CONSTANT_P (op1)
11498 || (GET_CODE (op1) == SUBREG
11499 && CONSTANT_P (SUBREG_REG (op1))))
11500 && standard_sse_constant_p (op1) <= 0)
11501 op1 = validize_mem (force_const_mem (mode, op1));
11503 /* We need to check memory alignment for SSE mode since attribute
11504 can make operands unaligned. */
11505 if (can_create_pseudo_p ()
11506 && SSE_REG_MODE_P (mode)
11507 && ((MEM_P (op0) && (MEM_ALIGN (op0) < align))
11508 || (MEM_P (op1) && (MEM_ALIGN (op1) < align))))
11512 /* ix86_expand_vector_move_misalign() does not like constants ... */
11513 if (CONSTANT_P (op1)
11514 || (GET_CODE (op1) == SUBREG
11515 && CONSTANT_P (SUBREG_REG (op1))))
11516 op1 = validize_mem (force_const_mem (mode, op1));
11518 /* ... nor both arguments in memory. */
11519 if (!register_operand (op0, mode)
11520 && !register_operand (op1, mode))
11521 op1 = force_reg (mode, op1);
11523 tmp[0] = op0; tmp[1] = op1;
11524 ix86_expand_vector_move_misalign (mode, tmp);
11528 /* Make operand1 a register if it isn't already. */
11529 if (can_create_pseudo_p ()
11530 && !register_operand (op0, mode)
11531 && !register_operand (op1, mode))
11533 emit_move_insn (op0, force_reg (GET_MODE (op0), op1));
11537 emit_insn (gen_rtx_SET (VOIDmode, op0, op1));
11540 /* Implement the movmisalign patterns for SSE. Non-SSE modes go
11541 straight to ix86_expand_vector_move. */
11542 /* Code generation for scalar reg-reg moves of single and double precision data:
11543 if (x86_sse_partial_reg_dependency == true | x86_sse_split_regs == true)
11547 if (x86_sse_partial_reg_dependency == true)
11552 Code generation for scalar loads of double precision data:
11553 if (x86_sse_split_regs == true)
11554 movlpd mem, reg (gas syntax)
11558 Code generation for unaligned packed loads of single precision data
11559 (x86_sse_unaligned_move_optimal overrides x86_sse_partial_reg_dependency):
11560 if (x86_sse_unaligned_move_optimal)
11563 if (x86_sse_partial_reg_dependency == true)
11575 Code generation for unaligned packed loads of double precision data
11576 (x86_sse_unaligned_move_optimal overrides x86_sse_split_regs):
11577 if (x86_sse_unaligned_move_optimal)
11580 if (x86_sse_split_regs == true)
11593 ix86_expand_vector_move_misalign (enum machine_mode mode, rtx operands[])
11602 /* If we're optimizing for size, movups is the smallest. */
11605 op0 = gen_lowpart (V4SFmode, op0);
11606 op1 = gen_lowpart (V4SFmode, op1);
11607 emit_insn (gen_sse_movups (op0, op1));
11611 /* ??? If we have typed data, then it would appear that using
11612 movdqu is the only way to get unaligned data loaded with
11614 if (TARGET_SSE2 && GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
11616 op0 = gen_lowpart (V16QImode, op0);
11617 op1 = gen_lowpart (V16QImode, op1);
11618 emit_insn (gen_sse2_movdqu (op0, op1));
11622 if (TARGET_SSE2 && mode == V2DFmode)
11626 if (TARGET_SSE_UNALIGNED_MOVE_OPTIMAL)
11628 op0 = gen_lowpart (V2DFmode, op0);
11629 op1 = gen_lowpart (V2DFmode, op1);
11630 emit_insn (gen_sse2_movupd (op0, op1));
11634 /* When SSE registers are split into halves, we can avoid
11635 writing to the top half twice. */
11636 if (TARGET_SSE_SPLIT_REGS)
11638 emit_clobber (op0);
11643 /* ??? Not sure about the best option for the Intel chips.
11644 The following would seem to satisfy; the register is
11645 entirely cleared, breaking the dependency chain. We
11646 then store to the upper half, with a dependency depth
11647 of one. A rumor has it that Intel recommends two movsd
11648 followed by an unpacklpd, but this is unconfirmed. And
11649 given that the dependency depth of the unpacklpd would
11650 still be one, I'm not sure why this would be better. */
11651 zero = CONST0_RTX (V2DFmode);
11654 m = adjust_address (op1, DFmode, 0);
11655 emit_insn (gen_sse2_loadlpd (op0, zero, m));
11656 m = adjust_address (op1, DFmode, 8);
11657 emit_insn (gen_sse2_loadhpd (op0, op0, m));
11661 if (TARGET_SSE_UNALIGNED_MOVE_OPTIMAL)
11663 op0 = gen_lowpart (V4SFmode, op0);
11664 op1 = gen_lowpart (V4SFmode, op1);
11665 emit_insn (gen_sse_movups (op0, op1));
11669 if (TARGET_SSE_PARTIAL_REG_DEPENDENCY)
11670 emit_move_insn (op0, CONST0_RTX (mode));
11672 emit_clobber (op0);
11674 if (mode != V4SFmode)
11675 op0 = gen_lowpart (V4SFmode, op0);
11676 m = adjust_address (op1, V2SFmode, 0);
11677 emit_insn (gen_sse_loadlps (op0, op0, m));
11678 m = adjust_address (op1, V2SFmode, 8);
11679 emit_insn (gen_sse_loadhps (op0, op0, m));
11682 else if (MEM_P (op0))
11684 /* If we're optimizing for size, movups is the smallest. */
11687 op0 = gen_lowpart (V4SFmode, op0);
11688 op1 = gen_lowpart (V4SFmode, op1);
11689 emit_insn (gen_sse_movups (op0, op1));
11693 /* ??? Similar to above, only less clear because of quote
11694 typeless stores unquote. */
11695 if (TARGET_SSE2 && !TARGET_SSE_TYPELESS_STORES
11696 && GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
11698 op0 = gen_lowpart (V16QImode, op0);
11699 op1 = gen_lowpart (V16QImode, op1);
11700 emit_insn (gen_sse2_movdqu (op0, op1));
11704 if (TARGET_SSE2 && mode == V2DFmode)
11706 m = adjust_address (op0, DFmode, 0);
11707 emit_insn (gen_sse2_storelpd (m, op1));
11708 m = adjust_address (op0, DFmode, 8);
11709 emit_insn (gen_sse2_storehpd (m, op1));
11713 if (mode != V4SFmode)
11714 op1 = gen_lowpart (V4SFmode, op1);
11715 m = adjust_address (op0, V2SFmode, 0);
11716 emit_insn (gen_sse_storelps (m, op1));
11717 m = adjust_address (op0, V2SFmode, 8);
11718 emit_insn (gen_sse_storehps (m, op1));
11722 gcc_unreachable ();
11725 /* Expand a push in MODE. This is some mode for which we do not support
11726 proper push instructions, at least from the registers that we expect
11727 the value to live in. */
11730 ix86_expand_push (enum machine_mode mode, rtx x)
11734 tmp = expand_simple_binop (Pmode, PLUS, stack_pointer_rtx,
11735 GEN_INT (-GET_MODE_SIZE (mode)),
11736 stack_pointer_rtx, 1, OPTAB_DIRECT);
11737 if (tmp != stack_pointer_rtx)
11738 emit_move_insn (stack_pointer_rtx, tmp);
11740 tmp = gen_rtx_MEM (mode, stack_pointer_rtx);
11741 emit_move_insn (tmp, x);
11744 /* Helper function of ix86_fixup_binary_operands to canonicalize
11745 operand order. Returns true if the operands should be swapped. */
11748 ix86_swap_binary_operands_p (enum rtx_code code, enum machine_mode mode,
11751 rtx dst = operands[0];
11752 rtx src1 = operands[1];
11753 rtx src2 = operands[2];
11755 /* If the operation is not commutative, we can't do anything. */
11756 if (GET_RTX_CLASS (code) != RTX_COMM_ARITH)
11759 /* Highest priority is that src1 should match dst. */
11760 if (rtx_equal_p (dst, src1))
11762 if (rtx_equal_p (dst, src2))
11765 /* Next highest priority is that immediate constants come second. */
11766 if (immediate_operand (src2, mode))
11768 if (immediate_operand (src1, mode))
11771 /* Lowest priority is that memory references should come second. */
11781 /* Fix up OPERANDS to satisfy ix86_binary_operator_ok. Return the
11782 destination to use for the operation. If different from the true
11783 destination in operands[0], a copy operation will be required. */
11786 ix86_fixup_binary_operands (enum rtx_code code, enum machine_mode mode,
11789 rtx dst = operands[0];
11790 rtx src1 = operands[1];
11791 rtx src2 = operands[2];
11793 /* Canonicalize operand order. */
11794 if (ix86_swap_binary_operands_p (code, mode, operands))
11798 /* It is invalid to swap operands of different modes. */
11799 gcc_assert (GET_MODE (src1) == GET_MODE (src2));
11806 /* Both source operands cannot be in memory. */
11807 if (MEM_P (src1) && MEM_P (src2))
11809 /* Optimization: Only read from memory once. */
11810 if (rtx_equal_p (src1, src2))
11812 src2 = force_reg (mode, src2);
11816 src2 = force_reg (mode, src2);
11819 /* If the destination is memory, and we do not have matching source
11820 operands, do things in registers. */
11821 if (MEM_P (dst) && !rtx_equal_p (dst, src1))
11822 dst = gen_reg_rtx (mode);
11824 /* Source 1 cannot be a constant. */
11825 if (CONSTANT_P (src1))
11826 src1 = force_reg (mode, src1);
11828 /* Source 1 cannot be a non-matching memory. */
11829 if (MEM_P (src1) && !rtx_equal_p (dst, src1))
11830 src1 = force_reg (mode, src1);
11832 operands[1] = src1;
11833 operands[2] = src2;
11837 /* Similarly, but assume that the destination has already been
11838 set up properly. */
11841 ix86_fixup_binary_operands_no_copy (enum rtx_code code,
11842 enum machine_mode mode, rtx operands[])
11844 rtx dst = ix86_fixup_binary_operands (code, mode, operands);
11845 gcc_assert (dst == operands[0]);
11848 /* Attempt to expand a binary operator. Make the expansion closer to the
11849 actual machine, then just general_operand, which will allow 3 separate
11850 memory references (one output, two input) in a single insn. */
11853 ix86_expand_binary_operator (enum rtx_code code, enum machine_mode mode,
11856 rtx src1, src2, dst, op, clob;
11858 dst = ix86_fixup_binary_operands (code, mode, operands);
11859 src1 = operands[1];
11860 src2 = operands[2];
11862 /* Emit the instruction. */
11864 op = gen_rtx_SET (VOIDmode, dst, gen_rtx_fmt_ee (code, mode, src1, src2));
11865 if (reload_in_progress)
11867 /* Reload doesn't know about the flags register, and doesn't know that
11868 it doesn't want to clobber it. We can only do this with PLUS. */
11869 gcc_assert (code == PLUS);
11874 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
11875 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
11878 /* Fix up the destination if needed. */
11879 if (dst != operands[0])
11880 emit_move_insn (operands[0], dst);
11883 /* Return TRUE or FALSE depending on whether the binary operator meets the
11884 appropriate constraints. */
11887 ix86_binary_operator_ok (enum rtx_code code, enum machine_mode mode,
11890 rtx dst = operands[0];
11891 rtx src1 = operands[1];
11892 rtx src2 = operands[2];
11894 /* Both source operands cannot be in memory. */
11895 if (MEM_P (src1) && MEM_P (src2))
11898 /* Canonicalize operand order for commutative operators. */
11899 if (ix86_swap_binary_operands_p (code, mode, operands))
11906 /* If the destination is memory, we must have a matching source operand. */
11907 if (MEM_P (dst) && !rtx_equal_p (dst, src1))
11910 /* Source 1 cannot be a constant. */
11911 if (CONSTANT_P (src1))
11914 /* Source 1 cannot be a non-matching memory. */
11915 if (MEM_P (src1) && !rtx_equal_p (dst, src1))
11921 /* Attempt to expand a unary operator. Make the expansion closer to the
11922 actual machine, then just general_operand, which will allow 2 separate
11923 memory references (one output, one input) in a single insn. */
11926 ix86_expand_unary_operator (enum rtx_code code, enum machine_mode mode,
11929 int matching_memory;
11930 rtx src, dst, op, clob;
11935 /* If the destination is memory, and we do not have matching source
11936 operands, do things in registers. */
11937 matching_memory = 0;
11940 if (rtx_equal_p (dst, src))
11941 matching_memory = 1;
11943 dst = gen_reg_rtx (mode);
11946 /* When source operand is memory, destination must match. */
11947 if (MEM_P (src) && !matching_memory)
11948 src = force_reg (mode, src);
11950 /* Emit the instruction. */
11952 op = gen_rtx_SET (VOIDmode, dst, gen_rtx_fmt_e (code, mode, src));
11953 if (reload_in_progress || code == NOT)
11955 /* Reload doesn't know about the flags register, and doesn't know that
11956 it doesn't want to clobber it. */
11957 gcc_assert (code == NOT);
11962 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
11963 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
11966 /* Fix up the destination if needed. */
11967 if (dst != operands[0])
11968 emit_move_insn (operands[0], dst);
11971 /* Return TRUE or FALSE depending on whether the unary operator meets the
11972 appropriate constraints. */
11975 ix86_unary_operator_ok (enum rtx_code code ATTRIBUTE_UNUSED,
11976 enum machine_mode mode ATTRIBUTE_UNUSED,
11977 rtx operands[2] ATTRIBUTE_UNUSED)
11979 /* If one of operands is memory, source and destination must match. */
11980 if ((MEM_P (operands[0])
11981 || MEM_P (operands[1]))
11982 && ! rtx_equal_p (operands[0], operands[1]))
11987 /* Post-reload splitter for converting an SF or DFmode value in an
11988 SSE register into an unsigned SImode. */
11991 ix86_split_convert_uns_si_sse (rtx operands[])
11993 enum machine_mode vecmode;
11994 rtx value, large, zero_or_two31, input, two31, x;
11996 large = operands[1];
11997 zero_or_two31 = operands[2];
11998 input = operands[3];
11999 two31 = operands[4];
12000 vecmode = GET_MODE (large);
12001 value = gen_rtx_REG (vecmode, REGNO (operands[0]));
12003 /* Load up the value into the low element. We must ensure that the other
12004 elements are valid floats -- zero is the easiest such value. */
12007 if (vecmode == V4SFmode)
12008 emit_insn (gen_vec_setv4sf_0 (value, CONST0_RTX (V4SFmode), input));
12010 emit_insn (gen_sse2_loadlpd (value, CONST0_RTX (V2DFmode), input));
12014 input = gen_rtx_REG (vecmode, REGNO (input));
12015 emit_move_insn (value, CONST0_RTX (vecmode));
12016 if (vecmode == V4SFmode)
12017 emit_insn (gen_sse_movss (value, value, input));
12019 emit_insn (gen_sse2_movsd (value, value, input));
12022 emit_move_insn (large, two31);
12023 emit_move_insn (zero_or_two31, MEM_P (two31) ? large : two31);
12025 x = gen_rtx_fmt_ee (LE, vecmode, large, value);
12026 emit_insn (gen_rtx_SET (VOIDmode, large, x));
12028 x = gen_rtx_AND (vecmode, zero_or_two31, large);
12029 emit_insn (gen_rtx_SET (VOIDmode, zero_or_two31, x));
12031 x = gen_rtx_MINUS (vecmode, value, zero_or_two31);
12032 emit_insn (gen_rtx_SET (VOIDmode, value, x));
12034 large = gen_rtx_REG (V4SImode, REGNO (large));
12035 emit_insn (gen_ashlv4si3 (large, large, GEN_INT (31)));
12037 x = gen_rtx_REG (V4SImode, REGNO (value));
12038 if (vecmode == V4SFmode)
12039 emit_insn (gen_sse2_cvttps2dq (x, value));
12041 emit_insn (gen_sse2_cvttpd2dq (x, value));
12044 emit_insn (gen_xorv4si3 (value, value, large));
12047 /* Convert an unsigned DImode value into a DFmode, using only SSE.
12048 Expects the 64-bit DImode to be supplied in a pair of integral
12049 registers. Requires SSE2; will use SSE3 if available. For x86_32,
12050 -mfpmath=sse, !optimize_size only. */
12053 ix86_expand_convert_uns_didf_sse (rtx target, rtx input)
12055 REAL_VALUE_TYPE bias_lo_rvt, bias_hi_rvt;
12056 rtx int_xmm, fp_xmm;
12057 rtx biases, exponents;
12060 int_xmm = gen_reg_rtx (V4SImode);
12061 if (TARGET_INTER_UNIT_MOVES)
12062 emit_insn (gen_movdi_to_sse (int_xmm, input));
12063 else if (TARGET_SSE_SPLIT_REGS)
12065 emit_clobber (int_xmm);
12066 emit_move_insn (gen_lowpart (DImode, int_xmm), input);
12070 x = gen_reg_rtx (V2DImode);
12071 ix86_expand_vector_init_one_nonzero (false, V2DImode, x, input, 0);
12072 emit_move_insn (int_xmm, gen_lowpart (V4SImode, x));
12075 x = gen_rtx_CONST_VECTOR (V4SImode,
12076 gen_rtvec (4, GEN_INT (0x43300000UL),
12077 GEN_INT (0x45300000UL),
12078 const0_rtx, const0_rtx));
12079 exponents = validize_mem (force_const_mem (V4SImode, x));
12081 /* int_xmm = {0x45300000UL, fp_xmm/hi, 0x43300000, fp_xmm/lo } */
12082 emit_insn (gen_sse2_punpckldq (int_xmm, int_xmm, exponents));
12084 /* Concatenating (juxtaposing) (0x43300000UL ## fp_value_low_xmm)
12085 yields a valid DF value equal to (0x1.0p52 + double(fp_value_lo_xmm)).
12086 Similarly (0x45300000UL ## fp_value_hi_xmm) yields
12087 (0x1.0p84 + double(fp_value_hi_xmm)).
12088 Note these exponents differ by 32. */
12090 fp_xmm = copy_to_mode_reg (V2DFmode, gen_lowpart (V2DFmode, int_xmm));
12092 /* Subtract off those 0x1.0p52 and 0x1.0p84 biases, to produce values
12093 in [0,2**32-1] and [0]+[2**32,2**64-1] respectively. */
12094 real_ldexp (&bias_lo_rvt, &dconst1, 52);
12095 real_ldexp (&bias_hi_rvt, &dconst1, 84);
12096 biases = const_double_from_real_value (bias_lo_rvt, DFmode);
12097 x = const_double_from_real_value (bias_hi_rvt, DFmode);
12098 biases = gen_rtx_CONST_VECTOR (V2DFmode, gen_rtvec (2, biases, x));
12099 biases = validize_mem (force_const_mem (V2DFmode, biases));
12100 emit_insn (gen_subv2df3 (fp_xmm, fp_xmm, biases));
12102 /* Add the upper and lower DFmode values together. */
12104 emit_insn (gen_sse3_haddv2df3 (fp_xmm, fp_xmm, fp_xmm));
12107 x = copy_to_mode_reg (V2DFmode, fp_xmm);
12108 emit_insn (gen_sse2_unpckhpd (fp_xmm, fp_xmm, fp_xmm));
12109 emit_insn (gen_addv2df3 (fp_xmm, fp_xmm, x));
12112 ix86_expand_vector_extract (false, target, fp_xmm, 0);
12115 /* Not used, but eases macroization of patterns. */
12117 ix86_expand_convert_uns_sixf_sse (rtx target ATTRIBUTE_UNUSED,
12118 rtx input ATTRIBUTE_UNUSED)
12120 gcc_unreachable ();
12123 /* Convert an unsigned SImode value into a DFmode. Only currently used
12124 for SSE, but applicable anywhere. */
12127 ix86_expand_convert_uns_sidf_sse (rtx target, rtx input)
12129 REAL_VALUE_TYPE TWO31r;
12132 x = expand_simple_binop (SImode, PLUS, input, GEN_INT (-2147483647 - 1),
12133 NULL, 1, OPTAB_DIRECT);
12135 fp = gen_reg_rtx (DFmode);
12136 emit_insn (gen_floatsidf2 (fp, x));
12138 real_ldexp (&TWO31r, &dconst1, 31);
12139 x = const_double_from_real_value (TWO31r, DFmode);
12141 x = expand_simple_binop (DFmode, PLUS, fp, x, target, 0, OPTAB_DIRECT);
12143 emit_move_insn (target, x);
12146 /* Convert a signed DImode value into a DFmode. Only used for SSE in
12147 32-bit mode; otherwise we have a direct convert instruction. */
12150 ix86_expand_convert_sign_didf_sse (rtx target, rtx input)
12152 REAL_VALUE_TYPE TWO32r;
12153 rtx fp_lo, fp_hi, x;
12155 fp_lo = gen_reg_rtx (DFmode);
12156 fp_hi = gen_reg_rtx (DFmode);
12158 emit_insn (gen_floatsidf2 (fp_hi, gen_highpart (SImode, input)));
12160 real_ldexp (&TWO32r, &dconst1, 32);
12161 x = const_double_from_real_value (TWO32r, DFmode);
12162 fp_hi = expand_simple_binop (DFmode, MULT, fp_hi, x, fp_hi, 0, OPTAB_DIRECT);
12164 ix86_expand_convert_uns_sidf_sse (fp_lo, gen_lowpart (SImode, input));
12166 x = expand_simple_binop (DFmode, PLUS, fp_hi, fp_lo, target,
12169 emit_move_insn (target, x);
12172 /* Convert an unsigned SImode value into a SFmode, using only SSE.
12173 For x86_32, -mfpmath=sse, !optimize_size only. */
12175 ix86_expand_convert_uns_sisf_sse (rtx target, rtx input)
12177 REAL_VALUE_TYPE ONE16r;
12178 rtx fp_hi, fp_lo, int_hi, int_lo, x;
12180 real_ldexp (&ONE16r, &dconst1, 16);
12181 x = const_double_from_real_value (ONE16r, SFmode);
12182 int_lo = expand_simple_binop (SImode, AND, input, GEN_INT(0xffff),
12183 NULL, 0, OPTAB_DIRECT);
12184 int_hi = expand_simple_binop (SImode, LSHIFTRT, input, GEN_INT(16),
12185 NULL, 0, OPTAB_DIRECT);
12186 fp_hi = gen_reg_rtx (SFmode);
12187 fp_lo = gen_reg_rtx (SFmode);
12188 emit_insn (gen_floatsisf2 (fp_hi, int_hi));
12189 emit_insn (gen_floatsisf2 (fp_lo, int_lo));
12190 fp_hi = expand_simple_binop (SFmode, MULT, fp_hi, x, fp_hi,
12192 fp_hi = expand_simple_binop (SFmode, PLUS, fp_hi, fp_lo, target,
12194 if (!rtx_equal_p (target, fp_hi))
12195 emit_move_insn (target, fp_hi);
12198 /* A subroutine of ix86_build_signbit_mask_vector. If VECT is true,
12199 then replicate the value for all elements of the vector
12203 ix86_build_const_vector (enum machine_mode mode, bool vect, rtx value)
12210 v = gen_rtvec (4, value, value, value, value);
12211 return gen_rtx_CONST_VECTOR (V4SImode, v);
12215 v = gen_rtvec (2, value, value);
12216 return gen_rtx_CONST_VECTOR (V2DImode, v);
12220 v = gen_rtvec (4, value, value, value, value);
12222 v = gen_rtvec (4, value, CONST0_RTX (SFmode),
12223 CONST0_RTX (SFmode), CONST0_RTX (SFmode));
12224 return gen_rtx_CONST_VECTOR (V4SFmode, v);
12228 v = gen_rtvec (2, value, value);
12230 v = gen_rtvec (2, value, CONST0_RTX (DFmode));
12231 return gen_rtx_CONST_VECTOR (V2DFmode, v);
12234 gcc_unreachable ();
12238 /* A subroutine of ix86_expand_fp_absneg_operator, copysign expanders
12239 and ix86_expand_int_vcond. Create a mask for the sign bit in MODE
12240 for an SSE register. If VECT is true, then replicate the mask for
12241 all elements of the vector register. If INVERT is true, then create
12242 a mask excluding the sign bit. */
12245 ix86_build_signbit_mask (enum machine_mode mode, bool vect, bool invert)
12247 enum machine_mode vec_mode, imode;
12248 HOST_WIDE_INT hi, lo;
12253 /* Find the sign bit, sign extended to 2*HWI. */
12259 vec_mode = (mode == SImode) ? V4SImode : V4SFmode;
12260 lo = 0x80000000, hi = lo < 0;
12266 vec_mode = (mode == DImode) ? V2DImode : V2DFmode;
12267 if (HOST_BITS_PER_WIDE_INT >= 64)
12268 lo = (HOST_WIDE_INT)1 << shift, hi = -1;
12270 lo = 0, hi = (HOST_WIDE_INT)1 << (shift - HOST_BITS_PER_WIDE_INT);
12275 vec_mode = VOIDmode;
12276 if (HOST_BITS_PER_WIDE_INT >= 64)
12279 lo = 0, hi = (HOST_WIDE_INT)1 << shift;
12286 lo = 0, hi = (HOST_WIDE_INT)1 << (shift - HOST_BITS_PER_WIDE_INT);
12290 lo = ~lo, hi = ~hi;
12296 mask = immed_double_const (lo, hi, imode);
12298 vec = gen_rtvec (2, v, mask);
12299 v = gen_rtx_CONST_VECTOR (V2DImode, vec);
12300 v = copy_to_mode_reg (mode, gen_lowpart (mode, v));
12307 gcc_unreachable ();
12311 lo = ~lo, hi = ~hi;
12313 /* Force this value into the low part of a fp vector constant. */
12314 mask = immed_double_const (lo, hi, imode);
12315 mask = gen_lowpart (mode, mask);
12317 if (vec_mode == VOIDmode)
12318 return force_reg (mode, mask);
12320 v = ix86_build_const_vector (mode, vect, mask);
12321 return force_reg (vec_mode, v);
12324 /* Generate code for floating point ABS or NEG. */
12327 ix86_expand_fp_absneg_operator (enum rtx_code code, enum machine_mode mode,
12330 rtx mask, set, use, clob, dst, src;
12331 bool use_sse = false;
12332 bool vector_mode = VECTOR_MODE_P (mode);
12333 enum machine_mode elt_mode = mode;
12337 elt_mode = GET_MODE_INNER (mode);
12340 else if (mode == TFmode)
12342 else if (TARGET_SSE_MATH)
12343 use_sse = SSE_FLOAT_MODE_P (mode);
12345 /* NEG and ABS performed with SSE use bitwise mask operations.
12346 Create the appropriate mask now. */
12348 mask = ix86_build_signbit_mask (elt_mode, vector_mode, code == ABS);
12357 set = gen_rtx_fmt_ee (code == NEG ? XOR : AND, mode, src, mask);
12358 set = gen_rtx_SET (VOIDmode, dst, set);
12363 set = gen_rtx_fmt_e (code, mode, src);
12364 set = gen_rtx_SET (VOIDmode, dst, set);
12367 use = gen_rtx_USE (VOIDmode, mask);
12368 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
12369 emit_insn (gen_rtx_PARALLEL (VOIDmode,
12370 gen_rtvec (3, set, use, clob)));
12377 /* Expand a copysign operation. Special case operand 0 being a constant. */
12380 ix86_expand_copysign (rtx operands[])
12382 enum machine_mode mode;
12383 rtx dest, op0, op1, mask, nmask;
12385 dest = operands[0];
12389 mode = GET_MODE (dest);
12391 if (GET_CODE (op0) == CONST_DOUBLE)
12393 rtx (*copysign_insn)(rtx, rtx, rtx, rtx);
12395 if (real_isneg (CONST_DOUBLE_REAL_VALUE (op0)))
12396 op0 = simplify_unary_operation (ABS, mode, op0, mode);
12398 if (mode == SFmode || mode == DFmode)
12400 enum machine_mode vmode;
12402 vmode = mode == SFmode ? V4SFmode : V2DFmode;
12404 if (op0 == CONST0_RTX (mode))
12405 op0 = CONST0_RTX (vmode);
12410 if (mode == SFmode)
12411 v = gen_rtvec (4, op0, CONST0_RTX (SFmode),
12412 CONST0_RTX (SFmode), CONST0_RTX (SFmode));
12414 v = gen_rtvec (2, op0, CONST0_RTX (DFmode));
12416 op0 = force_reg (vmode, gen_rtx_CONST_VECTOR (vmode, v));
12419 else if (op0 != CONST0_RTX (mode))
12420 op0 = force_reg (mode, op0);
12422 mask = ix86_build_signbit_mask (mode, 0, 0);
12424 if (mode == SFmode)
12425 copysign_insn = gen_copysignsf3_const;
12426 else if (mode == DFmode)
12427 copysign_insn = gen_copysigndf3_const;
12429 copysign_insn = gen_copysigntf3_const;
12431 emit_insn (copysign_insn (dest, op0, op1, mask));
12435 rtx (*copysign_insn)(rtx, rtx, rtx, rtx, rtx, rtx);
12437 nmask = ix86_build_signbit_mask (mode, 0, 1);
12438 mask = ix86_build_signbit_mask (mode, 0, 0);
12440 if (mode == SFmode)
12441 copysign_insn = gen_copysignsf3_var;
12442 else if (mode == DFmode)
12443 copysign_insn = gen_copysigndf3_var;
12445 copysign_insn = gen_copysigntf3_var;
12447 emit_insn (copysign_insn (dest, NULL_RTX, op0, op1, nmask, mask));
12451 /* Deconstruct a copysign operation into bit masks. Operand 0 is known to
12452 be a constant, and so has already been expanded into a vector constant. */
12455 ix86_split_copysign_const (rtx operands[])
12457 enum machine_mode mode, vmode;
12458 rtx dest, op0, op1, mask, x;
12460 dest = operands[0];
12463 mask = operands[3];
12465 mode = GET_MODE (dest);
12466 vmode = GET_MODE (mask);
12468 dest = simplify_gen_subreg (vmode, dest, mode, 0);
12469 x = gen_rtx_AND (vmode, dest, mask);
12470 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
12472 if (op0 != CONST0_RTX (vmode))
12474 x = gen_rtx_IOR (vmode, dest, op0);
12475 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
12479 /* Deconstruct a copysign operation into bit masks. Operand 0 is variable,
12480 so we have to do two masks. */
12483 ix86_split_copysign_var (rtx operands[])
12485 enum machine_mode mode, vmode;
12486 rtx dest, scratch, op0, op1, mask, nmask, x;
12488 dest = operands[0];
12489 scratch = operands[1];
12492 nmask = operands[4];
12493 mask = operands[5];
12495 mode = GET_MODE (dest);
12496 vmode = GET_MODE (mask);
12498 if (rtx_equal_p (op0, op1))
12500 /* Shouldn't happen often (it's useless, obviously), but when it does
12501 we'd generate incorrect code if we continue below. */
12502 emit_move_insn (dest, op0);
12506 if (REG_P (mask) && REGNO (dest) == REGNO (mask)) /* alternative 0 */
12508 gcc_assert (REGNO (op1) == REGNO (scratch));
12510 x = gen_rtx_AND (vmode, scratch, mask);
12511 emit_insn (gen_rtx_SET (VOIDmode, scratch, x));
12514 op0 = simplify_gen_subreg (vmode, op0, mode, 0);
12515 x = gen_rtx_NOT (vmode, dest);
12516 x = gen_rtx_AND (vmode, x, op0);
12517 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
12521 if (REGNO (op1) == REGNO (scratch)) /* alternative 1,3 */
12523 x = gen_rtx_AND (vmode, scratch, mask);
12525 else /* alternative 2,4 */
12527 gcc_assert (REGNO (mask) == REGNO (scratch));
12528 op1 = simplify_gen_subreg (vmode, op1, mode, 0);
12529 x = gen_rtx_AND (vmode, scratch, op1);
12531 emit_insn (gen_rtx_SET (VOIDmode, scratch, x));
12533 if (REGNO (op0) == REGNO (dest)) /* alternative 1,2 */
12535 dest = simplify_gen_subreg (vmode, op0, mode, 0);
12536 x = gen_rtx_AND (vmode, dest, nmask);
12538 else /* alternative 3,4 */
12540 gcc_assert (REGNO (nmask) == REGNO (dest));
12542 op0 = simplify_gen_subreg (vmode, op0, mode, 0);
12543 x = gen_rtx_AND (vmode, dest, op0);
12545 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
12548 x = gen_rtx_IOR (vmode, dest, scratch);
12549 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
12552 /* Return TRUE or FALSE depending on whether the first SET in INSN
12553 has source and destination with matching CC modes, and that the
12554 CC mode is at least as constrained as REQ_MODE. */
12557 ix86_match_ccmode (rtx insn, enum machine_mode req_mode)
12560 enum machine_mode set_mode;
12562 set = PATTERN (insn);
12563 if (GET_CODE (set) == PARALLEL)
12564 set = XVECEXP (set, 0, 0);
12565 gcc_assert (GET_CODE (set) == SET);
12566 gcc_assert (GET_CODE (SET_SRC (set)) == COMPARE);
12568 set_mode = GET_MODE (SET_DEST (set));
12572 if (req_mode != CCNOmode
12573 && (req_mode != CCmode
12574 || XEXP (SET_SRC (set), 1) != const0_rtx))
12578 if (req_mode == CCGCmode)
12582 if (req_mode == CCGOCmode || req_mode == CCNOmode)
12586 if (req_mode == CCZmode)
12593 gcc_unreachable ();
12596 return (GET_MODE (SET_SRC (set)) == set_mode);
12599 /* Generate insn patterns to do an integer compare of OPERANDS. */
12602 ix86_expand_int_compare (enum rtx_code code, rtx op0, rtx op1)
12604 enum machine_mode cmpmode;
12607 cmpmode = SELECT_CC_MODE (code, op0, op1);
12608 flags = gen_rtx_REG (cmpmode, FLAGS_REG);
12610 /* This is very simple, but making the interface the same as in the
12611 FP case makes the rest of the code easier. */
12612 tmp = gen_rtx_COMPARE (cmpmode, op0, op1);
12613 emit_insn (gen_rtx_SET (VOIDmode, flags, tmp));
12615 /* Return the test that should be put into the flags user, i.e.
12616 the bcc, scc, or cmov instruction. */
12617 return gen_rtx_fmt_ee (code, VOIDmode, flags, const0_rtx);
12620 /* Figure out whether to use ordered or unordered fp comparisons.
12621 Return the appropriate mode to use. */
12624 ix86_fp_compare_mode (enum rtx_code code ATTRIBUTE_UNUSED)
12626 /* ??? In order to make all comparisons reversible, we do all comparisons
12627 non-trapping when compiling for IEEE. Once gcc is able to distinguish
12628 all forms trapping and nontrapping comparisons, we can make inequality
12629 comparisons trapping again, since it results in better code when using
12630 FCOM based compares. */
12631 return TARGET_IEEE_FP ? CCFPUmode : CCFPmode;
12635 ix86_cc_mode (enum rtx_code code, rtx op0, rtx op1)
12637 enum machine_mode mode = GET_MODE (op0);
12639 if (SCALAR_FLOAT_MODE_P (mode))
12641 gcc_assert (!DECIMAL_FLOAT_MODE_P (mode));
12642 return ix86_fp_compare_mode (code);
12647 /* Only zero flag is needed. */
12648 case EQ: /* ZF=0 */
12649 case NE: /* ZF!=0 */
12651 /* Codes needing carry flag. */
12652 case GEU: /* CF=0 */
12653 case LTU: /* CF=1 */
12654 /* Detect overflow checks. They need just the carry flag. */
12655 if (GET_CODE (op0) == PLUS
12656 && rtx_equal_p (op1, XEXP (op0, 0)))
12660 case GTU: /* CF=0 & ZF=0 */
12661 case LEU: /* CF=1 | ZF=1 */
12662 /* Detect overflow checks. They need just the carry flag. */
12663 if (GET_CODE (op0) == MINUS
12664 && rtx_equal_p (op1, XEXP (op0, 0)))
12668 /* Codes possibly doable only with sign flag when
12669 comparing against zero. */
12670 case GE: /* SF=OF or SF=0 */
12671 case LT: /* SF<>OF or SF=1 */
12672 if (op1 == const0_rtx)
12675 /* For other cases Carry flag is not required. */
12677 /* Codes doable only with sign flag when comparing
12678 against zero, but we miss jump instruction for it
12679 so we need to use relational tests against overflow
12680 that thus needs to be zero. */
12681 case GT: /* ZF=0 & SF=OF */
12682 case LE: /* ZF=1 | SF<>OF */
12683 if (op1 == const0_rtx)
12687 /* strcmp pattern do (use flags) and combine may ask us for proper
12692 gcc_unreachable ();
12696 /* Return the fixed registers used for condition codes. */
12699 ix86_fixed_condition_code_regs (unsigned int *p1, unsigned int *p2)
12706 /* If two condition code modes are compatible, return a condition code
12707 mode which is compatible with both. Otherwise, return
12710 static enum machine_mode
12711 ix86_cc_modes_compatible (enum machine_mode m1, enum machine_mode m2)
12716 if (GET_MODE_CLASS (m1) != MODE_CC || GET_MODE_CLASS (m2) != MODE_CC)
12719 if ((m1 == CCGCmode && m2 == CCGOCmode)
12720 || (m1 == CCGOCmode && m2 == CCGCmode))
12726 gcc_unreachable ();
12756 /* These are only compatible with themselves, which we already
12762 /* Split comparison code CODE into comparisons we can do using branch
12763 instructions. BYPASS_CODE is comparison code for branch that will
12764 branch around FIRST_CODE and SECOND_CODE. If some of branches
12765 is not required, set value to UNKNOWN.
12766 We never require more than two branches. */
12769 ix86_fp_comparison_codes (enum rtx_code code, enum rtx_code *bypass_code,
12770 enum rtx_code *first_code,
12771 enum rtx_code *second_code)
12773 *first_code = code;
12774 *bypass_code = UNKNOWN;
12775 *second_code = UNKNOWN;
12777 /* The fcomi comparison sets flags as follows:
12787 case GT: /* GTU - CF=0 & ZF=0 */
12788 case GE: /* GEU - CF=0 */
12789 case ORDERED: /* PF=0 */
12790 case UNORDERED: /* PF=1 */
12791 case UNEQ: /* EQ - ZF=1 */
12792 case UNLT: /* LTU - CF=1 */
12793 case UNLE: /* LEU - CF=1 | ZF=1 */
12794 case LTGT: /* EQ - ZF=0 */
12796 case LT: /* LTU - CF=1 - fails on unordered */
12797 *first_code = UNLT;
12798 *bypass_code = UNORDERED;
12800 case LE: /* LEU - CF=1 | ZF=1 - fails on unordered */
12801 *first_code = UNLE;
12802 *bypass_code = UNORDERED;
12804 case EQ: /* EQ - ZF=1 - fails on unordered */
12805 *first_code = UNEQ;
12806 *bypass_code = UNORDERED;
12808 case NE: /* NE - ZF=0 - fails on unordered */
12809 *first_code = LTGT;
12810 *second_code = UNORDERED;
12812 case UNGE: /* GEU - CF=0 - fails on unordered */
12814 *second_code = UNORDERED;
12816 case UNGT: /* GTU - CF=0 & ZF=0 - fails on unordered */
12818 *second_code = UNORDERED;
12821 gcc_unreachable ();
12823 if (!TARGET_IEEE_FP)
12825 *second_code = UNKNOWN;
12826 *bypass_code = UNKNOWN;
12830 /* Return cost of comparison done fcom + arithmetics operations on AX.
12831 All following functions do use number of instructions as a cost metrics.
12832 In future this should be tweaked to compute bytes for optimize_size and
12833 take into account performance of various instructions on various CPUs. */
12835 ix86_fp_comparison_arithmetics_cost (enum rtx_code code)
12837 if (!TARGET_IEEE_FP)
12839 /* The cost of code output by ix86_expand_fp_compare. */
12863 gcc_unreachable ();
12867 /* Return cost of comparison done using fcomi operation.
12868 See ix86_fp_comparison_arithmetics_cost for the metrics. */
12870 ix86_fp_comparison_fcomi_cost (enum rtx_code code)
12872 enum rtx_code bypass_code, first_code, second_code;
12873 /* Return arbitrarily high cost when instruction is not supported - this
12874 prevents gcc from using it. */
12877 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
12878 return (bypass_code != UNKNOWN || second_code != UNKNOWN) + 2;
12881 /* Return cost of comparison done using sahf operation.
12882 See ix86_fp_comparison_arithmetics_cost for the metrics. */
12884 ix86_fp_comparison_sahf_cost (enum rtx_code code)
12886 enum rtx_code bypass_code, first_code, second_code;
12887 /* Return arbitrarily high cost when instruction is not preferred - this
12888 avoids gcc from using it. */
12889 if (!(TARGET_SAHF && (TARGET_USE_SAHF || optimize_size)))
12891 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
12892 return (bypass_code != UNKNOWN || second_code != UNKNOWN) + 3;
12895 /* Compute cost of the comparison done using any method.
12896 See ix86_fp_comparison_arithmetics_cost for the metrics. */
12898 ix86_fp_comparison_cost (enum rtx_code code)
12900 int fcomi_cost, sahf_cost, arithmetics_cost = 1024;
12903 fcomi_cost = ix86_fp_comparison_fcomi_cost (code);
12904 sahf_cost = ix86_fp_comparison_sahf_cost (code);
12906 min = arithmetics_cost = ix86_fp_comparison_arithmetics_cost (code);
12907 if (min > sahf_cost)
12909 if (min > fcomi_cost)
12914 /* Return true if we should use an FCOMI instruction for this
12918 ix86_use_fcomi_compare (enum rtx_code code ATTRIBUTE_UNUSED)
12920 enum rtx_code swapped_code = swap_condition (code);
12922 return ((ix86_fp_comparison_cost (code)
12923 == ix86_fp_comparison_fcomi_cost (code))
12924 || (ix86_fp_comparison_cost (swapped_code)
12925 == ix86_fp_comparison_fcomi_cost (swapped_code)));
12928 /* Swap, force into registers, or otherwise massage the two operands
12929 to a fp comparison. The operands are updated in place; the new
12930 comparison code is returned. */
12932 static enum rtx_code
12933 ix86_prepare_fp_compare_args (enum rtx_code code, rtx *pop0, rtx *pop1)
12935 enum machine_mode fpcmp_mode = ix86_fp_compare_mode (code);
12936 rtx op0 = *pop0, op1 = *pop1;
12937 enum machine_mode op_mode = GET_MODE (op0);
12938 int is_sse = TARGET_SSE_MATH && SSE_FLOAT_MODE_P (op_mode);
12940 /* All of the unordered compare instructions only work on registers.
12941 The same is true of the fcomi compare instructions. The XFmode
12942 compare instructions require registers except when comparing
12943 against zero or when converting operand 1 from fixed point to
12947 && (fpcmp_mode == CCFPUmode
12948 || (op_mode == XFmode
12949 && ! (standard_80387_constant_p (op0) == 1
12950 || standard_80387_constant_p (op1) == 1)
12951 && GET_CODE (op1) != FLOAT)
12952 || ix86_use_fcomi_compare (code)))
12954 op0 = force_reg (op_mode, op0);
12955 op1 = force_reg (op_mode, op1);
12959 /* %%% We only allow op1 in memory; op0 must be st(0). So swap
12960 things around if they appear profitable, otherwise force op0
12961 into a register. */
12963 if (standard_80387_constant_p (op0) == 0
12965 && ! (standard_80387_constant_p (op1) == 0
12969 tmp = op0, op0 = op1, op1 = tmp;
12970 code = swap_condition (code);
12974 op0 = force_reg (op_mode, op0);
12976 if (CONSTANT_P (op1))
12978 int tmp = standard_80387_constant_p (op1);
12980 op1 = validize_mem (force_const_mem (op_mode, op1));
12984 op1 = force_reg (op_mode, op1);
12987 op1 = force_reg (op_mode, op1);
12991 /* Try to rearrange the comparison to make it cheaper. */
12992 if (ix86_fp_comparison_cost (code)
12993 > ix86_fp_comparison_cost (swap_condition (code))
12994 && (REG_P (op1) || can_create_pseudo_p ()))
12997 tmp = op0, op0 = op1, op1 = tmp;
12998 code = swap_condition (code);
13000 op0 = force_reg (op_mode, op0);
13008 /* Convert comparison codes we use to represent FP comparison to integer
13009 code that will result in proper branch. Return UNKNOWN if no such code
13013 ix86_fp_compare_code_to_integer (enum rtx_code code)
13042 /* Generate insn patterns to do a floating point compare of OPERANDS. */
13045 ix86_expand_fp_compare (enum rtx_code code, rtx op0, rtx op1, rtx scratch,
13046 rtx *second_test, rtx *bypass_test)
13048 enum machine_mode fpcmp_mode, intcmp_mode;
13050 int cost = ix86_fp_comparison_cost (code);
13051 enum rtx_code bypass_code, first_code, second_code;
13053 fpcmp_mode = ix86_fp_compare_mode (code);
13054 code = ix86_prepare_fp_compare_args (code, &op0, &op1);
13057 *second_test = NULL_RTX;
13059 *bypass_test = NULL_RTX;
13061 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
13063 /* Do fcomi/sahf based test when profitable. */
13064 if (ix86_fp_comparison_arithmetics_cost (code) > cost
13065 && (bypass_code == UNKNOWN || bypass_test)
13066 && (second_code == UNKNOWN || second_test))
13068 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
13069 tmp = gen_rtx_SET (VOIDmode, gen_rtx_REG (fpcmp_mode, FLAGS_REG),
13075 gcc_assert (TARGET_SAHF);
13078 scratch = gen_reg_rtx (HImode);
13079 tmp2 = gen_rtx_CLOBBER (VOIDmode, scratch);
13081 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, tmp, tmp2)));
13084 /* The FP codes work out to act like unsigned. */
13085 intcmp_mode = fpcmp_mode;
13087 if (bypass_code != UNKNOWN)
13088 *bypass_test = gen_rtx_fmt_ee (bypass_code, VOIDmode,
13089 gen_rtx_REG (intcmp_mode, FLAGS_REG),
13091 if (second_code != UNKNOWN)
13092 *second_test = gen_rtx_fmt_ee (second_code, VOIDmode,
13093 gen_rtx_REG (intcmp_mode, FLAGS_REG),
13098 /* Sadness wrt reg-stack pops killing fpsr -- gotta get fnstsw first. */
13099 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
13100 tmp2 = gen_rtx_UNSPEC (HImode, gen_rtvec (1, tmp), UNSPEC_FNSTSW);
13102 scratch = gen_reg_rtx (HImode);
13103 emit_insn (gen_rtx_SET (VOIDmode, scratch, tmp2));
13105 /* In the unordered case, we have to check C2 for NaN's, which
13106 doesn't happen to work out to anything nice combination-wise.
13107 So do some bit twiddling on the value we've got in AH to come
13108 up with an appropriate set of condition codes. */
13110 intcmp_mode = CCNOmode;
13115 if (code == GT || !TARGET_IEEE_FP)
13117 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x45)));
13122 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
13123 emit_insn (gen_addqi_ext_1 (scratch, scratch, constm1_rtx));
13124 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x44)));
13125 intcmp_mode = CCmode;
13131 if (code == LT && TARGET_IEEE_FP)
13133 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
13134 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x01)));
13135 intcmp_mode = CCmode;
13140 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x01)));
13146 if (code == GE || !TARGET_IEEE_FP)
13148 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x05)));
13153 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
13154 emit_insn (gen_xorqi_cc_ext_1 (scratch, scratch,
13161 if (code == LE && TARGET_IEEE_FP)
13163 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
13164 emit_insn (gen_addqi_ext_1 (scratch, scratch, constm1_rtx));
13165 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x40)));
13166 intcmp_mode = CCmode;
13171 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x45)));
13177 if (code == EQ && TARGET_IEEE_FP)
13179 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
13180 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x40)));
13181 intcmp_mode = CCmode;
13186 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x40)));
13193 if (code == NE && TARGET_IEEE_FP)
13195 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
13196 emit_insn (gen_xorqi_cc_ext_1 (scratch, scratch,
13202 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x40)));
13208 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x04)));
13212 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x04)));
13217 gcc_unreachable ();
13221 /* Return the test that should be put into the flags user, i.e.
13222 the bcc, scc, or cmov instruction. */
13223 return gen_rtx_fmt_ee (code, VOIDmode,
13224 gen_rtx_REG (intcmp_mode, FLAGS_REG),
13229 ix86_expand_compare (enum rtx_code code, rtx *second_test, rtx *bypass_test)
13232 op0 = ix86_compare_op0;
13233 op1 = ix86_compare_op1;
13236 *second_test = NULL_RTX;
13238 *bypass_test = NULL_RTX;
13240 if (ix86_compare_emitted)
13242 ret = gen_rtx_fmt_ee (code, VOIDmode, ix86_compare_emitted, const0_rtx);
13243 ix86_compare_emitted = NULL_RTX;
13245 else if (SCALAR_FLOAT_MODE_P (GET_MODE (op0)))
13247 gcc_assert (!DECIMAL_FLOAT_MODE_P (GET_MODE (op0)));
13248 ret = ix86_expand_fp_compare (code, op0, op1, NULL_RTX,
13249 second_test, bypass_test);
13252 ret = ix86_expand_int_compare (code, op0, op1);
13257 /* Return true if the CODE will result in nontrivial jump sequence. */
13259 ix86_fp_jump_nontrivial_p (enum rtx_code code)
13261 enum rtx_code bypass_code, first_code, second_code;
13264 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
13265 return bypass_code != UNKNOWN || second_code != UNKNOWN;
13269 ix86_expand_branch (enum rtx_code code, rtx label)
13273 /* If we have emitted a compare insn, go straight to simple.
13274 ix86_expand_compare won't emit anything if ix86_compare_emitted
13276 if (ix86_compare_emitted)
13279 switch (GET_MODE (ix86_compare_op0))
13285 tmp = ix86_expand_compare (code, NULL, NULL);
13286 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
13287 gen_rtx_LABEL_REF (VOIDmode, label),
13289 emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
13298 enum rtx_code bypass_code, first_code, second_code;
13300 code = ix86_prepare_fp_compare_args (code, &ix86_compare_op0,
13301 &ix86_compare_op1);
13303 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
13305 /* Check whether we will use the natural sequence with one jump. If
13306 so, we can expand jump early. Otherwise delay expansion by
13307 creating compound insn to not confuse optimizers. */
13308 if (bypass_code == UNKNOWN && second_code == UNKNOWN)
13310 ix86_split_fp_branch (code, ix86_compare_op0, ix86_compare_op1,
13311 gen_rtx_LABEL_REF (VOIDmode, label),
13312 pc_rtx, NULL_RTX, NULL_RTX);
13316 tmp = gen_rtx_fmt_ee (code, VOIDmode,
13317 ix86_compare_op0, ix86_compare_op1);
13318 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
13319 gen_rtx_LABEL_REF (VOIDmode, label),
13321 tmp = gen_rtx_SET (VOIDmode, pc_rtx, tmp);
13323 use_fcomi = ix86_use_fcomi_compare (code);
13324 vec = rtvec_alloc (3 + !use_fcomi);
13325 RTVEC_ELT (vec, 0) = tmp;
13327 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCFPmode, FPSR_REG));
13329 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCFPmode, FLAGS_REG));
13332 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (HImode));
13334 emit_jump_insn (gen_rtx_PARALLEL (VOIDmode, vec));
13343 /* Expand DImode branch into multiple compare+branch. */
13345 rtx lo[2], hi[2], label2;
13346 enum rtx_code code1, code2, code3;
13347 enum machine_mode submode;
13349 if (CONSTANT_P (ix86_compare_op0) && ! CONSTANT_P (ix86_compare_op1))
13351 tmp = ix86_compare_op0;
13352 ix86_compare_op0 = ix86_compare_op1;
13353 ix86_compare_op1 = tmp;
13354 code = swap_condition (code);
13356 if (GET_MODE (ix86_compare_op0) == DImode)
13358 split_di (&ix86_compare_op0, 1, lo+0, hi+0);
13359 split_di (&ix86_compare_op1, 1, lo+1, hi+1);
13364 split_ti (&ix86_compare_op0, 1, lo+0, hi+0);
13365 split_ti (&ix86_compare_op1, 1, lo+1, hi+1);
13369 /* When comparing for equality, we can use (hi0^hi1)|(lo0^lo1) to
13370 avoid two branches. This costs one extra insn, so disable when
13371 optimizing for size. */
13373 if ((code == EQ || code == NE)
13375 || hi[1] == const0_rtx || lo[1] == const0_rtx))
13380 if (hi[1] != const0_rtx)
13381 xor1 = expand_binop (submode, xor_optab, xor1, hi[1],
13382 NULL_RTX, 0, OPTAB_WIDEN);
13385 if (lo[1] != const0_rtx)
13386 xor0 = expand_binop (submode, xor_optab, xor0, lo[1],
13387 NULL_RTX, 0, OPTAB_WIDEN);
13389 tmp = expand_binop (submode, ior_optab, xor1, xor0,
13390 NULL_RTX, 0, OPTAB_WIDEN);
13392 ix86_compare_op0 = tmp;
13393 ix86_compare_op1 = const0_rtx;
13394 ix86_expand_branch (code, label);
13398 /* Otherwise, if we are doing less-than or greater-or-equal-than,
13399 op1 is a constant and the low word is zero, then we can just
13400 examine the high word. Similarly for low word -1 and
13401 less-or-equal-than or greater-than. */
13403 if (CONST_INT_P (hi[1]))
13406 case LT: case LTU: case GE: case GEU:
13407 if (lo[1] == const0_rtx)
13409 ix86_compare_op0 = hi[0];
13410 ix86_compare_op1 = hi[1];
13411 ix86_expand_branch (code, label);
13415 case LE: case LEU: case GT: case GTU:
13416 if (lo[1] == constm1_rtx)
13418 ix86_compare_op0 = hi[0];
13419 ix86_compare_op1 = hi[1];
13420 ix86_expand_branch (code, label);
13428 /* Otherwise, we need two or three jumps. */
13430 label2 = gen_label_rtx ();
13433 code2 = swap_condition (code);
13434 code3 = unsigned_condition (code);
13438 case LT: case GT: case LTU: case GTU:
13441 case LE: code1 = LT; code2 = GT; break;
13442 case GE: code1 = GT; code2 = LT; break;
13443 case LEU: code1 = LTU; code2 = GTU; break;
13444 case GEU: code1 = GTU; code2 = LTU; break;
13446 case EQ: code1 = UNKNOWN; code2 = NE; break;
13447 case NE: code2 = UNKNOWN; break;
13450 gcc_unreachable ();
13455 * if (hi(a) < hi(b)) goto true;
13456 * if (hi(a) > hi(b)) goto false;
13457 * if (lo(a) < lo(b)) goto true;
13461 ix86_compare_op0 = hi[0];
13462 ix86_compare_op1 = hi[1];
13464 if (code1 != UNKNOWN)
13465 ix86_expand_branch (code1, label);
13466 if (code2 != UNKNOWN)
13467 ix86_expand_branch (code2, label2);
13469 ix86_compare_op0 = lo[0];
13470 ix86_compare_op1 = lo[1];
13471 ix86_expand_branch (code3, label);
13473 if (code2 != UNKNOWN)
13474 emit_label (label2);
13479 gcc_unreachable ();
13483 /* Split branch based on floating point condition. */
13485 ix86_split_fp_branch (enum rtx_code code, rtx op1, rtx op2,
13486 rtx target1, rtx target2, rtx tmp, rtx pushed)
13488 rtx second, bypass;
13489 rtx label = NULL_RTX;
13491 int bypass_probability = -1, second_probability = -1, probability = -1;
13494 if (target2 != pc_rtx)
13497 code = reverse_condition_maybe_unordered (code);
13502 condition = ix86_expand_fp_compare (code, op1, op2,
13503 tmp, &second, &bypass);
13505 /* Remove pushed operand from stack. */
13507 ix86_free_from_memory (GET_MODE (pushed));
13509 if (split_branch_probability >= 0)
13511 /* Distribute the probabilities across the jumps.
13512 Assume the BYPASS and SECOND to be always test
13514 probability = split_branch_probability;
13516 /* Value of 1 is low enough to make no need for probability
13517 to be updated. Later we may run some experiments and see
13518 if unordered values are more frequent in practice. */
13520 bypass_probability = 1;
13522 second_probability = 1;
13524 if (bypass != NULL_RTX)
13526 label = gen_label_rtx ();
13527 i = emit_jump_insn (gen_rtx_SET
13529 gen_rtx_IF_THEN_ELSE (VOIDmode,
13531 gen_rtx_LABEL_REF (VOIDmode,
13534 if (bypass_probability >= 0)
13536 = gen_rtx_EXPR_LIST (REG_BR_PROB,
13537 GEN_INT (bypass_probability),
13540 i = emit_jump_insn (gen_rtx_SET
13542 gen_rtx_IF_THEN_ELSE (VOIDmode,
13543 condition, target1, target2)));
13544 if (probability >= 0)
13546 = gen_rtx_EXPR_LIST (REG_BR_PROB,
13547 GEN_INT (probability),
13549 if (second != NULL_RTX)
13551 i = emit_jump_insn (gen_rtx_SET
13553 gen_rtx_IF_THEN_ELSE (VOIDmode, second, target1,
13555 if (second_probability >= 0)
13557 = gen_rtx_EXPR_LIST (REG_BR_PROB,
13558 GEN_INT (second_probability),
13561 if (label != NULL_RTX)
13562 emit_label (label);
13566 ix86_expand_setcc (enum rtx_code code, rtx dest)
13568 rtx ret, tmp, tmpreg, equiv;
13569 rtx second_test, bypass_test;
13571 if (GET_MODE (ix86_compare_op0) == (TARGET_64BIT ? TImode : DImode))
13572 return 0; /* FAIL */
13574 gcc_assert (GET_MODE (dest) == QImode);
13576 ret = ix86_expand_compare (code, &second_test, &bypass_test);
13577 PUT_MODE (ret, QImode);
13582 emit_insn (gen_rtx_SET (VOIDmode, tmp, ret));
13583 if (bypass_test || second_test)
13585 rtx test = second_test;
13587 rtx tmp2 = gen_reg_rtx (QImode);
13590 gcc_assert (!second_test);
13591 test = bypass_test;
13593 PUT_CODE (test, reverse_condition_maybe_unordered (GET_CODE (test)));
13595 PUT_MODE (test, QImode);
13596 emit_insn (gen_rtx_SET (VOIDmode, tmp2, test));
13599 emit_insn (gen_andqi3 (tmp, tmpreg, tmp2));
13601 emit_insn (gen_iorqi3 (tmp, tmpreg, tmp2));
13604 /* Attach a REG_EQUAL note describing the comparison result. */
13605 if (ix86_compare_op0 && ix86_compare_op1)
13607 equiv = simplify_gen_relational (code, QImode,
13608 GET_MODE (ix86_compare_op0),
13609 ix86_compare_op0, ix86_compare_op1);
13610 set_unique_reg_note (get_last_insn (), REG_EQUAL, equiv);
13613 return 1; /* DONE */
13616 /* Expand comparison setting or clearing carry flag. Return true when
13617 successful and set pop for the operation. */
13619 ix86_expand_carry_flag_compare (enum rtx_code code, rtx op0, rtx op1, rtx *pop)
13621 enum machine_mode mode =
13622 GET_MODE (op0) != VOIDmode ? GET_MODE (op0) : GET_MODE (op1);
13624 /* Do not handle DImode compares that go through special path. */
13625 if (mode == (TARGET_64BIT ? TImode : DImode))
13628 if (SCALAR_FLOAT_MODE_P (mode))
13630 rtx second_test = NULL, bypass_test = NULL;
13631 rtx compare_op, compare_seq;
13633 gcc_assert (!DECIMAL_FLOAT_MODE_P (mode));
13635 /* Shortcut: following common codes never translate
13636 into carry flag compares. */
13637 if (code == EQ || code == NE || code == UNEQ || code == LTGT
13638 || code == ORDERED || code == UNORDERED)
13641 /* These comparisons require zero flag; swap operands so they won't. */
13642 if ((code == GT || code == UNLE || code == LE || code == UNGT)
13643 && !TARGET_IEEE_FP)
13648 code = swap_condition (code);
13651 /* Try to expand the comparison and verify that we end up with
13652 carry flag based comparison. This fails to be true only when
13653 we decide to expand comparison using arithmetic that is not
13654 too common scenario. */
13656 compare_op = ix86_expand_fp_compare (code, op0, op1, NULL_RTX,
13657 &second_test, &bypass_test);
13658 compare_seq = get_insns ();
13661 if (second_test || bypass_test)
13664 if (GET_MODE (XEXP (compare_op, 0)) == CCFPmode
13665 || GET_MODE (XEXP (compare_op, 0)) == CCFPUmode)
13666 code = ix86_fp_compare_code_to_integer (GET_CODE (compare_op));
13668 code = GET_CODE (compare_op);
13670 if (code != LTU && code != GEU)
13673 emit_insn (compare_seq);
13678 if (!INTEGRAL_MODE_P (mode))
13687 /* Convert a==0 into (unsigned)a<1. */
13690 if (op1 != const0_rtx)
13693 code = (code == EQ ? LTU : GEU);
13696 /* Convert a>b into b<a or a>=b-1. */
13699 if (CONST_INT_P (op1))
13701 op1 = gen_int_mode (INTVAL (op1) + 1, GET_MODE (op0));
13702 /* Bail out on overflow. We still can swap operands but that
13703 would force loading of the constant into register. */
13704 if (op1 == const0_rtx
13705 || !x86_64_immediate_operand (op1, GET_MODE (op1)))
13707 code = (code == GTU ? GEU : LTU);
13714 code = (code == GTU ? LTU : GEU);
13718 /* Convert a>=0 into (unsigned)a<0x80000000. */
13721 if (mode == DImode || op1 != const0_rtx)
13723 op1 = gen_int_mode (1 << (GET_MODE_BITSIZE (mode) - 1), mode);
13724 code = (code == LT ? GEU : LTU);
13728 if (mode == DImode || op1 != constm1_rtx)
13730 op1 = gen_int_mode (1 << (GET_MODE_BITSIZE (mode) - 1), mode);
13731 code = (code == LE ? GEU : LTU);
13737 /* Swapping operands may cause constant to appear as first operand. */
13738 if (!nonimmediate_operand (op0, VOIDmode))
13740 if (!can_create_pseudo_p ())
13742 op0 = force_reg (mode, op0);
13744 ix86_compare_op0 = op0;
13745 ix86_compare_op1 = op1;
13746 *pop = ix86_expand_compare (code, NULL, NULL);
13747 gcc_assert (GET_CODE (*pop) == LTU || GET_CODE (*pop) == GEU);
13752 ix86_expand_int_movcc (rtx operands[])
13754 enum rtx_code code = GET_CODE (operands[1]), compare_code;
13755 rtx compare_seq, compare_op;
13756 rtx second_test, bypass_test;
13757 enum machine_mode mode = GET_MODE (operands[0]);
13758 bool sign_bit_compare_p = false;;
13761 compare_op = ix86_expand_compare (code, &second_test, &bypass_test);
13762 compare_seq = get_insns ();
13765 compare_code = GET_CODE (compare_op);
13767 if ((ix86_compare_op1 == const0_rtx && (code == GE || code == LT))
13768 || (ix86_compare_op1 == constm1_rtx && (code == GT || code == LE)))
13769 sign_bit_compare_p = true;
13771 /* Don't attempt mode expansion here -- if we had to expand 5 or 6
13772 HImode insns, we'd be swallowed in word prefix ops. */
13774 if ((mode != HImode || TARGET_FAST_PREFIX)
13775 && (mode != (TARGET_64BIT ? TImode : DImode))
13776 && CONST_INT_P (operands[2])
13777 && CONST_INT_P (operands[3]))
13779 rtx out = operands[0];
13780 HOST_WIDE_INT ct = INTVAL (operands[2]);
13781 HOST_WIDE_INT cf = INTVAL (operands[3]);
13782 HOST_WIDE_INT diff;
13785 /* Sign bit compares are better done using shifts than we do by using
13787 if (sign_bit_compare_p
13788 || ix86_expand_carry_flag_compare (code, ix86_compare_op0,
13789 ix86_compare_op1, &compare_op))
13791 /* Detect overlap between destination and compare sources. */
13794 if (!sign_bit_compare_p)
13796 bool fpcmp = false;
13798 compare_code = GET_CODE (compare_op);
13800 if (GET_MODE (XEXP (compare_op, 0)) == CCFPmode
13801 || GET_MODE (XEXP (compare_op, 0)) == CCFPUmode)
13804 compare_code = ix86_fp_compare_code_to_integer (compare_code);
13807 /* To simplify rest of code, restrict to the GEU case. */
13808 if (compare_code == LTU)
13810 HOST_WIDE_INT tmp = ct;
13813 compare_code = reverse_condition (compare_code);
13814 code = reverse_condition (code);
13819 PUT_CODE (compare_op,
13820 reverse_condition_maybe_unordered
13821 (GET_CODE (compare_op)));
13823 PUT_CODE (compare_op, reverse_condition (GET_CODE (compare_op)));
13827 if (reg_overlap_mentioned_p (out, ix86_compare_op0)
13828 || reg_overlap_mentioned_p (out, ix86_compare_op1))
13829 tmp = gen_reg_rtx (mode);
13831 if (mode == DImode)
13832 emit_insn (gen_x86_movdicc_0_m1_rex64 (tmp, compare_op));
13834 emit_insn (gen_x86_movsicc_0_m1 (gen_lowpart (SImode, tmp), compare_op));
13838 if (code == GT || code == GE)
13839 code = reverse_condition (code);
13842 HOST_WIDE_INT tmp = ct;
13847 tmp = emit_store_flag (tmp, code, ix86_compare_op0,
13848 ix86_compare_op1, VOIDmode, 0, -1);
13861 tmp = expand_simple_binop (mode, PLUS,
13863 copy_rtx (tmp), 1, OPTAB_DIRECT);
13874 tmp = expand_simple_binop (mode, IOR,
13876 copy_rtx (tmp), 1, OPTAB_DIRECT);
13878 else if (diff == -1 && ct)
13888 tmp = expand_simple_unop (mode, NOT, tmp, copy_rtx (tmp), 1);
13890 tmp = expand_simple_binop (mode, PLUS,
13891 copy_rtx (tmp), GEN_INT (cf),
13892 copy_rtx (tmp), 1, OPTAB_DIRECT);
13900 * andl cf - ct, dest
13910 tmp = expand_simple_unop (mode, NOT, tmp, copy_rtx (tmp), 1);
13913 tmp = expand_simple_binop (mode, AND,
13915 gen_int_mode (cf - ct, mode),
13916 copy_rtx (tmp), 1, OPTAB_DIRECT);
13918 tmp = expand_simple_binop (mode, PLUS,
13919 copy_rtx (tmp), GEN_INT (ct),
13920 copy_rtx (tmp), 1, OPTAB_DIRECT);
13923 if (!rtx_equal_p (tmp, out))
13924 emit_move_insn (copy_rtx (out), copy_rtx (tmp));
13926 return 1; /* DONE */
13931 enum machine_mode cmp_mode = GET_MODE (ix86_compare_op0);
13934 tmp = ct, ct = cf, cf = tmp;
13937 if (SCALAR_FLOAT_MODE_P (cmp_mode))
13939 gcc_assert (!DECIMAL_FLOAT_MODE_P (cmp_mode));
13941 /* We may be reversing unordered compare to normal compare, that
13942 is not valid in general (we may convert non-trapping condition
13943 to trapping one), however on i386 we currently emit all
13944 comparisons unordered. */
13945 compare_code = reverse_condition_maybe_unordered (compare_code);
13946 code = reverse_condition_maybe_unordered (code);
13950 compare_code = reverse_condition (compare_code);
13951 code = reverse_condition (code);
13955 compare_code = UNKNOWN;
13956 if (GET_MODE_CLASS (GET_MODE (ix86_compare_op0)) == MODE_INT
13957 && CONST_INT_P (ix86_compare_op1))
13959 if (ix86_compare_op1 == const0_rtx
13960 && (code == LT || code == GE))
13961 compare_code = code;
13962 else if (ix86_compare_op1 == constm1_rtx)
13966 else if (code == GT)
13971 /* Optimize dest = (op0 < 0) ? -1 : cf. */
13972 if (compare_code != UNKNOWN
13973 && GET_MODE (ix86_compare_op0) == GET_MODE (out)
13974 && (cf == -1 || ct == -1))
13976 /* If lea code below could be used, only optimize
13977 if it results in a 2 insn sequence. */
13979 if (! (diff == 1 || diff == 2 || diff == 4 || diff == 8
13980 || diff == 3 || diff == 5 || diff == 9)
13981 || (compare_code == LT && ct == -1)
13982 || (compare_code == GE && cf == -1))
13985 * notl op1 (if necessary)
13993 code = reverse_condition (code);
13996 out = emit_store_flag (out, code, ix86_compare_op0,
13997 ix86_compare_op1, VOIDmode, 0, -1);
13999 out = expand_simple_binop (mode, IOR,
14001 out, 1, OPTAB_DIRECT);
14002 if (out != operands[0])
14003 emit_move_insn (operands[0], out);
14005 return 1; /* DONE */
14010 if ((diff == 1 || diff == 2 || diff == 4 || diff == 8
14011 || diff == 3 || diff == 5 || diff == 9)
14012 && ((mode != QImode && mode != HImode) || !TARGET_PARTIAL_REG_STALL)
14014 || x86_64_immediate_operand (GEN_INT (cf), VOIDmode)))
14020 * lea cf(dest*(ct-cf)),dest
14024 * This also catches the degenerate setcc-only case.
14030 out = emit_store_flag (out, code, ix86_compare_op0,
14031 ix86_compare_op1, VOIDmode, 0, 1);
14034 /* On x86_64 the lea instruction operates on Pmode, so we need
14035 to get arithmetics done in proper mode to match. */
14037 tmp = copy_rtx (out);
14041 out1 = copy_rtx (out);
14042 tmp = gen_rtx_MULT (mode, out1, GEN_INT (diff & ~1));
14046 tmp = gen_rtx_PLUS (mode, tmp, out1);
14052 tmp = gen_rtx_PLUS (mode, tmp, GEN_INT (cf));
14055 if (!rtx_equal_p (tmp, out))
14058 out = force_operand (tmp, copy_rtx (out));
14060 emit_insn (gen_rtx_SET (VOIDmode, copy_rtx (out), copy_rtx (tmp)));
14062 if (!rtx_equal_p (out, operands[0]))
14063 emit_move_insn (operands[0], copy_rtx (out));
14065 return 1; /* DONE */
14069 * General case: Jumpful:
14070 * xorl dest,dest cmpl op1, op2
14071 * cmpl op1, op2 movl ct, dest
14072 * setcc dest jcc 1f
14073 * decl dest movl cf, dest
14074 * andl (cf-ct),dest 1:
14077 * Size 20. Size 14.
14079 * This is reasonably steep, but branch mispredict costs are
14080 * high on modern cpus, so consider failing only if optimizing
14084 if ((!TARGET_CMOVE || (mode == QImode && TARGET_PARTIAL_REG_STALL))
14085 && BRANCH_COST >= 2)
14089 enum machine_mode cmp_mode = GET_MODE (ix86_compare_op0);
14094 if (SCALAR_FLOAT_MODE_P (cmp_mode))
14096 gcc_assert (!DECIMAL_FLOAT_MODE_P (cmp_mode));
14098 /* We may be reversing unordered compare to normal compare,
14099 that is not valid in general (we may convert non-trapping
14100 condition to trapping one), however on i386 we currently
14101 emit all comparisons unordered. */
14102 code = reverse_condition_maybe_unordered (code);
14106 code = reverse_condition (code);
14107 if (compare_code != UNKNOWN)
14108 compare_code = reverse_condition (compare_code);
14112 if (compare_code != UNKNOWN)
14114 /* notl op1 (if needed)
14119 For x < 0 (resp. x <= -1) there will be no notl,
14120 so if possible swap the constants to get rid of the
14122 True/false will be -1/0 while code below (store flag
14123 followed by decrement) is 0/-1, so the constants need
14124 to be exchanged once more. */
14126 if (compare_code == GE || !cf)
14128 code = reverse_condition (code);
14133 HOST_WIDE_INT tmp = cf;
14138 out = emit_store_flag (out, code, ix86_compare_op0,
14139 ix86_compare_op1, VOIDmode, 0, -1);
14143 out = emit_store_flag (out, code, ix86_compare_op0,
14144 ix86_compare_op1, VOIDmode, 0, 1);
14146 out = expand_simple_binop (mode, PLUS, copy_rtx (out), constm1_rtx,
14147 copy_rtx (out), 1, OPTAB_DIRECT);
14150 out = expand_simple_binop (mode, AND, copy_rtx (out),
14151 gen_int_mode (cf - ct, mode),
14152 copy_rtx (out), 1, OPTAB_DIRECT);
14154 out = expand_simple_binop (mode, PLUS, copy_rtx (out), GEN_INT (ct),
14155 copy_rtx (out), 1, OPTAB_DIRECT);
14156 if (!rtx_equal_p (out, operands[0]))
14157 emit_move_insn (operands[0], copy_rtx (out));
14159 return 1; /* DONE */
14163 if (!TARGET_CMOVE || (mode == QImode && TARGET_PARTIAL_REG_STALL))
14165 /* Try a few things more with specific constants and a variable. */
14168 rtx var, orig_out, out, tmp;
14170 if (BRANCH_COST <= 2)
14171 return 0; /* FAIL */
14173 /* If one of the two operands is an interesting constant, load a
14174 constant with the above and mask it in with a logical operation. */
14176 if (CONST_INT_P (operands[2]))
14179 if (INTVAL (operands[2]) == 0 && operands[3] != constm1_rtx)
14180 operands[3] = constm1_rtx, op = and_optab;
14181 else if (INTVAL (operands[2]) == -1 && operands[3] != const0_rtx)
14182 operands[3] = const0_rtx, op = ior_optab;
14184 return 0; /* FAIL */
14186 else if (CONST_INT_P (operands[3]))
14189 if (INTVAL (operands[3]) == 0 && operands[2] != constm1_rtx)
14190 operands[2] = constm1_rtx, op = and_optab;
14191 else if (INTVAL (operands[3]) == -1 && operands[3] != const0_rtx)
14192 operands[2] = const0_rtx, op = ior_optab;
14194 return 0; /* FAIL */
14197 return 0; /* FAIL */
14199 orig_out = operands[0];
14200 tmp = gen_reg_rtx (mode);
14203 /* Recurse to get the constant loaded. */
14204 if (ix86_expand_int_movcc (operands) == 0)
14205 return 0; /* FAIL */
14207 /* Mask in the interesting variable. */
14208 out = expand_binop (mode, op, var, tmp, orig_out, 0,
14210 if (!rtx_equal_p (out, orig_out))
14211 emit_move_insn (copy_rtx (orig_out), copy_rtx (out));
14213 return 1; /* DONE */
14217 * For comparison with above,
14227 if (! nonimmediate_operand (operands[2], mode))
14228 operands[2] = force_reg (mode, operands[2]);
14229 if (! nonimmediate_operand (operands[3], mode))
14230 operands[3] = force_reg (mode, operands[3]);
14232 if (bypass_test && reg_overlap_mentioned_p (operands[0], operands[3]))
14234 rtx tmp = gen_reg_rtx (mode);
14235 emit_move_insn (tmp, operands[3]);
14238 if (second_test && reg_overlap_mentioned_p (operands[0], operands[2]))
14240 rtx tmp = gen_reg_rtx (mode);
14241 emit_move_insn (tmp, operands[2]);
14245 if (! register_operand (operands[2], VOIDmode)
14247 || ! register_operand (operands[3], VOIDmode)))
14248 operands[2] = force_reg (mode, operands[2]);
14251 && ! register_operand (operands[3], VOIDmode))
14252 operands[3] = force_reg (mode, operands[3]);
14254 emit_insn (compare_seq);
14255 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
14256 gen_rtx_IF_THEN_ELSE (mode,
14257 compare_op, operands[2],
14260 emit_insn (gen_rtx_SET (VOIDmode, copy_rtx (operands[0]),
14261 gen_rtx_IF_THEN_ELSE (mode,
14263 copy_rtx (operands[3]),
14264 copy_rtx (operands[0]))));
14266 emit_insn (gen_rtx_SET (VOIDmode, copy_rtx (operands[0]),
14267 gen_rtx_IF_THEN_ELSE (mode,
14269 copy_rtx (operands[2]),
14270 copy_rtx (operands[0]))));
14272 return 1; /* DONE */
14275 /* Swap, force into registers, or otherwise massage the two operands
14276 to an sse comparison with a mask result. Thus we differ a bit from
14277 ix86_prepare_fp_compare_args which expects to produce a flags result.
14279 The DEST operand exists to help determine whether to commute commutative
14280 operators. The POP0/POP1 operands are updated in place. The new
14281 comparison code is returned, or UNKNOWN if not implementable. */
14283 static enum rtx_code
14284 ix86_prepare_sse_fp_compare_args (rtx dest, enum rtx_code code,
14285 rtx *pop0, rtx *pop1)
14293 /* We have no LTGT as an operator. We could implement it with
14294 NE & ORDERED, but this requires an extra temporary. It's
14295 not clear that it's worth it. */
14302 /* These are supported directly. */
14309 /* For commutative operators, try to canonicalize the destination
14310 operand to be first in the comparison - this helps reload to
14311 avoid extra moves. */
14312 if (!dest || !rtx_equal_p (dest, *pop1))
14320 /* These are not supported directly. Swap the comparison operands
14321 to transform into something that is supported. */
14325 code = swap_condition (code);
14329 gcc_unreachable ();
14335 /* Detect conditional moves that exactly match min/max operational
14336 semantics. Note that this is IEEE safe, as long as we don't
14337 interchange the operands.
14339 Returns FALSE if this conditional move doesn't match a MIN/MAX,
14340 and TRUE if the operation is successful and instructions are emitted. */
14343 ix86_expand_sse_fp_minmax (rtx dest, enum rtx_code code, rtx cmp_op0,
14344 rtx cmp_op1, rtx if_true, rtx if_false)
14346 enum machine_mode mode;
14352 else if (code == UNGE)
14355 if_true = if_false;
14361 if (rtx_equal_p (cmp_op0, if_true) && rtx_equal_p (cmp_op1, if_false))
14363 else if (rtx_equal_p (cmp_op1, if_true) && rtx_equal_p (cmp_op0, if_false))
14368 mode = GET_MODE (dest);
14370 /* We want to check HONOR_NANS and HONOR_SIGNED_ZEROS here,
14371 but MODE may be a vector mode and thus not appropriate. */
14372 if (!flag_finite_math_only || !flag_unsafe_math_optimizations)
14374 int u = is_min ? UNSPEC_IEEE_MIN : UNSPEC_IEEE_MAX;
14377 if_true = force_reg (mode, if_true);
14378 v = gen_rtvec (2, if_true, if_false);
14379 tmp = gen_rtx_UNSPEC (mode, v, u);
14383 code = is_min ? SMIN : SMAX;
14384 tmp = gen_rtx_fmt_ee (code, mode, if_true, if_false);
14387 emit_insn (gen_rtx_SET (VOIDmode, dest, tmp));
14391 /* Expand an sse vector comparison. Return the register with the result. */
14394 ix86_expand_sse_cmp (rtx dest, enum rtx_code code, rtx cmp_op0, rtx cmp_op1,
14395 rtx op_true, rtx op_false)
14397 enum machine_mode mode = GET_MODE (dest);
14400 cmp_op0 = force_reg (mode, cmp_op0);
14401 if (!nonimmediate_operand (cmp_op1, mode))
14402 cmp_op1 = force_reg (mode, cmp_op1);
14405 || reg_overlap_mentioned_p (dest, op_true)
14406 || reg_overlap_mentioned_p (dest, op_false))
14407 dest = gen_reg_rtx (mode);
14409 x = gen_rtx_fmt_ee (code, mode, cmp_op0, cmp_op1);
14410 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
14415 /* Expand DEST = CMP ? OP_TRUE : OP_FALSE into a sequence of logical
14416 operations. This is used for both scalar and vector conditional moves. */
14419 ix86_expand_sse_movcc (rtx dest, rtx cmp, rtx op_true, rtx op_false)
14421 enum machine_mode mode = GET_MODE (dest);
14424 if (op_false == CONST0_RTX (mode))
14426 op_true = force_reg (mode, op_true);
14427 x = gen_rtx_AND (mode, cmp, op_true);
14428 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
14430 else if (op_true == CONST0_RTX (mode))
14432 op_false = force_reg (mode, op_false);
14433 x = gen_rtx_NOT (mode, cmp);
14434 x = gen_rtx_AND (mode, x, op_false);
14435 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
14437 else if (TARGET_SSE5)
14439 rtx pcmov = gen_rtx_SET (mode, dest,
14440 gen_rtx_IF_THEN_ELSE (mode, cmp,
14447 op_true = force_reg (mode, op_true);
14448 op_false = force_reg (mode, op_false);
14450 t2 = gen_reg_rtx (mode);
14452 t3 = gen_reg_rtx (mode);
14456 x = gen_rtx_AND (mode, op_true, cmp);
14457 emit_insn (gen_rtx_SET (VOIDmode, t2, x));
14459 x = gen_rtx_NOT (mode, cmp);
14460 x = gen_rtx_AND (mode, x, op_false);
14461 emit_insn (gen_rtx_SET (VOIDmode, t3, x));
14463 x = gen_rtx_IOR (mode, t3, t2);
14464 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
14468 /* Expand a floating-point conditional move. Return true if successful. */
14471 ix86_expand_fp_movcc (rtx operands[])
14473 enum machine_mode mode = GET_MODE (operands[0]);
14474 enum rtx_code code = GET_CODE (operands[1]);
14475 rtx tmp, compare_op, second_test, bypass_test;
14477 if (TARGET_SSE_MATH && SSE_FLOAT_MODE_P (mode))
14479 enum machine_mode cmode;
14481 /* Since we've no cmove for sse registers, don't force bad register
14482 allocation just to gain access to it. Deny movcc when the
14483 comparison mode doesn't match the move mode. */
14484 cmode = GET_MODE (ix86_compare_op0);
14485 if (cmode == VOIDmode)
14486 cmode = GET_MODE (ix86_compare_op1);
14490 code = ix86_prepare_sse_fp_compare_args (operands[0], code,
14492 &ix86_compare_op1);
14493 if (code == UNKNOWN)
14496 if (ix86_expand_sse_fp_minmax (operands[0], code, ix86_compare_op0,
14497 ix86_compare_op1, operands[2],
14501 tmp = ix86_expand_sse_cmp (operands[0], code, ix86_compare_op0,
14502 ix86_compare_op1, operands[2], operands[3]);
14503 ix86_expand_sse_movcc (operands[0], tmp, operands[2], operands[3]);
14507 /* The floating point conditional move instructions don't directly
14508 support conditions resulting from a signed integer comparison. */
14510 compare_op = ix86_expand_compare (code, &second_test, &bypass_test);
14512 /* The floating point conditional move instructions don't directly
14513 support signed integer comparisons. */
14515 if (!fcmov_comparison_operator (compare_op, VOIDmode))
14517 gcc_assert (!second_test && !bypass_test);
14518 tmp = gen_reg_rtx (QImode);
14519 ix86_expand_setcc (code, tmp);
14521 ix86_compare_op0 = tmp;
14522 ix86_compare_op1 = const0_rtx;
14523 compare_op = ix86_expand_compare (code, &second_test, &bypass_test);
14525 if (bypass_test && reg_overlap_mentioned_p (operands[0], operands[3]))
14527 tmp = gen_reg_rtx (mode);
14528 emit_move_insn (tmp, operands[3]);
14531 if (second_test && reg_overlap_mentioned_p (operands[0], operands[2]))
14533 tmp = gen_reg_rtx (mode);
14534 emit_move_insn (tmp, operands[2]);
14538 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
14539 gen_rtx_IF_THEN_ELSE (mode, compare_op,
14540 operands[2], operands[3])));
14542 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
14543 gen_rtx_IF_THEN_ELSE (mode, bypass_test,
14544 operands[3], operands[0])));
14546 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
14547 gen_rtx_IF_THEN_ELSE (mode, second_test,
14548 operands[2], operands[0])));
14553 /* Expand a floating-point vector conditional move; a vcond operation
14554 rather than a movcc operation. */
14557 ix86_expand_fp_vcond (rtx operands[])
14559 enum rtx_code code = GET_CODE (operands[3]);
14562 code = ix86_prepare_sse_fp_compare_args (operands[0], code,
14563 &operands[4], &operands[5]);
14564 if (code == UNKNOWN)
14567 if (ix86_expand_sse_fp_minmax (operands[0], code, operands[4],
14568 operands[5], operands[1], operands[2]))
14571 cmp = ix86_expand_sse_cmp (operands[0], code, operands[4], operands[5],
14572 operands[1], operands[2]);
14573 ix86_expand_sse_movcc (operands[0], cmp, operands[1], operands[2]);
14577 /* Expand a signed/unsigned integral vector conditional move. */
14580 ix86_expand_int_vcond (rtx operands[])
14582 enum machine_mode mode = GET_MODE (operands[0]);
14583 enum rtx_code code = GET_CODE (operands[3]);
14584 bool negate = false;
14587 cop0 = operands[4];
14588 cop1 = operands[5];
14590 /* SSE5 supports all of the comparisons on all vector int types. */
14593 /* Canonicalize the comparison to EQ, GT, GTU. */
14604 code = reverse_condition (code);
14610 code = reverse_condition (code);
14616 code = swap_condition (code);
14617 x = cop0, cop0 = cop1, cop1 = x;
14621 gcc_unreachable ();
14624 /* Only SSE4.1/SSE4.2 supports V2DImode. */
14625 if (mode == V2DImode)
14630 /* SSE4.1 supports EQ. */
14631 if (!TARGET_SSE4_1)
14637 /* SSE4.2 supports GT/GTU. */
14638 if (!TARGET_SSE4_2)
14643 gcc_unreachable ();
14647 /* Unsigned parallel compare is not supported by the hardware. Play some
14648 tricks to turn this into a signed comparison against 0. */
14651 cop0 = force_reg (mode, cop0);
14660 /* Perform a parallel modulo subtraction. */
14661 t1 = gen_reg_rtx (mode);
14662 emit_insn ((mode == V4SImode
14664 : gen_subv2di3) (t1, cop0, cop1));
14666 /* Extract the original sign bit of op0. */
14667 mask = ix86_build_signbit_mask (GET_MODE_INNER (mode),
14669 t2 = gen_reg_rtx (mode);
14670 emit_insn ((mode == V4SImode
14672 : gen_andv2di3) (t2, cop0, mask));
14674 /* XOR it back into the result of the subtraction. This results
14675 in the sign bit set iff we saw unsigned underflow. */
14676 x = gen_reg_rtx (mode);
14677 emit_insn ((mode == V4SImode
14679 : gen_xorv2di3) (x, t1, t2));
14687 /* Perform a parallel unsigned saturating subtraction. */
14688 x = gen_reg_rtx (mode);
14689 emit_insn (gen_rtx_SET (VOIDmode, x,
14690 gen_rtx_US_MINUS (mode, cop0, cop1)));
14697 gcc_unreachable ();
14701 cop1 = CONST0_RTX (mode);
14705 x = ix86_expand_sse_cmp (operands[0], code, cop0, cop1,
14706 operands[1+negate], operands[2-negate]);
14708 ix86_expand_sse_movcc (operands[0], x, operands[1+negate],
14709 operands[2-negate]);
14713 /* Unpack OP[1] into the next wider integer vector type. UNSIGNED_P is
14714 true if we should do zero extension, else sign extension. HIGH_P is
14715 true if we want the N/2 high elements, else the low elements. */
14718 ix86_expand_sse_unpack (rtx operands[2], bool unsigned_p, bool high_p)
14720 enum machine_mode imode = GET_MODE (operands[1]);
14721 rtx (*unpack)(rtx, rtx, rtx);
14728 unpack = gen_vec_interleave_highv16qi;
14730 unpack = gen_vec_interleave_lowv16qi;
14734 unpack = gen_vec_interleave_highv8hi;
14736 unpack = gen_vec_interleave_lowv8hi;
14740 unpack = gen_vec_interleave_highv4si;
14742 unpack = gen_vec_interleave_lowv4si;
14745 gcc_unreachable ();
14748 dest = gen_lowpart (imode, operands[0]);
14751 se = force_reg (imode, CONST0_RTX (imode));
14753 se = ix86_expand_sse_cmp (gen_reg_rtx (imode), GT, CONST0_RTX (imode),
14754 operands[1], pc_rtx, pc_rtx);
14756 emit_insn (unpack (dest, operands[1], se));
14759 /* This function performs the same task as ix86_expand_sse_unpack,
14760 but with SSE4.1 instructions. */
14763 ix86_expand_sse4_unpack (rtx operands[2], bool unsigned_p, bool high_p)
14765 enum machine_mode imode = GET_MODE (operands[1]);
14766 rtx (*unpack)(rtx, rtx);
14773 unpack = gen_sse4_1_zero_extendv8qiv8hi2;
14775 unpack = gen_sse4_1_extendv8qiv8hi2;
14779 unpack = gen_sse4_1_zero_extendv4hiv4si2;
14781 unpack = gen_sse4_1_extendv4hiv4si2;
14785 unpack = gen_sse4_1_zero_extendv2siv2di2;
14787 unpack = gen_sse4_1_extendv2siv2di2;
14790 gcc_unreachable ();
14793 dest = operands[0];
14796 /* Shift higher 8 bytes to lower 8 bytes. */
14797 src = gen_reg_rtx (imode);
14798 emit_insn (gen_sse2_lshrti3 (gen_lowpart (TImode, src),
14799 gen_lowpart (TImode, operands[1]),
14805 emit_insn (unpack (dest, src));
14808 /* This function performs the same task as ix86_expand_sse_unpack,
14809 but with sse5 instructions. */
14812 ix86_expand_sse5_unpack (rtx operands[2], bool unsigned_p, bool high_p)
14814 enum machine_mode imode = GET_MODE (operands[1]);
14815 int pperm_bytes[16];
14817 int h = (high_p) ? 8 : 0;
14820 rtvec v = rtvec_alloc (16);
14823 rtx op0 = operands[0], op1 = operands[1];
14828 vs = rtvec_alloc (8);
14829 h2 = (high_p) ? 8 : 0;
14830 for (i = 0; i < 8; i++)
14832 pperm_bytes[2*i+0] = PPERM_SRC | PPERM_SRC2 | i | h;
14833 pperm_bytes[2*i+1] = ((unsigned_p)
14835 : PPERM_SIGN | PPERM_SRC2 | i | h);
14838 for (i = 0; i < 16; i++)
14839 RTVEC_ELT (v, i) = GEN_INT (pperm_bytes[i]);
14841 for (i = 0; i < 8; i++)
14842 RTVEC_ELT (vs, i) = GEN_INT (i + h2);
14844 p = gen_rtx_PARALLEL (VOIDmode, vs);
14845 x = force_reg (V16QImode, gen_rtx_CONST_VECTOR (V16QImode, v));
14847 emit_insn (gen_sse5_pperm_zero_v16qi_v8hi (op0, op1, p, x));
14849 emit_insn (gen_sse5_pperm_sign_v16qi_v8hi (op0, op1, p, x));
14853 vs = rtvec_alloc (4);
14854 h2 = (high_p) ? 4 : 0;
14855 for (i = 0; i < 4; i++)
14857 sign_extend = ((unsigned_p)
14859 : PPERM_SIGN | PPERM_SRC2 | ((2*i) + 1 + h));
14860 pperm_bytes[4*i+0] = PPERM_SRC | PPERM_SRC2 | ((2*i) + 0 + h);
14861 pperm_bytes[4*i+1] = PPERM_SRC | PPERM_SRC2 | ((2*i) + 1 + h);
14862 pperm_bytes[4*i+2] = sign_extend;
14863 pperm_bytes[4*i+3] = sign_extend;
14866 for (i = 0; i < 16; i++)
14867 RTVEC_ELT (v, i) = GEN_INT (pperm_bytes[i]);
14869 for (i = 0; i < 4; i++)
14870 RTVEC_ELT (vs, i) = GEN_INT (i + h2);
14872 p = gen_rtx_PARALLEL (VOIDmode, vs);
14873 x = force_reg (V16QImode, gen_rtx_CONST_VECTOR (V16QImode, v));
14875 emit_insn (gen_sse5_pperm_zero_v8hi_v4si (op0, op1, p, x));
14877 emit_insn (gen_sse5_pperm_sign_v8hi_v4si (op0, op1, p, x));
14881 vs = rtvec_alloc (2);
14882 h2 = (high_p) ? 2 : 0;
14883 for (i = 0; i < 2; i++)
14885 sign_extend = ((unsigned_p)
14887 : PPERM_SIGN | PPERM_SRC2 | ((4*i) + 3 + h));
14888 pperm_bytes[8*i+0] = PPERM_SRC | PPERM_SRC2 | ((4*i) + 0 + h);
14889 pperm_bytes[8*i+1] = PPERM_SRC | PPERM_SRC2 | ((4*i) + 1 + h);
14890 pperm_bytes[8*i+2] = PPERM_SRC | PPERM_SRC2 | ((4*i) + 2 + h);
14891 pperm_bytes[8*i+3] = PPERM_SRC | PPERM_SRC2 | ((4*i) + 3 + h);
14892 pperm_bytes[8*i+4] = sign_extend;
14893 pperm_bytes[8*i+5] = sign_extend;
14894 pperm_bytes[8*i+6] = sign_extend;
14895 pperm_bytes[8*i+7] = sign_extend;
14898 for (i = 0; i < 16; i++)
14899 RTVEC_ELT (v, i) = GEN_INT (pperm_bytes[i]);
14901 for (i = 0; i < 2; i++)
14902 RTVEC_ELT (vs, i) = GEN_INT (i + h2);
14904 p = gen_rtx_PARALLEL (VOIDmode, vs);
14905 x = force_reg (V16QImode, gen_rtx_CONST_VECTOR (V16QImode, v));
14907 emit_insn (gen_sse5_pperm_zero_v4si_v2di (op0, op1, p, x));
14909 emit_insn (gen_sse5_pperm_sign_v4si_v2di (op0, op1, p, x));
14913 gcc_unreachable ();
14919 /* Pack the high bits from OPERANDS[1] and low bits from OPERANDS[2] into the
14920 next narrower integer vector type */
14922 ix86_expand_sse5_pack (rtx operands[3])
14924 enum machine_mode imode = GET_MODE (operands[0]);
14925 int pperm_bytes[16];
14927 rtvec v = rtvec_alloc (16);
14929 rtx op0 = operands[0];
14930 rtx op1 = operands[1];
14931 rtx op2 = operands[2];
14936 for (i = 0; i < 8; i++)
14938 pperm_bytes[i+0] = PPERM_SRC | PPERM_SRC1 | (i*2);
14939 pperm_bytes[i+8] = PPERM_SRC | PPERM_SRC2 | (i*2);
14942 for (i = 0; i < 16; i++)
14943 RTVEC_ELT (v, i) = GEN_INT (pperm_bytes[i]);
14945 x = force_reg (V16QImode, gen_rtx_CONST_VECTOR (V16QImode, v));
14946 emit_insn (gen_sse5_pperm_pack_v8hi_v16qi (op0, op1, op2, x));
14950 for (i = 0; i < 4; i++)
14952 pperm_bytes[(2*i)+0] = PPERM_SRC | PPERM_SRC1 | ((i*4) + 0);
14953 pperm_bytes[(2*i)+1] = PPERM_SRC | PPERM_SRC1 | ((i*4) + 1);
14954 pperm_bytes[(2*i)+8] = PPERM_SRC | PPERM_SRC2 | ((i*4) + 0);
14955 pperm_bytes[(2*i)+9] = PPERM_SRC | PPERM_SRC2 | ((i*4) + 1);
14958 for (i = 0; i < 16; i++)
14959 RTVEC_ELT (v, i) = GEN_INT (pperm_bytes[i]);
14961 x = force_reg (V16QImode, gen_rtx_CONST_VECTOR (V16QImode, v));
14962 emit_insn (gen_sse5_pperm_pack_v4si_v8hi (op0, op1, op2, x));
14966 for (i = 0; i < 2; i++)
14968 pperm_bytes[(4*i)+0] = PPERM_SRC | PPERM_SRC1 | ((i*8) + 0);
14969 pperm_bytes[(4*i)+1] = PPERM_SRC | PPERM_SRC1 | ((i*8) + 1);
14970 pperm_bytes[(4*i)+2] = PPERM_SRC | PPERM_SRC1 | ((i*8) + 2);
14971 pperm_bytes[(4*i)+3] = PPERM_SRC | PPERM_SRC1 | ((i*8) + 3);
14972 pperm_bytes[(4*i)+8] = PPERM_SRC | PPERM_SRC2 | ((i*8) + 0);
14973 pperm_bytes[(4*i)+9] = PPERM_SRC | PPERM_SRC2 | ((i*8) + 1);
14974 pperm_bytes[(4*i)+10] = PPERM_SRC | PPERM_SRC2 | ((i*8) + 2);
14975 pperm_bytes[(4*i)+11] = PPERM_SRC | PPERM_SRC2 | ((i*8) + 3);
14978 for (i = 0; i < 16; i++)
14979 RTVEC_ELT (v, i) = GEN_INT (pperm_bytes[i]);
14981 x = force_reg (V16QImode, gen_rtx_CONST_VECTOR (V16QImode, v));
14982 emit_insn (gen_sse5_pperm_pack_v2di_v4si (op0, op1, op2, x));
14986 gcc_unreachable ();
14992 /* Expand conditional increment or decrement using adb/sbb instructions.
14993 The default case using setcc followed by the conditional move can be
14994 done by generic code. */
14996 ix86_expand_int_addcc (rtx operands[])
14998 enum rtx_code code = GET_CODE (operands[1]);
15000 rtx val = const0_rtx;
15001 bool fpcmp = false;
15002 enum machine_mode mode = GET_MODE (operands[0]);
15004 if (operands[3] != const1_rtx
15005 && operands[3] != constm1_rtx)
15007 if (!ix86_expand_carry_flag_compare (code, ix86_compare_op0,
15008 ix86_compare_op1, &compare_op))
15010 code = GET_CODE (compare_op);
15012 if (GET_MODE (XEXP (compare_op, 0)) == CCFPmode
15013 || GET_MODE (XEXP (compare_op, 0)) == CCFPUmode)
15016 code = ix86_fp_compare_code_to_integer (code);
15023 PUT_CODE (compare_op,
15024 reverse_condition_maybe_unordered
15025 (GET_CODE (compare_op)));
15027 PUT_CODE (compare_op, reverse_condition (GET_CODE (compare_op)));
15029 PUT_MODE (compare_op, mode);
15031 /* Construct either adc or sbb insn. */
15032 if ((code == LTU) == (operands[3] == constm1_rtx))
15034 switch (GET_MODE (operands[0]))
15037 emit_insn (gen_subqi3_carry (operands[0], operands[2], val, compare_op));
15040 emit_insn (gen_subhi3_carry (operands[0], operands[2], val, compare_op));
15043 emit_insn (gen_subsi3_carry (operands[0], operands[2], val, compare_op));
15046 emit_insn (gen_subdi3_carry_rex64 (operands[0], operands[2], val, compare_op));
15049 gcc_unreachable ();
15054 switch (GET_MODE (operands[0]))
15057 emit_insn (gen_addqi3_carry (operands[0], operands[2], val, compare_op));
15060 emit_insn (gen_addhi3_carry (operands[0], operands[2], val, compare_op));
15063 emit_insn (gen_addsi3_carry (operands[0], operands[2], val, compare_op));
15066 emit_insn (gen_adddi3_carry_rex64 (operands[0], operands[2], val, compare_op));
15069 gcc_unreachable ();
15072 return 1; /* DONE */
15076 /* Split operands 0 and 1 into SImode parts. Similar to split_di, but
15077 works for floating pointer parameters and nonoffsetable memories.
15078 For pushes, it returns just stack offsets; the values will be saved
15079 in the right order. Maximally three parts are generated. */
15082 ix86_split_to_parts (rtx operand, rtx *parts, enum machine_mode mode)
15087 size = mode==XFmode ? 3 : GET_MODE_SIZE (mode) / 4;
15089 size = (GET_MODE_SIZE (mode) + 4) / 8;
15091 gcc_assert (!REG_P (operand) || !MMX_REGNO_P (REGNO (operand)));
15092 gcc_assert (size >= 2 && size <= 4);
15094 /* Optimize constant pool reference to immediates. This is used by fp
15095 moves, that force all constants to memory to allow combining. */
15096 if (MEM_P (operand) && MEM_READONLY_P (operand))
15098 rtx tmp = maybe_get_pool_constant (operand);
15103 if (MEM_P (operand) && !offsettable_memref_p (operand))
15105 /* The only non-offsetable memories we handle are pushes. */
15106 int ok = push_operand (operand, VOIDmode);
15110 operand = copy_rtx (operand);
15111 PUT_MODE (operand, Pmode);
15112 parts[0] = parts[1] = parts[2] = parts[3] = operand;
15116 if (GET_CODE (operand) == CONST_VECTOR)
15118 enum machine_mode imode = int_mode_for_mode (mode);
15119 /* Caution: if we looked through a constant pool memory above,
15120 the operand may actually have a different mode now. That's
15121 ok, since we want to pun this all the way back to an integer. */
15122 operand = simplify_subreg (imode, operand, GET_MODE (operand), 0);
15123 gcc_assert (operand != NULL);
15129 if (mode == DImode)
15130 split_di (&operand, 1, &parts[0], &parts[1]);
15135 if (REG_P (operand))
15137 gcc_assert (reload_completed);
15138 for (i = 0; i < size; i++)
15139 parts[i] = gen_rtx_REG (SImode, REGNO (operand) + i);
15141 else if (offsettable_memref_p (operand))
15143 operand = adjust_address (operand, SImode, 0);
15144 parts[0] = operand;
15145 for (i = 1; i < size; i++)
15146 parts[i] = adjust_address (operand, SImode, 4 * i);
15148 else if (GET_CODE (operand) == CONST_DOUBLE)
15153 REAL_VALUE_FROM_CONST_DOUBLE (r, operand);
15157 real_to_target (l, &r, mode);
15158 parts[3] = gen_int_mode (l[3], SImode);
15159 parts[2] = gen_int_mode (l[2], SImode);
15162 REAL_VALUE_TO_TARGET_LONG_DOUBLE (r, l);
15163 parts[2] = gen_int_mode (l[2], SImode);
15166 REAL_VALUE_TO_TARGET_DOUBLE (r, l);
15169 gcc_unreachable ();
15171 parts[1] = gen_int_mode (l[1], SImode);
15172 parts[0] = gen_int_mode (l[0], SImode);
15175 gcc_unreachable ();
15180 if (mode == TImode)
15181 split_ti (&operand, 1, &parts[0], &parts[1]);
15182 if (mode == XFmode || mode == TFmode)
15184 enum machine_mode upper_mode = mode==XFmode ? SImode : DImode;
15185 if (REG_P (operand))
15187 gcc_assert (reload_completed);
15188 parts[0] = gen_rtx_REG (DImode, REGNO (operand) + 0);
15189 parts[1] = gen_rtx_REG (upper_mode, REGNO (operand) + 1);
15191 else if (offsettable_memref_p (operand))
15193 operand = adjust_address (operand, DImode, 0);
15194 parts[0] = operand;
15195 parts[1] = adjust_address (operand, upper_mode, 8);
15197 else if (GET_CODE (operand) == CONST_DOUBLE)
15202 REAL_VALUE_FROM_CONST_DOUBLE (r, operand);
15203 real_to_target (l, &r, mode);
15205 /* Do not use shift by 32 to avoid warning on 32bit systems. */
15206 if (HOST_BITS_PER_WIDE_INT >= 64)
15209 ((l[0] & (((HOST_WIDE_INT) 2 << 31) - 1))
15210 + ((((HOST_WIDE_INT) l[1]) << 31) << 1),
15213 parts[0] = immed_double_const (l[0], l[1], DImode);
15215 if (upper_mode == SImode)
15216 parts[1] = gen_int_mode (l[2], SImode);
15217 else if (HOST_BITS_PER_WIDE_INT >= 64)
15220 ((l[2] & (((HOST_WIDE_INT) 2 << 31) - 1))
15221 + ((((HOST_WIDE_INT) l[3]) << 31) << 1),
15224 parts[1] = immed_double_const (l[2], l[3], DImode);
15227 gcc_unreachable ();
15234 /* Emit insns to perform a move or push of DI, DF, XF, and TF values.
15235 Return false when normal moves are needed; true when all required
15236 insns have been emitted. Operands 2-4 contain the input values
15237 int the correct order; operands 5-7 contain the output values. */
15240 ix86_split_long_move (rtx operands[])
15245 int collisions = 0;
15246 enum machine_mode mode = GET_MODE (operands[0]);
15247 bool collisionparts[4];
15249 /* The DFmode expanders may ask us to move double.
15250 For 64bit target this is single move. By hiding the fact
15251 here we simplify i386.md splitters. */
15252 if (GET_MODE_SIZE (GET_MODE (operands[0])) == 8 && TARGET_64BIT)
15254 /* Optimize constant pool reference to immediates. This is used by
15255 fp moves, that force all constants to memory to allow combining. */
15257 if (MEM_P (operands[1])
15258 && GET_CODE (XEXP (operands[1], 0)) == SYMBOL_REF
15259 && CONSTANT_POOL_ADDRESS_P (XEXP (operands[1], 0)))
15260 operands[1] = get_pool_constant (XEXP (operands[1], 0));
15261 if (push_operand (operands[0], VOIDmode))
15263 operands[0] = copy_rtx (operands[0]);
15264 PUT_MODE (operands[0], Pmode);
15267 operands[0] = gen_lowpart (DImode, operands[0]);
15268 operands[1] = gen_lowpart (DImode, operands[1]);
15269 emit_move_insn (operands[0], operands[1]);
15273 /* The only non-offsettable memory we handle is push. */
15274 if (push_operand (operands[0], VOIDmode))
15277 gcc_assert (!MEM_P (operands[0])
15278 || offsettable_memref_p (operands[0]));
15280 nparts = ix86_split_to_parts (operands[1], part[1], GET_MODE (operands[0]));
15281 ix86_split_to_parts (operands[0], part[0], GET_MODE (operands[0]));
15283 /* When emitting push, take care for source operands on the stack. */
15284 if (push && MEM_P (operands[1])
15285 && reg_overlap_mentioned_p (stack_pointer_rtx, operands[1]))
15286 for (i = 0; i < nparts - 1; i++)
15287 part[1][i] = change_address (part[1][i],
15288 GET_MODE (part[1][i]),
15289 XEXP (part[1][i + 1], 0));
15291 /* We need to do copy in the right order in case an address register
15292 of the source overlaps the destination. */
15293 if (REG_P (part[0][0]) && MEM_P (part[1][0]))
15297 for (i = 0; i < nparts; i++)
15300 = reg_overlap_mentioned_p (part[0][i], XEXP (part[1][0], 0));
15301 if (collisionparts[i])
15305 /* Collision in the middle part can be handled by reordering. */
15306 if (collisions == 1 && nparts == 3 && collisionparts [1])
15308 tmp = part[0][1]; part[0][1] = part[0][2]; part[0][2] = tmp;
15309 tmp = part[1][1]; part[1][1] = part[1][2]; part[1][2] = tmp;
15311 else if (collisions == 1
15313 && (collisionparts [1] || collisionparts [2]))
15315 if (collisionparts [1])
15317 tmp = part[0][1]; part[0][1] = part[0][2]; part[0][2] = tmp;
15318 tmp = part[1][1]; part[1][1] = part[1][2]; part[1][2] = tmp;
15322 tmp = part[0][2]; part[0][2] = part[0][3]; part[0][3] = tmp;
15323 tmp = part[1][2]; part[1][2] = part[1][3]; part[1][3] = tmp;
15327 /* If there are more collisions, we can't handle it by reordering.
15328 Do an lea to the last part and use only one colliding move. */
15329 else if (collisions > 1)
15335 base = part[0][nparts - 1];
15337 /* Handle the case when the last part isn't valid for lea.
15338 Happens in 64-bit mode storing the 12-byte XFmode. */
15339 if (GET_MODE (base) != Pmode)
15340 base = gen_rtx_REG (Pmode, REGNO (base));
15342 emit_insn (gen_rtx_SET (VOIDmode, base, XEXP (part[1][0], 0)));
15343 part[1][0] = replace_equiv_address (part[1][0], base);
15344 for (i = 1; i < nparts; i++)
15346 tmp = plus_constant (base, UNITS_PER_WORD * i);
15347 part[1][i] = replace_equiv_address (part[1][i], tmp);
15358 if (TARGET_128BIT_LONG_DOUBLE && mode == XFmode)
15359 emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx, GEN_INT (-4)));
15360 emit_move_insn (part[0][2], part[1][2]);
15362 else if (nparts == 4)
15364 emit_move_insn (part[0][3], part[1][3]);
15365 emit_move_insn (part[0][2], part[1][2]);
15370 /* In 64bit mode we don't have 32bit push available. In case this is
15371 register, it is OK - we will just use larger counterpart. We also
15372 retype memory - these comes from attempt to avoid REX prefix on
15373 moving of second half of TFmode value. */
15374 if (GET_MODE (part[1][1]) == SImode)
15376 switch (GET_CODE (part[1][1]))
15379 part[1][1] = adjust_address (part[1][1], DImode, 0);
15383 part[1][1] = gen_rtx_REG (DImode, REGNO (part[1][1]));
15387 gcc_unreachable ();
15390 if (GET_MODE (part[1][0]) == SImode)
15391 part[1][0] = part[1][1];
15394 emit_move_insn (part[0][1], part[1][1]);
15395 emit_move_insn (part[0][0], part[1][0]);
15399 /* Choose correct order to not overwrite the source before it is copied. */
15400 if ((REG_P (part[0][0])
15401 && REG_P (part[1][1])
15402 && (REGNO (part[0][0]) == REGNO (part[1][1])
15404 && REGNO (part[0][0]) == REGNO (part[1][2]))
15406 && REGNO (part[0][0]) == REGNO (part[1][3]))))
15408 && reg_overlap_mentioned_p (part[0][0], XEXP (part[1][0], 0))))
15410 for (i = 0, j = nparts - 1; i < nparts; i++, j--)
15412 operands[2 + i] = part[0][j];
15413 operands[6 + i] = part[1][j];
15418 for (i = 0; i < nparts; i++)
15420 operands[2 + i] = part[0][i];
15421 operands[6 + i] = part[1][i];
15425 /* If optimizing for size, attempt to locally unCSE nonzero constants. */
15428 for (j = 0; j < nparts - 1; j++)
15429 if (CONST_INT_P (operands[6 + j])
15430 && operands[6 + j] != const0_rtx
15431 && REG_P (operands[2 + j]))
15432 for (i = j; i < nparts - 1; i++)
15433 if (CONST_INT_P (operands[7 + i])
15434 && INTVAL (operands[7 + i]) == INTVAL (operands[6 + j]))
15435 operands[7 + i] = operands[2 + j];
15438 for (i = 0; i < nparts; i++)
15439 emit_move_insn (operands[2 + i], operands[6 + i]);
15444 /* Helper function of ix86_split_ashl used to generate an SImode/DImode
15445 left shift by a constant, either using a single shift or
15446 a sequence of add instructions. */
15449 ix86_expand_ashl_const (rtx operand, int count, enum machine_mode mode)
15453 emit_insn ((mode == DImode
15455 : gen_adddi3) (operand, operand, operand));
15457 else if (!optimize_size
15458 && count * ix86_cost->add <= ix86_cost->shift_const)
15461 for (i=0; i<count; i++)
15463 emit_insn ((mode == DImode
15465 : gen_adddi3) (operand, operand, operand));
15469 emit_insn ((mode == DImode
15471 : gen_ashldi3) (operand, operand, GEN_INT (count)));
15475 ix86_split_ashl (rtx *operands, rtx scratch, enum machine_mode mode)
15477 rtx low[2], high[2];
15479 const int single_width = mode == DImode ? 32 : 64;
15481 if (CONST_INT_P (operands[2]))
15483 (mode == DImode ? split_di : split_ti) (operands, 2, low, high);
15484 count = INTVAL (operands[2]) & (single_width * 2 - 1);
15486 if (count >= single_width)
15488 emit_move_insn (high[0], low[1]);
15489 emit_move_insn (low[0], const0_rtx);
15491 if (count > single_width)
15492 ix86_expand_ashl_const (high[0], count - single_width, mode);
15496 if (!rtx_equal_p (operands[0], operands[1]))
15497 emit_move_insn (operands[0], operands[1]);
15498 emit_insn ((mode == DImode
15500 : gen_x86_64_shld) (high[0], low[0], GEN_INT (count)));
15501 ix86_expand_ashl_const (low[0], count, mode);
15506 (mode == DImode ? split_di : split_ti) (operands, 1, low, high);
15508 if (operands[1] == const1_rtx)
15510 /* Assuming we've chosen a QImode capable registers, then 1 << N
15511 can be done with two 32/64-bit shifts, no branches, no cmoves. */
15512 if (ANY_QI_REG_P (low[0]) && ANY_QI_REG_P (high[0]))
15514 rtx s, d, flags = gen_rtx_REG (CCZmode, FLAGS_REG);
15516 ix86_expand_clear (low[0]);
15517 ix86_expand_clear (high[0]);
15518 emit_insn (gen_testqi_ccz_1 (operands[2], GEN_INT (single_width)));
15520 d = gen_lowpart (QImode, low[0]);
15521 d = gen_rtx_STRICT_LOW_PART (VOIDmode, d);
15522 s = gen_rtx_EQ (QImode, flags, const0_rtx);
15523 emit_insn (gen_rtx_SET (VOIDmode, d, s));
15525 d = gen_lowpart (QImode, high[0]);
15526 d = gen_rtx_STRICT_LOW_PART (VOIDmode, d);
15527 s = gen_rtx_NE (QImode, flags, const0_rtx);
15528 emit_insn (gen_rtx_SET (VOIDmode, d, s));
15531 /* Otherwise, we can get the same results by manually performing
15532 a bit extract operation on bit 5/6, and then performing the two
15533 shifts. The two methods of getting 0/1 into low/high are exactly
15534 the same size. Avoiding the shift in the bit extract case helps
15535 pentium4 a bit; no one else seems to care much either way. */
15540 if (TARGET_PARTIAL_REG_STALL && !optimize_size)
15541 x = gen_rtx_ZERO_EXTEND (mode == DImode ? SImode : DImode, operands[2]);
15543 x = gen_lowpart (mode == DImode ? SImode : DImode, operands[2]);
15544 emit_insn (gen_rtx_SET (VOIDmode, high[0], x));
15546 emit_insn ((mode == DImode
15548 : gen_lshrdi3) (high[0], high[0], GEN_INT (mode == DImode ? 5 : 6)));
15549 emit_insn ((mode == DImode
15551 : gen_anddi3) (high[0], high[0], GEN_INT (1)));
15552 emit_move_insn (low[0], high[0]);
15553 emit_insn ((mode == DImode
15555 : gen_xordi3) (low[0], low[0], GEN_INT (1)));
15558 emit_insn ((mode == DImode
15560 : gen_ashldi3) (low[0], low[0], operands[2]));
15561 emit_insn ((mode == DImode
15563 : gen_ashldi3) (high[0], high[0], operands[2]));
15567 if (operands[1] == constm1_rtx)
15569 /* For -1 << N, we can avoid the shld instruction, because we
15570 know that we're shifting 0...31/63 ones into a -1. */
15571 emit_move_insn (low[0], constm1_rtx);
15573 emit_move_insn (high[0], low[0]);
15575 emit_move_insn (high[0], constm1_rtx);
15579 if (!rtx_equal_p (operands[0], operands[1]))
15580 emit_move_insn (operands[0], operands[1]);
15582 (mode == DImode ? split_di : split_ti) (operands, 1, low, high);
15583 emit_insn ((mode == DImode
15585 : gen_x86_64_shld) (high[0], low[0], operands[2]));
15588 emit_insn ((mode == DImode ? gen_ashlsi3 : gen_ashldi3) (low[0], low[0], operands[2]));
15590 if (TARGET_CMOVE && scratch)
15592 ix86_expand_clear (scratch);
15593 emit_insn ((mode == DImode
15594 ? gen_x86_shift_adj_1
15595 : gen_x86_64_shift_adj_1) (high[0], low[0], operands[2],
15599 emit_insn ((mode == DImode
15600 ? gen_x86_shift_adj_2
15601 : gen_x86_64_shift_adj_2) (high[0], low[0], operands[2]));
15605 ix86_split_ashr (rtx *operands, rtx scratch, enum machine_mode mode)
15607 rtx low[2], high[2];
15609 const int single_width = mode == DImode ? 32 : 64;
15611 if (CONST_INT_P (operands[2]))
15613 (mode == DImode ? split_di : split_ti) (operands, 2, low, high);
15614 count = INTVAL (operands[2]) & (single_width * 2 - 1);
15616 if (count == single_width * 2 - 1)
15618 emit_move_insn (high[0], high[1]);
15619 emit_insn ((mode == DImode
15621 : gen_ashrdi3) (high[0], high[0],
15622 GEN_INT (single_width - 1)));
15623 emit_move_insn (low[0], high[0]);
15626 else if (count >= single_width)
15628 emit_move_insn (low[0], high[1]);
15629 emit_move_insn (high[0], low[0]);
15630 emit_insn ((mode == DImode
15632 : gen_ashrdi3) (high[0], high[0],
15633 GEN_INT (single_width - 1)));
15634 if (count > single_width)
15635 emit_insn ((mode == DImode
15637 : gen_ashrdi3) (low[0], low[0],
15638 GEN_INT (count - single_width)));
15642 if (!rtx_equal_p (operands[0], operands[1]))
15643 emit_move_insn (operands[0], operands[1]);
15644 emit_insn ((mode == DImode
15646 : gen_x86_64_shrd) (low[0], high[0], GEN_INT (count)));
15647 emit_insn ((mode == DImode
15649 : gen_ashrdi3) (high[0], high[0], GEN_INT (count)));
15654 if (!rtx_equal_p (operands[0], operands[1]))
15655 emit_move_insn (operands[0], operands[1]);
15657 (mode == DImode ? split_di : split_ti) (operands, 1, low, high);
15659 emit_insn ((mode == DImode
15661 : gen_x86_64_shrd) (low[0], high[0], operands[2]));
15662 emit_insn ((mode == DImode
15664 : gen_ashrdi3) (high[0], high[0], operands[2]));
15666 if (TARGET_CMOVE && scratch)
15668 emit_move_insn (scratch, high[0]);
15669 emit_insn ((mode == DImode
15671 : gen_ashrdi3) (scratch, scratch,
15672 GEN_INT (single_width - 1)));
15673 emit_insn ((mode == DImode
15674 ? gen_x86_shift_adj_1
15675 : gen_x86_64_shift_adj_1) (low[0], high[0], operands[2],
15679 emit_insn ((mode == DImode
15680 ? gen_x86_shift_adj_3
15681 : gen_x86_64_shift_adj_3) (low[0], high[0], operands[2]));
15686 ix86_split_lshr (rtx *operands, rtx scratch, enum machine_mode mode)
15688 rtx low[2], high[2];
15690 const int single_width = mode == DImode ? 32 : 64;
15692 if (CONST_INT_P (operands[2]))
15694 (mode == DImode ? split_di : split_ti) (operands, 2, low, high);
15695 count = INTVAL (operands[2]) & (single_width * 2 - 1);
15697 if (count >= single_width)
15699 emit_move_insn (low[0], high[1]);
15700 ix86_expand_clear (high[0]);
15702 if (count > single_width)
15703 emit_insn ((mode == DImode
15705 : gen_lshrdi3) (low[0], low[0],
15706 GEN_INT (count - single_width)));
15710 if (!rtx_equal_p (operands[0], operands[1]))
15711 emit_move_insn (operands[0], operands[1]);
15712 emit_insn ((mode == DImode
15714 : gen_x86_64_shrd) (low[0], high[0], GEN_INT (count)));
15715 emit_insn ((mode == DImode
15717 : gen_lshrdi3) (high[0], high[0], GEN_INT (count)));
15722 if (!rtx_equal_p (operands[0], operands[1]))
15723 emit_move_insn (operands[0], operands[1]);
15725 (mode == DImode ? split_di : split_ti) (operands, 1, low, high);
15727 emit_insn ((mode == DImode
15729 : gen_x86_64_shrd) (low[0], high[0], operands[2]));
15730 emit_insn ((mode == DImode
15732 : gen_lshrdi3) (high[0], high[0], operands[2]));
15734 /* Heh. By reversing the arguments, we can reuse this pattern. */
15735 if (TARGET_CMOVE && scratch)
15737 ix86_expand_clear (scratch);
15738 emit_insn ((mode == DImode
15739 ? gen_x86_shift_adj_1
15740 : gen_x86_64_shift_adj_1) (low[0], high[0], operands[2],
15744 emit_insn ((mode == DImode
15745 ? gen_x86_shift_adj_2
15746 : gen_x86_64_shift_adj_2) (low[0], high[0], operands[2]));
15750 /* Predict just emitted jump instruction to be taken with probability PROB. */
15752 predict_jump (int prob)
15754 rtx insn = get_last_insn ();
15755 gcc_assert (JUMP_P (insn));
15757 = gen_rtx_EXPR_LIST (REG_BR_PROB,
15762 /* Helper function for the string operations below. Dest VARIABLE whether
15763 it is aligned to VALUE bytes. If true, jump to the label. */
15765 ix86_expand_aligntest (rtx variable, int value, bool epilogue)
15767 rtx label = gen_label_rtx ();
15768 rtx tmpcount = gen_reg_rtx (GET_MODE (variable));
15769 if (GET_MODE (variable) == DImode)
15770 emit_insn (gen_anddi3 (tmpcount, variable, GEN_INT (value)));
15772 emit_insn (gen_andsi3 (tmpcount, variable, GEN_INT (value)));
15773 emit_cmp_and_jump_insns (tmpcount, const0_rtx, EQ, 0, GET_MODE (variable),
15776 predict_jump (REG_BR_PROB_BASE * 50 / 100);
15778 predict_jump (REG_BR_PROB_BASE * 90 / 100);
15782 /* Adjust COUNTER by the VALUE. */
15784 ix86_adjust_counter (rtx countreg, HOST_WIDE_INT value)
15786 if (GET_MODE (countreg) == DImode)
15787 emit_insn (gen_adddi3 (countreg, countreg, GEN_INT (-value)));
15789 emit_insn (gen_addsi3 (countreg, countreg, GEN_INT (-value)));
15792 /* Zero extend possibly SImode EXP to Pmode register. */
15794 ix86_zero_extend_to_Pmode (rtx exp)
15797 if (GET_MODE (exp) == VOIDmode)
15798 return force_reg (Pmode, exp);
15799 if (GET_MODE (exp) == Pmode)
15800 return copy_to_mode_reg (Pmode, exp);
15801 r = gen_reg_rtx (Pmode);
15802 emit_insn (gen_zero_extendsidi2 (r, exp));
15806 /* Divide COUNTREG by SCALE. */
15808 scale_counter (rtx countreg, int scale)
15811 rtx piece_size_mask;
15815 if (CONST_INT_P (countreg))
15816 return GEN_INT (INTVAL (countreg) / scale);
15817 gcc_assert (REG_P (countreg));
15819 piece_size_mask = GEN_INT (scale - 1);
15820 sc = expand_simple_binop (GET_MODE (countreg), LSHIFTRT, countreg,
15821 GEN_INT (exact_log2 (scale)),
15822 NULL, 1, OPTAB_DIRECT);
15826 /* Return mode for the memcpy/memset loop counter. Prefer SImode over
15827 DImode for constant loop counts. */
15829 static enum machine_mode
15830 counter_mode (rtx count_exp)
15832 if (GET_MODE (count_exp) != VOIDmode)
15833 return GET_MODE (count_exp);
15834 if (GET_CODE (count_exp) != CONST_INT)
15836 if (TARGET_64BIT && (INTVAL (count_exp) & ~0xffffffff))
15841 /* When SRCPTR is non-NULL, output simple loop to move memory
15842 pointer to SRCPTR to DESTPTR via chunks of MODE unrolled UNROLL times,
15843 overall size is COUNT specified in bytes. When SRCPTR is NULL, output the
15844 equivalent loop to set memory by VALUE (supposed to be in MODE).
15846 The size is rounded down to whole number of chunk size moved at once.
15847 SRCMEM and DESTMEM provide MEMrtx to feed proper aliasing info. */
15851 expand_set_or_movmem_via_loop (rtx destmem, rtx srcmem,
15852 rtx destptr, rtx srcptr, rtx value,
15853 rtx count, enum machine_mode mode, int unroll,
15856 rtx out_label, top_label, iter, tmp;
15857 enum machine_mode iter_mode = counter_mode (count);
15858 rtx piece_size = GEN_INT (GET_MODE_SIZE (mode) * unroll);
15859 rtx piece_size_mask = GEN_INT (~((GET_MODE_SIZE (mode) * unroll) - 1));
15865 top_label = gen_label_rtx ();
15866 out_label = gen_label_rtx ();
15867 iter = gen_reg_rtx (iter_mode);
15869 size = expand_simple_binop (iter_mode, AND, count, piece_size_mask,
15870 NULL, 1, OPTAB_DIRECT);
15871 /* Those two should combine. */
15872 if (piece_size == const1_rtx)
15874 emit_cmp_and_jump_insns (size, const0_rtx, EQ, NULL_RTX, iter_mode,
15876 predict_jump (REG_BR_PROB_BASE * 10 / 100);
15878 emit_move_insn (iter, const0_rtx);
15880 emit_label (top_label);
15882 tmp = convert_modes (Pmode, iter_mode, iter, true);
15883 x_addr = gen_rtx_PLUS (Pmode, destptr, tmp);
15884 destmem = change_address (destmem, mode, x_addr);
15888 y_addr = gen_rtx_PLUS (Pmode, srcptr, copy_rtx (tmp));
15889 srcmem = change_address (srcmem, mode, y_addr);
15891 /* When unrolling for chips that reorder memory reads and writes,
15892 we can save registers by using single temporary.
15893 Also using 4 temporaries is overkill in 32bit mode. */
15894 if (!TARGET_64BIT && 0)
15896 for (i = 0; i < unroll; i++)
15901 adjust_address (copy_rtx (destmem), mode, GET_MODE_SIZE (mode));
15903 adjust_address (copy_rtx (srcmem), mode, GET_MODE_SIZE (mode));
15905 emit_move_insn (destmem, srcmem);
15911 gcc_assert (unroll <= 4);
15912 for (i = 0; i < unroll; i++)
15914 tmpreg[i] = gen_reg_rtx (mode);
15918 adjust_address (copy_rtx (srcmem), mode, GET_MODE_SIZE (mode));
15920 emit_move_insn (tmpreg[i], srcmem);
15922 for (i = 0; i < unroll; i++)
15927 adjust_address (copy_rtx (destmem), mode, GET_MODE_SIZE (mode));
15929 emit_move_insn (destmem, tmpreg[i]);
15934 for (i = 0; i < unroll; i++)
15938 adjust_address (copy_rtx (destmem), mode, GET_MODE_SIZE (mode));
15939 emit_move_insn (destmem, value);
15942 tmp = expand_simple_binop (iter_mode, PLUS, iter, piece_size, iter,
15943 true, OPTAB_LIB_WIDEN);
15945 emit_move_insn (iter, tmp);
15947 emit_cmp_and_jump_insns (iter, size, LT, NULL_RTX, iter_mode,
15949 if (expected_size != -1)
15951 expected_size /= GET_MODE_SIZE (mode) * unroll;
15952 if (expected_size == 0)
15954 else if (expected_size > REG_BR_PROB_BASE)
15955 predict_jump (REG_BR_PROB_BASE - 1);
15957 predict_jump (REG_BR_PROB_BASE - (REG_BR_PROB_BASE + expected_size / 2) / expected_size);
15960 predict_jump (REG_BR_PROB_BASE * 80 / 100);
15961 iter = ix86_zero_extend_to_Pmode (iter);
15962 tmp = expand_simple_binop (Pmode, PLUS, destptr, iter, destptr,
15963 true, OPTAB_LIB_WIDEN);
15964 if (tmp != destptr)
15965 emit_move_insn (destptr, tmp);
15968 tmp = expand_simple_binop (Pmode, PLUS, srcptr, iter, srcptr,
15969 true, OPTAB_LIB_WIDEN);
15971 emit_move_insn (srcptr, tmp);
15973 emit_label (out_label);
15976 /* Output "rep; mov" instruction.
15977 Arguments have same meaning as for previous function */
15979 expand_movmem_via_rep_mov (rtx destmem, rtx srcmem,
15980 rtx destptr, rtx srcptr,
15982 enum machine_mode mode)
15988 /* If the size is known, it is shorter to use rep movs. */
15989 if (mode == QImode && CONST_INT_P (count)
15990 && !(INTVAL (count) & 3))
15993 if (destptr != XEXP (destmem, 0) || GET_MODE (destmem) != BLKmode)
15994 destmem = adjust_automodify_address_nv (destmem, BLKmode, destptr, 0);
15995 if (srcptr != XEXP (srcmem, 0) || GET_MODE (srcmem) != BLKmode)
15996 srcmem = adjust_automodify_address_nv (srcmem, BLKmode, srcptr, 0);
15997 countreg = ix86_zero_extend_to_Pmode (scale_counter (count, GET_MODE_SIZE (mode)));
15998 if (mode != QImode)
16000 destexp = gen_rtx_ASHIFT (Pmode, countreg,
16001 GEN_INT (exact_log2 (GET_MODE_SIZE (mode))));
16002 destexp = gen_rtx_PLUS (Pmode, destexp, destptr);
16003 srcexp = gen_rtx_ASHIFT (Pmode, countreg,
16004 GEN_INT (exact_log2 (GET_MODE_SIZE (mode))));
16005 srcexp = gen_rtx_PLUS (Pmode, srcexp, srcptr);
16009 destexp = gen_rtx_PLUS (Pmode, destptr, countreg);
16010 srcexp = gen_rtx_PLUS (Pmode, srcptr, countreg);
16012 emit_insn (gen_rep_mov (destptr, destmem, srcptr, srcmem, countreg,
16016 /* Output "rep; stos" instruction.
16017 Arguments have same meaning as for previous function */
16019 expand_setmem_via_rep_stos (rtx destmem, rtx destptr, rtx value,
16021 enum machine_mode mode)
16026 if (destptr != XEXP (destmem, 0) || GET_MODE (destmem) != BLKmode)
16027 destmem = adjust_automodify_address_nv (destmem, BLKmode, destptr, 0);
16028 value = force_reg (mode, gen_lowpart (mode, value));
16029 countreg = ix86_zero_extend_to_Pmode (scale_counter (count, GET_MODE_SIZE (mode)));
16030 if (mode != QImode)
16032 destexp = gen_rtx_ASHIFT (Pmode, countreg,
16033 GEN_INT (exact_log2 (GET_MODE_SIZE (mode))));
16034 destexp = gen_rtx_PLUS (Pmode, destexp, destptr);
16037 destexp = gen_rtx_PLUS (Pmode, destptr, countreg);
16038 emit_insn (gen_rep_stos (destptr, countreg, destmem, value, destexp));
16042 emit_strmov (rtx destmem, rtx srcmem,
16043 rtx destptr, rtx srcptr, enum machine_mode mode, int offset)
16045 rtx src = adjust_automodify_address_nv (srcmem, mode, srcptr, offset);
16046 rtx dest = adjust_automodify_address_nv (destmem, mode, destptr, offset);
16047 emit_insn (gen_strmov (destptr, dest, srcptr, src));
16050 /* Output code to copy at most count & (max_size - 1) bytes from SRC to DEST. */
16052 expand_movmem_epilogue (rtx destmem, rtx srcmem,
16053 rtx destptr, rtx srcptr, rtx count, int max_size)
16056 if (CONST_INT_P (count))
16058 HOST_WIDE_INT countval = INTVAL (count);
16061 if ((countval & 0x10) && max_size > 16)
16065 emit_strmov (destmem, srcmem, destptr, srcptr, DImode, offset);
16066 emit_strmov (destmem, srcmem, destptr, srcptr, DImode, offset + 8);
16069 gcc_unreachable ();
16072 if ((countval & 0x08) && max_size > 8)
16075 emit_strmov (destmem, srcmem, destptr, srcptr, DImode, offset);
16078 emit_strmov (destmem, srcmem, destptr, srcptr, SImode, offset);
16079 emit_strmov (destmem, srcmem, destptr, srcptr, SImode, offset + 4);
16083 if ((countval & 0x04) && max_size > 4)
16085 emit_strmov (destmem, srcmem, destptr, srcptr, SImode, offset);
16088 if ((countval & 0x02) && max_size > 2)
16090 emit_strmov (destmem, srcmem, destptr, srcptr, HImode, offset);
16093 if ((countval & 0x01) && max_size > 1)
16095 emit_strmov (destmem, srcmem, destptr, srcptr, QImode, offset);
16102 count = expand_simple_binop (GET_MODE (count), AND, count, GEN_INT (max_size - 1),
16103 count, 1, OPTAB_DIRECT);
16104 expand_set_or_movmem_via_loop (destmem, srcmem, destptr, srcptr, NULL,
16105 count, QImode, 1, 4);
16109 /* When there are stringops, we can cheaply increase dest and src pointers.
16110 Otherwise we save code size by maintaining offset (zero is readily
16111 available from preceding rep operation) and using x86 addressing modes.
16113 if (TARGET_SINGLE_STRINGOP)
16117 rtx label = ix86_expand_aligntest (count, 4, true);
16118 src = change_address (srcmem, SImode, srcptr);
16119 dest = change_address (destmem, SImode, destptr);
16120 emit_insn (gen_strmov (destptr, dest, srcptr, src));
16121 emit_label (label);
16122 LABEL_NUSES (label) = 1;
16126 rtx label = ix86_expand_aligntest (count, 2, true);
16127 src = change_address (srcmem, HImode, srcptr);
16128 dest = change_address (destmem, HImode, destptr);
16129 emit_insn (gen_strmov (destptr, dest, srcptr, src));
16130 emit_label (label);
16131 LABEL_NUSES (label) = 1;
16135 rtx label = ix86_expand_aligntest (count, 1, true);
16136 src = change_address (srcmem, QImode, srcptr);
16137 dest = change_address (destmem, QImode, destptr);
16138 emit_insn (gen_strmov (destptr, dest, srcptr, src));
16139 emit_label (label);
16140 LABEL_NUSES (label) = 1;
16145 rtx offset = force_reg (Pmode, const0_rtx);
16150 rtx label = ix86_expand_aligntest (count, 4, true);
16151 src = change_address (srcmem, SImode, srcptr);
16152 dest = change_address (destmem, SImode, destptr);
16153 emit_move_insn (dest, src);
16154 tmp = expand_simple_binop (Pmode, PLUS, offset, GEN_INT (4), NULL,
16155 true, OPTAB_LIB_WIDEN);
16157 emit_move_insn (offset, tmp);
16158 emit_label (label);
16159 LABEL_NUSES (label) = 1;
16163 rtx label = ix86_expand_aligntest (count, 2, true);
16164 tmp = gen_rtx_PLUS (Pmode, srcptr, offset);
16165 src = change_address (srcmem, HImode, tmp);
16166 tmp = gen_rtx_PLUS (Pmode, destptr, offset);
16167 dest = change_address (destmem, HImode, tmp);
16168 emit_move_insn (dest, src);
16169 tmp = expand_simple_binop (Pmode, PLUS, offset, GEN_INT (2), tmp,
16170 true, OPTAB_LIB_WIDEN);
16172 emit_move_insn (offset, tmp);
16173 emit_label (label);
16174 LABEL_NUSES (label) = 1;
16178 rtx label = ix86_expand_aligntest (count, 1, true);
16179 tmp = gen_rtx_PLUS (Pmode, srcptr, offset);
16180 src = change_address (srcmem, QImode, tmp);
16181 tmp = gen_rtx_PLUS (Pmode, destptr, offset);
16182 dest = change_address (destmem, QImode, tmp);
16183 emit_move_insn (dest, src);
16184 emit_label (label);
16185 LABEL_NUSES (label) = 1;
16190 /* Output code to set at most count & (max_size - 1) bytes starting by DEST. */
16192 expand_setmem_epilogue_via_loop (rtx destmem, rtx destptr, rtx value,
16193 rtx count, int max_size)
16196 expand_simple_binop (counter_mode (count), AND, count,
16197 GEN_INT (max_size - 1), count, 1, OPTAB_DIRECT);
16198 expand_set_or_movmem_via_loop (destmem, NULL, destptr, NULL,
16199 gen_lowpart (QImode, value), count, QImode,
16203 /* Output code to set at most count & (max_size - 1) bytes starting by DEST. */
16205 expand_setmem_epilogue (rtx destmem, rtx destptr, rtx value, rtx count, int max_size)
16209 if (CONST_INT_P (count))
16211 HOST_WIDE_INT countval = INTVAL (count);
16214 if ((countval & 0x10) && max_size > 16)
16218 dest = adjust_automodify_address_nv (destmem, DImode, destptr, offset);
16219 emit_insn (gen_strset (destptr, dest, value));
16220 dest = adjust_automodify_address_nv (destmem, DImode, destptr, offset + 8);
16221 emit_insn (gen_strset (destptr, dest, value));
16224 gcc_unreachable ();
16227 if ((countval & 0x08) && max_size > 8)
16231 dest = adjust_automodify_address_nv (destmem, DImode, destptr, offset);
16232 emit_insn (gen_strset (destptr, dest, value));
16236 dest = adjust_automodify_address_nv (destmem, SImode, destptr, offset);
16237 emit_insn (gen_strset (destptr, dest, value));
16238 dest = adjust_automodify_address_nv (destmem, SImode, destptr, offset + 4);
16239 emit_insn (gen_strset (destptr, dest, value));
16243 if ((countval & 0x04) && max_size > 4)
16245 dest = adjust_automodify_address_nv (destmem, SImode, destptr, offset);
16246 emit_insn (gen_strset (destptr, dest, gen_lowpart (SImode, value)));
16249 if ((countval & 0x02) && max_size > 2)
16251 dest = adjust_automodify_address_nv (destmem, HImode, destptr, offset);
16252 emit_insn (gen_strset (destptr, dest, gen_lowpart (HImode, value)));
16255 if ((countval & 0x01) && max_size > 1)
16257 dest = adjust_automodify_address_nv (destmem, QImode, destptr, offset);
16258 emit_insn (gen_strset (destptr, dest, gen_lowpart (QImode, value)));
16265 expand_setmem_epilogue_via_loop (destmem, destptr, value, count, max_size);
16270 rtx label = ix86_expand_aligntest (count, 16, true);
16273 dest = change_address (destmem, DImode, destptr);
16274 emit_insn (gen_strset (destptr, dest, value));
16275 emit_insn (gen_strset (destptr, dest, value));
16279 dest = change_address (destmem, SImode, destptr);
16280 emit_insn (gen_strset (destptr, dest, value));
16281 emit_insn (gen_strset (destptr, dest, value));
16282 emit_insn (gen_strset (destptr, dest, value));
16283 emit_insn (gen_strset (destptr, dest, value));
16285 emit_label (label);
16286 LABEL_NUSES (label) = 1;
16290 rtx label = ix86_expand_aligntest (count, 8, true);
16293 dest = change_address (destmem, DImode, destptr);
16294 emit_insn (gen_strset (destptr, dest, value));
16298 dest = change_address (destmem, SImode, destptr);
16299 emit_insn (gen_strset (destptr, dest, value));
16300 emit_insn (gen_strset (destptr, dest, value));
16302 emit_label (label);
16303 LABEL_NUSES (label) = 1;
16307 rtx label = ix86_expand_aligntest (count, 4, true);
16308 dest = change_address (destmem, SImode, destptr);
16309 emit_insn (gen_strset (destptr, dest, gen_lowpart (SImode, value)));
16310 emit_label (label);
16311 LABEL_NUSES (label) = 1;
16315 rtx label = ix86_expand_aligntest (count, 2, true);
16316 dest = change_address (destmem, HImode, destptr);
16317 emit_insn (gen_strset (destptr, dest, gen_lowpart (HImode, value)));
16318 emit_label (label);
16319 LABEL_NUSES (label) = 1;
16323 rtx label = ix86_expand_aligntest (count, 1, true);
16324 dest = change_address (destmem, QImode, destptr);
16325 emit_insn (gen_strset (destptr, dest, gen_lowpart (QImode, value)));
16326 emit_label (label);
16327 LABEL_NUSES (label) = 1;
16331 /* Copy enough from DEST to SRC to align DEST known to by aligned by ALIGN to
16332 DESIRED_ALIGNMENT. */
16334 expand_movmem_prologue (rtx destmem, rtx srcmem,
16335 rtx destptr, rtx srcptr, rtx count,
16336 int align, int desired_alignment)
16338 if (align <= 1 && desired_alignment > 1)
16340 rtx label = ix86_expand_aligntest (destptr, 1, false);
16341 srcmem = change_address (srcmem, QImode, srcptr);
16342 destmem = change_address (destmem, QImode, destptr);
16343 emit_insn (gen_strmov (destptr, destmem, srcptr, srcmem));
16344 ix86_adjust_counter (count, 1);
16345 emit_label (label);
16346 LABEL_NUSES (label) = 1;
16348 if (align <= 2 && desired_alignment > 2)
16350 rtx label = ix86_expand_aligntest (destptr, 2, false);
16351 srcmem = change_address (srcmem, HImode, srcptr);
16352 destmem = change_address (destmem, HImode, destptr);
16353 emit_insn (gen_strmov (destptr, destmem, srcptr, srcmem));
16354 ix86_adjust_counter (count, 2);
16355 emit_label (label);
16356 LABEL_NUSES (label) = 1;
16358 if (align <= 4 && desired_alignment > 4)
16360 rtx label = ix86_expand_aligntest (destptr, 4, false);
16361 srcmem = change_address (srcmem, SImode, srcptr);
16362 destmem = change_address (destmem, SImode, destptr);
16363 emit_insn (gen_strmov (destptr, destmem, srcptr, srcmem));
16364 ix86_adjust_counter (count, 4);
16365 emit_label (label);
16366 LABEL_NUSES (label) = 1;
16368 gcc_assert (desired_alignment <= 8);
16371 /* Set enough from DEST to align DEST known to by aligned by ALIGN to
16372 DESIRED_ALIGNMENT. */
16374 expand_setmem_prologue (rtx destmem, rtx destptr, rtx value, rtx count,
16375 int align, int desired_alignment)
16377 if (align <= 1 && desired_alignment > 1)
16379 rtx label = ix86_expand_aligntest (destptr, 1, false);
16380 destmem = change_address (destmem, QImode, destptr);
16381 emit_insn (gen_strset (destptr, destmem, gen_lowpart (QImode, value)));
16382 ix86_adjust_counter (count, 1);
16383 emit_label (label);
16384 LABEL_NUSES (label) = 1;
16386 if (align <= 2 && desired_alignment > 2)
16388 rtx label = ix86_expand_aligntest (destptr, 2, false);
16389 destmem = change_address (destmem, HImode, destptr);
16390 emit_insn (gen_strset (destptr, destmem, gen_lowpart (HImode, value)));
16391 ix86_adjust_counter (count, 2);
16392 emit_label (label);
16393 LABEL_NUSES (label) = 1;
16395 if (align <= 4 && desired_alignment > 4)
16397 rtx label = ix86_expand_aligntest (destptr, 4, false);
16398 destmem = change_address (destmem, SImode, destptr);
16399 emit_insn (gen_strset (destptr, destmem, gen_lowpart (SImode, value)));
16400 ix86_adjust_counter (count, 4);
16401 emit_label (label);
16402 LABEL_NUSES (label) = 1;
16404 gcc_assert (desired_alignment <= 8);
16407 /* Given COUNT and EXPECTED_SIZE, decide on codegen of string operation. */
16408 static enum stringop_alg
16409 decide_alg (HOST_WIDE_INT count, HOST_WIDE_INT expected_size, bool memset,
16410 int *dynamic_check)
16412 const struct stringop_algs * algs;
16413 /* Algorithms using the rep prefix want at least edi and ecx;
16414 additionally, memset wants eax and memcpy wants esi. Don't
16415 consider such algorithms if the user has appropriated those
16416 registers for their own purposes. */
16417 bool rep_prefix_usable = !(fixed_regs[CX_REG] || fixed_regs[DI_REG]
16419 ? fixed_regs[AX_REG] : fixed_regs[SI_REG]));
16421 #define ALG_USABLE_P(alg) (rep_prefix_usable \
16422 || (alg != rep_prefix_1_byte \
16423 && alg != rep_prefix_4_byte \
16424 && alg != rep_prefix_8_byte))
16426 *dynamic_check = -1;
16428 algs = &ix86_cost->memset[TARGET_64BIT != 0];
16430 algs = &ix86_cost->memcpy[TARGET_64BIT != 0];
16431 if (stringop_alg != no_stringop && ALG_USABLE_P (stringop_alg))
16432 return stringop_alg;
16433 /* rep; movq or rep; movl is the smallest variant. */
16434 else if (optimize_size)
16436 if (!count || (count & 3))
16437 return rep_prefix_usable ? rep_prefix_1_byte : loop_1_byte;
16439 return rep_prefix_usable ? rep_prefix_4_byte : loop;
16441 /* Very tiny blocks are best handled via the loop, REP is expensive to setup.
16443 else if (expected_size != -1 && expected_size < 4)
16444 return loop_1_byte;
16445 else if (expected_size != -1)
16448 enum stringop_alg alg = libcall;
16449 for (i = 0; i < NAX_STRINGOP_ALGS; i++)
16451 /* We get here if the algorithms that were not libcall-based
16452 were rep-prefix based and we are unable to use rep prefixes
16453 based on global register usage. Break out of the loop and
16454 use the heuristic below. */
16455 if (algs->size[i].max == 0)
16457 if (algs->size[i].max >= expected_size || algs->size[i].max == -1)
16459 enum stringop_alg candidate = algs->size[i].alg;
16461 if (candidate != libcall && ALG_USABLE_P (candidate))
16463 /* Honor TARGET_INLINE_ALL_STRINGOPS by picking
16464 last non-libcall inline algorithm. */
16465 if (TARGET_INLINE_ALL_STRINGOPS)
16467 /* When the current size is best to be copied by a libcall,
16468 but we are still forced to inline, run the heuristic below
16469 that will pick code for medium sized blocks. */
16470 if (alg != libcall)
16474 else if (ALG_USABLE_P (candidate))
16478 gcc_assert (TARGET_INLINE_ALL_STRINGOPS || !rep_prefix_usable);
16480 /* When asked to inline the call anyway, try to pick meaningful choice.
16481 We look for maximal size of block that is faster to copy by hand and
16482 take blocks of at most of that size guessing that average size will
16483 be roughly half of the block.
16485 If this turns out to be bad, we might simply specify the preferred
16486 choice in ix86_costs. */
16487 if ((TARGET_INLINE_ALL_STRINGOPS || TARGET_INLINE_STRINGOPS_DYNAMICALLY)
16488 && (algs->unknown_size == libcall || !ALG_USABLE_P (algs->unknown_size)))
16491 enum stringop_alg alg;
16493 bool any_alg_usable_p = true;
16495 for (i = 0; i < NAX_STRINGOP_ALGS; i++)
16497 enum stringop_alg candidate = algs->size[i].alg;
16498 any_alg_usable_p = any_alg_usable_p && ALG_USABLE_P (candidate);
16500 if (candidate != libcall && candidate
16501 && ALG_USABLE_P (candidate))
16502 max = algs->size[i].max;
16504 /* If there aren't any usable algorithms, then recursing on
16505 smaller sizes isn't going to find anything. Just return the
16506 simple byte-at-a-time copy loop. */
16507 if (!any_alg_usable_p)
16509 /* Pick something reasonable. */
16510 if (TARGET_INLINE_STRINGOPS_DYNAMICALLY)
16511 *dynamic_check = 128;
16512 return loop_1_byte;
16516 alg = decide_alg (count, max / 2, memset, dynamic_check);
16517 gcc_assert (*dynamic_check == -1);
16518 gcc_assert (alg != libcall);
16519 if (TARGET_INLINE_STRINGOPS_DYNAMICALLY)
16520 *dynamic_check = max;
16523 return ALG_USABLE_P (algs->unknown_size) ? algs->unknown_size : libcall;
16524 #undef ALG_USABLE_P
16527 /* Decide on alignment. We know that the operand is already aligned to ALIGN
16528 (ALIGN can be based on profile feedback and thus it is not 100% guaranteed). */
16530 decide_alignment (int align,
16531 enum stringop_alg alg,
16534 int desired_align = 0;
16538 gcc_unreachable ();
16540 case unrolled_loop:
16541 desired_align = GET_MODE_SIZE (Pmode);
16543 case rep_prefix_8_byte:
16546 case rep_prefix_4_byte:
16547 /* PentiumPro has special logic triggering for 8 byte aligned blocks.
16548 copying whole cacheline at once. */
16549 if (TARGET_PENTIUMPRO)
16554 case rep_prefix_1_byte:
16555 /* PentiumPro has special logic triggering for 8 byte aligned blocks.
16556 copying whole cacheline at once. */
16557 if (TARGET_PENTIUMPRO)
16571 if (desired_align < align)
16572 desired_align = align;
16573 if (expected_size != -1 && expected_size < 4)
16574 desired_align = align;
16575 return desired_align;
16578 /* Return the smallest power of 2 greater than VAL. */
16580 smallest_pow2_greater_than (int val)
16588 /* Expand string move (memcpy) operation. Use i386 string operations when
16589 profitable. expand_setmem contains similar code. The code depends upon
16590 architecture, block size and alignment, but always has the same
16593 1) Prologue guard: Conditional that jumps up to epilogues for small
16594 blocks that can be handled by epilogue alone. This is faster but
16595 also needed for correctness, since prologue assume the block is larger
16596 than the desired alignment.
16598 Optional dynamic check for size and libcall for large
16599 blocks is emitted here too, with -minline-stringops-dynamically.
16601 2) Prologue: copy first few bytes in order to get destination aligned
16602 to DESIRED_ALIGN. It is emitted only when ALIGN is less than
16603 DESIRED_ALIGN and and up to DESIRED_ALIGN - ALIGN bytes can be copied.
16604 We emit either a jump tree on power of two sized blocks, or a byte loop.
16606 3) Main body: the copying loop itself, copying in SIZE_NEEDED chunks
16607 with specified algorithm.
16609 4) Epilogue: code copying tail of the block that is too small to be
16610 handled by main body (or up to size guarded by prologue guard). */
16613 ix86_expand_movmem (rtx dst, rtx src, rtx count_exp, rtx align_exp,
16614 rtx expected_align_exp, rtx expected_size_exp)
16620 rtx jump_around_label = NULL;
16621 HOST_WIDE_INT align = 1;
16622 unsigned HOST_WIDE_INT count = 0;
16623 HOST_WIDE_INT expected_size = -1;
16624 int size_needed = 0, epilogue_size_needed;
16625 int desired_align = 0;
16626 enum stringop_alg alg;
16629 if (CONST_INT_P (align_exp))
16630 align = INTVAL (align_exp);
16631 /* i386 can do misaligned access on reasonably increased cost. */
16632 if (CONST_INT_P (expected_align_exp)
16633 && INTVAL (expected_align_exp) > align)
16634 align = INTVAL (expected_align_exp);
16635 if (CONST_INT_P (count_exp))
16636 count = expected_size = INTVAL (count_exp);
16637 if (CONST_INT_P (expected_size_exp) && count == 0)
16638 expected_size = INTVAL (expected_size_exp);
16640 /* Make sure we don't need to care about overflow later on. */
16641 if (count > ((unsigned HOST_WIDE_INT) 1 << 30))
16644 /* Step 0: Decide on preferred algorithm, desired alignment and
16645 size of chunks to be copied by main loop. */
16647 alg = decide_alg (count, expected_size, false, &dynamic_check);
16648 desired_align = decide_alignment (align, alg, expected_size);
16650 if (!TARGET_ALIGN_STRINGOPS)
16651 align = desired_align;
16653 if (alg == libcall)
16655 gcc_assert (alg != no_stringop);
16657 count_exp = copy_to_mode_reg (GET_MODE (count_exp), count_exp);
16658 destreg = copy_to_mode_reg (Pmode, XEXP (dst, 0));
16659 srcreg = copy_to_mode_reg (Pmode, XEXP (src, 0));
16664 gcc_unreachable ();
16666 size_needed = GET_MODE_SIZE (Pmode);
16668 case unrolled_loop:
16669 size_needed = GET_MODE_SIZE (Pmode) * (TARGET_64BIT ? 4 : 2);
16671 case rep_prefix_8_byte:
16674 case rep_prefix_4_byte:
16677 case rep_prefix_1_byte:
16683 epilogue_size_needed = size_needed;
16685 /* Step 1: Prologue guard. */
16687 /* Alignment code needs count to be in register. */
16688 if (CONST_INT_P (count_exp) && desired_align > align)
16689 count_exp = force_reg (counter_mode (count_exp), count_exp);
16690 gcc_assert (desired_align >= 1 && align >= 1);
16692 /* Ensure that alignment prologue won't copy past end of block. */
16693 if (size_needed > 1 || (desired_align > 1 && desired_align > align))
16695 epilogue_size_needed = MAX (size_needed - 1, desired_align - align);
16696 /* Epilogue always copies COUNT_EXP & EPILOGUE_SIZE_NEEDED bytes.
16697 Make sure it is power of 2. */
16698 epilogue_size_needed = smallest_pow2_greater_than (epilogue_size_needed);
16700 if (CONST_INT_P (count_exp))
16702 if (UINTVAL (count_exp) < (unsigned HOST_WIDE_INT)epilogue_size_needed)
16707 label = gen_label_rtx ();
16708 emit_cmp_and_jump_insns (count_exp,
16709 GEN_INT (epilogue_size_needed),
16710 LTU, 0, counter_mode (count_exp), 1, label);
16711 if (expected_size == -1 || expected_size < epilogue_size_needed)
16712 predict_jump (REG_BR_PROB_BASE * 60 / 100);
16714 predict_jump (REG_BR_PROB_BASE * 20 / 100);
16718 /* Emit code to decide on runtime whether library call or inline should be
16720 if (dynamic_check != -1)
16722 if (CONST_INT_P (count_exp))
16724 if (UINTVAL (count_exp) >= (unsigned HOST_WIDE_INT)dynamic_check)
16726 emit_block_move_via_libcall (dst, src, count_exp, false);
16727 count_exp = const0_rtx;
16733 rtx hot_label = gen_label_rtx ();
16734 jump_around_label = gen_label_rtx ();
16735 emit_cmp_and_jump_insns (count_exp, GEN_INT (dynamic_check - 1),
16736 LEU, 0, GET_MODE (count_exp), 1, hot_label);
16737 predict_jump (REG_BR_PROB_BASE * 90 / 100);
16738 emit_block_move_via_libcall (dst, src, count_exp, false);
16739 emit_jump (jump_around_label);
16740 emit_label (hot_label);
16744 /* Step 2: Alignment prologue. */
16746 if (desired_align > align)
16748 /* Except for the first move in epilogue, we no longer know
16749 constant offset in aliasing info. It don't seems to worth
16750 the pain to maintain it for the first move, so throw away
16752 src = change_address (src, BLKmode, srcreg);
16753 dst = change_address (dst, BLKmode, destreg);
16754 expand_movmem_prologue (dst, src, destreg, srcreg, count_exp, align,
16757 if (label && size_needed == 1)
16759 emit_label (label);
16760 LABEL_NUSES (label) = 1;
16764 /* Step 3: Main loop. */
16770 gcc_unreachable ();
16772 expand_set_or_movmem_via_loop (dst, src, destreg, srcreg, NULL,
16773 count_exp, QImode, 1, expected_size);
16776 expand_set_or_movmem_via_loop (dst, src, destreg, srcreg, NULL,
16777 count_exp, Pmode, 1, expected_size);
16779 case unrolled_loop:
16780 /* Unroll only by factor of 2 in 32bit mode, since we don't have enough
16781 registers for 4 temporaries anyway. */
16782 expand_set_or_movmem_via_loop (dst, src, destreg, srcreg, NULL,
16783 count_exp, Pmode, TARGET_64BIT ? 4 : 2,
16786 case rep_prefix_8_byte:
16787 expand_movmem_via_rep_mov (dst, src, destreg, srcreg, count_exp,
16790 case rep_prefix_4_byte:
16791 expand_movmem_via_rep_mov (dst, src, destreg, srcreg, count_exp,
16794 case rep_prefix_1_byte:
16795 expand_movmem_via_rep_mov (dst, src, destreg, srcreg, count_exp,
16799 /* Adjust properly the offset of src and dest memory for aliasing. */
16800 if (CONST_INT_P (count_exp))
16802 src = adjust_automodify_address_nv (src, BLKmode, srcreg,
16803 (count / size_needed) * size_needed);
16804 dst = adjust_automodify_address_nv (dst, BLKmode, destreg,
16805 (count / size_needed) * size_needed);
16809 src = change_address (src, BLKmode, srcreg);
16810 dst = change_address (dst, BLKmode, destreg);
16813 /* Step 4: Epilogue to copy the remaining bytes. */
16817 /* When the main loop is done, COUNT_EXP might hold original count,
16818 while we want to copy only COUNT_EXP & SIZE_NEEDED bytes.
16819 Epilogue code will actually copy COUNT_EXP & EPILOGUE_SIZE_NEEDED
16820 bytes. Compensate if needed. */
16822 if (size_needed < epilogue_size_needed)
16825 expand_simple_binop (counter_mode (count_exp), AND, count_exp,
16826 GEN_INT (size_needed - 1), count_exp, 1,
16828 if (tmp != count_exp)
16829 emit_move_insn (count_exp, tmp);
16831 emit_label (label);
16832 LABEL_NUSES (label) = 1;
16835 if (count_exp != const0_rtx && epilogue_size_needed > 1)
16836 expand_movmem_epilogue (dst, src, destreg, srcreg, count_exp,
16837 epilogue_size_needed);
16838 if (jump_around_label)
16839 emit_label (jump_around_label);
16843 /* Helper function for memcpy. For QImode value 0xXY produce
16844 0xXYXYXYXY of wide specified by MODE. This is essentially
16845 a * 0x10101010, but we can do slightly better than
16846 synth_mult by unwinding the sequence by hand on CPUs with
16849 promote_duplicated_reg (enum machine_mode mode, rtx val)
16851 enum machine_mode valmode = GET_MODE (val);
16853 int nops = mode == DImode ? 3 : 2;
16855 gcc_assert (mode == SImode || mode == DImode);
16856 if (val == const0_rtx)
16857 return copy_to_mode_reg (mode, const0_rtx);
16858 if (CONST_INT_P (val))
16860 HOST_WIDE_INT v = INTVAL (val) & 255;
16864 if (mode == DImode)
16865 v |= (v << 16) << 16;
16866 return copy_to_mode_reg (mode, gen_int_mode (v, mode));
16869 if (valmode == VOIDmode)
16871 if (valmode != QImode)
16872 val = gen_lowpart (QImode, val);
16873 if (mode == QImode)
16875 if (!TARGET_PARTIAL_REG_STALL)
16877 if (ix86_cost->mult_init[mode == DImode ? 3 : 2]
16878 + ix86_cost->mult_bit * (mode == DImode ? 8 : 4)
16879 <= (ix86_cost->shift_const + ix86_cost->add) * nops
16880 + (COSTS_N_INSNS (TARGET_PARTIAL_REG_STALL == 0)))
16882 rtx reg = convert_modes (mode, QImode, val, true);
16883 tmp = promote_duplicated_reg (mode, const1_rtx);
16884 return expand_simple_binop (mode, MULT, reg, tmp, NULL, 1,
16889 rtx reg = convert_modes (mode, QImode, val, true);
16891 if (!TARGET_PARTIAL_REG_STALL)
16892 if (mode == SImode)
16893 emit_insn (gen_movsi_insv_1 (reg, reg));
16895 emit_insn (gen_movdi_insv_1_rex64 (reg, reg));
16898 tmp = expand_simple_binop (mode, ASHIFT, reg, GEN_INT (8),
16899 NULL, 1, OPTAB_DIRECT);
16901 expand_simple_binop (mode, IOR, reg, tmp, reg, 1, OPTAB_DIRECT);
16903 tmp = expand_simple_binop (mode, ASHIFT, reg, GEN_INT (16),
16904 NULL, 1, OPTAB_DIRECT);
16905 reg = expand_simple_binop (mode, IOR, reg, tmp, reg, 1, OPTAB_DIRECT);
16906 if (mode == SImode)
16908 tmp = expand_simple_binop (mode, ASHIFT, reg, GEN_INT (32),
16909 NULL, 1, OPTAB_DIRECT);
16910 reg = expand_simple_binop (mode, IOR, reg, tmp, reg, 1, OPTAB_DIRECT);
16915 /* Duplicate value VAL using promote_duplicated_reg into maximal size that will
16916 be needed by main loop copying SIZE_NEEDED chunks and prologue getting
16917 alignment from ALIGN to DESIRED_ALIGN. */
16919 promote_duplicated_reg_to_size (rtx val, int size_needed, int desired_align, int align)
16924 && (size_needed > 4 || (desired_align > align && desired_align > 4)))
16925 promoted_val = promote_duplicated_reg (DImode, val);
16926 else if (size_needed > 2 || (desired_align > align && desired_align > 2))
16927 promoted_val = promote_duplicated_reg (SImode, val);
16928 else if (size_needed > 1 || (desired_align > align && desired_align > 1))
16929 promoted_val = promote_duplicated_reg (HImode, val);
16931 promoted_val = val;
16933 return promoted_val;
16936 /* Expand string clear operation (bzero). Use i386 string operations when
16937 profitable. See expand_movmem comment for explanation of individual
16938 steps performed. */
16940 ix86_expand_setmem (rtx dst, rtx count_exp, rtx val_exp, rtx align_exp,
16941 rtx expected_align_exp, rtx expected_size_exp)
16946 rtx jump_around_label = NULL;
16947 HOST_WIDE_INT align = 1;
16948 unsigned HOST_WIDE_INT count = 0;
16949 HOST_WIDE_INT expected_size = -1;
16950 int size_needed = 0, epilogue_size_needed;
16951 int desired_align = 0;
16952 enum stringop_alg alg;
16953 rtx promoted_val = NULL;
16954 bool force_loopy_epilogue = false;
16957 if (CONST_INT_P (align_exp))
16958 align = INTVAL (align_exp);
16959 /* i386 can do misaligned access on reasonably increased cost. */
16960 if (CONST_INT_P (expected_align_exp)
16961 && INTVAL (expected_align_exp) > align)
16962 align = INTVAL (expected_align_exp);
16963 if (CONST_INT_P (count_exp))
16964 count = expected_size = INTVAL (count_exp);
16965 if (CONST_INT_P (expected_size_exp) && count == 0)
16966 expected_size = INTVAL (expected_size_exp);
16968 /* Make sure we don't need to care about overflow later on. */
16969 if (count > ((unsigned HOST_WIDE_INT) 1 << 30))
16972 /* Step 0: Decide on preferred algorithm, desired alignment and
16973 size of chunks to be copied by main loop. */
16975 alg = decide_alg (count, expected_size, true, &dynamic_check);
16976 desired_align = decide_alignment (align, alg, expected_size);
16978 if (!TARGET_ALIGN_STRINGOPS)
16979 align = desired_align;
16981 if (alg == libcall)
16983 gcc_assert (alg != no_stringop);
16985 count_exp = copy_to_mode_reg (counter_mode (count_exp), count_exp);
16986 destreg = copy_to_mode_reg (Pmode, XEXP (dst, 0));
16991 gcc_unreachable ();
16993 size_needed = GET_MODE_SIZE (Pmode);
16995 case unrolled_loop:
16996 size_needed = GET_MODE_SIZE (Pmode) * 4;
16998 case rep_prefix_8_byte:
17001 case rep_prefix_4_byte:
17004 case rep_prefix_1_byte:
17009 epilogue_size_needed = size_needed;
17011 /* Step 1: Prologue guard. */
17013 /* Alignment code needs count to be in register. */
17014 if (CONST_INT_P (count_exp) && desired_align > align)
17016 enum machine_mode mode = SImode;
17017 if (TARGET_64BIT && (count & ~0xffffffff))
17019 count_exp = force_reg (mode, count_exp);
17021 /* Do the cheap promotion to allow better CSE across the
17022 main loop and epilogue (ie one load of the big constant in the
17023 front of all code. */
17024 if (CONST_INT_P (val_exp))
17025 promoted_val = promote_duplicated_reg_to_size (val_exp, size_needed,
17026 desired_align, align);
17027 /* Ensure that alignment prologue won't copy past end of block. */
17028 if (size_needed > 1 || (desired_align > 1 && desired_align > align))
17030 epilogue_size_needed = MAX (size_needed - 1, desired_align - align);
17031 /* Epilogue always copies COUNT_EXP & EPILOGUE_SIZE_NEEDED bytes.
17032 Make sure it is power of 2. */
17033 epilogue_size_needed = smallest_pow2_greater_than (epilogue_size_needed);
17035 /* To improve performance of small blocks, we jump around the VAL
17036 promoting mode. This mean that if the promoted VAL is not constant,
17037 we might not use it in the epilogue and have to use byte
17039 if (epilogue_size_needed > 2 && !promoted_val)
17040 force_loopy_epilogue = true;
17041 label = gen_label_rtx ();
17042 emit_cmp_and_jump_insns (count_exp,
17043 GEN_INT (epilogue_size_needed),
17044 LTU, 0, counter_mode (count_exp), 1, label);
17045 if (GET_CODE (count_exp) == CONST_INT)
17047 else if (expected_size == -1 || expected_size <= epilogue_size_needed)
17048 predict_jump (REG_BR_PROB_BASE * 60 / 100);
17050 predict_jump (REG_BR_PROB_BASE * 20 / 100);
17052 if (dynamic_check != -1)
17054 rtx hot_label = gen_label_rtx ();
17055 jump_around_label = gen_label_rtx ();
17056 emit_cmp_and_jump_insns (count_exp, GEN_INT (dynamic_check - 1),
17057 LEU, 0, counter_mode (count_exp), 1, hot_label);
17058 predict_jump (REG_BR_PROB_BASE * 90 / 100);
17059 set_storage_via_libcall (dst, count_exp, val_exp, false);
17060 emit_jump (jump_around_label);
17061 emit_label (hot_label);
17064 /* Step 2: Alignment prologue. */
17066 /* Do the expensive promotion once we branched off the small blocks. */
17068 promoted_val = promote_duplicated_reg_to_size (val_exp, size_needed,
17069 desired_align, align);
17070 gcc_assert (desired_align >= 1 && align >= 1);
17072 if (desired_align > align)
17074 /* Except for the first move in epilogue, we no longer know
17075 constant offset in aliasing info. It don't seems to worth
17076 the pain to maintain it for the first move, so throw away
17078 dst = change_address (dst, BLKmode, destreg);
17079 expand_setmem_prologue (dst, destreg, promoted_val, count_exp, align,
17082 if (label && size_needed == 1)
17084 emit_label (label);
17085 LABEL_NUSES (label) = 1;
17089 /* Step 3: Main loop. */
17095 gcc_unreachable ();
17097 expand_set_or_movmem_via_loop (dst, NULL, destreg, NULL, promoted_val,
17098 count_exp, QImode, 1, expected_size);
17101 expand_set_or_movmem_via_loop (dst, NULL, destreg, NULL, promoted_val,
17102 count_exp, Pmode, 1, expected_size);
17104 case unrolled_loop:
17105 expand_set_or_movmem_via_loop (dst, NULL, destreg, NULL, promoted_val,
17106 count_exp, Pmode, 4, expected_size);
17108 case rep_prefix_8_byte:
17109 expand_setmem_via_rep_stos (dst, destreg, promoted_val, count_exp,
17112 case rep_prefix_4_byte:
17113 expand_setmem_via_rep_stos (dst, destreg, promoted_val, count_exp,
17116 case rep_prefix_1_byte:
17117 expand_setmem_via_rep_stos (dst, destreg, promoted_val, count_exp,
17121 /* Adjust properly the offset of src and dest memory for aliasing. */
17122 if (CONST_INT_P (count_exp))
17123 dst = adjust_automodify_address_nv (dst, BLKmode, destreg,
17124 (count / size_needed) * size_needed);
17126 dst = change_address (dst, BLKmode, destreg);
17128 /* Step 4: Epilogue to copy the remaining bytes. */
17132 /* When the main loop is done, COUNT_EXP might hold original count,
17133 while we want to copy only COUNT_EXP & SIZE_NEEDED bytes.
17134 Epilogue code will actually copy COUNT_EXP & EPILOGUE_SIZE_NEEDED
17135 bytes. Compensate if needed. */
17137 if (size_needed < desired_align - align)
17140 expand_simple_binop (counter_mode (count_exp), AND, count_exp,
17141 GEN_INT (size_needed - 1), count_exp, 1,
17143 size_needed = desired_align - align + 1;
17144 if (tmp != count_exp)
17145 emit_move_insn (count_exp, tmp);
17147 emit_label (label);
17148 LABEL_NUSES (label) = 1;
17150 if (count_exp != const0_rtx && epilogue_size_needed > 1)
17152 if (force_loopy_epilogue)
17153 expand_setmem_epilogue_via_loop (dst, destreg, val_exp, count_exp,
17156 expand_setmem_epilogue (dst, destreg, promoted_val, count_exp,
17159 if (jump_around_label)
17160 emit_label (jump_around_label);
17164 /* Expand the appropriate insns for doing strlen if not just doing
17167 out = result, initialized with the start address
17168 align_rtx = alignment of the address.
17169 scratch = scratch register, initialized with the startaddress when
17170 not aligned, otherwise undefined
17172 This is just the body. It needs the initializations mentioned above and
17173 some address computing at the end. These things are done in i386.md. */
17176 ix86_expand_strlensi_unroll_1 (rtx out, rtx src, rtx align_rtx)
17180 rtx align_2_label = NULL_RTX;
17181 rtx align_3_label = NULL_RTX;
17182 rtx align_4_label = gen_label_rtx ();
17183 rtx end_0_label = gen_label_rtx ();
17185 rtx tmpreg = gen_reg_rtx (SImode);
17186 rtx scratch = gen_reg_rtx (SImode);
17190 if (CONST_INT_P (align_rtx))
17191 align = INTVAL (align_rtx);
17193 /* Loop to check 1..3 bytes for null to get an aligned pointer. */
17195 /* Is there a known alignment and is it less than 4? */
17198 rtx scratch1 = gen_reg_rtx (Pmode);
17199 emit_move_insn (scratch1, out);
17200 /* Is there a known alignment and is it not 2? */
17203 align_3_label = gen_label_rtx (); /* Label when aligned to 3-byte */
17204 align_2_label = gen_label_rtx (); /* Label when aligned to 2-byte */
17206 /* Leave just the 3 lower bits. */
17207 align_rtx = expand_binop (Pmode, and_optab, scratch1, GEN_INT (3),
17208 NULL_RTX, 0, OPTAB_WIDEN);
17210 emit_cmp_and_jump_insns (align_rtx, const0_rtx, EQ, NULL,
17211 Pmode, 1, align_4_label);
17212 emit_cmp_and_jump_insns (align_rtx, const2_rtx, EQ, NULL,
17213 Pmode, 1, align_2_label);
17214 emit_cmp_and_jump_insns (align_rtx, const2_rtx, GTU, NULL,
17215 Pmode, 1, align_3_label);
17219 /* Since the alignment is 2, we have to check 2 or 0 bytes;
17220 check if is aligned to 4 - byte. */
17222 align_rtx = expand_binop (Pmode, and_optab, scratch1, const2_rtx,
17223 NULL_RTX, 0, OPTAB_WIDEN);
17225 emit_cmp_and_jump_insns (align_rtx, const0_rtx, EQ, NULL,
17226 Pmode, 1, align_4_label);
17229 mem = change_address (src, QImode, out);
17231 /* Now compare the bytes. */
17233 /* Compare the first n unaligned byte on a byte per byte basis. */
17234 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL,
17235 QImode, 1, end_0_label);
17237 /* Increment the address. */
17238 emit_insn ((*ix86_gen_add3) (out, out, const1_rtx));
17240 /* Not needed with an alignment of 2 */
17243 emit_label (align_2_label);
17245 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL, QImode, 1,
17248 emit_insn ((*ix86_gen_add3) (out, out, const1_rtx));
17250 emit_label (align_3_label);
17253 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL, QImode, 1,
17256 emit_insn ((*ix86_gen_add3) (out, out, const1_rtx));
17259 /* Generate loop to check 4 bytes at a time. It is not a good idea to
17260 align this loop. It gives only huge programs, but does not help to
17262 emit_label (align_4_label);
17264 mem = change_address (src, SImode, out);
17265 emit_move_insn (scratch, mem);
17266 emit_insn ((*ix86_gen_add3) (out, out, GEN_INT (4)));
17268 /* This formula yields a nonzero result iff one of the bytes is zero.
17269 This saves three branches inside loop and many cycles. */
17271 emit_insn (gen_addsi3 (tmpreg, scratch, GEN_INT (-0x01010101)));
17272 emit_insn (gen_one_cmplsi2 (scratch, scratch));
17273 emit_insn (gen_andsi3 (tmpreg, tmpreg, scratch));
17274 emit_insn (gen_andsi3 (tmpreg, tmpreg,
17275 gen_int_mode (0x80808080, SImode)));
17276 emit_cmp_and_jump_insns (tmpreg, const0_rtx, EQ, 0, SImode, 1,
17281 rtx reg = gen_reg_rtx (SImode);
17282 rtx reg2 = gen_reg_rtx (Pmode);
17283 emit_move_insn (reg, tmpreg);
17284 emit_insn (gen_lshrsi3 (reg, reg, GEN_INT (16)));
17286 /* If zero is not in the first two bytes, move two bytes forward. */
17287 emit_insn (gen_testsi_ccno_1 (tmpreg, GEN_INT (0x8080)));
17288 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
17289 tmp = gen_rtx_EQ (VOIDmode, tmp, const0_rtx);
17290 emit_insn (gen_rtx_SET (VOIDmode, tmpreg,
17291 gen_rtx_IF_THEN_ELSE (SImode, tmp,
17294 /* Emit lea manually to avoid clobbering of flags. */
17295 emit_insn (gen_rtx_SET (SImode, reg2,
17296 gen_rtx_PLUS (Pmode, out, const2_rtx)));
17298 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
17299 tmp = gen_rtx_EQ (VOIDmode, tmp, const0_rtx);
17300 emit_insn (gen_rtx_SET (VOIDmode, out,
17301 gen_rtx_IF_THEN_ELSE (Pmode, tmp,
17308 rtx end_2_label = gen_label_rtx ();
17309 /* Is zero in the first two bytes? */
17311 emit_insn (gen_testsi_ccno_1 (tmpreg, GEN_INT (0x8080)));
17312 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
17313 tmp = gen_rtx_NE (VOIDmode, tmp, const0_rtx);
17314 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
17315 gen_rtx_LABEL_REF (VOIDmode, end_2_label),
17317 tmp = emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
17318 JUMP_LABEL (tmp) = end_2_label;
17320 /* Not in the first two. Move two bytes forward. */
17321 emit_insn (gen_lshrsi3 (tmpreg, tmpreg, GEN_INT (16)));
17322 emit_insn ((*ix86_gen_add3) (out, out, const2_rtx));
17324 emit_label (end_2_label);
17328 /* Avoid branch in fixing the byte. */
17329 tmpreg = gen_lowpart (QImode, tmpreg);
17330 emit_insn (gen_addqi3_cc (tmpreg, tmpreg, tmpreg));
17331 cmp = gen_rtx_LTU (Pmode, gen_rtx_REG (CCmode, FLAGS_REG), const0_rtx);
17332 emit_insn ((*ix86_gen_sub3_carry) (out, out, GEN_INT (3), cmp));
17334 emit_label (end_0_label);
17337 /* Expand strlen. */
17340 ix86_expand_strlen (rtx out, rtx src, rtx eoschar, rtx align)
17342 rtx addr, scratch1, scratch2, scratch3, scratch4;
17344 /* The generic case of strlen expander is long. Avoid it's
17345 expanding unless TARGET_INLINE_ALL_STRINGOPS. */
17347 if (TARGET_UNROLL_STRLEN && eoschar == const0_rtx && optimize > 1
17348 && !TARGET_INLINE_ALL_STRINGOPS
17350 && (!CONST_INT_P (align) || INTVAL (align) < 4))
17353 addr = force_reg (Pmode, XEXP (src, 0));
17354 scratch1 = gen_reg_rtx (Pmode);
17356 if (TARGET_UNROLL_STRLEN && eoschar == const0_rtx && optimize > 1
17359 /* Well it seems that some optimizer does not combine a call like
17360 foo(strlen(bar), strlen(bar));
17361 when the move and the subtraction is done here. It does calculate
17362 the length just once when these instructions are done inside of
17363 output_strlen_unroll(). But I think since &bar[strlen(bar)] is
17364 often used and I use one fewer register for the lifetime of
17365 output_strlen_unroll() this is better. */
17367 emit_move_insn (out, addr);
17369 ix86_expand_strlensi_unroll_1 (out, src, align);
17371 /* strlensi_unroll_1 returns the address of the zero at the end of
17372 the string, like memchr(), so compute the length by subtracting
17373 the start address. */
17374 emit_insn ((*ix86_gen_sub3) (out, out, addr));
17380 /* Can't use this if the user has appropriated eax, ecx, or edi. */
17381 if (fixed_regs[AX_REG] || fixed_regs[CX_REG] || fixed_regs[DI_REG])
17384 scratch2 = gen_reg_rtx (Pmode);
17385 scratch3 = gen_reg_rtx (Pmode);
17386 scratch4 = force_reg (Pmode, constm1_rtx);
17388 emit_move_insn (scratch3, addr);
17389 eoschar = force_reg (QImode, eoschar);
17391 src = replace_equiv_address_nv (src, scratch3);
17393 /* If .md starts supporting :P, this can be done in .md. */
17394 unspec = gen_rtx_UNSPEC (Pmode, gen_rtvec (4, src, eoschar, align,
17395 scratch4), UNSPEC_SCAS);
17396 emit_insn (gen_strlenqi_1 (scratch1, scratch3, unspec));
17397 emit_insn ((*ix86_gen_one_cmpl2) (scratch2, scratch1));
17398 emit_insn ((*ix86_gen_add3) (out, scratch2, constm1_rtx));
17403 /* For given symbol (function) construct code to compute address of it's PLT
17404 entry in large x86-64 PIC model. */
17406 construct_plt_address (rtx symbol)
17408 rtx tmp = gen_reg_rtx (Pmode);
17409 rtx unspec = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, symbol), UNSPEC_PLTOFF);
17411 gcc_assert (GET_CODE (symbol) == SYMBOL_REF);
17412 gcc_assert (ix86_cmodel == CM_LARGE_PIC);
17414 emit_move_insn (tmp, gen_rtx_CONST (Pmode, unspec));
17415 emit_insn (gen_adddi3 (tmp, tmp, pic_offset_table_rtx));
17420 ix86_expand_call (rtx retval, rtx fnaddr, rtx callarg1,
17421 rtx callarg2 ATTRIBUTE_UNUSED,
17422 rtx pop, int sibcall)
17424 rtx use = NULL, call;
17426 if (pop == const0_rtx)
17428 gcc_assert (!TARGET_64BIT || !pop);
17430 if (TARGET_MACHO && !TARGET_64BIT)
17433 if (flag_pic && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF)
17434 fnaddr = machopic_indirect_call_target (fnaddr);
17439 /* Static functions and indirect calls don't need the pic register. */
17440 if (flag_pic && (!TARGET_64BIT || ix86_cmodel == CM_LARGE_PIC)
17441 && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF
17442 && ! SYMBOL_REF_LOCAL_P (XEXP (fnaddr, 0)))
17443 use_reg (&use, pic_offset_table_rtx);
17446 if (TARGET_64BIT && INTVAL (callarg2) >= 0)
17448 rtx al = gen_rtx_REG (QImode, AX_REG);
17449 emit_move_insn (al, callarg2);
17450 use_reg (&use, al);
17453 if (ix86_cmodel == CM_LARGE_PIC
17454 && GET_CODE (fnaddr) == MEM
17455 && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF
17456 && !local_symbolic_operand (XEXP (fnaddr, 0), VOIDmode))
17457 fnaddr = gen_rtx_MEM (QImode, construct_plt_address (XEXP (fnaddr, 0)));
17458 else if (! call_insn_operand (XEXP (fnaddr, 0), Pmode))
17460 fnaddr = copy_to_mode_reg (Pmode, XEXP (fnaddr, 0));
17461 fnaddr = gen_rtx_MEM (QImode, fnaddr);
17463 if (sibcall && TARGET_64BIT
17464 && !constant_call_address_operand (XEXP (fnaddr, 0), Pmode))
17467 addr = copy_to_mode_reg (Pmode, XEXP (fnaddr, 0));
17468 fnaddr = gen_rtx_REG (Pmode, R11_REG);
17469 emit_move_insn (fnaddr, addr);
17470 fnaddr = gen_rtx_MEM (QImode, fnaddr);
17473 call = gen_rtx_CALL (VOIDmode, fnaddr, callarg1);
17475 call = gen_rtx_SET (VOIDmode, retval, call);
17478 pop = gen_rtx_PLUS (Pmode, stack_pointer_rtx, pop);
17479 pop = gen_rtx_SET (VOIDmode, stack_pointer_rtx, pop);
17480 call = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, call, pop));
17483 call = emit_call_insn (call);
17485 CALL_INSN_FUNCTION_USAGE (call) = use;
17489 /* Clear stack slot assignments remembered from previous functions.
17490 This is called from INIT_EXPANDERS once before RTL is emitted for each
17493 static struct machine_function *
17494 ix86_init_machine_status (void)
17496 struct machine_function *f;
17498 f = GGC_CNEW (struct machine_function);
17499 f->use_fast_prologue_epilogue_nregs = -1;
17500 f->tls_descriptor_call_expanded_p = 0;
17501 f->call_abi = DEFAULT_ABI;
17506 /* Return a MEM corresponding to a stack slot with mode MODE.
17507 Allocate a new slot if necessary.
17509 The RTL for a function can have several slots available: N is
17510 which slot to use. */
17513 assign_386_stack_local (enum machine_mode mode, enum ix86_stack_slot n)
17515 struct stack_local_entry *s;
17517 gcc_assert (n < MAX_386_STACK_LOCALS);
17519 /* Virtual slot is valid only before vregs are instantiated. */
17520 gcc_assert ((n == SLOT_VIRTUAL) == !virtuals_instantiated);
17522 for (s = ix86_stack_locals; s; s = s->next)
17523 if (s->mode == mode && s->n == n)
17524 return copy_rtx (s->rtl);
17526 s = (struct stack_local_entry *)
17527 ggc_alloc (sizeof (struct stack_local_entry));
17530 s->rtl = assign_stack_local (mode, GET_MODE_SIZE (mode), 0);
17532 s->next = ix86_stack_locals;
17533 ix86_stack_locals = s;
17537 /* Construct the SYMBOL_REF for the tls_get_addr function. */
17539 static GTY(()) rtx ix86_tls_symbol;
17541 ix86_tls_get_addr (void)
17544 if (!ix86_tls_symbol)
17546 ix86_tls_symbol = gen_rtx_SYMBOL_REF (Pmode,
17547 (TARGET_ANY_GNU_TLS
17549 ? "___tls_get_addr"
17550 : "__tls_get_addr");
17553 return ix86_tls_symbol;
17556 /* Construct the SYMBOL_REF for the _TLS_MODULE_BASE_ symbol. */
17558 static GTY(()) rtx ix86_tls_module_base_symbol;
17560 ix86_tls_module_base (void)
17563 if (!ix86_tls_module_base_symbol)
17565 ix86_tls_module_base_symbol = gen_rtx_SYMBOL_REF (Pmode,
17566 "_TLS_MODULE_BASE_");
17567 SYMBOL_REF_FLAGS (ix86_tls_module_base_symbol)
17568 |= TLS_MODEL_GLOBAL_DYNAMIC << SYMBOL_FLAG_TLS_SHIFT;
17571 return ix86_tls_module_base_symbol;
17574 /* Calculate the length of the memory address in the instruction
17575 encoding. Does not include the one-byte modrm, opcode, or prefix. */
17578 memory_address_length (rtx addr)
17580 struct ix86_address parts;
17581 rtx base, index, disp;
17585 if (GET_CODE (addr) == PRE_DEC
17586 || GET_CODE (addr) == POST_INC
17587 || GET_CODE (addr) == PRE_MODIFY
17588 || GET_CODE (addr) == POST_MODIFY)
17591 ok = ix86_decompose_address (addr, &parts);
17594 if (parts.base && GET_CODE (parts.base) == SUBREG)
17595 parts.base = SUBREG_REG (parts.base);
17596 if (parts.index && GET_CODE (parts.index) == SUBREG)
17597 parts.index = SUBREG_REG (parts.index);
17600 index = parts.index;
17605 - esp as the base always wants an index,
17606 - ebp as the base always wants a displacement. */
17608 /* Register Indirect. */
17609 if (base && !index && !disp)
17611 /* esp (for its index) and ebp (for its displacement) need
17612 the two-byte modrm form. */
17613 if (addr == stack_pointer_rtx
17614 || addr == arg_pointer_rtx
17615 || addr == frame_pointer_rtx
17616 || addr == hard_frame_pointer_rtx)
17620 /* Direct Addressing. */
17621 else if (disp && !base && !index)
17626 /* Find the length of the displacement constant. */
17629 if (base && satisfies_constraint_K (disp))
17634 /* ebp always wants a displacement. */
17635 else if (base == hard_frame_pointer_rtx)
17638 /* An index requires the two-byte modrm form.... */
17640 /* ...like esp, which always wants an index. */
17641 || base == stack_pointer_rtx
17642 || base == arg_pointer_rtx
17643 || base == frame_pointer_rtx)
17650 /* Compute default value for "length_immediate" attribute. When SHORTFORM
17651 is set, expect that insn have 8bit immediate alternative. */
17653 ix86_attr_length_immediate_default (rtx insn, int shortform)
17657 extract_insn_cached (insn);
17658 for (i = recog_data.n_operands - 1; i >= 0; --i)
17659 if (CONSTANT_P (recog_data.operand[i]))
17662 if (shortform && satisfies_constraint_K (recog_data.operand[i]))
17666 switch (get_attr_mode (insn))
17677 /* Immediates for DImode instructions are encoded as 32bit sign extended values. */
17682 fatal_insn ("unknown insn mode", insn);
17688 /* Compute default value for "length_address" attribute. */
17690 ix86_attr_length_address_default (rtx insn)
17694 if (get_attr_type (insn) == TYPE_LEA)
17696 rtx set = PATTERN (insn);
17698 if (GET_CODE (set) == PARALLEL)
17699 set = XVECEXP (set, 0, 0);
17701 gcc_assert (GET_CODE (set) == SET);
17703 return memory_address_length (SET_SRC (set));
17706 extract_insn_cached (insn);
17707 for (i = recog_data.n_operands - 1; i >= 0; --i)
17708 if (MEM_P (recog_data.operand[i]))
17710 return memory_address_length (XEXP (recog_data.operand[i], 0));
17716 /* Return the maximum number of instructions a cpu can issue. */
17719 ix86_issue_rate (void)
17723 case PROCESSOR_PENTIUM:
17727 case PROCESSOR_PENTIUMPRO:
17728 case PROCESSOR_PENTIUM4:
17729 case PROCESSOR_ATHLON:
17731 case PROCESSOR_AMDFAM10:
17732 case PROCESSOR_NOCONA:
17733 case PROCESSOR_GENERIC32:
17734 case PROCESSOR_GENERIC64:
17737 case PROCESSOR_CORE2:
17745 /* A subroutine of ix86_adjust_cost -- return true iff INSN reads flags set
17746 by DEP_INSN and nothing set by DEP_INSN. */
17749 ix86_flags_dependent (rtx insn, rtx dep_insn, enum attr_type insn_type)
17753 /* Simplify the test for uninteresting insns. */
17754 if (insn_type != TYPE_SETCC
17755 && insn_type != TYPE_ICMOV
17756 && insn_type != TYPE_FCMOV
17757 && insn_type != TYPE_IBR)
17760 if ((set = single_set (dep_insn)) != 0)
17762 set = SET_DEST (set);
17765 else if (GET_CODE (PATTERN (dep_insn)) == PARALLEL
17766 && XVECLEN (PATTERN (dep_insn), 0) == 2
17767 && GET_CODE (XVECEXP (PATTERN (dep_insn), 0, 0)) == SET
17768 && GET_CODE (XVECEXP (PATTERN (dep_insn), 0, 1)) == SET)
17770 set = SET_DEST (XVECEXP (PATTERN (dep_insn), 0, 0));
17771 set2 = SET_DEST (XVECEXP (PATTERN (dep_insn), 0, 0));
17776 if (!REG_P (set) || REGNO (set) != FLAGS_REG)
17779 /* This test is true if the dependent insn reads the flags but
17780 not any other potentially set register. */
17781 if (!reg_overlap_mentioned_p (set, PATTERN (insn)))
17784 if (set2 && reg_overlap_mentioned_p (set2, PATTERN (insn)))
17790 /* A subroutine of ix86_adjust_cost -- return true iff INSN has a memory
17791 address with operands set by DEP_INSN. */
17794 ix86_agi_dependent (rtx insn, rtx dep_insn, enum attr_type insn_type)
17798 if (insn_type == TYPE_LEA
17801 addr = PATTERN (insn);
17803 if (GET_CODE (addr) == PARALLEL)
17804 addr = XVECEXP (addr, 0, 0);
17806 gcc_assert (GET_CODE (addr) == SET);
17808 addr = SET_SRC (addr);
17813 extract_insn_cached (insn);
17814 for (i = recog_data.n_operands - 1; i >= 0; --i)
17815 if (MEM_P (recog_data.operand[i]))
17817 addr = XEXP (recog_data.operand[i], 0);
17824 return modified_in_p (addr, dep_insn);
17828 ix86_adjust_cost (rtx insn, rtx link, rtx dep_insn, int cost)
17830 enum attr_type insn_type, dep_insn_type;
17831 enum attr_memory memory;
17833 int dep_insn_code_number;
17835 /* Anti and output dependencies have zero cost on all CPUs. */
17836 if (REG_NOTE_KIND (link) != 0)
17839 dep_insn_code_number = recog_memoized (dep_insn);
17841 /* If we can't recognize the insns, we can't really do anything. */
17842 if (dep_insn_code_number < 0 || recog_memoized (insn) < 0)
17845 insn_type = get_attr_type (insn);
17846 dep_insn_type = get_attr_type (dep_insn);
17850 case PROCESSOR_PENTIUM:
17851 /* Address Generation Interlock adds a cycle of latency. */
17852 if (ix86_agi_dependent (insn, dep_insn, insn_type))
17855 /* ??? Compares pair with jump/setcc. */
17856 if (ix86_flags_dependent (insn, dep_insn, insn_type))
17859 /* Floating point stores require value to be ready one cycle earlier. */
17860 if (insn_type == TYPE_FMOV
17861 && get_attr_memory (insn) == MEMORY_STORE
17862 && !ix86_agi_dependent (insn, dep_insn, insn_type))
17866 case PROCESSOR_PENTIUMPRO:
17867 memory = get_attr_memory (insn);
17869 /* INT->FP conversion is expensive. */
17870 if (get_attr_fp_int_src (dep_insn))
17873 /* There is one cycle extra latency between an FP op and a store. */
17874 if (insn_type == TYPE_FMOV
17875 && (set = single_set (dep_insn)) != NULL_RTX
17876 && (set2 = single_set (insn)) != NULL_RTX
17877 && rtx_equal_p (SET_DEST (set), SET_SRC (set2))
17878 && MEM_P (SET_DEST (set2)))
17881 /* Show ability of reorder buffer to hide latency of load by executing
17882 in parallel with previous instruction in case
17883 previous instruction is not needed to compute the address. */
17884 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
17885 && !ix86_agi_dependent (insn, dep_insn, insn_type))
17887 /* Claim moves to take one cycle, as core can issue one load
17888 at time and the next load can start cycle later. */
17889 if (dep_insn_type == TYPE_IMOV
17890 || dep_insn_type == TYPE_FMOV)
17898 memory = get_attr_memory (insn);
17900 /* The esp dependency is resolved before the instruction is really
17902 if ((insn_type == TYPE_PUSH || insn_type == TYPE_POP)
17903 && (dep_insn_type == TYPE_PUSH || dep_insn_type == TYPE_POP))
17906 /* INT->FP conversion is expensive. */
17907 if (get_attr_fp_int_src (dep_insn))
17910 /* Show ability of reorder buffer to hide latency of load by executing
17911 in parallel with previous instruction in case
17912 previous instruction is not needed to compute the address. */
17913 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
17914 && !ix86_agi_dependent (insn, dep_insn, insn_type))
17916 /* Claim moves to take one cycle, as core can issue one load
17917 at time and the next load can start cycle later. */
17918 if (dep_insn_type == TYPE_IMOV
17919 || dep_insn_type == TYPE_FMOV)
17928 case PROCESSOR_ATHLON:
17930 case PROCESSOR_AMDFAM10:
17931 case PROCESSOR_GENERIC32:
17932 case PROCESSOR_GENERIC64:
17933 memory = get_attr_memory (insn);
17935 /* Show ability of reorder buffer to hide latency of load by executing
17936 in parallel with previous instruction in case
17937 previous instruction is not needed to compute the address. */
17938 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
17939 && !ix86_agi_dependent (insn, dep_insn, insn_type))
17941 enum attr_unit unit = get_attr_unit (insn);
17944 /* Because of the difference between the length of integer and
17945 floating unit pipeline preparation stages, the memory operands
17946 for floating point are cheaper.
17948 ??? For Athlon it the difference is most probably 2. */
17949 if (unit == UNIT_INTEGER || unit == UNIT_UNKNOWN)
17952 loadcost = TARGET_ATHLON ? 2 : 0;
17954 if (cost >= loadcost)
17967 /* How many alternative schedules to try. This should be as wide as the
17968 scheduling freedom in the DFA, but no wider. Making this value too
17969 large results extra work for the scheduler. */
17972 ia32_multipass_dfa_lookahead (void)
17976 case PROCESSOR_PENTIUM:
17979 case PROCESSOR_PENTIUMPRO:
17989 /* Compute the alignment given to a constant that is being placed in memory.
17990 EXP is the constant and ALIGN is the alignment that the object would
17992 The value of this function is used instead of that alignment to align
17996 ix86_constant_alignment (tree exp, int align)
17998 if (TREE_CODE (exp) == REAL_CST || TREE_CODE (exp) == VECTOR_CST
17999 || TREE_CODE (exp) == INTEGER_CST)
18001 if (TYPE_MODE (TREE_TYPE (exp)) == DFmode && align < 64)
18003 else if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (exp))) && align < 128)
18006 else if (!optimize_size && TREE_CODE (exp) == STRING_CST
18007 && TREE_STRING_LENGTH (exp) >= 31 && align < BITS_PER_WORD)
18008 return BITS_PER_WORD;
18013 /* Compute the alignment for a static variable.
18014 TYPE is the data type, and ALIGN is the alignment that
18015 the object would ordinarily have. The value of this function is used
18016 instead of that alignment to align the object. */
18019 ix86_data_alignment (tree type, int align)
18021 int max_align = optimize_size ? BITS_PER_WORD : MIN (256, MAX_OFILE_ALIGNMENT);
18023 if (AGGREGATE_TYPE_P (type)
18024 && TYPE_SIZE (type)
18025 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
18026 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= (unsigned) max_align
18027 || TREE_INT_CST_HIGH (TYPE_SIZE (type)))
18028 && align < max_align)
18031 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
18032 to 16byte boundary. */
18035 if (AGGREGATE_TYPE_P (type)
18036 && TYPE_SIZE (type)
18037 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
18038 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= 128
18039 || TREE_INT_CST_HIGH (TYPE_SIZE (type))) && align < 128)
18043 if (TREE_CODE (type) == ARRAY_TYPE)
18045 if (TYPE_MODE (TREE_TYPE (type)) == DFmode && align < 64)
18047 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type))) && align < 128)
18050 else if (TREE_CODE (type) == COMPLEX_TYPE)
18053 if (TYPE_MODE (type) == DCmode && align < 64)
18055 if ((TYPE_MODE (type) == XCmode
18056 || TYPE_MODE (type) == TCmode) && align < 128)
18059 else if ((TREE_CODE (type) == RECORD_TYPE
18060 || TREE_CODE (type) == UNION_TYPE
18061 || TREE_CODE (type) == QUAL_UNION_TYPE)
18062 && TYPE_FIELDS (type))
18064 if (DECL_MODE (TYPE_FIELDS (type)) == DFmode && align < 64)
18066 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type))) && align < 128)
18069 else if (TREE_CODE (type) == REAL_TYPE || TREE_CODE (type) == VECTOR_TYPE
18070 || TREE_CODE (type) == INTEGER_TYPE)
18072 if (TYPE_MODE (type) == DFmode && align < 64)
18074 if (ALIGN_MODE_128 (TYPE_MODE (type)) && align < 128)
18081 /* Compute the alignment for a local variable or a stack slot. TYPE is
18082 the data type, MODE is the widest mode available and ALIGN is the
18083 alignment that the object would ordinarily have. The value of this
18084 macro is used instead of that alignment to align the object. */
18087 ix86_local_alignment (tree type, enum machine_mode mode,
18088 unsigned int align)
18090 /* If TYPE is NULL, we are allocating a stack slot for caller-save
18091 register in MODE. We will return the largest alignment of XF
18095 if (mode == XFmode && align < GET_MODE_ALIGNMENT (DFmode))
18096 align = GET_MODE_ALIGNMENT (DFmode);
18100 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
18101 to 16byte boundary. */
18104 if (AGGREGATE_TYPE_P (type)
18105 && TYPE_SIZE (type)
18106 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
18107 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= 16
18108 || TREE_INT_CST_HIGH (TYPE_SIZE (type))) && align < 128)
18111 if (TREE_CODE (type) == ARRAY_TYPE)
18113 if (TYPE_MODE (TREE_TYPE (type)) == DFmode && align < 64)
18115 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type))) && align < 128)
18118 else if (TREE_CODE (type) == COMPLEX_TYPE)
18120 if (TYPE_MODE (type) == DCmode && align < 64)
18122 if ((TYPE_MODE (type) == XCmode
18123 || TYPE_MODE (type) == TCmode) && align < 128)
18126 else if ((TREE_CODE (type) == RECORD_TYPE
18127 || TREE_CODE (type) == UNION_TYPE
18128 || TREE_CODE (type) == QUAL_UNION_TYPE)
18129 && TYPE_FIELDS (type))
18131 if (DECL_MODE (TYPE_FIELDS (type)) == DFmode && align < 64)
18133 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type))) && align < 128)
18136 else if (TREE_CODE (type) == REAL_TYPE || TREE_CODE (type) == VECTOR_TYPE
18137 || TREE_CODE (type) == INTEGER_TYPE)
18140 if (TYPE_MODE (type) == DFmode && align < 64)
18142 if (ALIGN_MODE_128 (TYPE_MODE (type)) && align < 128)
18148 /* Emit RTL insns to initialize the variable parts of a trampoline.
18149 FNADDR is an RTX for the address of the function's pure code.
18150 CXT is an RTX for the static chain value for the function. */
18152 x86_initialize_trampoline (rtx tramp, rtx fnaddr, rtx cxt)
18156 /* Compute offset from the end of the jmp to the target function. */
18157 rtx disp = expand_binop (SImode, sub_optab, fnaddr,
18158 plus_constant (tramp, 10),
18159 NULL_RTX, 1, OPTAB_DIRECT);
18160 emit_move_insn (gen_rtx_MEM (QImode, tramp),
18161 gen_int_mode (0xb9, QImode));
18162 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 1)), cxt);
18163 emit_move_insn (gen_rtx_MEM (QImode, plus_constant (tramp, 5)),
18164 gen_int_mode (0xe9, QImode));
18165 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 6)), disp);
18170 /* Try to load address using shorter movl instead of movabs.
18171 We may want to support movq for kernel mode, but kernel does not use
18172 trampolines at the moment. */
18173 if (x86_64_zext_immediate_operand (fnaddr, VOIDmode))
18175 fnaddr = copy_to_mode_reg (DImode, fnaddr);
18176 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
18177 gen_int_mode (0xbb41, HImode));
18178 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, offset + 2)),
18179 gen_lowpart (SImode, fnaddr));
18184 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
18185 gen_int_mode (0xbb49, HImode));
18186 emit_move_insn (gen_rtx_MEM (DImode, plus_constant (tramp, offset + 2)),
18190 /* Load static chain using movabs to r10. */
18191 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
18192 gen_int_mode (0xba49, HImode));
18193 emit_move_insn (gen_rtx_MEM (DImode, plus_constant (tramp, offset + 2)),
18196 /* Jump to the r11 */
18197 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
18198 gen_int_mode (0xff49, HImode));
18199 emit_move_insn (gen_rtx_MEM (QImode, plus_constant (tramp, offset+2)),
18200 gen_int_mode (0xe3, QImode));
18202 gcc_assert (offset <= TRAMPOLINE_SIZE);
18205 #ifdef ENABLE_EXECUTE_STACK
18206 emit_library_call (gen_rtx_SYMBOL_REF (Pmode, "__enable_execute_stack"),
18207 LCT_NORMAL, VOIDmode, 1, tramp, Pmode);
18211 /* Codes for all the SSE/MMX builtins. */
18214 IX86_BUILTIN_ADDPS,
18215 IX86_BUILTIN_ADDSS,
18216 IX86_BUILTIN_DIVPS,
18217 IX86_BUILTIN_DIVSS,
18218 IX86_BUILTIN_MULPS,
18219 IX86_BUILTIN_MULSS,
18220 IX86_BUILTIN_SUBPS,
18221 IX86_BUILTIN_SUBSS,
18223 IX86_BUILTIN_CMPEQPS,
18224 IX86_BUILTIN_CMPLTPS,
18225 IX86_BUILTIN_CMPLEPS,
18226 IX86_BUILTIN_CMPGTPS,
18227 IX86_BUILTIN_CMPGEPS,
18228 IX86_BUILTIN_CMPNEQPS,
18229 IX86_BUILTIN_CMPNLTPS,
18230 IX86_BUILTIN_CMPNLEPS,
18231 IX86_BUILTIN_CMPNGTPS,
18232 IX86_BUILTIN_CMPNGEPS,
18233 IX86_BUILTIN_CMPORDPS,
18234 IX86_BUILTIN_CMPUNORDPS,
18235 IX86_BUILTIN_CMPEQSS,
18236 IX86_BUILTIN_CMPLTSS,
18237 IX86_BUILTIN_CMPLESS,
18238 IX86_BUILTIN_CMPNEQSS,
18239 IX86_BUILTIN_CMPNLTSS,
18240 IX86_BUILTIN_CMPNLESS,
18241 IX86_BUILTIN_CMPNGTSS,
18242 IX86_BUILTIN_CMPNGESS,
18243 IX86_BUILTIN_CMPORDSS,
18244 IX86_BUILTIN_CMPUNORDSS,
18246 IX86_BUILTIN_COMIEQSS,
18247 IX86_BUILTIN_COMILTSS,
18248 IX86_BUILTIN_COMILESS,
18249 IX86_BUILTIN_COMIGTSS,
18250 IX86_BUILTIN_COMIGESS,
18251 IX86_BUILTIN_COMINEQSS,
18252 IX86_BUILTIN_UCOMIEQSS,
18253 IX86_BUILTIN_UCOMILTSS,
18254 IX86_BUILTIN_UCOMILESS,
18255 IX86_BUILTIN_UCOMIGTSS,
18256 IX86_BUILTIN_UCOMIGESS,
18257 IX86_BUILTIN_UCOMINEQSS,
18259 IX86_BUILTIN_CVTPI2PS,
18260 IX86_BUILTIN_CVTPS2PI,
18261 IX86_BUILTIN_CVTSI2SS,
18262 IX86_BUILTIN_CVTSI642SS,
18263 IX86_BUILTIN_CVTSS2SI,
18264 IX86_BUILTIN_CVTSS2SI64,
18265 IX86_BUILTIN_CVTTPS2PI,
18266 IX86_BUILTIN_CVTTSS2SI,
18267 IX86_BUILTIN_CVTTSS2SI64,
18269 IX86_BUILTIN_MAXPS,
18270 IX86_BUILTIN_MAXSS,
18271 IX86_BUILTIN_MINPS,
18272 IX86_BUILTIN_MINSS,
18274 IX86_BUILTIN_LOADUPS,
18275 IX86_BUILTIN_STOREUPS,
18276 IX86_BUILTIN_MOVSS,
18278 IX86_BUILTIN_MOVHLPS,
18279 IX86_BUILTIN_MOVLHPS,
18280 IX86_BUILTIN_LOADHPS,
18281 IX86_BUILTIN_LOADLPS,
18282 IX86_BUILTIN_STOREHPS,
18283 IX86_BUILTIN_STORELPS,
18285 IX86_BUILTIN_MASKMOVQ,
18286 IX86_BUILTIN_MOVMSKPS,
18287 IX86_BUILTIN_PMOVMSKB,
18289 IX86_BUILTIN_MOVNTPS,
18290 IX86_BUILTIN_MOVNTQ,
18292 IX86_BUILTIN_LOADDQU,
18293 IX86_BUILTIN_STOREDQU,
18295 IX86_BUILTIN_PACKSSWB,
18296 IX86_BUILTIN_PACKSSDW,
18297 IX86_BUILTIN_PACKUSWB,
18299 IX86_BUILTIN_PADDB,
18300 IX86_BUILTIN_PADDW,
18301 IX86_BUILTIN_PADDD,
18302 IX86_BUILTIN_PADDQ,
18303 IX86_BUILTIN_PADDSB,
18304 IX86_BUILTIN_PADDSW,
18305 IX86_BUILTIN_PADDUSB,
18306 IX86_BUILTIN_PADDUSW,
18307 IX86_BUILTIN_PSUBB,
18308 IX86_BUILTIN_PSUBW,
18309 IX86_BUILTIN_PSUBD,
18310 IX86_BUILTIN_PSUBQ,
18311 IX86_BUILTIN_PSUBSB,
18312 IX86_BUILTIN_PSUBSW,
18313 IX86_BUILTIN_PSUBUSB,
18314 IX86_BUILTIN_PSUBUSW,
18317 IX86_BUILTIN_PANDN,
18321 IX86_BUILTIN_PAVGB,
18322 IX86_BUILTIN_PAVGW,
18324 IX86_BUILTIN_PCMPEQB,
18325 IX86_BUILTIN_PCMPEQW,
18326 IX86_BUILTIN_PCMPEQD,
18327 IX86_BUILTIN_PCMPGTB,
18328 IX86_BUILTIN_PCMPGTW,
18329 IX86_BUILTIN_PCMPGTD,
18331 IX86_BUILTIN_PMADDWD,
18333 IX86_BUILTIN_PMAXSW,
18334 IX86_BUILTIN_PMAXUB,
18335 IX86_BUILTIN_PMINSW,
18336 IX86_BUILTIN_PMINUB,
18338 IX86_BUILTIN_PMULHUW,
18339 IX86_BUILTIN_PMULHW,
18340 IX86_BUILTIN_PMULLW,
18342 IX86_BUILTIN_PSADBW,
18343 IX86_BUILTIN_PSHUFW,
18345 IX86_BUILTIN_PSLLW,
18346 IX86_BUILTIN_PSLLD,
18347 IX86_BUILTIN_PSLLQ,
18348 IX86_BUILTIN_PSRAW,
18349 IX86_BUILTIN_PSRAD,
18350 IX86_BUILTIN_PSRLW,
18351 IX86_BUILTIN_PSRLD,
18352 IX86_BUILTIN_PSRLQ,
18353 IX86_BUILTIN_PSLLWI,
18354 IX86_BUILTIN_PSLLDI,
18355 IX86_BUILTIN_PSLLQI,
18356 IX86_BUILTIN_PSRAWI,
18357 IX86_BUILTIN_PSRADI,
18358 IX86_BUILTIN_PSRLWI,
18359 IX86_BUILTIN_PSRLDI,
18360 IX86_BUILTIN_PSRLQI,
18362 IX86_BUILTIN_PUNPCKHBW,
18363 IX86_BUILTIN_PUNPCKHWD,
18364 IX86_BUILTIN_PUNPCKHDQ,
18365 IX86_BUILTIN_PUNPCKLBW,
18366 IX86_BUILTIN_PUNPCKLWD,
18367 IX86_BUILTIN_PUNPCKLDQ,
18369 IX86_BUILTIN_SHUFPS,
18371 IX86_BUILTIN_RCPPS,
18372 IX86_BUILTIN_RCPSS,
18373 IX86_BUILTIN_RSQRTPS,
18374 IX86_BUILTIN_RSQRTPS_NR,
18375 IX86_BUILTIN_RSQRTSS,
18376 IX86_BUILTIN_RSQRTF,
18377 IX86_BUILTIN_SQRTPS,
18378 IX86_BUILTIN_SQRTPS_NR,
18379 IX86_BUILTIN_SQRTSS,
18381 IX86_BUILTIN_UNPCKHPS,
18382 IX86_BUILTIN_UNPCKLPS,
18384 IX86_BUILTIN_ANDPS,
18385 IX86_BUILTIN_ANDNPS,
18387 IX86_BUILTIN_XORPS,
18390 IX86_BUILTIN_LDMXCSR,
18391 IX86_BUILTIN_STMXCSR,
18392 IX86_BUILTIN_SFENCE,
18394 /* 3DNow! Original */
18395 IX86_BUILTIN_FEMMS,
18396 IX86_BUILTIN_PAVGUSB,
18397 IX86_BUILTIN_PF2ID,
18398 IX86_BUILTIN_PFACC,
18399 IX86_BUILTIN_PFADD,
18400 IX86_BUILTIN_PFCMPEQ,
18401 IX86_BUILTIN_PFCMPGE,
18402 IX86_BUILTIN_PFCMPGT,
18403 IX86_BUILTIN_PFMAX,
18404 IX86_BUILTIN_PFMIN,
18405 IX86_BUILTIN_PFMUL,
18406 IX86_BUILTIN_PFRCP,
18407 IX86_BUILTIN_PFRCPIT1,
18408 IX86_BUILTIN_PFRCPIT2,
18409 IX86_BUILTIN_PFRSQIT1,
18410 IX86_BUILTIN_PFRSQRT,
18411 IX86_BUILTIN_PFSUB,
18412 IX86_BUILTIN_PFSUBR,
18413 IX86_BUILTIN_PI2FD,
18414 IX86_BUILTIN_PMULHRW,
18416 /* 3DNow! Athlon Extensions */
18417 IX86_BUILTIN_PF2IW,
18418 IX86_BUILTIN_PFNACC,
18419 IX86_BUILTIN_PFPNACC,
18420 IX86_BUILTIN_PI2FW,
18421 IX86_BUILTIN_PSWAPDSI,
18422 IX86_BUILTIN_PSWAPDSF,
18425 IX86_BUILTIN_ADDPD,
18426 IX86_BUILTIN_ADDSD,
18427 IX86_BUILTIN_DIVPD,
18428 IX86_BUILTIN_DIVSD,
18429 IX86_BUILTIN_MULPD,
18430 IX86_BUILTIN_MULSD,
18431 IX86_BUILTIN_SUBPD,
18432 IX86_BUILTIN_SUBSD,
18434 IX86_BUILTIN_CMPEQPD,
18435 IX86_BUILTIN_CMPLTPD,
18436 IX86_BUILTIN_CMPLEPD,
18437 IX86_BUILTIN_CMPGTPD,
18438 IX86_BUILTIN_CMPGEPD,
18439 IX86_BUILTIN_CMPNEQPD,
18440 IX86_BUILTIN_CMPNLTPD,
18441 IX86_BUILTIN_CMPNLEPD,
18442 IX86_BUILTIN_CMPNGTPD,
18443 IX86_BUILTIN_CMPNGEPD,
18444 IX86_BUILTIN_CMPORDPD,
18445 IX86_BUILTIN_CMPUNORDPD,
18446 IX86_BUILTIN_CMPEQSD,
18447 IX86_BUILTIN_CMPLTSD,
18448 IX86_BUILTIN_CMPLESD,
18449 IX86_BUILTIN_CMPNEQSD,
18450 IX86_BUILTIN_CMPNLTSD,
18451 IX86_BUILTIN_CMPNLESD,
18452 IX86_BUILTIN_CMPORDSD,
18453 IX86_BUILTIN_CMPUNORDSD,
18455 IX86_BUILTIN_COMIEQSD,
18456 IX86_BUILTIN_COMILTSD,
18457 IX86_BUILTIN_COMILESD,
18458 IX86_BUILTIN_COMIGTSD,
18459 IX86_BUILTIN_COMIGESD,
18460 IX86_BUILTIN_COMINEQSD,
18461 IX86_BUILTIN_UCOMIEQSD,
18462 IX86_BUILTIN_UCOMILTSD,
18463 IX86_BUILTIN_UCOMILESD,
18464 IX86_BUILTIN_UCOMIGTSD,
18465 IX86_BUILTIN_UCOMIGESD,
18466 IX86_BUILTIN_UCOMINEQSD,
18468 IX86_BUILTIN_MAXPD,
18469 IX86_BUILTIN_MAXSD,
18470 IX86_BUILTIN_MINPD,
18471 IX86_BUILTIN_MINSD,
18473 IX86_BUILTIN_ANDPD,
18474 IX86_BUILTIN_ANDNPD,
18476 IX86_BUILTIN_XORPD,
18478 IX86_BUILTIN_SQRTPD,
18479 IX86_BUILTIN_SQRTSD,
18481 IX86_BUILTIN_UNPCKHPD,
18482 IX86_BUILTIN_UNPCKLPD,
18484 IX86_BUILTIN_SHUFPD,
18486 IX86_BUILTIN_LOADUPD,
18487 IX86_BUILTIN_STOREUPD,
18488 IX86_BUILTIN_MOVSD,
18490 IX86_BUILTIN_LOADHPD,
18491 IX86_BUILTIN_LOADLPD,
18493 IX86_BUILTIN_CVTDQ2PD,
18494 IX86_BUILTIN_CVTDQ2PS,
18496 IX86_BUILTIN_CVTPD2DQ,
18497 IX86_BUILTIN_CVTPD2PI,
18498 IX86_BUILTIN_CVTPD2PS,
18499 IX86_BUILTIN_CVTTPD2DQ,
18500 IX86_BUILTIN_CVTTPD2PI,
18502 IX86_BUILTIN_CVTPI2PD,
18503 IX86_BUILTIN_CVTSI2SD,
18504 IX86_BUILTIN_CVTSI642SD,
18506 IX86_BUILTIN_CVTSD2SI,
18507 IX86_BUILTIN_CVTSD2SI64,
18508 IX86_BUILTIN_CVTSD2SS,
18509 IX86_BUILTIN_CVTSS2SD,
18510 IX86_BUILTIN_CVTTSD2SI,
18511 IX86_BUILTIN_CVTTSD2SI64,
18513 IX86_BUILTIN_CVTPS2DQ,
18514 IX86_BUILTIN_CVTPS2PD,
18515 IX86_BUILTIN_CVTTPS2DQ,
18517 IX86_BUILTIN_MOVNTI,
18518 IX86_BUILTIN_MOVNTPD,
18519 IX86_BUILTIN_MOVNTDQ,
18522 IX86_BUILTIN_MASKMOVDQU,
18523 IX86_BUILTIN_MOVMSKPD,
18524 IX86_BUILTIN_PMOVMSKB128,
18526 IX86_BUILTIN_PACKSSWB128,
18527 IX86_BUILTIN_PACKSSDW128,
18528 IX86_BUILTIN_PACKUSWB128,
18530 IX86_BUILTIN_PADDB128,
18531 IX86_BUILTIN_PADDW128,
18532 IX86_BUILTIN_PADDD128,
18533 IX86_BUILTIN_PADDQ128,
18534 IX86_BUILTIN_PADDSB128,
18535 IX86_BUILTIN_PADDSW128,
18536 IX86_BUILTIN_PADDUSB128,
18537 IX86_BUILTIN_PADDUSW128,
18538 IX86_BUILTIN_PSUBB128,
18539 IX86_BUILTIN_PSUBW128,
18540 IX86_BUILTIN_PSUBD128,
18541 IX86_BUILTIN_PSUBQ128,
18542 IX86_BUILTIN_PSUBSB128,
18543 IX86_BUILTIN_PSUBSW128,
18544 IX86_BUILTIN_PSUBUSB128,
18545 IX86_BUILTIN_PSUBUSW128,
18547 IX86_BUILTIN_PAND128,
18548 IX86_BUILTIN_PANDN128,
18549 IX86_BUILTIN_POR128,
18550 IX86_BUILTIN_PXOR128,
18552 IX86_BUILTIN_PAVGB128,
18553 IX86_BUILTIN_PAVGW128,
18555 IX86_BUILTIN_PCMPEQB128,
18556 IX86_BUILTIN_PCMPEQW128,
18557 IX86_BUILTIN_PCMPEQD128,
18558 IX86_BUILTIN_PCMPGTB128,
18559 IX86_BUILTIN_PCMPGTW128,
18560 IX86_BUILTIN_PCMPGTD128,
18562 IX86_BUILTIN_PMADDWD128,
18564 IX86_BUILTIN_PMAXSW128,
18565 IX86_BUILTIN_PMAXUB128,
18566 IX86_BUILTIN_PMINSW128,
18567 IX86_BUILTIN_PMINUB128,
18569 IX86_BUILTIN_PMULUDQ,
18570 IX86_BUILTIN_PMULUDQ128,
18571 IX86_BUILTIN_PMULHUW128,
18572 IX86_BUILTIN_PMULHW128,
18573 IX86_BUILTIN_PMULLW128,
18575 IX86_BUILTIN_PSADBW128,
18576 IX86_BUILTIN_PSHUFHW,
18577 IX86_BUILTIN_PSHUFLW,
18578 IX86_BUILTIN_PSHUFD,
18580 IX86_BUILTIN_PSLLDQI128,
18581 IX86_BUILTIN_PSLLWI128,
18582 IX86_BUILTIN_PSLLDI128,
18583 IX86_BUILTIN_PSLLQI128,
18584 IX86_BUILTIN_PSRAWI128,
18585 IX86_BUILTIN_PSRADI128,
18586 IX86_BUILTIN_PSRLDQI128,
18587 IX86_BUILTIN_PSRLWI128,
18588 IX86_BUILTIN_PSRLDI128,
18589 IX86_BUILTIN_PSRLQI128,
18591 IX86_BUILTIN_PSLLDQ128,
18592 IX86_BUILTIN_PSLLW128,
18593 IX86_BUILTIN_PSLLD128,
18594 IX86_BUILTIN_PSLLQ128,
18595 IX86_BUILTIN_PSRAW128,
18596 IX86_BUILTIN_PSRAD128,
18597 IX86_BUILTIN_PSRLW128,
18598 IX86_BUILTIN_PSRLD128,
18599 IX86_BUILTIN_PSRLQ128,
18601 IX86_BUILTIN_PUNPCKHBW128,
18602 IX86_BUILTIN_PUNPCKHWD128,
18603 IX86_BUILTIN_PUNPCKHDQ128,
18604 IX86_BUILTIN_PUNPCKHQDQ128,
18605 IX86_BUILTIN_PUNPCKLBW128,
18606 IX86_BUILTIN_PUNPCKLWD128,
18607 IX86_BUILTIN_PUNPCKLDQ128,
18608 IX86_BUILTIN_PUNPCKLQDQ128,
18610 IX86_BUILTIN_CLFLUSH,
18611 IX86_BUILTIN_MFENCE,
18612 IX86_BUILTIN_LFENCE,
18615 IX86_BUILTIN_ADDSUBPS,
18616 IX86_BUILTIN_HADDPS,
18617 IX86_BUILTIN_HSUBPS,
18618 IX86_BUILTIN_MOVSHDUP,
18619 IX86_BUILTIN_MOVSLDUP,
18620 IX86_BUILTIN_ADDSUBPD,
18621 IX86_BUILTIN_HADDPD,
18622 IX86_BUILTIN_HSUBPD,
18623 IX86_BUILTIN_LDDQU,
18625 IX86_BUILTIN_MONITOR,
18626 IX86_BUILTIN_MWAIT,
18629 IX86_BUILTIN_PHADDW,
18630 IX86_BUILTIN_PHADDD,
18631 IX86_BUILTIN_PHADDSW,
18632 IX86_BUILTIN_PHSUBW,
18633 IX86_BUILTIN_PHSUBD,
18634 IX86_BUILTIN_PHSUBSW,
18635 IX86_BUILTIN_PMADDUBSW,
18636 IX86_BUILTIN_PMULHRSW,
18637 IX86_BUILTIN_PSHUFB,
18638 IX86_BUILTIN_PSIGNB,
18639 IX86_BUILTIN_PSIGNW,
18640 IX86_BUILTIN_PSIGND,
18641 IX86_BUILTIN_PALIGNR,
18642 IX86_BUILTIN_PABSB,
18643 IX86_BUILTIN_PABSW,
18644 IX86_BUILTIN_PABSD,
18646 IX86_BUILTIN_PHADDW128,
18647 IX86_BUILTIN_PHADDD128,
18648 IX86_BUILTIN_PHADDSW128,
18649 IX86_BUILTIN_PHSUBW128,
18650 IX86_BUILTIN_PHSUBD128,
18651 IX86_BUILTIN_PHSUBSW128,
18652 IX86_BUILTIN_PMADDUBSW128,
18653 IX86_BUILTIN_PMULHRSW128,
18654 IX86_BUILTIN_PSHUFB128,
18655 IX86_BUILTIN_PSIGNB128,
18656 IX86_BUILTIN_PSIGNW128,
18657 IX86_BUILTIN_PSIGND128,
18658 IX86_BUILTIN_PALIGNR128,
18659 IX86_BUILTIN_PABSB128,
18660 IX86_BUILTIN_PABSW128,
18661 IX86_BUILTIN_PABSD128,
18663 /* AMDFAM10 - SSE4A New Instructions. */
18664 IX86_BUILTIN_MOVNTSD,
18665 IX86_BUILTIN_MOVNTSS,
18666 IX86_BUILTIN_EXTRQI,
18667 IX86_BUILTIN_EXTRQ,
18668 IX86_BUILTIN_INSERTQI,
18669 IX86_BUILTIN_INSERTQ,
18672 IX86_BUILTIN_BLENDPD,
18673 IX86_BUILTIN_BLENDPS,
18674 IX86_BUILTIN_BLENDVPD,
18675 IX86_BUILTIN_BLENDVPS,
18676 IX86_BUILTIN_PBLENDVB128,
18677 IX86_BUILTIN_PBLENDW128,
18682 IX86_BUILTIN_INSERTPS128,
18684 IX86_BUILTIN_MOVNTDQA,
18685 IX86_BUILTIN_MPSADBW128,
18686 IX86_BUILTIN_PACKUSDW128,
18687 IX86_BUILTIN_PCMPEQQ,
18688 IX86_BUILTIN_PHMINPOSUW128,
18690 IX86_BUILTIN_PMAXSB128,
18691 IX86_BUILTIN_PMAXSD128,
18692 IX86_BUILTIN_PMAXUD128,
18693 IX86_BUILTIN_PMAXUW128,
18695 IX86_BUILTIN_PMINSB128,
18696 IX86_BUILTIN_PMINSD128,
18697 IX86_BUILTIN_PMINUD128,
18698 IX86_BUILTIN_PMINUW128,
18700 IX86_BUILTIN_PMOVSXBW128,
18701 IX86_BUILTIN_PMOVSXBD128,
18702 IX86_BUILTIN_PMOVSXBQ128,
18703 IX86_BUILTIN_PMOVSXWD128,
18704 IX86_BUILTIN_PMOVSXWQ128,
18705 IX86_BUILTIN_PMOVSXDQ128,
18707 IX86_BUILTIN_PMOVZXBW128,
18708 IX86_BUILTIN_PMOVZXBD128,
18709 IX86_BUILTIN_PMOVZXBQ128,
18710 IX86_BUILTIN_PMOVZXWD128,
18711 IX86_BUILTIN_PMOVZXWQ128,
18712 IX86_BUILTIN_PMOVZXDQ128,
18714 IX86_BUILTIN_PMULDQ128,
18715 IX86_BUILTIN_PMULLD128,
18717 IX86_BUILTIN_ROUNDPD,
18718 IX86_BUILTIN_ROUNDPS,
18719 IX86_BUILTIN_ROUNDSD,
18720 IX86_BUILTIN_ROUNDSS,
18722 IX86_BUILTIN_PTESTZ,
18723 IX86_BUILTIN_PTESTC,
18724 IX86_BUILTIN_PTESTNZC,
18726 IX86_BUILTIN_VEC_INIT_V2SI,
18727 IX86_BUILTIN_VEC_INIT_V4HI,
18728 IX86_BUILTIN_VEC_INIT_V8QI,
18729 IX86_BUILTIN_VEC_EXT_V2DF,
18730 IX86_BUILTIN_VEC_EXT_V2DI,
18731 IX86_BUILTIN_VEC_EXT_V4SF,
18732 IX86_BUILTIN_VEC_EXT_V4SI,
18733 IX86_BUILTIN_VEC_EXT_V8HI,
18734 IX86_BUILTIN_VEC_EXT_V2SI,
18735 IX86_BUILTIN_VEC_EXT_V4HI,
18736 IX86_BUILTIN_VEC_EXT_V16QI,
18737 IX86_BUILTIN_VEC_SET_V2DI,
18738 IX86_BUILTIN_VEC_SET_V4SF,
18739 IX86_BUILTIN_VEC_SET_V4SI,
18740 IX86_BUILTIN_VEC_SET_V8HI,
18741 IX86_BUILTIN_VEC_SET_V4HI,
18742 IX86_BUILTIN_VEC_SET_V16QI,
18744 IX86_BUILTIN_VEC_PACK_SFIX,
18747 IX86_BUILTIN_CRC32QI,
18748 IX86_BUILTIN_CRC32HI,
18749 IX86_BUILTIN_CRC32SI,
18750 IX86_BUILTIN_CRC32DI,
18752 IX86_BUILTIN_PCMPESTRI128,
18753 IX86_BUILTIN_PCMPESTRM128,
18754 IX86_BUILTIN_PCMPESTRA128,
18755 IX86_BUILTIN_PCMPESTRC128,
18756 IX86_BUILTIN_PCMPESTRO128,
18757 IX86_BUILTIN_PCMPESTRS128,
18758 IX86_BUILTIN_PCMPESTRZ128,
18759 IX86_BUILTIN_PCMPISTRI128,
18760 IX86_BUILTIN_PCMPISTRM128,
18761 IX86_BUILTIN_PCMPISTRA128,
18762 IX86_BUILTIN_PCMPISTRC128,
18763 IX86_BUILTIN_PCMPISTRO128,
18764 IX86_BUILTIN_PCMPISTRS128,
18765 IX86_BUILTIN_PCMPISTRZ128,
18767 IX86_BUILTIN_PCMPGTQ,
18769 /* AES instructions */
18770 IX86_BUILTIN_AESENC128,
18771 IX86_BUILTIN_AESENCLAST128,
18772 IX86_BUILTIN_AESDEC128,
18773 IX86_BUILTIN_AESDECLAST128,
18774 IX86_BUILTIN_AESIMC128,
18775 IX86_BUILTIN_AESKEYGENASSIST128,
18777 /* PCLMUL instruction */
18778 IX86_BUILTIN_PCLMULQDQ128,
18780 /* TFmode support builtins. */
18782 IX86_BUILTIN_FABSQ,
18783 IX86_BUILTIN_COPYSIGNQ,
18785 /* SSE5 instructions */
18786 IX86_BUILTIN_FMADDSS,
18787 IX86_BUILTIN_FMADDSD,
18788 IX86_BUILTIN_FMADDPS,
18789 IX86_BUILTIN_FMADDPD,
18790 IX86_BUILTIN_FMSUBSS,
18791 IX86_BUILTIN_FMSUBSD,
18792 IX86_BUILTIN_FMSUBPS,
18793 IX86_BUILTIN_FMSUBPD,
18794 IX86_BUILTIN_FNMADDSS,
18795 IX86_BUILTIN_FNMADDSD,
18796 IX86_BUILTIN_FNMADDPS,
18797 IX86_BUILTIN_FNMADDPD,
18798 IX86_BUILTIN_FNMSUBSS,
18799 IX86_BUILTIN_FNMSUBSD,
18800 IX86_BUILTIN_FNMSUBPS,
18801 IX86_BUILTIN_FNMSUBPD,
18802 IX86_BUILTIN_PCMOV_V2DI,
18803 IX86_BUILTIN_PCMOV_V4SI,
18804 IX86_BUILTIN_PCMOV_V8HI,
18805 IX86_BUILTIN_PCMOV_V16QI,
18806 IX86_BUILTIN_PCMOV_V4SF,
18807 IX86_BUILTIN_PCMOV_V2DF,
18808 IX86_BUILTIN_PPERM,
18809 IX86_BUILTIN_PERMPS,
18810 IX86_BUILTIN_PERMPD,
18811 IX86_BUILTIN_PMACSSWW,
18812 IX86_BUILTIN_PMACSWW,
18813 IX86_BUILTIN_PMACSSWD,
18814 IX86_BUILTIN_PMACSWD,
18815 IX86_BUILTIN_PMACSSDD,
18816 IX86_BUILTIN_PMACSDD,
18817 IX86_BUILTIN_PMACSSDQL,
18818 IX86_BUILTIN_PMACSSDQH,
18819 IX86_BUILTIN_PMACSDQL,
18820 IX86_BUILTIN_PMACSDQH,
18821 IX86_BUILTIN_PMADCSSWD,
18822 IX86_BUILTIN_PMADCSWD,
18823 IX86_BUILTIN_PHADDBW,
18824 IX86_BUILTIN_PHADDBD,
18825 IX86_BUILTIN_PHADDBQ,
18826 IX86_BUILTIN_PHADDWD,
18827 IX86_BUILTIN_PHADDWQ,
18828 IX86_BUILTIN_PHADDDQ,
18829 IX86_BUILTIN_PHADDUBW,
18830 IX86_BUILTIN_PHADDUBD,
18831 IX86_BUILTIN_PHADDUBQ,
18832 IX86_BUILTIN_PHADDUWD,
18833 IX86_BUILTIN_PHADDUWQ,
18834 IX86_BUILTIN_PHADDUDQ,
18835 IX86_BUILTIN_PHSUBBW,
18836 IX86_BUILTIN_PHSUBWD,
18837 IX86_BUILTIN_PHSUBDQ,
18838 IX86_BUILTIN_PROTB,
18839 IX86_BUILTIN_PROTW,
18840 IX86_BUILTIN_PROTD,
18841 IX86_BUILTIN_PROTQ,
18842 IX86_BUILTIN_PROTB_IMM,
18843 IX86_BUILTIN_PROTW_IMM,
18844 IX86_BUILTIN_PROTD_IMM,
18845 IX86_BUILTIN_PROTQ_IMM,
18846 IX86_BUILTIN_PSHLB,
18847 IX86_BUILTIN_PSHLW,
18848 IX86_BUILTIN_PSHLD,
18849 IX86_BUILTIN_PSHLQ,
18850 IX86_BUILTIN_PSHAB,
18851 IX86_BUILTIN_PSHAW,
18852 IX86_BUILTIN_PSHAD,
18853 IX86_BUILTIN_PSHAQ,
18854 IX86_BUILTIN_FRCZSS,
18855 IX86_BUILTIN_FRCZSD,
18856 IX86_BUILTIN_FRCZPS,
18857 IX86_BUILTIN_FRCZPD,
18858 IX86_BUILTIN_CVTPH2PS,
18859 IX86_BUILTIN_CVTPS2PH,
18861 IX86_BUILTIN_COMEQSS,
18862 IX86_BUILTIN_COMNESS,
18863 IX86_BUILTIN_COMLTSS,
18864 IX86_BUILTIN_COMLESS,
18865 IX86_BUILTIN_COMGTSS,
18866 IX86_BUILTIN_COMGESS,
18867 IX86_BUILTIN_COMUEQSS,
18868 IX86_BUILTIN_COMUNESS,
18869 IX86_BUILTIN_COMULTSS,
18870 IX86_BUILTIN_COMULESS,
18871 IX86_BUILTIN_COMUGTSS,
18872 IX86_BUILTIN_COMUGESS,
18873 IX86_BUILTIN_COMORDSS,
18874 IX86_BUILTIN_COMUNORDSS,
18875 IX86_BUILTIN_COMFALSESS,
18876 IX86_BUILTIN_COMTRUESS,
18878 IX86_BUILTIN_COMEQSD,
18879 IX86_BUILTIN_COMNESD,
18880 IX86_BUILTIN_COMLTSD,
18881 IX86_BUILTIN_COMLESD,
18882 IX86_BUILTIN_COMGTSD,
18883 IX86_BUILTIN_COMGESD,
18884 IX86_BUILTIN_COMUEQSD,
18885 IX86_BUILTIN_COMUNESD,
18886 IX86_BUILTIN_COMULTSD,
18887 IX86_BUILTIN_COMULESD,
18888 IX86_BUILTIN_COMUGTSD,
18889 IX86_BUILTIN_COMUGESD,
18890 IX86_BUILTIN_COMORDSD,
18891 IX86_BUILTIN_COMUNORDSD,
18892 IX86_BUILTIN_COMFALSESD,
18893 IX86_BUILTIN_COMTRUESD,
18895 IX86_BUILTIN_COMEQPS,
18896 IX86_BUILTIN_COMNEPS,
18897 IX86_BUILTIN_COMLTPS,
18898 IX86_BUILTIN_COMLEPS,
18899 IX86_BUILTIN_COMGTPS,
18900 IX86_BUILTIN_COMGEPS,
18901 IX86_BUILTIN_COMUEQPS,
18902 IX86_BUILTIN_COMUNEPS,
18903 IX86_BUILTIN_COMULTPS,
18904 IX86_BUILTIN_COMULEPS,
18905 IX86_BUILTIN_COMUGTPS,
18906 IX86_BUILTIN_COMUGEPS,
18907 IX86_BUILTIN_COMORDPS,
18908 IX86_BUILTIN_COMUNORDPS,
18909 IX86_BUILTIN_COMFALSEPS,
18910 IX86_BUILTIN_COMTRUEPS,
18912 IX86_BUILTIN_COMEQPD,
18913 IX86_BUILTIN_COMNEPD,
18914 IX86_BUILTIN_COMLTPD,
18915 IX86_BUILTIN_COMLEPD,
18916 IX86_BUILTIN_COMGTPD,
18917 IX86_BUILTIN_COMGEPD,
18918 IX86_BUILTIN_COMUEQPD,
18919 IX86_BUILTIN_COMUNEPD,
18920 IX86_BUILTIN_COMULTPD,
18921 IX86_BUILTIN_COMULEPD,
18922 IX86_BUILTIN_COMUGTPD,
18923 IX86_BUILTIN_COMUGEPD,
18924 IX86_BUILTIN_COMORDPD,
18925 IX86_BUILTIN_COMUNORDPD,
18926 IX86_BUILTIN_COMFALSEPD,
18927 IX86_BUILTIN_COMTRUEPD,
18929 IX86_BUILTIN_PCOMEQUB,
18930 IX86_BUILTIN_PCOMNEUB,
18931 IX86_BUILTIN_PCOMLTUB,
18932 IX86_BUILTIN_PCOMLEUB,
18933 IX86_BUILTIN_PCOMGTUB,
18934 IX86_BUILTIN_PCOMGEUB,
18935 IX86_BUILTIN_PCOMFALSEUB,
18936 IX86_BUILTIN_PCOMTRUEUB,
18937 IX86_BUILTIN_PCOMEQUW,
18938 IX86_BUILTIN_PCOMNEUW,
18939 IX86_BUILTIN_PCOMLTUW,
18940 IX86_BUILTIN_PCOMLEUW,
18941 IX86_BUILTIN_PCOMGTUW,
18942 IX86_BUILTIN_PCOMGEUW,
18943 IX86_BUILTIN_PCOMFALSEUW,
18944 IX86_BUILTIN_PCOMTRUEUW,
18945 IX86_BUILTIN_PCOMEQUD,
18946 IX86_BUILTIN_PCOMNEUD,
18947 IX86_BUILTIN_PCOMLTUD,
18948 IX86_BUILTIN_PCOMLEUD,
18949 IX86_BUILTIN_PCOMGTUD,
18950 IX86_BUILTIN_PCOMGEUD,
18951 IX86_BUILTIN_PCOMFALSEUD,
18952 IX86_BUILTIN_PCOMTRUEUD,
18953 IX86_BUILTIN_PCOMEQUQ,
18954 IX86_BUILTIN_PCOMNEUQ,
18955 IX86_BUILTIN_PCOMLTUQ,
18956 IX86_BUILTIN_PCOMLEUQ,
18957 IX86_BUILTIN_PCOMGTUQ,
18958 IX86_BUILTIN_PCOMGEUQ,
18959 IX86_BUILTIN_PCOMFALSEUQ,
18960 IX86_BUILTIN_PCOMTRUEUQ,
18962 IX86_BUILTIN_PCOMEQB,
18963 IX86_BUILTIN_PCOMNEB,
18964 IX86_BUILTIN_PCOMLTB,
18965 IX86_BUILTIN_PCOMLEB,
18966 IX86_BUILTIN_PCOMGTB,
18967 IX86_BUILTIN_PCOMGEB,
18968 IX86_BUILTIN_PCOMFALSEB,
18969 IX86_BUILTIN_PCOMTRUEB,
18970 IX86_BUILTIN_PCOMEQW,
18971 IX86_BUILTIN_PCOMNEW,
18972 IX86_BUILTIN_PCOMLTW,
18973 IX86_BUILTIN_PCOMLEW,
18974 IX86_BUILTIN_PCOMGTW,
18975 IX86_BUILTIN_PCOMGEW,
18976 IX86_BUILTIN_PCOMFALSEW,
18977 IX86_BUILTIN_PCOMTRUEW,
18978 IX86_BUILTIN_PCOMEQD,
18979 IX86_BUILTIN_PCOMNED,
18980 IX86_BUILTIN_PCOMLTD,
18981 IX86_BUILTIN_PCOMLED,
18982 IX86_BUILTIN_PCOMGTD,
18983 IX86_BUILTIN_PCOMGED,
18984 IX86_BUILTIN_PCOMFALSED,
18985 IX86_BUILTIN_PCOMTRUED,
18986 IX86_BUILTIN_PCOMEQQ,
18987 IX86_BUILTIN_PCOMNEQ,
18988 IX86_BUILTIN_PCOMLTQ,
18989 IX86_BUILTIN_PCOMLEQ,
18990 IX86_BUILTIN_PCOMGTQ,
18991 IX86_BUILTIN_PCOMGEQ,
18992 IX86_BUILTIN_PCOMFALSEQ,
18993 IX86_BUILTIN_PCOMTRUEQ,
18998 /* Table for the ix86 builtin decls. */
18999 static GTY(()) tree ix86_builtins[(int) IX86_BUILTIN_MAX];
19001 /* Table to record which ISA options the builtin needs. */
19002 static int ix86_builtins_isa[(int) IX86_BUILTIN_MAX];
19004 /* Add an ix86 target builtin function with CODE, NAME and TYPE. Save the MASK
19005 * of which isa_flags to use in the ix86_builtins_isa array. Stores the
19006 * function decl in the ix86_builtins array. Returns the function decl or
19007 * NULL_TREE, if the builtin was not added.
19009 * Record all builtins, even if it isn't an instruction set in the current ISA
19010 * in case the user uses function specific options for a different ISA. When
19011 * the builtin is expanded, check at that time whether it is valid. */
19014 def_builtin (int mask, const char *name, tree type, enum ix86_builtins code)
19016 tree decl = NULL_TREE;
19018 if (!(mask & OPTION_MASK_ISA_64BIT) || TARGET_64BIT)
19020 decl = add_builtin_function (name, type, code, BUILT_IN_MD,
19022 ix86_builtins[(int) code] = decl;
19023 ix86_builtins_isa[(int) code] = mask;
19029 /* Like def_builtin, but also marks the function decl "const". */
19032 def_builtin_const (int mask, const char *name, tree type,
19033 enum ix86_builtins code)
19035 tree decl = def_builtin (mask, name, type, code);
19037 TREE_READONLY (decl) = 1;
19041 /* Bits for builtin_description.flag. */
19043 /* Set when we don't support the comparison natively, and should
19044 swap_comparison in order to support it. */
19045 #define BUILTIN_DESC_SWAP_OPERANDS 1
19047 struct builtin_description
19049 const unsigned int mask;
19050 const enum insn_code icode;
19051 const char *const name;
19052 const enum ix86_builtins code;
19053 const enum rtx_code comparison;
19057 static const struct builtin_description bdesc_comi[] =
19059 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comieq", IX86_BUILTIN_COMIEQSS, UNEQ, 0 },
19060 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comilt", IX86_BUILTIN_COMILTSS, UNLT, 0 },
19061 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comile", IX86_BUILTIN_COMILESS, UNLE, 0 },
19062 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comigt", IX86_BUILTIN_COMIGTSS, GT, 0 },
19063 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comige", IX86_BUILTIN_COMIGESS, GE, 0 },
19064 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comineq", IX86_BUILTIN_COMINEQSS, LTGT, 0 },
19065 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomieq", IX86_BUILTIN_UCOMIEQSS, UNEQ, 0 },
19066 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomilt", IX86_BUILTIN_UCOMILTSS, UNLT, 0 },
19067 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomile", IX86_BUILTIN_UCOMILESS, UNLE, 0 },
19068 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomigt", IX86_BUILTIN_UCOMIGTSS, GT, 0 },
19069 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomige", IX86_BUILTIN_UCOMIGESS, GE, 0 },
19070 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomineq", IX86_BUILTIN_UCOMINEQSS, LTGT, 0 },
19071 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdeq", IX86_BUILTIN_COMIEQSD, UNEQ, 0 },
19072 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdlt", IX86_BUILTIN_COMILTSD, UNLT, 0 },
19073 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdle", IX86_BUILTIN_COMILESD, UNLE, 0 },
19074 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdgt", IX86_BUILTIN_COMIGTSD, GT, 0 },
19075 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdge", IX86_BUILTIN_COMIGESD, GE, 0 },
19076 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdneq", IX86_BUILTIN_COMINEQSD, LTGT, 0 },
19077 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdeq", IX86_BUILTIN_UCOMIEQSD, UNEQ, 0 },
19078 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdlt", IX86_BUILTIN_UCOMILTSD, UNLT, 0 },
19079 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdle", IX86_BUILTIN_UCOMILESD, UNLE, 0 },
19080 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdgt", IX86_BUILTIN_UCOMIGTSD, GT, 0 },
19081 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdge", IX86_BUILTIN_UCOMIGESD, GE, 0 },
19082 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdneq", IX86_BUILTIN_UCOMINEQSD, LTGT, 0 },
19085 static const struct builtin_description bdesc_pcmpestr[] =
19088 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestri128", IX86_BUILTIN_PCMPESTRI128, UNKNOWN, 0 },
19089 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestrm128", IX86_BUILTIN_PCMPESTRM128, UNKNOWN, 0 },
19090 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestria128", IX86_BUILTIN_PCMPESTRA128, UNKNOWN, (int) CCAmode },
19091 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestric128", IX86_BUILTIN_PCMPESTRC128, UNKNOWN, (int) CCCmode },
19092 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestrio128", IX86_BUILTIN_PCMPESTRO128, UNKNOWN, (int) CCOmode },
19093 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestris128", IX86_BUILTIN_PCMPESTRS128, UNKNOWN, (int) CCSmode },
19094 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestriz128", IX86_BUILTIN_PCMPESTRZ128, UNKNOWN, (int) CCZmode },
19097 static const struct builtin_description bdesc_pcmpistr[] =
19100 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistri128", IX86_BUILTIN_PCMPISTRI128, UNKNOWN, 0 },
19101 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistrm128", IX86_BUILTIN_PCMPISTRM128, UNKNOWN, 0 },
19102 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistria128", IX86_BUILTIN_PCMPISTRA128, UNKNOWN, (int) CCAmode },
19103 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistric128", IX86_BUILTIN_PCMPISTRC128, UNKNOWN, (int) CCCmode },
19104 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistrio128", IX86_BUILTIN_PCMPISTRO128, UNKNOWN, (int) CCOmode },
19105 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistris128", IX86_BUILTIN_PCMPISTRS128, UNKNOWN, (int) CCSmode },
19106 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistriz128", IX86_BUILTIN_PCMPISTRZ128, UNKNOWN, (int) CCZmode },
19109 /* Special builtin types */
19110 enum ix86_special_builtin_type
19112 SPECIAL_FTYPE_UNKNOWN,
19114 V16QI_FTYPE_PCCHAR,
19115 V4SF_FTYPE_PCFLOAT,
19116 V2DF_FTYPE_PCDOUBLE,
19117 V4SF_FTYPE_V4SF_PCV2SF,
19118 V2DF_FTYPE_V2DF_PCDOUBLE,
19120 VOID_FTYPE_PV2SF_V4SF,
19121 VOID_FTYPE_PV2DI_V2DI,
19122 VOID_FTYPE_PCHAR_V16QI,
19123 VOID_FTYPE_PFLOAT_V4SF,
19124 VOID_FTYPE_PDOUBLE_V2DF,
19126 VOID_FTYPE_PINT_INT
19129 /* Builtin types */
19130 enum ix86_builtin_type
19133 FLOAT128_FTYPE_FLOAT128,
19135 FLOAT128_FTYPE_FLOAT128_FLOAT128,
19136 INT_FTYPE_V2DI_V2DI_PTEST,
19154 V4SF_FTYPE_V4SF_VEC_MERGE,
19162 V2DF_FTYPE_V2DF_VEC_MERGE,
19172 V16QI_FTYPE_V16QI_V16QI,
19173 V16QI_FTYPE_V8HI_V8HI,
19174 V8QI_FTYPE_V8QI_V8QI,
19175 V8QI_FTYPE_V4HI_V4HI,
19176 V8HI_FTYPE_V8HI_V8HI,
19177 V8HI_FTYPE_V8HI_V8HI_COUNT,
19178 V8HI_FTYPE_V16QI_V16QI,
19179 V8HI_FTYPE_V4SI_V4SI,
19180 V8HI_FTYPE_V8HI_SI_COUNT,
19181 V4SI_FTYPE_V4SI_V4SI,
19182 V4SI_FTYPE_V4SI_V4SI_COUNT,
19183 V4SI_FTYPE_V8HI_V8HI,
19184 V4SI_FTYPE_V4SF_V4SF,
19185 V4SI_FTYPE_V2DF_V2DF,
19186 V4SI_FTYPE_V4SI_SI_COUNT,
19187 V4HI_FTYPE_V4HI_V4HI,
19188 V4HI_FTYPE_V4HI_V4HI_COUNT,
19189 V4HI_FTYPE_V8QI_V8QI,
19190 V4HI_FTYPE_V2SI_V2SI,
19191 V4HI_FTYPE_V4HI_SI_COUNT,
19192 V4SF_FTYPE_V4SF_V4SF,
19193 V4SF_FTYPE_V4SF_V4SF_SWAP,
19194 V4SF_FTYPE_V4SF_V2SI,
19195 V4SF_FTYPE_V4SF_V2DF,
19196 V4SF_FTYPE_V4SF_DI,
19197 V4SF_FTYPE_V4SF_SI,
19198 V2DI_FTYPE_V2DI_V2DI,
19199 V2DI_FTYPE_V2DI_V2DI_COUNT,
19200 V2DI_FTYPE_V16QI_V16QI,
19201 V2DI_FTYPE_V4SI_V4SI,
19202 V2DI_FTYPE_V2DI_V16QI,
19203 V2DI_FTYPE_V2DF_V2DF,
19204 V2DI_FTYPE_V2DI_SI_COUNT,
19205 V2SI_FTYPE_V2SI_V2SI,
19206 V2SI_FTYPE_V2SI_V2SI_COUNT,
19207 V2SI_FTYPE_V4HI_V4HI,
19208 V2SI_FTYPE_V2SF_V2SF,
19209 V2SI_FTYPE_V2SI_SI_COUNT,
19210 V2DF_FTYPE_V2DF_V2DF,
19211 V2DF_FTYPE_V2DF_V2DF_SWAP,
19212 V2DF_FTYPE_V2DF_V4SF,
19213 V2DF_FTYPE_V2DF_DI,
19214 V2DF_FTYPE_V2DF_SI,
19215 V2SF_FTYPE_V2SF_V2SF,
19216 V1DI_FTYPE_V1DI_V1DI,
19217 V1DI_FTYPE_V1DI_V1DI_COUNT,
19218 V1DI_FTYPE_V8QI_V8QI,
19219 V1DI_FTYPE_V2SI_V2SI,
19220 V1DI_FTYPE_V1DI_SI_COUNT,
19221 UINT64_FTYPE_UINT64_UINT64,
19222 UINT_FTYPE_UINT_UINT,
19223 UINT_FTYPE_UINT_USHORT,
19224 UINT_FTYPE_UINT_UCHAR,
19225 V8HI_FTYPE_V8HI_INT,
19226 V4SI_FTYPE_V4SI_INT,
19227 V4HI_FTYPE_V4HI_INT,
19228 V4SF_FTYPE_V4SF_INT,
19229 V2DI_FTYPE_V2DI_INT,
19230 V2DI2TI_FTYPE_V2DI_INT,
19231 V2DF_FTYPE_V2DF_INT,
19232 V16QI_FTYPE_V16QI_V16QI_V16QI,
19233 V4SF_FTYPE_V4SF_V4SF_V4SF,
19234 V2DF_FTYPE_V2DF_V2DF_V2DF,
19235 V16QI_FTYPE_V16QI_V16QI_INT,
19236 V8HI_FTYPE_V8HI_V8HI_INT,
19237 V4SI_FTYPE_V4SI_V4SI_INT,
19238 V4SF_FTYPE_V4SF_V4SF_INT,
19239 V2DI_FTYPE_V2DI_V2DI_INT,
19240 V2DI2TI_FTYPE_V2DI_V2DI_INT,
19241 V1DI2DI_FTYPE_V1DI_V1DI_INT,
19242 V2DF_FTYPE_V2DF_V2DF_INT,
19243 V2DI_FTYPE_V2DI_UINT_UINT,
19244 V2DI_FTYPE_V2DI_V2DI_UINT_UINT
19247 /* Special builtins with variable number of arguments. */
19248 static const struct builtin_description bdesc_special_args[] =
19251 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_emms, "__builtin_ia32_emms", IX86_BUILTIN_EMMS, UNKNOWN, (int) VOID_FTYPE_VOID },
19254 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_femms, "__builtin_ia32_femms", IX86_BUILTIN_FEMMS, UNKNOWN, (int) VOID_FTYPE_VOID },
19257 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movups, "__builtin_ia32_storeups", IX86_BUILTIN_STOREUPS, UNKNOWN, (int) VOID_FTYPE_PFLOAT_V4SF },
19258 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movntv4sf, "__builtin_ia32_movntps", IX86_BUILTIN_MOVNTPS, UNKNOWN, (int) VOID_FTYPE_PFLOAT_V4SF },
19259 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movups, "__builtin_ia32_loadups", IX86_BUILTIN_LOADUPS, UNKNOWN, (int) V4SF_FTYPE_PCFLOAT },
19261 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_loadhps_exp, "__builtin_ia32_loadhps", IX86_BUILTIN_LOADHPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_PCV2SF },
19262 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_loadlps_exp, "__builtin_ia32_loadlps", IX86_BUILTIN_LOADLPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_PCV2SF },
19263 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_storehps, "__builtin_ia32_storehps", IX86_BUILTIN_STOREHPS, UNKNOWN, (int) VOID_FTYPE_PV2SF_V4SF },
19264 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_storelps, "__builtin_ia32_storelps", IX86_BUILTIN_STORELPS, UNKNOWN, (int) VOID_FTYPE_PV2SF_V4SF },
19266 /* SSE or 3DNow!A */
19267 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_sse_sfence, "__builtin_ia32_sfence", IX86_BUILTIN_SFENCE, UNKNOWN, (int) VOID_FTYPE_VOID },
19268 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_sse_movntdi, "__builtin_ia32_movntq", IX86_BUILTIN_MOVNTQ, UNKNOWN, (int) VOID_FTYPE_PDI_DI },
19271 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_lfence, "__builtin_ia32_lfence", IX86_BUILTIN_LFENCE, UNKNOWN, (int) VOID_FTYPE_VOID },
19272 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_mfence, 0, IX86_BUILTIN_MFENCE, UNKNOWN, (int) VOID_FTYPE_VOID },
19273 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movupd, "__builtin_ia32_storeupd", IX86_BUILTIN_STOREUPD, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V2DF },
19274 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movdqu, "__builtin_ia32_storedqu", IX86_BUILTIN_STOREDQU, UNKNOWN, (int) VOID_FTYPE_PCHAR_V16QI },
19275 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movntv2df, "__builtin_ia32_movntpd", IX86_BUILTIN_MOVNTPD, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V2DF },
19276 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movntv2di, "__builtin_ia32_movntdq", IX86_BUILTIN_MOVNTDQ, UNKNOWN, (int) VOID_FTYPE_PV2DI_V2DI },
19277 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movntsi, "__builtin_ia32_movnti", IX86_BUILTIN_MOVNTI, UNKNOWN, (int) VOID_FTYPE_PINT_INT },
19278 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movupd, "__builtin_ia32_loadupd", IX86_BUILTIN_LOADUPD, UNKNOWN, (int) V2DF_FTYPE_PCDOUBLE },
19279 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movdqu, "__builtin_ia32_loaddqu", IX86_BUILTIN_LOADDQU, UNKNOWN, (int) V16QI_FTYPE_PCCHAR },
19281 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_loadhpd_exp, "__builtin_ia32_loadhpd", IX86_BUILTIN_LOADHPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_PCDOUBLE },
19282 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_loadlpd_exp, "__builtin_ia32_loadlpd", IX86_BUILTIN_LOADLPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_PCDOUBLE },
19285 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_lddqu, "__builtin_ia32_lddqu", IX86_BUILTIN_LDDQU, UNKNOWN, (int) V16QI_FTYPE_PCCHAR },
19288 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_movntdqa, "__builtin_ia32_movntdqa", IX86_BUILTIN_MOVNTDQA, UNKNOWN, (int) V2DI_FTYPE_PV2DI },
19291 { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_vmmovntv2df, "__builtin_ia32_movntsd", IX86_BUILTIN_MOVNTSD, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V2DF },
19292 { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_vmmovntv4sf, "__builtin_ia32_movntss", IX86_BUILTIN_MOVNTSS, UNKNOWN, (int) VOID_FTYPE_PFLOAT_V4SF },
19295 /* Builtins with variable number of arguments. */
19296 static const struct builtin_description bdesc_args[] =
19299 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_addv8qi3, "__builtin_ia32_paddb", IX86_BUILTIN_PADDB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
19300 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_addv4hi3, "__builtin_ia32_paddw", IX86_BUILTIN_PADDW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
19301 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_addv2si3, "__builtin_ia32_paddd", IX86_BUILTIN_PADDD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
19302 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_subv8qi3, "__builtin_ia32_psubb", IX86_BUILTIN_PSUBB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
19303 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_subv4hi3, "__builtin_ia32_psubw", IX86_BUILTIN_PSUBW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
19304 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_subv2si3, "__builtin_ia32_psubd", IX86_BUILTIN_PSUBD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
19306 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ssaddv8qi3, "__builtin_ia32_paddsb", IX86_BUILTIN_PADDSB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
19307 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ssaddv4hi3, "__builtin_ia32_paddsw", IX86_BUILTIN_PADDSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
19308 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_sssubv8qi3, "__builtin_ia32_psubsb", IX86_BUILTIN_PSUBSB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
19309 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_sssubv4hi3, "__builtin_ia32_psubsw", IX86_BUILTIN_PSUBSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
19310 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_usaddv8qi3, "__builtin_ia32_paddusb", IX86_BUILTIN_PADDUSB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
19311 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_usaddv4hi3, "__builtin_ia32_paddusw", IX86_BUILTIN_PADDUSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
19312 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ussubv8qi3, "__builtin_ia32_psubusb", IX86_BUILTIN_PSUBUSB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
19313 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ussubv4hi3, "__builtin_ia32_psubusw", IX86_BUILTIN_PSUBUSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
19315 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_mulv4hi3, "__builtin_ia32_pmullw", IX86_BUILTIN_PMULLW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
19316 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_smulv4hi3_highpart, "__builtin_ia32_pmulhw", IX86_BUILTIN_PMULHW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
19318 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_andv2si3, "__builtin_ia32_pand", IX86_BUILTIN_PAND, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
19319 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_nandv2si3, "__builtin_ia32_pandn", IX86_BUILTIN_PANDN, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
19320 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_iorv2si3, "__builtin_ia32_por", IX86_BUILTIN_POR, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
19321 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_xorv2si3, "__builtin_ia32_pxor", IX86_BUILTIN_PXOR, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
19323 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_eqv8qi3, "__builtin_ia32_pcmpeqb", IX86_BUILTIN_PCMPEQB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
19324 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_eqv4hi3, "__builtin_ia32_pcmpeqw", IX86_BUILTIN_PCMPEQW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
19325 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_eqv2si3, "__builtin_ia32_pcmpeqd", IX86_BUILTIN_PCMPEQD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
19326 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_gtv8qi3, "__builtin_ia32_pcmpgtb", IX86_BUILTIN_PCMPGTB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
19327 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_gtv4hi3, "__builtin_ia32_pcmpgtw", IX86_BUILTIN_PCMPGTW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
19328 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_gtv2si3, "__builtin_ia32_pcmpgtd", IX86_BUILTIN_PCMPGTD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
19330 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpckhbw, "__builtin_ia32_punpckhbw", IX86_BUILTIN_PUNPCKHBW, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
19331 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpckhwd, "__builtin_ia32_punpckhwd", IX86_BUILTIN_PUNPCKHWD, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
19332 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpckhdq, "__builtin_ia32_punpckhdq", IX86_BUILTIN_PUNPCKHDQ, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
19333 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpcklbw, "__builtin_ia32_punpcklbw", IX86_BUILTIN_PUNPCKLBW, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
19334 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpcklwd, "__builtin_ia32_punpcklwd", IX86_BUILTIN_PUNPCKLWD, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI},
19335 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpckldq, "__builtin_ia32_punpckldq", IX86_BUILTIN_PUNPCKLDQ, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI},
19337 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_packsswb, "__builtin_ia32_packsswb", IX86_BUILTIN_PACKSSWB, UNKNOWN, (int) V8QI_FTYPE_V4HI_V4HI },
19338 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_packssdw, "__builtin_ia32_packssdw", IX86_BUILTIN_PACKSSDW, UNKNOWN, (int) V4HI_FTYPE_V2SI_V2SI },
19339 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_packuswb, "__builtin_ia32_packuswb", IX86_BUILTIN_PACKUSWB, UNKNOWN, (int) V8QI_FTYPE_V4HI_V4HI },
19341 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_pmaddwd, "__builtin_ia32_pmaddwd", IX86_BUILTIN_PMADDWD, UNKNOWN, (int) V2SI_FTYPE_V4HI_V4HI },
19343 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv4hi3, "__builtin_ia32_psllwi", IX86_BUILTIN_PSLLWI, UNKNOWN, (int) V4HI_FTYPE_V4HI_SI_COUNT },
19344 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv2si3, "__builtin_ia32_pslldi", IX86_BUILTIN_PSLLDI, UNKNOWN, (int) V2SI_FTYPE_V2SI_SI_COUNT },
19345 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv1di3, "__builtin_ia32_psllqi", IX86_BUILTIN_PSLLQI, UNKNOWN, (int) V1DI_FTYPE_V1DI_SI_COUNT },
19346 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv4hi3, "__builtin_ia32_psllw", IX86_BUILTIN_PSLLW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI_COUNT },
19347 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv2si3, "__builtin_ia32_pslld", IX86_BUILTIN_PSLLD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI_COUNT },
19348 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv1di3, "__builtin_ia32_psllq", IX86_BUILTIN_PSLLQ, UNKNOWN, (int) V1DI_FTYPE_V1DI_V1DI_COUNT },
19350 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv4hi3, "__builtin_ia32_psrlwi", IX86_BUILTIN_PSRLWI, UNKNOWN, (int) V4HI_FTYPE_V4HI_SI_COUNT },
19351 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv2si3, "__builtin_ia32_psrldi", IX86_BUILTIN_PSRLDI, UNKNOWN, (int) V2SI_FTYPE_V2SI_SI_COUNT },
19352 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv1di3, "__builtin_ia32_psrlqi", IX86_BUILTIN_PSRLQI, UNKNOWN, (int) V1DI_FTYPE_V1DI_SI_COUNT },
19353 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv4hi3, "__builtin_ia32_psrlw", IX86_BUILTIN_PSRLW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI_COUNT },
19354 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv2si3, "__builtin_ia32_psrld", IX86_BUILTIN_PSRLD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI_COUNT },
19355 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv1di3, "__builtin_ia32_psrlq", IX86_BUILTIN_PSRLQ, UNKNOWN, (int) V1DI_FTYPE_V1DI_V1DI_COUNT },
19357 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashrv4hi3, "__builtin_ia32_psrawi", IX86_BUILTIN_PSRAWI, UNKNOWN, (int) V4HI_FTYPE_V4HI_SI_COUNT },
19358 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashrv2si3, "__builtin_ia32_psradi", IX86_BUILTIN_PSRADI, UNKNOWN, (int) V2SI_FTYPE_V2SI_SI_COUNT },
19359 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashrv4hi3, "__builtin_ia32_psraw", IX86_BUILTIN_PSRAW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI_COUNT },
19360 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashrv2si3, "__builtin_ia32_psrad", IX86_BUILTIN_PSRAD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI_COUNT },
19363 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_pf2id, "__builtin_ia32_pf2id", IX86_BUILTIN_PF2ID, UNKNOWN, (int) V2SI_FTYPE_V2SF },
19364 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_floatv2si2, "__builtin_ia32_pi2fd", IX86_BUILTIN_PI2FD, UNKNOWN, (int) V2SF_FTYPE_V2SI },
19365 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_rcpv2sf2, "__builtin_ia32_pfrcp", IX86_BUILTIN_PFRCP, UNKNOWN, (int) V2SF_FTYPE_V2SF },
19366 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_rsqrtv2sf2, "__builtin_ia32_pfrsqrt", IX86_BUILTIN_PFRSQRT, UNKNOWN, (int) V2SF_FTYPE_V2SF },
19368 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_uavgv8qi3, "__builtin_ia32_pavgusb", IX86_BUILTIN_PAVGUSB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
19369 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_haddv2sf3, "__builtin_ia32_pfacc", IX86_BUILTIN_PFACC, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
19370 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_addv2sf3, "__builtin_ia32_pfadd", IX86_BUILTIN_PFADD, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
19371 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_eqv2sf3, "__builtin_ia32_pfcmpeq", IX86_BUILTIN_PFCMPEQ, UNKNOWN, (int) V2SI_FTYPE_V2SF_V2SF },
19372 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_gev2sf3, "__builtin_ia32_pfcmpge", IX86_BUILTIN_PFCMPGE, UNKNOWN, (int) V2SI_FTYPE_V2SF_V2SF },
19373 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_gtv2sf3, "__builtin_ia32_pfcmpgt", IX86_BUILTIN_PFCMPGT, UNKNOWN, (int) V2SI_FTYPE_V2SF_V2SF },
19374 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_smaxv2sf3, "__builtin_ia32_pfmax", IX86_BUILTIN_PFMAX, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
19375 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_sminv2sf3, "__builtin_ia32_pfmin", IX86_BUILTIN_PFMIN, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
19376 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_mulv2sf3, "__builtin_ia32_pfmul", IX86_BUILTIN_PFMUL, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
19377 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_rcpit1v2sf3, "__builtin_ia32_pfrcpit1", IX86_BUILTIN_PFRCPIT1, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
19378 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_rcpit2v2sf3, "__builtin_ia32_pfrcpit2", IX86_BUILTIN_PFRCPIT2, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
19379 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_rsqit1v2sf3, "__builtin_ia32_pfrsqit1", IX86_BUILTIN_PFRSQIT1, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
19380 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_subv2sf3, "__builtin_ia32_pfsub", IX86_BUILTIN_PFSUB, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
19381 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_subrv2sf3, "__builtin_ia32_pfsubr", IX86_BUILTIN_PFSUBR, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
19382 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_pmulhrwv4hi3, "__builtin_ia32_pmulhrw", IX86_BUILTIN_PMULHRW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
19385 { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pf2iw, "__builtin_ia32_pf2iw", IX86_BUILTIN_PF2IW, UNKNOWN, (int) V2SI_FTYPE_V2SF },
19386 { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pi2fw, "__builtin_ia32_pi2fw", IX86_BUILTIN_PI2FW, UNKNOWN, (int) V2SF_FTYPE_V2SI },
19387 { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pswapdv2si2, "__builtin_ia32_pswapdsi", IX86_BUILTIN_PSWAPDSI, UNKNOWN, (int) V2SI_FTYPE_V2SI },
19388 { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pswapdv2sf2, "__builtin_ia32_pswapdsf", IX86_BUILTIN_PSWAPDSF, UNKNOWN, (int) V2SF_FTYPE_V2SF },
19389 { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_hsubv2sf3, "__builtin_ia32_pfnacc", IX86_BUILTIN_PFNACC, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
19390 { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_addsubv2sf3, "__builtin_ia32_pfpnacc", IX86_BUILTIN_PFPNACC, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
19393 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movmskps, "__builtin_ia32_movmskps", IX86_BUILTIN_MOVMSKPS, UNKNOWN, (int) INT_FTYPE_V4SF },
19394 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_sqrtv4sf2, "__builtin_ia32_sqrtps", IX86_BUILTIN_SQRTPS, UNKNOWN, (int) V4SF_FTYPE_V4SF },
19395 { OPTION_MASK_ISA_SSE, CODE_FOR_sqrtv4sf2, "__builtin_ia32_sqrtps_nr", IX86_BUILTIN_SQRTPS_NR, UNKNOWN, (int) V4SF_FTYPE_V4SF },
19396 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_rsqrtv4sf2, "__builtin_ia32_rsqrtps", IX86_BUILTIN_RSQRTPS, UNKNOWN, (int) V4SF_FTYPE_V4SF },
19397 { OPTION_MASK_ISA_SSE, CODE_FOR_rsqrtv4sf2, "__builtin_ia32_rsqrtps_nr", IX86_BUILTIN_RSQRTPS_NR, UNKNOWN, (int) V4SF_FTYPE_V4SF },
19398 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_rcpv4sf2, "__builtin_ia32_rcpps", IX86_BUILTIN_RCPPS, UNKNOWN, (int) V4SF_FTYPE_V4SF },
19399 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvtps2pi, "__builtin_ia32_cvtps2pi", IX86_BUILTIN_CVTPS2PI, UNKNOWN, (int) V2SI_FTYPE_V4SF },
19400 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvtss2si, "__builtin_ia32_cvtss2si", IX86_BUILTIN_CVTSS2SI, UNKNOWN, (int) INT_FTYPE_V4SF },
19401 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_64BIT, CODE_FOR_sse_cvtss2siq, "__builtin_ia32_cvtss2si64", IX86_BUILTIN_CVTSS2SI64, UNKNOWN, (int) INT64_FTYPE_V4SF },
19402 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvttps2pi, "__builtin_ia32_cvttps2pi", IX86_BUILTIN_CVTTPS2PI, UNKNOWN, (int) V2SI_FTYPE_V4SF },
19403 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvttss2si, "__builtin_ia32_cvttss2si", IX86_BUILTIN_CVTTSS2SI, UNKNOWN, (int) INT_FTYPE_V4SF },
19404 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_64BIT, CODE_FOR_sse_cvttss2siq, "__builtin_ia32_cvttss2si64", IX86_BUILTIN_CVTTSS2SI64, UNKNOWN, (int) INT64_FTYPE_V4SF },
19406 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_shufps, "__builtin_ia32_shufps", IX86_BUILTIN_SHUFPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
19408 { OPTION_MASK_ISA_SSE, CODE_FOR_addv4sf3, "__builtin_ia32_addps", IX86_BUILTIN_ADDPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
19409 { OPTION_MASK_ISA_SSE, CODE_FOR_subv4sf3, "__builtin_ia32_subps", IX86_BUILTIN_SUBPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
19410 { OPTION_MASK_ISA_SSE, CODE_FOR_mulv4sf3, "__builtin_ia32_mulps", IX86_BUILTIN_MULPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
19411 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_divv4sf3, "__builtin_ia32_divps", IX86_BUILTIN_DIVPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
19412 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmaddv4sf3, "__builtin_ia32_addss", IX86_BUILTIN_ADDSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
19413 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmsubv4sf3, "__builtin_ia32_subss", IX86_BUILTIN_SUBSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
19414 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmulv4sf3, "__builtin_ia32_mulss", IX86_BUILTIN_MULSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
19415 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmdivv4sf3, "__builtin_ia32_divss", IX86_BUILTIN_DIVSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
19417 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpeqps", IX86_BUILTIN_CMPEQPS, EQ, (int) V4SF_FTYPE_V4SF_V4SF },
19418 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpltps", IX86_BUILTIN_CMPLTPS, LT, (int) V4SF_FTYPE_V4SF_V4SF },
19419 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpleps", IX86_BUILTIN_CMPLEPS, LE, (int) V4SF_FTYPE_V4SF_V4SF },
19420 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpgtps", IX86_BUILTIN_CMPGTPS, LT, (int) V4SF_FTYPE_V4SF_V4SF_SWAP },
19421 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpgeps", IX86_BUILTIN_CMPGEPS, LE, (int) V4SF_FTYPE_V4SF_V4SF_SWAP },
19422 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpunordps", IX86_BUILTIN_CMPUNORDPS, UNORDERED, (int) V4SF_FTYPE_V4SF_V4SF },
19423 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpneqps", IX86_BUILTIN_CMPNEQPS, NE, (int) V4SF_FTYPE_V4SF_V4SF },
19424 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpnltps", IX86_BUILTIN_CMPNLTPS, UNGE, (int) V4SF_FTYPE_V4SF_V4SF },
19425 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpnleps", IX86_BUILTIN_CMPNLEPS, UNGT, (int) V4SF_FTYPE_V4SF_V4SF },
19426 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpngtps", IX86_BUILTIN_CMPNGTPS, UNGE, (int) V4SF_FTYPE_V4SF_V4SF_SWAP },
19427 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpngeps", IX86_BUILTIN_CMPNGEPS, UNGT, (int) V4SF_FTYPE_V4SF_V4SF_SWAP},
19428 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpordps", IX86_BUILTIN_CMPORDPS, ORDERED, (int) V4SF_FTYPE_V4SF_V4SF },
19429 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpeqss", IX86_BUILTIN_CMPEQSS, EQ, (int) V4SF_FTYPE_V4SF_V4SF },
19430 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpltss", IX86_BUILTIN_CMPLTSS, LT, (int) V4SF_FTYPE_V4SF_V4SF },
19431 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpless", IX86_BUILTIN_CMPLESS, LE, (int) V4SF_FTYPE_V4SF_V4SF },
19432 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpunordss", IX86_BUILTIN_CMPUNORDSS, UNORDERED, (int) V4SF_FTYPE_V4SF_V4SF },
19433 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpneqss", IX86_BUILTIN_CMPNEQSS, NE, (int) V4SF_FTYPE_V4SF_V4SF },
19434 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpnltss", IX86_BUILTIN_CMPNLTSS, UNGE, (int) V4SF_FTYPE_V4SF_V4SF },
19435 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpnless", IX86_BUILTIN_CMPNLESS, UNGT, (int) V4SF_FTYPE_V4SF_V4SF },
19436 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpngtss", IX86_BUILTIN_CMPNGTSS, UNGE, (int) V4SF_FTYPE_V4SF_V4SF_SWAP },
19437 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpngess", IX86_BUILTIN_CMPNGESS, UNGT, (int) V4SF_FTYPE_V4SF_V4SF_SWAP },
19438 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpordss", IX86_BUILTIN_CMPORDSS, ORDERED, (int) V4SF_FTYPE_V4SF_V4SF },
19440 { OPTION_MASK_ISA_SSE, CODE_FOR_sminv4sf3, "__builtin_ia32_minps", IX86_BUILTIN_MINPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
19441 { OPTION_MASK_ISA_SSE, CODE_FOR_smaxv4sf3, "__builtin_ia32_maxps", IX86_BUILTIN_MAXPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
19442 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmsminv4sf3, "__builtin_ia32_minss", IX86_BUILTIN_MINSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
19443 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmsmaxv4sf3, "__builtin_ia32_maxss", IX86_BUILTIN_MAXSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
19445 { OPTION_MASK_ISA_SSE, CODE_FOR_andv4sf3, "__builtin_ia32_andps", IX86_BUILTIN_ANDPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
19446 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_nandv4sf3, "__builtin_ia32_andnps", IX86_BUILTIN_ANDNPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
19447 { OPTION_MASK_ISA_SSE, CODE_FOR_iorv4sf3, "__builtin_ia32_orps", IX86_BUILTIN_ORPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
19448 { OPTION_MASK_ISA_SSE, CODE_FOR_xorv4sf3, "__builtin_ia32_xorps", IX86_BUILTIN_XORPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
19450 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movss, "__builtin_ia32_movss", IX86_BUILTIN_MOVSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
19451 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movhlps_exp, "__builtin_ia32_movhlps", IX86_BUILTIN_MOVHLPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
19452 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movlhps_exp, "__builtin_ia32_movlhps", IX86_BUILTIN_MOVLHPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
19453 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_unpckhps, "__builtin_ia32_unpckhps", IX86_BUILTIN_UNPCKHPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
19454 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_unpcklps, "__builtin_ia32_unpcklps", IX86_BUILTIN_UNPCKLPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
19456 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvtpi2ps, "__builtin_ia32_cvtpi2ps", IX86_BUILTIN_CVTPI2PS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V2SI },
19457 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvtsi2ss, "__builtin_ia32_cvtsi2ss", IX86_BUILTIN_CVTSI2SS, UNKNOWN, (int) V4SF_FTYPE_V4SF_SI },
19458 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_64BIT, CODE_FOR_sse_cvtsi2ssq, "__builtin_ia32_cvtsi642ss", IX86_BUILTIN_CVTSI642SS, UNKNOWN, V4SF_FTYPE_V4SF_DI },
19460 { OPTION_MASK_ISA_SSE, CODE_FOR_rsqrtsf2, "__builtin_ia32_rsqrtf", IX86_BUILTIN_RSQRTF, UNKNOWN, (int) FLOAT_FTYPE_FLOAT },
19462 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmsqrtv4sf2, "__builtin_ia32_sqrtss", IX86_BUILTIN_SQRTSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_VEC_MERGE },
19463 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmrsqrtv4sf2, "__builtin_ia32_rsqrtss", IX86_BUILTIN_RSQRTSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_VEC_MERGE },
19464 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmrcpv4sf2, "__builtin_ia32_rcpss", IX86_BUILTIN_RCPSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_VEC_MERGE },
19466 /* SSE MMX or 3Dnow!A */
19467 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_uavgv8qi3, "__builtin_ia32_pavgb", IX86_BUILTIN_PAVGB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
19468 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_uavgv4hi3, "__builtin_ia32_pavgw", IX86_BUILTIN_PAVGW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
19469 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_umulv4hi3_highpart, "__builtin_ia32_pmulhuw", IX86_BUILTIN_PMULHUW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
19471 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_umaxv8qi3, "__builtin_ia32_pmaxub", IX86_BUILTIN_PMAXUB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
19472 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_smaxv4hi3, "__builtin_ia32_pmaxsw", IX86_BUILTIN_PMAXSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
19473 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_uminv8qi3, "__builtin_ia32_pminub", IX86_BUILTIN_PMINUB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
19474 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_sminv4hi3, "__builtin_ia32_pminsw", IX86_BUILTIN_PMINSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
19476 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_psadbw, "__builtin_ia32_psadbw", IX86_BUILTIN_PSADBW, UNKNOWN, (int) V1DI_FTYPE_V8QI_V8QI },
19477 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pmovmskb, "__builtin_ia32_pmovmskb", IX86_BUILTIN_PMOVMSKB, UNKNOWN, (int) INT_FTYPE_V8QI },
19479 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pshufw, "__builtin_ia32_pshufw", IX86_BUILTIN_PSHUFW, UNKNOWN, (int) V4HI_FTYPE_V4HI_INT },
19482 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_shufpd, "__builtin_ia32_shufpd", IX86_BUILTIN_SHUFPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
19484 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movmskpd, "__builtin_ia32_movmskpd", IX86_BUILTIN_MOVMSKPD, UNKNOWN, (int) INT_FTYPE_V2DF },
19485 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_pmovmskb, "__builtin_ia32_pmovmskb128", IX86_BUILTIN_PMOVMSKB128, UNKNOWN, (int) INT_FTYPE_V16QI },
19486 { OPTION_MASK_ISA_SSE2, CODE_FOR_sqrtv2df2, "__builtin_ia32_sqrtpd", IX86_BUILTIN_SQRTPD, UNKNOWN, (int) V2DF_FTYPE_V2DF },
19487 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtdq2pd, "__builtin_ia32_cvtdq2pd", IX86_BUILTIN_CVTDQ2PD, UNKNOWN, (int) V2DF_FTYPE_V4SI },
19488 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtdq2ps, "__builtin_ia32_cvtdq2ps", IX86_BUILTIN_CVTDQ2PS, UNKNOWN, (int) V4SF_FTYPE_V4SI },
19490 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtpd2dq, "__builtin_ia32_cvtpd2dq", IX86_BUILTIN_CVTPD2DQ, UNKNOWN, (int) V4SI_FTYPE_V2DF },
19491 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtpd2pi, "__builtin_ia32_cvtpd2pi", IX86_BUILTIN_CVTPD2PI, UNKNOWN, (int) V2SI_FTYPE_V2DF },
19492 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtpd2ps, "__builtin_ia32_cvtpd2ps", IX86_BUILTIN_CVTPD2PS, UNKNOWN, (int) V4SF_FTYPE_V2DF },
19493 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvttpd2dq, "__builtin_ia32_cvttpd2dq", IX86_BUILTIN_CVTTPD2DQ, UNKNOWN, (int) V4SI_FTYPE_V2DF },
19494 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvttpd2pi, "__builtin_ia32_cvttpd2pi", IX86_BUILTIN_CVTTPD2PI, UNKNOWN, (int) V2SI_FTYPE_V2DF },
19496 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtpi2pd, "__builtin_ia32_cvtpi2pd", IX86_BUILTIN_CVTPI2PD, UNKNOWN, (int) V2DF_FTYPE_V2SI },
19498 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtsd2si, "__builtin_ia32_cvtsd2si", IX86_BUILTIN_CVTSD2SI, UNKNOWN, (int) INT_FTYPE_V2DF },
19499 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvttsd2si, "__builtin_ia32_cvttsd2si", IX86_BUILTIN_CVTTSD2SI, UNKNOWN, (int) INT_FTYPE_V2DF },
19500 { OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_64BIT, CODE_FOR_sse2_cvtsd2siq, "__builtin_ia32_cvtsd2si64", IX86_BUILTIN_CVTSD2SI64, UNKNOWN, (int) INT64_FTYPE_V2DF },
19501 { OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_64BIT, CODE_FOR_sse2_cvttsd2siq, "__builtin_ia32_cvttsd2si64", IX86_BUILTIN_CVTTSD2SI64, UNKNOWN, (int) INT64_FTYPE_V2DF },
19503 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtps2dq, "__builtin_ia32_cvtps2dq", IX86_BUILTIN_CVTPS2DQ, UNKNOWN, (int) V4SI_FTYPE_V4SF },
19504 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtps2pd, "__builtin_ia32_cvtps2pd", IX86_BUILTIN_CVTPS2PD, UNKNOWN, (int) V2DF_FTYPE_V4SF },
19505 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvttps2dq, "__builtin_ia32_cvttps2dq", IX86_BUILTIN_CVTTPS2DQ, UNKNOWN, (int) V4SI_FTYPE_V4SF },
19507 { OPTION_MASK_ISA_SSE2, CODE_FOR_addv2df3, "__builtin_ia32_addpd", IX86_BUILTIN_ADDPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
19508 { OPTION_MASK_ISA_SSE2, CODE_FOR_subv2df3, "__builtin_ia32_subpd", IX86_BUILTIN_SUBPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
19509 { OPTION_MASK_ISA_SSE2, CODE_FOR_mulv2df3, "__builtin_ia32_mulpd", IX86_BUILTIN_MULPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
19510 { OPTION_MASK_ISA_SSE2, CODE_FOR_divv2df3, "__builtin_ia32_divpd", IX86_BUILTIN_DIVPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
19511 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmaddv2df3, "__builtin_ia32_addsd", IX86_BUILTIN_ADDSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
19512 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmsubv2df3, "__builtin_ia32_subsd", IX86_BUILTIN_SUBSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
19513 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmulv2df3, "__builtin_ia32_mulsd", IX86_BUILTIN_MULSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
19514 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmdivv2df3, "__builtin_ia32_divsd", IX86_BUILTIN_DIVSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
19516 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpeqpd", IX86_BUILTIN_CMPEQPD, EQ, (int) V2DF_FTYPE_V2DF_V2DF },
19517 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpltpd", IX86_BUILTIN_CMPLTPD, LT, (int) V2DF_FTYPE_V2DF_V2DF },
19518 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmplepd", IX86_BUILTIN_CMPLEPD, LE, (int) V2DF_FTYPE_V2DF_V2DF },
19519 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpgtpd", IX86_BUILTIN_CMPGTPD, LT, (int) V2DF_FTYPE_V2DF_V2DF_SWAP },
19520 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpgepd", IX86_BUILTIN_CMPGEPD, LE, (int) V2DF_FTYPE_V2DF_V2DF_SWAP},
19521 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpunordpd", IX86_BUILTIN_CMPUNORDPD, UNORDERED, (int) V2DF_FTYPE_V2DF_V2DF },
19522 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpneqpd", IX86_BUILTIN_CMPNEQPD, NE, (int) V2DF_FTYPE_V2DF_V2DF },
19523 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpnltpd", IX86_BUILTIN_CMPNLTPD, UNGE, (int) V2DF_FTYPE_V2DF_V2DF },
19524 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpnlepd", IX86_BUILTIN_CMPNLEPD, UNGT, (int) V2DF_FTYPE_V2DF_V2DF },
19525 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpngtpd", IX86_BUILTIN_CMPNGTPD, UNGE, (int) V2DF_FTYPE_V2DF_V2DF_SWAP },
19526 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpngepd", IX86_BUILTIN_CMPNGEPD, UNGT, (int) V2DF_FTYPE_V2DF_V2DF_SWAP },
19527 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpordpd", IX86_BUILTIN_CMPORDPD, ORDERED, (int) V2DF_FTYPE_V2DF_V2DF },
19528 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpeqsd", IX86_BUILTIN_CMPEQSD, EQ, (int) V2DF_FTYPE_V2DF_V2DF },
19529 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpltsd", IX86_BUILTIN_CMPLTSD, LT, (int) V2DF_FTYPE_V2DF_V2DF },
19530 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmplesd", IX86_BUILTIN_CMPLESD, LE, (int) V2DF_FTYPE_V2DF_V2DF },
19531 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpunordsd", IX86_BUILTIN_CMPUNORDSD, UNORDERED, (int) V2DF_FTYPE_V2DF_V2DF },
19532 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpneqsd", IX86_BUILTIN_CMPNEQSD, NE, (int) V2DF_FTYPE_V2DF_V2DF },
19533 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpnltsd", IX86_BUILTIN_CMPNLTSD, UNGE, (int) V2DF_FTYPE_V2DF_V2DF },
19534 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpnlesd", IX86_BUILTIN_CMPNLESD, UNGT, (int) V2DF_FTYPE_V2DF_V2DF },
19535 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpordsd", IX86_BUILTIN_CMPORDSD, ORDERED, (int) V2DF_FTYPE_V2DF_V2DF },
19537 { OPTION_MASK_ISA_SSE2, CODE_FOR_sminv2df3, "__builtin_ia32_minpd", IX86_BUILTIN_MINPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
19538 { OPTION_MASK_ISA_SSE2, CODE_FOR_smaxv2df3, "__builtin_ia32_maxpd", IX86_BUILTIN_MAXPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
19539 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmsminv2df3, "__builtin_ia32_minsd", IX86_BUILTIN_MINSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
19540 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmsmaxv2df3, "__builtin_ia32_maxsd", IX86_BUILTIN_MAXSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
19542 { OPTION_MASK_ISA_SSE2, CODE_FOR_andv2df3, "__builtin_ia32_andpd", IX86_BUILTIN_ANDPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
19543 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_nandv2df3, "__builtin_ia32_andnpd", IX86_BUILTIN_ANDNPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
19544 { OPTION_MASK_ISA_SSE2, CODE_FOR_iorv2df3, "__builtin_ia32_orpd", IX86_BUILTIN_ORPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
19545 { OPTION_MASK_ISA_SSE2, CODE_FOR_xorv2df3, "__builtin_ia32_xorpd", IX86_BUILTIN_XORPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
19547 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movsd, "__builtin_ia32_movsd", IX86_BUILTIN_MOVSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
19548 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_unpckhpd_exp, "__builtin_ia32_unpckhpd", IX86_BUILTIN_UNPCKHPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
19549 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_unpcklpd_exp, "__builtin_ia32_unpcklpd", IX86_BUILTIN_UNPCKLPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
19551 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_pack_sfix_v2df, "__builtin_ia32_vec_pack_sfix", IX86_BUILTIN_VEC_PACK_SFIX, UNKNOWN, (int) V4SI_FTYPE_V2DF_V2DF },
19553 { OPTION_MASK_ISA_SSE2, CODE_FOR_addv16qi3, "__builtin_ia32_paddb128", IX86_BUILTIN_PADDB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
19554 { OPTION_MASK_ISA_SSE2, CODE_FOR_addv8hi3, "__builtin_ia32_paddw128", IX86_BUILTIN_PADDW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
19555 { OPTION_MASK_ISA_SSE2, CODE_FOR_addv4si3, "__builtin_ia32_paddd128", IX86_BUILTIN_PADDD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
19556 { OPTION_MASK_ISA_SSE2, CODE_FOR_addv2di3, "__builtin_ia32_paddq128", IX86_BUILTIN_PADDQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
19557 { OPTION_MASK_ISA_SSE2, CODE_FOR_subv16qi3, "__builtin_ia32_psubb128", IX86_BUILTIN_PSUBB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
19558 { OPTION_MASK_ISA_SSE2, CODE_FOR_subv8hi3, "__builtin_ia32_psubw128", IX86_BUILTIN_PSUBW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
19559 { OPTION_MASK_ISA_SSE2, CODE_FOR_subv4si3, "__builtin_ia32_psubd128", IX86_BUILTIN_PSUBD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
19560 { OPTION_MASK_ISA_SSE2, CODE_FOR_subv2di3, "__builtin_ia32_psubq128", IX86_BUILTIN_PSUBQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
19562 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ssaddv16qi3, "__builtin_ia32_paddsb128", IX86_BUILTIN_PADDSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
19563 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ssaddv8hi3, "__builtin_ia32_paddsw128", IX86_BUILTIN_PADDSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
19564 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_sssubv16qi3, "__builtin_ia32_psubsb128", IX86_BUILTIN_PSUBSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
19565 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_sssubv8hi3, "__builtin_ia32_psubsw128", IX86_BUILTIN_PSUBSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
19566 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_usaddv16qi3, "__builtin_ia32_paddusb128", IX86_BUILTIN_PADDUSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
19567 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_usaddv8hi3, "__builtin_ia32_paddusw128", IX86_BUILTIN_PADDUSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
19568 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ussubv16qi3, "__builtin_ia32_psubusb128", IX86_BUILTIN_PSUBUSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
19569 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ussubv8hi3, "__builtin_ia32_psubusw128", IX86_BUILTIN_PSUBUSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
19571 { OPTION_MASK_ISA_SSE2, CODE_FOR_mulv8hi3, "__builtin_ia32_pmullw128", IX86_BUILTIN_PMULLW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
19572 { OPTION_MASK_ISA_SSE2, CODE_FOR_smulv8hi3_highpart, "__builtin_ia32_pmulhw128", IX86_BUILTIN_PMULHW128, UNKNOWN,(int) V8HI_FTYPE_V8HI_V8HI },
19574 { OPTION_MASK_ISA_SSE2, CODE_FOR_andv2di3, "__builtin_ia32_pand128", IX86_BUILTIN_PAND128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
19575 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_nandv2di3, "__builtin_ia32_pandn128", IX86_BUILTIN_PANDN128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
19576 { OPTION_MASK_ISA_SSE2, CODE_FOR_iorv2di3, "__builtin_ia32_por128", IX86_BUILTIN_POR128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
19577 { OPTION_MASK_ISA_SSE2, CODE_FOR_xorv2di3, "__builtin_ia32_pxor128", IX86_BUILTIN_PXOR128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
19579 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_uavgv16qi3, "__builtin_ia32_pavgb128", IX86_BUILTIN_PAVGB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
19580 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_uavgv8hi3, "__builtin_ia32_pavgw128", IX86_BUILTIN_PAVGW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
19582 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_eqv16qi3, "__builtin_ia32_pcmpeqb128", IX86_BUILTIN_PCMPEQB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
19583 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_eqv8hi3, "__builtin_ia32_pcmpeqw128", IX86_BUILTIN_PCMPEQW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
19584 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_eqv4si3, "__builtin_ia32_pcmpeqd128", IX86_BUILTIN_PCMPEQD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
19585 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_gtv16qi3, "__builtin_ia32_pcmpgtb128", IX86_BUILTIN_PCMPGTB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
19586 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_gtv8hi3, "__builtin_ia32_pcmpgtw128", IX86_BUILTIN_PCMPGTW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
19587 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_gtv4si3, "__builtin_ia32_pcmpgtd128", IX86_BUILTIN_PCMPGTD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
19589 { OPTION_MASK_ISA_SSE2, CODE_FOR_umaxv16qi3, "__builtin_ia32_pmaxub128", IX86_BUILTIN_PMAXUB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
19590 { OPTION_MASK_ISA_SSE2, CODE_FOR_smaxv8hi3, "__builtin_ia32_pmaxsw128", IX86_BUILTIN_PMAXSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
19591 { OPTION_MASK_ISA_SSE2, CODE_FOR_uminv16qi3, "__builtin_ia32_pminub128", IX86_BUILTIN_PMINUB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
19592 { OPTION_MASK_ISA_SSE2, CODE_FOR_sminv8hi3, "__builtin_ia32_pminsw128", IX86_BUILTIN_PMINSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
19594 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_punpckhbw, "__builtin_ia32_punpckhbw128", IX86_BUILTIN_PUNPCKHBW128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
19595 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_punpckhwd, "__builtin_ia32_punpckhwd128", IX86_BUILTIN_PUNPCKHWD128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
19596 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_punpckhdq, "__builtin_ia32_punpckhdq128", IX86_BUILTIN_PUNPCKHDQ128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
19597 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_punpckhqdq, "__builtin_ia32_punpckhqdq128", IX86_BUILTIN_PUNPCKHQDQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
19598 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_punpcklbw, "__builtin_ia32_punpcklbw128", IX86_BUILTIN_PUNPCKLBW128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
19599 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_punpcklwd, "__builtin_ia32_punpcklwd128", IX86_BUILTIN_PUNPCKLWD128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
19600 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_punpckldq, "__builtin_ia32_punpckldq128", IX86_BUILTIN_PUNPCKLDQ128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
19601 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_punpcklqdq, "__builtin_ia32_punpcklqdq128", IX86_BUILTIN_PUNPCKLQDQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
19603 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_packsswb, "__builtin_ia32_packsswb128", IX86_BUILTIN_PACKSSWB128, UNKNOWN, (int) V16QI_FTYPE_V8HI_V8HI },
19604 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_packssdw, "__builtin_ia32_packssdw128", IX86_BUILTIN_PACKSSDW128, UNKNOWN, (int) V8HI_FTYPE_V4SI_V4SI },
19605 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_packuswb, "__builtin_ia32_packuswb128", IX86_BUILTIN_PACKUSWB128, UNKNOWN, (int) V16QI_FTYPE_V8HI_V8HI },
19607 { OPTION_MASK_ISA_SSE2, CODE_FOR_umulv8hi3_highpart, "__builtin_ia32_pmulhuw128", IX86_BUILTIN_PMULHUW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
19608 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_psadbw, "__builtin_ia32_psadbw128", IX86_BUILTIN_PSADBW128, UNKNOWN, (int) V2DI_FTYPE_V16QI_V16QI },
19610 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_umulv1siv1di3, "__builtin_ia32_pmuludq", IX86_BUILTIN_PMULUDQ, UNKNOWN, (int) V1DI_FTYPE_V2SI_V2SI },
19611 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_umulv2siv2di3, "__builtin_ia32_pmuludq128", IX86_BUILTIN_PMULUDQ128, UNKNOWN, (int) V2DI_FTYPE_V4SI_V4SI },
19613 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_pmaddwd, "__builtin_ia32_pmaddwd128", IX86_BUILTIN_PMADDWD128, UNKNOWN, (int) V4SI_FTYPE_V8HI_V8HI },
19615 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtsi2sd, "__builtin_ia32_cvtsi2sd", IX86_BUILTIN_CVTSI2SD, UNKNOWN, (int) V2DF_FTYPE_V2DF_SI },
19616 { OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_64BIT, CODE_FOR_sse2_cvtsi2sdq, "__builtin_ia32_cvtsi642sd", IX86_BUILTIN_CVTSI642SD, UNKNOWN, (int) V2DF_FTYPE_V2DF_DI },
19617 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtsd2ss, "__builtin_ia32_cvtsd2ss", IX86_BUILTIN_CVTSD2SS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V2DF },
19618 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtss2sd, "__builtin_ia32_cvtss2sd", IX86_BUILTIN_CVTSS2SD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V4SF },
19620 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ashlti3, "__builtin_ia32_pslldqi128", IX86_BUILTIN_PSLLDQI128, UNKNOWN, (int) V2DI2TI_FTYPE_V2DI_INT },
19621 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv8hi3, "__builtin_ia32_psllwi128", IX86_BUILTIN_PSLLWI128, UNKNOWN, (int) V8HI_FTYPE_V8HI_SI_COUNT },
19622 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv4si3, "__builtin_ia32_pslldi128", IX86_BUILTIN_PSLLDI128, UNKNOWN, (int) V4SI_FTYPE_V4SI_SI_COUNT },
19623 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv2di3, "__builtin_ia32_psllqi128", IX86_BUILTIN_PSLLQI128, UNKNOWN, (int) V2DI_FTYPE_V2DI_SI_COUNT },
19624 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv8hi3, "__builtin_ia32_psllw128", IX86_BUILTIN_PSLLW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_COUNT },
19625 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv4si3, "__builtin_ia32_pslld128", IX86_BUILTIN_PSLLD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_COUNT },
19626 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv2di3, "__builtin_ia32_psllq128", IX86_BUILTIN_PSLLQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_COUNT },
19628 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_lshrti3, "__builtin_ia32_psrldqi128", IX86_BUILTIN_PSRLDQI128, UNKNOWN, (int) V2DI2TI_FTYPE_V2DI_INT },
19629 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv8hi3, "__builtin_ia32_psrlwi128", IX86_BUILTIN_PSRLWI128, UNKNOWN, (int) V8HI_FTYPE_V8HI_SI_COUNT },
19630 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv4si3, "__builtin_ia32_psrldi128", IX86_BUILTIN_PSRLDI128, UNKNOWN, (int) V4SI_FTYPE_V4SI_SI_COUNT },
19631 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv2di3, "__builtin_ia32_psrlqi128", IX86_BUILTIN_PSRLQI128, UNKNOWN, (int) V2DI_FTYPE_V2DI_SI_COUNT },
19632 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv8hi3, "__builtin_ia32_psrlw128", IX86_BUILTIN_PSRLW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_COUNT },
19633 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv4si3, "__builtin_ia32_psrld128", IX86_BUILTIN_PSRLD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_COUNT },
19634 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv2di3, "__builtin_ia32_psrlq128", IX86_BUILTIN_PSRLQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_COUNT },
19636 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashrv8hi3, "__builtin_ia32_psrawi128", IX86_BUILTIN_PSRAWI128, UNKNOWN, (int) V8HI_FTYPE_V8HI_SI_COUNT },
19637 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashrv4si3, "__builtin_ia32_psradi128", IX86_BUILTIN_PSRADI128, UNKNOWN, (int) V4SI_FTYPE_V4SI_SI_COUNT },
19638 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashrv8hi3, "__builtin_ia32_psraw128", IX86_BUILTIN_PSRAW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_COUNT },
19639 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashrv4si3, "__builtin_ia32_psrad128", IX86_BUILTIN_PSRAD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_COUNT },
19641 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_pshufd, "__builtin_ia32_pshufd", IX86_BUILTIN_PSHUFD, UNKNOWN, (int) V4SI_FTYPE_V4SI_INT },
19642 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_pshuflw, "__builtin_ia32_pshuflw", IX86_BUILTIN_PSHUFLW, UNKNOWN, (int) V8HI_FTYPE_V8HI_INT },
19643 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_pshufhw, "__builtin_ia32_pshufhw", IX86_BUILTIN_PSHUFHW, UNKNOWN, (int) V8HI_FTYPE_V8HI_INT },
19645 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmsqrtv2df2, "__builtin_ia32_sqrtsd", IX86_BUILTIN_SQRTSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_VEC_MERGE },
19647 { OPTION_MASK_ISA_SSE2, CODE_FOR_abstf2, 0, IX86_BUILTIN_FABSQ, UNKNOWN, (int) FLOAT128_FTYPE_FLOAT128 },
19648 { OPTION_MASK_ISA_SSE2, CODE_FOR_copysigntf3, 0, IX86_BUILTIN_COPYSIGNQ, UNKNOWN, (int) FLOAT128_FTYPE_FLOAT128_FLOAT128 },
19651 { OPTION_MASK_ISA_SSE2, CODE_FOR_mmx_addv1di3, "__builtin_ia32_paddq", IX86_BUILTIN_PADDQ, UNKNOWN, (int) V1DI_FTYPE_V1DI_V1DI },
19652 { OPTION_MASK_ISA_SSE2, CODE_FOR_mmx_subv1di3, "__builtin_ia32_psubq", IX86_BUILTIN_PSUBQ, UNKNOWN, (int) V1DI_FTYPE_V1DI_V1DI },
19655 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_movshdup, "__builtin_ia32_movshdup", IX86_BUILTIN_MOVSHDUP, UNKNOWN, (int) V4SF_FTYPE_V4SF},
19656 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_movsldup, "__builtin_ia32_movsldup", IX86_BUILTIN_MOVSLDUP, UNKNOWN, (int) V4SF_FTYPE_V4SF },
19658 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_addsubv4sf3, "__builtin_ia32_addsubps", IX86_BUILTIN_ADDSUBPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
19659 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_addsubv2df3, "__builtin_ia32_addsubpd", IX86_BUILTIN_ADDSUBPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
19660 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_haddv4sf3, "__builtin_ia32_haddps", IX86_BUILTIN_HADDPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
19661 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_haddv2df3, "__builtin_ia32_haddpd", IX86_BUILTIN_HADDPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
19662 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_hsubv4sf3, "__builtin_ia32_hsubps", IX86_BUILTIN_HSUBPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
19663 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_hsubv2df3, "__builtin_ia32_hsubpd", IX86_BUILTIN_HSUBPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
19666 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv16qi2, "__builtin_ia32_pabsb128", IX86_BUILTIN_PABSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI },
19667 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv8qi2, "__builtin_ia32_pabsb", IX86_BUILTIN_PABSB, UNKNOWN, (int) V8QI_FTYPE_V8QI },
19668 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv8hi2, "__builtin_ia32_pabsw128", IX86_BUILTIN_PABSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI },
19669 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv4hi2, "__builtin_ia32_pabsw", IX86_BUILTIN_PABSW, UNKNOWN, (int) V4HI_FTYPE_V4HI },
19670 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv4si2, "__builtin_ia32_pabsd128", IX86_BUILTIN_PABSD128, UNKNOWN, (int) V4SI_FTYPE_V4SI },
19671 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv2si2, "__builtin_ia32_pabsd", IX86_BUILTIN_PABSD, UNKNOWN, (int) V2SI_FTYPE_V2SI },
19673 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phaddwv8hi3, "__builtin_ia32_phaddw128", IX86_BUILTIN_PHADDW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
19674 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phaddwv4hi3, "__builtin_ia32_phaddw", IX86_BUILTIN_PHADDW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
19675 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phadddv4si3, "__builtin_ia32_phaddd128", IX86_BUILTIN_PHADDD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
19676 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phadddv2si3, "__builtin_ia32_phaddd", IX86_BUILTIN_PHADDD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
19677 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phaddswv8hi3, "__builtin_ia32_phaddsw128", IX86_BUILTIN_PHADDSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
19678 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phaddswv4hi3, "__builtin_ia32_phaddsw", IX86_BUILTIN_PHADDSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
19679 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubwv8hi3, "__builtin_ia32_phsubw128", IX86_BUILTIN_PHSUBW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
19680 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubwv4hi3, "__builtin_ia32_phsubw", IX86_BUILTIN_PHSUBW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
19681 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubdv4si3, "__builtin_ia32_phsubd128", IX86_BUILTIN_PHSUBD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
19682 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubdv2si3, "__builtin_ia32_phsubd", IX86_BUILTIN_PHSUBD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
19683 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubswv8hi3, "__builtin_ia32_phsubsw128", IX86_BUILTIN_PHSUBSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
19684 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubswv4hi3, "__builtin_ia32_phsubsw", IX86_BUILTIN_PHSUBSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
19685 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pmaddubsw128, "__builtin_ia32_pmaddubsw128", IX86_BUILTIN_PMADDUBSW128, UNKNOWN, (int) V8HI_FTYPE_V16QI_V16QI },
19686 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pmaddubsw, "__builtin_ia32_pmaddubsw", IX86_BUILTIN_PMADDUBSW, UNKNOWN, (int) V4HI_FTYPE_V8QI_V8QI },
19687 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pmulhrswv8hi3, "__builtin_ia32_pmulhrsw128", IX86_BUILTIN_PMULHRSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
19688 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pmulhrswv4hi3, "__builtin_ia32_pmulhrsw", IX86_BUILTIN_PMULHRSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
19689 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pshufbv16qi3, "__builtin_ia32_pshufb128", IX86_BUILTIN_PSHUFB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
19690 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pshufbv8qi3, "__builtin_ia32_pshufb", IX86_BUILTIN_PSHUFB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
19691 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv16qi3, "__builtin_ia32_psignb128", IX86_BUILTIN_PSIGNB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
19692 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv8qi3, "__builtin_ia32_psignb", IX86_BUILTIN_PSIGNB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
19693 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv8hi3, "__builtin_ia32_psignw128", IX86_BUILTIN_PSIGNW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
19694 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv4hi3, "__builtin_ia32_psignw", IX86_BUILTIN_PSIGNW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
19695 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv4si3, "__builtin_ia32_psignd128", IX86_BUILTIN_PSIGND128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
19696 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv2si3, "__builtin_ia32_psignd", IX86_BUILTIN_PSIGND, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
19699 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_palignrti, "__builtin_ia32_palignr128", IX86_BUILTIN_PALIGNR128, UNKNOWN, (int) V2DI2TI_FTYPE_V2DI_V2DI_INT },
19700 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_palignrdi, "__builtin_ia32_palignr", IX86_BUILTIN_PALIGNR, UNKNOWN, (int) V1DI2DI_FTYPE_V1DI_V1DI_INT },
19703 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_blendpd, "__builtin_ia32_blendpd", IX86_BUILTIN_BLENDPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
19704 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_blendps, "__builtin_ia32_blendps", IX86_BUILTIN_BLENDPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
19705 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_blendvpd, "__builtin_ia32_blendvpd", IX86_BUILTIN_BLENDVPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF },
19706 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_blendvps, "__builtin_ia32_blendvps", IX86_BUILTIN_BLENDVPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF },
19707 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_dppd, "__builtin_ia32_dppd", IX86_BUILTIN_DPPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
19708 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_dpps, "__builtin_ia32_dpps", IX86_BUILTIN_DPPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
19709 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_insertps, "__builtin_ia32_insertps128", IX86_BUILTIN_INSERTPS128, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
19710 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_mpsadbw, "__builtin_ia32_mpsadbw128", IX86_BUILTIN_MPSADBW128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_INT },
19711 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_pblendvb, "__builtin_ia32_pblendvb128", IX86_BUILTIN_PBLENDVB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI },
19712 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_pblendw, "__builtin_ia32_pblendw128", IX86_BUILTIN_PBLENDW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_INT },
19714 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_extendv8qiv8hi2, "__builtin_ia32_pmovsxbw128", IX86_BUILTIN_PMOVSXBW128, UNKNOWN, (int) V8HI_FTYPE_V16QI },
19715 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_extendv4qiv4si2, "__builtin_ia32_pmovsxbd128", IX86_BUILTIN_PMOVSXBD128, UNKNOWN, (int) V4SI_FTYPE_V16QI },
19716 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_extendv2qiv2di2, "__builtin_ia32_pmovsxbq128", IX86_BUILTIN_PMOVSXBQ128, UNKNOWN, (int) V2DI_FTYPE_V16QI },
19717 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_extendv4hiv4si2, "__builtin_ia32_pmovsxwd128", IX86_BUILTIN_PMOVSXWD128, UNKNOWN, (int) V4SI_FTYPE_V8HI },
19718 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_extendv2hiv2di2, "__builtin_ia32_pmovsxwq128", IX86_BUILTIN_PMOVSXWQ128, UNKNOWN, (int) V2DI_FTYPE_V8HI },
19719 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_extendv2siv2di2, "__builtin_ia32_pmovsxdq128", IX86_BUILTIN_PMOVSXDQ128, UNKNOWN, (int) V2DI_FTYPE_V4SI },
19720 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv8qiv8hi2, "__builtin_ia32_pmovzxbw128", IX86_BUILTIN_PMOVZXBW128, UNKNOWN, (int) V8HI_FTYPE_V16QI },
19721 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv4qiv4si2, "__builtin_ia32_pmovzxbd128", IX86_BUILTIN_PMOVZXBD128, UNKNOWN, (int) V4SI_FTYPE_V16QI },
19722 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv2qiv2di2, "__builtin_ia32_pmovzxbq128", IX86_BUILTIN_PMOVZXBQ128, UNKNOWN, (int) V2DI_FTYPE_V16QI },
19723 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv4hiv4si2, "__builtin_ia32_pmovzxwd128", IX86_BUILTIN_PMOVZXWD128, UNKNOWN, (int) V4SI_FTYPE_V8HI },
19724 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv2hiv2di2, "__builtin_ia32_pmovzxwq128", IX86_BUILTIN_PMOVZXWQ128, UNKNOWN, (int) V2DI_FTYPE_V8HI },
19725 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv2siv2di2, "__builtin_ia32_pmovzxdq128", IX86_BUILTIN_PMOVZXDQ128, UNKNOWN, (int) V2DI_FTYPE_V4SI },
19726 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_phminposuw, "__builtin_ia32_phminposuw128", IX86_BUILTIN_PHMINPOSUW128, UNKNOWN, (int) V8HI_FTYPE_V8HI },
19728 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_packusdw, "__builtin_ia32_packusdw128", IX86_BUILTIN_PACKUSDW128, UNKNOWN, (int) V8HI_FTYPE_V4SI_V4SI },
19729 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_eqv2di3, "__builtin_ia32_pcmpeqq", IX86_BUILTIN_PCMPEQQ, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
19730 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_smaxv16qi3, "__builtin_ia32_pmaxsb128", IX86_BUILTIN_PMAXSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
19731 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_smaxv4si3, "__builtin_ia32_pmaxsd128", IX86_BUILTIN_PMAXSD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
19732 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_umaxv4si3, "__builtin_ia32_pmaxud128", IX86_BUILTIN_PMAXUD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
19733 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_umaxv8hi3, "__builtin_ia32_pmaxuw128", IX86_BUILTIN_PMAXUW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
19734 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sminv16qi3, "__builtin_ia32_pminsb128", IX86_BUILTIN_PMINSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
19735 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sminv4si3, "__builtin_ia32_pminsd128", IX86_BUILTIN_PMINSD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
19736 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_uminv4si3, "__builtin_ia32_pminud128", IX86_BUILTIN_PMINUD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
19737 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_uminv8hi3, "__builtin_ia32_pminuw128", IX86_BUILTIN_PMINUW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
19738 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_mulv2siv2di3, "__builtin_ia32_pmuldq128", IX86_BUILTIN_PMULDQ128, UNKNOWN, (int) V2DI_FTYPE_V4SI_V4SI },
19739 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_mulv4si3, "__builtin_ia32_pmulld128", IX86_BUILTIN_PMULLD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
19741 /* SSE4.1 and SSE5 */
19742 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundpd, "__builtin_ia32_roundpd", IX86_BUILTIN_ROUNDPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_INT },
19743 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundps, "__builtin_ia32_roundps", IX86_BUILTIN_ROUNDPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_INT },
19744 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundsd, "__builtin_ia32_roundsd", IX86_BUILTIN_ROUNDSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
19745 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundss, "__builtin_ia32_roundss", IX86_BUILTIN_ROUNDSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
19747 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_ptest, "__builtin_ia32_ptestz128", IX86_BUILTIN_PTESTZ, EQ, (int) INT_FTYPE_V2DI_V2DI_PTEST },
19748 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_ptest, "__builtin_ia32_ptestc128", IX86_BUILTIN_PTESTC, LTU, (int) INT_FTYPE_V2DI_V2DI_PTEST },
19749 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_ptest, "__builtin_ia32_ptestnzc128", IX86_BUILTIN_PTESTNZC, GTU, (int) INT_FTYPE_V2DI_V2DI_PTEST },
19752 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_gtv2di3, "__builtin_ia32_pcmpgtq", IX86_BUILTIN_PCMPGTQ, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
19753 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_crc32qi, "__builtin_ia32_crc32qi", IX86_BUILTIN_CRC32QI, UNKNOWN, (int) UINT_FTYPE_UINT_UCHAR },
19754 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_crc32hi, "__builtin_ia32_crc32hi", IX86_BUILTIN_CRC32HI, UNKNOWN, (int) UINT_FTYPE_UINT_USHORT },
19755 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_crc32si, "__builtin_ia32_crc32si", IX86_BUILTIN_CRC32SI, UNKNOWN, (int) UINT_FTYPE_UINT_UINT },
19756 { OPTION_MASK_ISA_SSE4_2 | OPTION_MASK_ISA_64BIT, CODE_FOR_sse4_2_crc32di, "__builtin_ia32_crc32di", IX86_BUILTIN_CRC32DI, UNKNOWN, (int) UINT64_FTYPE_UINT64_UINT64 },
19759 { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_extrqi, "__builtin_ia32_extrqi", IX86_BUILTIN_EXTRQI, UNKNOWN, (int) V2DI_FTYPE_V2DI_UINT_UINT },
19760 { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_extrq, "__builtin_ia32_extrq", IX86_BUILTIN_EXTRQ, UNKNOWN, (int) V2DI_FTYPE_V2DI_V16QI },
19761 { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_insertqi, "__builtin_ia32_insertqi", IX86_BUILTIN_INSERTQI, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_UINT_UINT },
19762 { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_insertq, "__builtin_ia32_insertq", IX86_BUILTIN_INSERTQ, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
19765 { OPTION_MASK_ISA_SSE2, CODE_FOR_aeskeygenassist, 0, IX86_BUILTIN_AESKEYGENASSIST128, UNKNOWN, (int) V2DI_FTYPE_V2DI_INT },
19766 { OPTION_MASK_ISA_SSE2, CODE_FOR_aesimc, 0, IX86_BUILTIN_AESIMC128, UNKNOWN, (int) V2DI_FTYPE_V2DI },
19768 { OPTION_MASK_ISA_SSE2, CODE_FOR_aesenc, 0, IX86_BUILTIN_AESENC128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
19769 { OPTION_MASK_ISA_SSE2, CODE_FOR_aesenclast, 0, IX86_BUILTIN_AESENCLAST128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
19770 { OPTION_MASK_ISA_SSE2, CODE_FOR_aesdec, 0, IX86_BUILTIN_AESDEC128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
19771 { OPTION_MASK_ISA_SSE2, CODE_FOR_aesdeclast, 0, IX86_BUILTIN_AESDECLAST128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
19774 { OPTION_MASK_ISA_SSE2, CODE_FOR_pclmulqdq, 0, IX86_BUILTIN_PCLMULQDQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_INT },
19778 enum multi_arg_type {
19788 MULTI_ARG_3_PERMPS,
19789 MULTI_ARG_3_PERMPD,
19796 MULTI_ARG_2_DI_IMM,
19797 MULTI_ARG_2_SI_IMM,
19798 MULTI_ARG_2_HI_IMM,
19799 MULTI_ARG_2_QI_IMM,
19800 MULTI_ARG_2_SF_CMP,
19801 MULTI_ARG_2_DF_CMP,
19802 MULTI_ARG_2_DI_CMP,
19803 MULTI_ARG_2_SI_CMP,
19804 MULTI_ARG_2_HI_CMP,
19805 MULTI_ARG_2_QI_CMP,
19828 static const struct builtin_description bdesc_multi_arg[] =
19830 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_vmfmaddv4sf4, "__builtin_ia32_fmaddss", IX86_BUILTIN_FMADDSS, 0, (int)MULTI_ARG_3_SF },
19831 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_vmfmaddv2df4, "__builtin_ia32_fmaddsd", IX86_BUILTIN_FMADDSD, 0, (int)MULTI_ARG_3_DF },
19832 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_fmaddv4sf4, "__builtin_ia32_fmaddps", IX86_BUILTIN_FMADDPS, 0, (int)MULTI_ARG_3_SF },
19833 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_fmaddv2df4, "__builtin_ia32_fmaddpd", IX86_BUILTIN_FMADDPD, 0, (int)MULTI_ARG_3_DF },
19834 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_vmfmsubv4sf4, "__builtin_ia32_fmsubss", IX86_BUILTIN_FMSUBSS, 0, (int)MULTI_ARG_3_SF },
19835 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_vmfmsubv2df4, "__builtin_ia32_fmsubsd", IX86_BUILTIN_FMSUBSD, 0, (int)MULTI_ARG_3_DF },
19836 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_fmsubv4sf4, "__builtin_ia32_fmsubps", IX86_BUILTIN_FMSUBPS, 0, (int)MULTI_ARG_3_SF },
19837 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_fmsubv2df4, "__builtin_ia32_fmsubpd", IX86_BUILTIN_FMSUBPD, 0, (int)MULTI_ARG_3_DF },
19838 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_vmfnmaddv4sf4, "__builtin_ia32_fnmaddss", IX86_BUILTIN_FNMADDSS, 0, (int)MULTI_ARG_3_SF },
19839 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_vmfnmaddv2df4, "__builtin_ia32_fnmaddsd", IX86_BUILTIN_FNMADDSD, 0, (int)MULTI_ARG_3_DF },
19840 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_fnmaddv4sf4, "__builtin_ia32_fnmaddps", IX86_BUILTIN_FNMADDPS, 0, (int)MULTI_ARG_3_SF },
19841 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_fnmaddv2df4, "__builtin_ia32_fnmaddpd", IX86_BUILTIN_FNMADDPD, 0, (int)MULTI_ARG_3_DF },
19842 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_vmfnmsubv4sf4, "__builtin_ia32_fnmsubss", IX86_BUILTIN_FNMSUBSS, 0, (int)MULTI_ARG_3_SF },
19843 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_vmfnmsubv2df4, "__builtin_ia32_fnmsubsd", IX86_BUILTIN_FNMSUBSD, 0, (int)MULTI_ARG_3_DF },
19844 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_fnmsubv4sf4, "__builtin_ia32_fnmsubps", IX86_BUILTIN_FNMSUBPS, 0, (int)MULTI_ARG_3_SF },
19845 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_fnmsubv2df4, "__builtin_ia32_fnmsubpd", IX86_BUILTIN_FNMSUBPD, 0, (int)MULTI_ARG_3_DF },
19846 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcmov_v2di, "__builtin_ia32_pcmov", IX86_BUILTIN_PCMOV_V2DI, 0, (int)MULTI_ARG_3_DI },
19847 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcmov_v2di, "__builtin_ia32_pcmov_v2di", IX86_BUILTIN_PCMOV_V2DI, 0, (int)MULTI_ARG_3_DI },
19848 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcmov_v4si, "__builtin_ia32_pcmov_v4si", IX86_BUILTIN_PCMOV_V4SI, 0, (int)MULTI_ARG_3_SI },
19849 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcmov_v8hi, "__builtin_ia32_pcmov_v8hi", IX86_BUILTIN_PCMOV_V8HI, 0, (int)MULTI_ARG_3_HI },
19850 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcmov_v16qi, "__builtin_ia32_pcmov_v16qi",IX86_BUILTIN_PCMOV_V16QI,0, (int)MULTI_ARG_3_QI },
19851 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcmov_v2df, "__builtin_ia32_pcmov_v2df", IX86_BUILTIN_PCMOV_V2DF, 0, (int)MULTI_ARG_3_DF },
19852 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcmov_v4sf, "__builtin_ia32_pcmov_v4sf", IX86_BUILTIN_PCMOV_V4SF, 0, (int)MULTI_ARG_3_SF },
19853 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pperm, "__builtin_ia32_pperm", IX86_BUILTIN_PPERM, 0, (int)MULTI_ARG_3_QI },
19854 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_permv4sf, "__builtin_ia32_permps", IX86_BUILTIN_PERMPS, 0, (int)MULTI_ARG_3_PERMPS },
19855 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_permv2df, "__builtin_ia32_permpd", IX86_BUILTIN_PERMPD, 0, (int)MULTI_ARG_3_PERMPD },
19856 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pmacssww, "__builtin_ia32_pmacssww", IX86_BUILTIN_PMACSSWW, 0, (int)MULTI_ARG_3_HI },
19857 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pmacsww, "__builtin_ia32_pmacsww", IX86_BUILTIN_PMACSWW, 0, (int)MULTI_ARG_3_HI },
19858 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pmacsswd, "__builtin_ia32_pmacsswd", IX86_BUILTIN_PMACSSWD, 0, (int)MULTI_ARG_3_HI_SI },
19859 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pmacswd, "__builtin_ia32_pmacswd", IX86_BUILTIN_PMACSWD, 0, (int)MULTI_ARG_3_HI_SI },
19860 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pmacssdd, "__builtin_ia32_pmacssdd", IX86_BUILTIN_PMACSSDD, 0, (int)MULTI_ARG_3_SI },
19861 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pmacsdd, "__builtin_ia32_pmacsdd", IX86_BUILTIN_PMACSDD, 0, (int)MULTI_ARG_3_SI },
19862 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pmacssdql, "__builtin_ia32_pmacssdql", IX86_BUILTIN_PMACSSDQL, 0, (int)MULTI_ARG_3_SI_DI },
19863 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pmacssdqh, "__builtin_ia32_pmacssdqh", IX86_BUILTIN_PMACSSDQH, 0, (int)MULTI_ARG_3_SI_DI },
19864 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pmacsdql, "__builtin_ia32_pmacsdql", IX86_BUILTIN_PMACSDQL, 0, (int)MULTI_ARG_3_SI_DI },
19865 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pmacsdqh, "__builtin_ia32_pmacsdqh", IX86_BUILTIN_PMACSDQH, 0, (int)MULTI_ARG_3_SI_DI },
19866 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pmadcsswd, "__builtin_ia32_pmadcsswd", IX86_BUILTIN_PMADCSSWD, 0, (int)MULTI_ARG_3_HI_SI },
19867 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pmadcswd, "__builtin_ia32_pmadcswd", IX86_BUILTIN_PMADCSWD, 0, (int)MULTI_ARG_3_HI_SI },
19868 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vrotlv2di3, "__builtin_ia32_protq", IX86_BUILTIN_PROTQ, 0, (int)MULTI_ARG_2_DI },
19869 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vrotlv4si3, "__builtin_ia32_protd", IX86_BUILTIN_PROTD, 0, (int)MULTI_ARG_2_SI },
19870 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vrotlv8hi3, "__builtin_ia32_protw", IX86_BUILTIN_PROTW, 0, (int)MULTI_ARG_2_HI },
19871 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vrotlv16qi3, "__builtin_ia32_protb", IX86_BUILTIN_PROTB, 0, (int)MULTI_ARG_2_QI },
19872 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_rotlv2di3, "__builtin_ia32_protqi", IX86_BUILTIN_PROTQ_IMM, 0, (int)MULTI_ARG_2_DI_IMM },
19873 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_rotlv4si3, "__builtin_ia32_protdi", IX86_BUILTIN_PROTD_IMM, 0, (int)MULTI_ARG_2_SI_IMM },
19874 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_rotlv8hi3, "__builtin_ia32_protwi", IX86_BUILTIN_PROTW_IMM, 0, (int)MULTI_ARG_2_HI_IMM },
19875 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_rotlv16qi3, "__builtin_ia32_protbi", IX86_BUILTIN_PROTB_IMM, 0, (int)MULTI_ARG_2_QI_IMM },
19876 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_ashlv2di3, "__builtin_ia32_pshaq", IX86_BUILTIN_PSHAQ, 0, (int)MULTI_ARG_2_DI },
19877 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_ashlv4si3, "__builtin_ia32_pshad", IX86_BUILTIN_PSHAD, 0, (int)MULTI_ARG_2_SI },
19878 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_ashlv8hi3, "__builtin_ia32_pshaw", IX86_BUILTIN_PSHAW, 0, (int)MULTI_ARG_2_HI },
19879 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_ashlv16qi3, "__builtin_ia32_pshab", IX86_BUILTIN_PSHAB, 0, (int)MULTI_ARG_2_QI },
19880 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_lshlv2di3, "__builtin_ia32_pshlq", IX86_BUILTIN_PSHLQ, 0, (int)MULTI_ARG_2_DI },
19881 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_lshlv4si3, "__builtin_ia32_pshld", IX86_BUILTIN_PSHLD, 0, (int)MULTI_ARG_2_SI },
19882 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_lshlv8hi3, "__builtin_ia32_pshlw", IX86_BUILTIN_PSHLW, 0, (int)MULTI_ARG_2_HI },
19883 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_lshlv16qi3, "__builtin_ia32_pshlb", IX86_BUILTIN_PSHLB, 0, (int)MULTI_ARG_2_QI },
19884 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmfrczv4sf2, "__builtin_ia32_frczss", IX86_BUILTIN_FRCZSS, 0, (int)MULTI_ARG_2_SF },
19885 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmfrczv2df2, "__builtin_ia32_frczsd", IX86_BUILTIN_FRCZSD, 0, (int)MULTI_ARG_2_DF },
19886 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_frczv4sf2, "__builtin_ia32_frczps", IX86_BUILTIN_FRCZPS, 0, (int)MULTI_ARG_1_SF },
19887 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_frczv2df2, "__builtin_ia32_frczpd", IX86_BUILTIN_FRCZPD, 0, (int)MULTI_ARG_1_DF },
19888 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_cvtph2ps, "__builtin_ia32_cvtph2ps", IX86_BUILTIN_CVTPH2PS, 0, (int)MULTI_ARG_1_PH2PS },
19889 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_cvtps2ph, "__builtin_ia32_cvtps2ph", IX86_BUILTIN_CVTPS2PH, 0, (int)MULTI_ARG_1_PS2PH },
19890 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_phaddbw, "__builtin_ia32_phaddbw", IX86_BUILTIN_PHADDBW, 0, (int)MULTI_ARG_1_QI_HI },
19891 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_phaddbd, "__builtin_ia32_phaddbd", IX86_BUILTIN_PHADDBD, 0, (int)MULTI_ARG_1_QI_SI },
19892 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_phaddbq, "__builtin_ia32_phaddbq", IX86_BUILTIN_PHADDBQ, 0, (int)MULTI_ARG_1_QI_DI },
19893 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_phaddwd, "__builtin_ia32_phaddwd", IX86_BUILTIN_PHADDWD, 0, (int)MULTI_ARG_1_HI_SI },
19894 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_phaddwq, "__builtin_ia32_phaddwq", IX86_BUILTIN_PHADDWQ, 0, (int)MULTI_ARG_1_HI_DI },
19895 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_phadddq, "__builtin_ia32_phadddq", IX86_BUILTIN_PHADDDQ, 0, (int)MULTI_ARG_1_SI_DI },
19896 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_phaddubw, "__builtin_ia32_phaddubw", IX86_BUILTIN_PHADDUBW, 0, (int)MULTI_ARG_1_QI_HI },
19897 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_phaddubd, "__builtin_ia32_phaddubd", IX86_BUILTIN_PHADDUBD, 0, (int)MULTI_ARG_1_QI_SI },
19898 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_phaddubq, "__builtin_ia32_phaddubq", IX86_BUILTIN_PHADDUBQ, 0, (int)MULTI_ARG_1_QI_DI },
19899 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_phadduwd, "__builtin_ia32_phadduwd", IX86_BUILTIN_PHADDUWD, 0, (int)MULTI_ARG_1_HI_SI },
19900 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_phadduwq, "__builtin_ia32_phadduwq", IX86_BUILTIN_PHADDUWQ, 0, (int)MULTI_ARG_1_HI_DI },
19901 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_phaddudq, "__builtin_ia32_phaddudq", IX86_BUILTIN_PHADDUDQ, 0, (int)MULTI_ARG_1_SI_DI },
19902 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_phsubbw, "__builtin_ia32_phsubbw", IX86_BUILTIN_PHSUBBW, 0, (int)MULTI_ARG_1_QI_HI },
19903 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_phsubwd, "__builtin_ia32_phsubwd", IX86_BUILTIN_PHSUBWD, 0, (int)MULTI_ARG_1_HI_SI },
19904 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_phsubdq, "__builtin_ia32_phsubdq", IX86_BUILTIN_PHSUBDQ, 0, (int)MULTI_ARG_1_SI_DI },
19906 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv4sf3, "__builtin_ia32_comeqss", IX86_BUILTIN_COMEQSS, EQ, (int)MULTI_ARG_2_SF_CMP },
19907 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv4sf3, "__builtin_ia32_comness", IX86_BUILTIN_COMNESS, NE, (int)MULTI_ARG_2_SF_CMP },
19908 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv4sf3, "__builtin_ia32_comneqss", IX86_BUILTIN_COMNESS, NE, (int)MULTI_ARG_2_SF_CMP },
19909 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv4sf3, "__builtin_ia32_comltss", IX86_BUILTIN_COMLTSS, LT, (int)MULTI_ARG_2_SF_CMP },
19910 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv4sf3, "__builtin_ia32_comless", IX86_BUILTIN_COMLESS, LE, (int)MULTI_ARG_2_SF_CMP },
19911 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv4sf3, "__builtin_ia32_comgtss", IX86_BUILTIN_COMGTSS, GT, (int)MULTI_ARG_2_SF_CMP },
19912 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv4sf3, "__builtin_ia32_comgess", IX86_BUILTIN_COMGESS, GE, (int)MULTI_ARG_2_SF_CMP },
19913 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv4sf3, "__builtin_ia32_comueqss", IX86_BUILTIN_COMUEQSS, UNEQ, (int)MULTI_ARG_2_SF_CMP },
19914 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv4sf3, "__builtin_ia32_comuness", IX86_BUILTIN_COMUNESS, LTGT, (int)MULTI_ARG_2_SF_CMP },
19915 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv4sf3, "__builtin_ia32_comuneqss", IX86_BUILTIN_COMUNESS, LTGT, (int)MULTI_ARG_2_SF_CMP },
19916 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv4sf3, "__builtin_ia32_comunltss", IX86_BUILTIN_COMULTSS, UNLT, (int)MULTI_ARG_2_SF_CMP },
19917 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv4sf3, "__builtin_ia32_comunless", IX86_BUILTIN_COMULESS, UNLE, (int)MULTI_ARG_2_SF_CMP },
19918 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv4sf3, "__builtin_ia32_comungtss", IX86_BUILTIN_COMUGTSS, UNGT, (int)MULTI_ARG_2_SF_CMP },
19919 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv4sf3, "__builtin_ia32_comungess", IX86_BUILTIN_COMUGESS, UNGE, (int)MULTI_ARG_2_SF_CMP },
19920 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv4sf3, "__builtin_ia32_comordss", IX86_BUILTIN_COMORDSS, ORDERED, (int)MULTI_ARG_2_SF_CMP },
19921 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv4sf3, "__builtin_ia32_comunordss", IX86_BUILTIN_COMUNORDSS, UNORDERED, (int)MULTI_ARG_2_SF_CMP },
19923 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv2df3, "__builtin_ia32_comeqsd", IX86_BUILTIN_COMEQSD, EQ, (int)MULTI_ARG_2_DF_CMP },
19924 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv2df3, "__builtin_ia32_comnesd", IX86_BUILTIN_COMNESD, NE, (int)MULTI_ARG_2_DF_CMP },
19925 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv2df3, "__builtin_ia32_comneqsd", IX86_BUILTIN_COMNESD, NE, (int)MULTI_ARG_2_DF_CMP },
19926 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv2df3, "__builtin_ia32_comltsd", IX86_BUILTIN_COMLTSD, LT, (int)MULTI_ARG_2_DF_CMP },
19927 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv2df3, "__builtin_ia32_comlesd", IX86_BUILTIN_COMLESD, LE, (int)MULTI_ARG_2_DF_CMP },
19928 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv2df3, "__builtin_ia32_comgtsd", IX86_BUILTIN_COMGTSD, GT, (int)MULTI_ARG_2_DF_CMP },
19929 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv2df3, "__builtin_ia32_comgesd", IX86_BUILTIN_COMGESD, GE, (int)MULTI_ARG_2_DF_CMP },
19930 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv2df3, "__builtin_ia32_comueqsd", IX86_BUILTIN_COMUEQSD, UNEQ, (int)MULTI_ARG_2_DF_CMP },
19931 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv2df3, "__builtin_ia32_comunesd", IX86_BUILTIN_COMUNESD, LTGT, (int)MULTI_ARG_2_DF_CMP },
19932 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv2df3, "__builtin_ia32_comuneqsd", IX86_BUILTIN_COMUNESD, LTGT, (int)MULTI_ARG_2_DF_CMP },
19933 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv2df3, "__builtin_ia32_comunltsd", IX86_BUILTIN_COMULTSD, UNLT, (int)MULTI_ARG_2_DF_CMP },
19934 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv2df3, "__builtin_ia32_comunlesd", IX86_BUILTIN_COMULESD, UNLE, (int)MULTI_ARG_2_DF_CMP },
19935 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv2df3, "__builtin_ia32_comungtsd", IX86_BUILTIN_COMUGTSD, UNGT, (int)MULTI_ARG_2_DF_CMP },
19936 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv2df3, "__builtin_ia32_comungesd", IX86_BUILTIN_COMUGESD, UNGE, (int)MULTI_ARG_2_DF_CMP },
19937 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv2df3, "__builtin_ia32_comordsd", IX86_BUILTIN_COMORDSD, ORDERED, (int)MULTI_ARG_2_DF_CMP },
19938 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv2df3, "__builtin_ia32_comunordsd", IX86_BUILTIN_COMUNORDSD, UNORDERED, (int)MULTI_ARG_2_DF_CMP },
19940 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4sf3, "__builtin_ia32_comeqps", IX86_BUILTIN_COMEQPS, EQ, (int)MULTI_ARG_2_SF_CMP },
19941 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4sf3, "__builtin_ia32_comneps", IX86_BUILTIN_COMNEPS, NE, (int)MULTI_ARG_2_SF_CMP },
19942 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4sf3, "__builtin_ia32_comneqps", IX86_BUILTIN_COMNEPS, NE, (int)MULTI_ARG_2_SF_CMP },
19943 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4sf3, "__builtin_ia32_comltps", IX86_BUILTIN_COMLTPS, LT, (int)MULTI_ARG_2_SF_CMP },
19944 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4sf3, "__builtin_ia32_comleps", IX86_BUILTIN_COMLEPS, LE, (int)MULTI_ARG_2_SF_CMP },
19945 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4sf3, "__builtin_ia32_comgtps", IX86_BUILTIN_COMGTPS, GT, (int)MULTI_ARG_2_SF_CMP },
19946 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4sf3, "__builtin_ia32_comgeps", IX86_BUILTIN_COMGEPS, GE, (int)MULTI_ARG_2_SF_CMP },
19947 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4sf3, "__builtin_ia32_comueqps", IX86_BUILTIN_COMUEQPS, UNEQ, (int)MULTI_ARG_2_SF_CMP },
19948 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4sf3, "__builtin_ia32_comuneps", IX86_BUILTIN_COMUNEPS, LTGT, (int)MULTI_ARG_2_SF_CMP },
19949 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4sf3, "__builtin_ia32_comuneqps", IX86_BUILTIN_COMUNEPS, LTGT, (int)MULTI_ARG_2_SF_CMP },
19950 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4sf3, "__builtin_ia32_comunltps", IX86_BUILTIN_COMULTPS, UNLT, (int)MULTI_ARG_2_SF_CMP },
19951 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4sf3, "__builtin_ia32_comunleps", IX86_BUILTIN_COMULEPS, UNLE, (int)MULTI_ARG_2_SF_CMP },
19952 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4sf3, "__builtin_ia32_comungtps", IX86_BUILTIN_COMUGTPS, UNGT, (int)MULTI_ARG_2_SF_CMP },
19953 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4sf3, "__builtin_ia32_comungeps", IX86_BUILTIN_COMUGEPS, UNGE, (int)MULTI_ARG_2_SF_CMP },
19954 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4sf3, "__builtin_ia32_comordps", IX86_BUILTIN_COMORDPS, ORDERED, (int)MULTI_ARG_2_SF_CMP },
19955 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4sf3, "__builtin_ia32_comunordps", IX86_BUILTIN_COMUNORDPS, UNORDERED, (int)MULTI_ARG_2_SF_CMP },
19957 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2df3, "__builtin_ia32_comeqpd", IX86_BUILTIN_COMEQPD, EQ, (int)MULTI_ARG_2_DF_CMP },
19958 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2df3, "__builtin_ia32_comnepd", IX86_BUILTIN_COMNEPD, NE, (int)MULTI_ARG_2_DF_CMP },
19959 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2df3, "__builtin_ia32_comneqpd", IX86_BUILTIN_COMNEPD, NE, (int)MULTI_ARG_2_DF_CMP },
19960 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2df3, "__builtin_ia32_comltpd", IX86_BUILTIN_COMLTPD, LT, (int)MULTI_ARG_2_DF_CMP },
19961 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2df3, "__builtin_ia32_comlepd", IX86_BUILTIN_COMLEPD, LE, (int)MULTI_ARG_2_DF_CMP },
19962 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2df3, "__builtin_ia32_comgtpd", IX86_BUILTIN_COMGTPD, GT, (int)MULTI_ARG_2_DF_CMP },
19963 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2df3, "__builtin_ia32_comgepd", IX86_BUILTIN_COMGEPD, GE, (int)MULTI_ARG_2_DF_CMP },
19964 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2df3, "__builtin_ia32_comueqpd", IX86_BUILTIN_COMUEQPD, UNEQ, (int)MULTI_ARG_2_DF_CMP },
19965 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2df3, "__builtin_ia32_comunepd", IX86_BUILTIN_COMUNEPD, LTGT, (int)MULTI_ARG_2_DF_CMP },
19966 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2df3, "__builtin_ia32_comuneqpd", IX86_BUILTIN_COMUNEPD, LTGT, (int)MULTI_ARG_2_DF_CMP },
19967 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2df3, "__builtin_ia32_comunltpd", IX86_BUILTIN_COMULTPD, UNLT, (int)MULTI_ARG_2_DF_CMP },
19968 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2df3, "__builtin_ia32_comunlepd", IX86_BUILTIN_COMULEPD, UNLE, (int)MULTI_ARG_2_DF_CMP },
19969 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2df3, "__builtin_ia32_comungtpd", IX86_BUILTIN_COMUGTPD, UNGT, (int)MULTI_ARG_2_DF_CMP },
19970 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2df3, "__builtin_ia32_comungepd", IX86_BUILTIN_COMUGEPD, UNGE, (int)MULTI_ARG_2_DF_CMP },
19971 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2df3, "__builtin_ia32_comordpd", IX86_BUILTIN_COMORDPD, ORDERED, (int)MULTI_ARG_2_DF_CMP },
19972 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2df3, "__builtin_ia32_comunordpd", IX86_BUILTIN_COMUNORDPD, UNORDERED, (int)MULTI_ARG_2_DF_CMP },
19974 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv16qi3, "__builtin_ia32_pcomeqb", IX86_BUILTIN_PCOMEQB, EQ, (int)MULTI_ARG_2_QI_CMP },
19975 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv16qi3, "__builtin_ia32_pcomneb", IX86_BUILTIN_PCOMNEB, NE, (int)MULTI_ARG_2_QI_CMP },
19976 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv16qi3, "__builtin_ia32_pcomneqb", IX86_BUILTIN_PCOMNEB, NE, (int)MULTI_ARG_2_QI_CMP },
19977 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv16qi3, "__builtin_ia32_pcomltb", IX86_BUILTIN_PCOMLTB, LT, (int)MULTI_ARG_2_QI_CMP },
19978 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv16qi3, "__builtin_ia32_pcomleb", IX86_BUILTIN_PCOMLEB, LE, (int)MULTI_ARG_2_QI_CMP },
19979 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv16qi3, "__builtin_ia32_pcomgtb", IX86_BUILTIN_PCOMGTB, GT, (int)MULTI_ARG_2_QI_CMP },
19980 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv16qi3, "__builtin_ia32_pcomgeb", IX86_BUILTIN_PCOMGEB, GE, (int)MULTI_ARG_2_QI_CMP },
19982 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv8hi3, "__builtin_ia32_pcomeqw", IX86_BUILTIN_PCOMEQW, EQ, (int)MULTI_ARG_2_HI_CMP },
19983 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv8hi3, "__builtin_ia32_pcomnew", IX86_BUILTIN_PCOMNEW, NE, (int)MULTI_ARG_2_HI_CMP },
19984 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv8hi3, "__builtin_ia32_pcomneqw", IX86_BUILTIN_PCOMNEW, NE, (int)MULTI_ARG_2_HI_CMP },
19985 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv8hi3, "__builtin_ia32_pcomltw", IX86_BUILTIN_PCOMLTW, LT, (int)MULTI_ARG_2_HI_CMP },
19986 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv8hi3, "__builtin_ia32_pcomlew", IX86_BUILTIN_PCOMLEW, LE, (int)MULTI_ARG_2_HI_CMP },
19987 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv8hi3, "__builtin_ia32_pcomgtw", IX86_BUILTIN_PCOMGTW, GT, (int)MULTI_ARG_2_HI_CMP },
19988 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv8hi3, "__builtin_ia32_pcomgew", IX86_BUILTIN_PCOMGEW, GE, (int)MULTI_ARG_2_HI_CMP },
19990 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4si3, "__builtin_ia32_pcomeqd", IX86_BUILTIN_PCOMEQD, EQ, (int)MULTI_ARG_2_SI_CMP },
19991 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4si3, "__builtin_ia32_pcomned", IX86_BUILTIN_PCOMNED, NE, (int)MULTI_ARG_2_SI_CMP },
19992 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4si3, "__builtin_ia32_pcomneqd", IX86_BUILTIN_PCOMNED, NE, (int)MULTI_ARG_2_SI_CMP },
19993 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4si3, "__builtin_ia32_pcomltd", IX86_BUILTIN_PCOMLTD, LT, (int)MULTI_ARG_2_SI_CMP },
19994 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4si3, "__builtin_ia32_pcomled", IX86_BUILTIN_PCOMLED, LE, (int)MULTI_ARG_2_SI_CMP },
19995 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4si3, "__builtin_ia32_pcomgtd", IX86_BUILTIN_PCOMGTD, GT, (int)MULTI_ARG_2_SI_CMP },
19996 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4si3, "__builtin_ia32_pcomged", IX86_BUILTIN_PCOMGED, GE, (int)MULTI_ARG_2_SI_CMP },
19998 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2di3, "__builtin_ia32_pcomeqq", IX86_BUILTIN_PCOMEQQ, EQ, (int)MULTI_ARG_2_DI_CMP },
19999 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2di3, "__builtin_ia32_pcomneq", IX86_BUILTIN_PCOMNEQ, NE, (int)MULTI_ARG_2_DI_CMP },
20000 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2di3, "__builtin_ia32_pcomneqq", IX86_BUILTIN_PCOMNEQ, NE, (int)MULTI_ARG_2_DI_CMP },
20001 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2di3, "__builtin_ia32_pcomltq", IX86_BUILTIN_PCOMLTQ, LT, (int)MULTI_ARG_2_DI_CMP },
20002 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2di3, "__builtin_ia32_pcomleq", IX86_BUILTIN_PCOMLEQ, LE, (int)MULTI_ARG_2_DI_CMP },
20003 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2di3, "__builtin_ia32_pcomgtq", IX86_BUILTIN_PCOMGTQ, GT, (int)MULTI_ARG_2_DI_CMP },
20004 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2di3, "__builtin_ia32_pcomgeq", IX86_BUILTIN_PCOMGEQ, GE, (int)MULTI_ARG_2_DI_CMP },
20006 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_uns2v16qi3,"__builtin_ia32_pcomequb", IX86_BUILTIN_PCOMEQUB, EQ, (int)MULTI_ARG_2_QI_CMP },
20007 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_uns2v16qi3,"__builtin_ia32_pcomneub", IX86_BUILTIN_PCOMNEUB, NE, (int)MULTI_ARG_2_QI_CMP },
20008 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_uns2v16qi3,"__builtin_ia32_pcomnequb", IX86_BUILTIN_PCOMNEUB, NE, (int)MULTI_ARG_2_QI_CMP },
20009 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_unsv16qi3, "__builtin_ia32_pcomltub", IX86_BUILTIN_PCOMLTUB, LTU, (int)MULTI_ARG_2_QI_CMP },
20010 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_unsv16qi3, "__builtin_ia32_pcomleub", IX86_BUILTIN_PCOMLEUB, LEU, (int)MULTI_ARG_2_QI_CMP },
20011 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_unsv16qi3, "__builtin_ia32_pcomgtub", IX86_BUILTIN_PCOMGTUB, GTU, (int)MULTI_ARG_2_QI_CMP },
20012 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_unsv16qi3, "__builtin_ia32_pcomgeub", IX86_BUILTIN_PCOMGEUB, GEU, (int)MULTI_ARG_2_QI_CMP },
20014 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_uns2v8hi3, "__builtin_ia32_pcomequw", IX86_BUILTIN_PCOMEQUW, EQ, (int)MULTI_ARG_2_HI_CMP },
20015 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_uns2v8hi3, "__builtin_ia32_pcomneuw", IX86_BUILTIN_PCOMNEUW, NE, (int)MULTI_ARG_2_HI_CMP },
20016 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_uns2v8hi3, "__builtin_ia32_pcomnequw", IX86_BUILTIN_PCOMNEUW, NE, (int)MULTI_ARG_2_HI_CMP },
20017 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_unsv8hi3, "__builtin_ia32_pcomltuw", IX86_BUILTIN_PCOMLTUW, LTU, (int)MULTI_ARG_2_HI_CMP },
20018 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_unsv8hi3, "__builtin_ia32_pcomleuw", IX86_BUILTIN_PCOMLEUW, LEU, (int)MULTI_ARG_2_HI_CMP },
20019 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_unsv8hi3, "__builtin_ia32_pcomgtuw", IX86_BUILTIN_PCOMGTUW, GTU, (int)MULTI_ARG_2_HI_CMP },
20020 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_unsv8hi3, "__builtin_ia32_pcomgeuw", IX86_BUILTIN_PCOMGEUW, GEU, (int)MULTI_ARG_2_HI_CMP },
20022 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_uns2v4si3, "__builtin_ia32_pcomequd", IX86_BUILTIN_PCOMEQUD, EQ, (int)MULTI_ARG_2_SI_CMP },
20023 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_uns2v4si3, "__builtin_ia32_pcomneud", IX86_BUILTIN_PCOMNEUD, NE, (int)MULTI_ARG_2_SI_CMP },
20024 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_uns2v4si3, "__builtin_ia32_pcomnequd", IX86_BUILTIN_PCOMNEUD, NE, (int)MULTI_ARG_2_SI_CMP },
20025 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_unsv4si3, "__builtin_ia32_pcomltud", IX86_BUILTIN_PCOMLTUD, LTU, (int)MULTI_ARG_2_SI_CMP },
20026 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_unsv4si3, "__builtin_ia32_pcomleud", IX86_BUILTIN_PCOMLEUD, LEU, (int)MULTI_ARG_2_SI_CMP },
20027 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_unsv4si3, "__builtin_ia32_pcomgtud", IX86_BUILTIN_PCOMGTUD, GTU, (int)MULTI_ARG_2_SI_CMP },
20028 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_unsv4si3, "__builtin_ia32_pcomgeud", IX86_BUILTIN_PCOMGEUD, GEU, (int)MULTI_ARG_2_SI_CMP },
20030 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_uns2v2di3, "__builtin_ia32_pcomequq", IX86_BUILTIN_PCOMEQUQ, EQ, (int)MULTI_ARG_2_DI_CMP },
20031 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_uns2v2di3, "__builtin_ia32_pcomneuq", IX86_BUILTIN_PCOMNEUQ, NE, (int)MULTI_ARG_2_DI_CMP },
20032 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_uns2v2di3, "__builtin_ia32_pcomnequq", IX86_BUILTIN_PCOMNEUQ, NE, (int)MULTI_ARG_2_DI_CMP },
20033 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_unsv2di3, "__builtin_ia32_pcomltuq", IX86_BUILTIN_PCOMLTUQ, LTU, (int)MULTI_ARG_2_DI_CMP },
20034 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_unsv2di3, "__builtin_ia32_pcomleuq", IX86_BUILTIN_PCOMLEUQ, LEU, (int)MULTI_ARG_2_DI_CMP },
20035 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_unsv2di3, "__builtin_ia32_pcomgtuq", IX86_BUILTIN_PCOMGTUQ, GTU, (int)MULTI_ARG_2_DI_CMP },
20036 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_unsv2di3, "__builtin_ia32_pcomgeuq", IX86_BUILTIN_PCOMGEUQ, GEU, (int)MULTI_ARG_2_DI_CMP },
20038 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_com_tfv4sf3, "__builtin_ia32_comfalsess", IX86_BUILTIN_COMFALSESS, COM_FALSE_S, (int)MULTI_ARG_2_SF_TF },
20039 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_com_tfv4sf3, "__builtin_ia32_comtruess", IX86_BUILTIN_COMTRUESS, COM_TRUE_S, (int)MULTI_ARG_2_SF_TF },
20040 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_com_tfv4sf3, "__builtin_ia32_comfalseps", IX86_BUILTIN_COMFALSEPS, COM_FALSE_P, (int)MULTI_ARG_2_SF_TF },
20041 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_com_tfv4sf3, "__builtin_ia32_comtrueps", IX86_BUILTIN_COMTRUEPS, COM_TRUE_P, (int)MULTI_ARG_2_SF_TF },
20042 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_com_tfv2df3, "__builtin_ia32_comfalsesd", IX86_BUILTIN_COMFALSESD, COM_FALSE_S, (int)MULTI_ARG_2_DF_TF },
20043 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_com_tfv2df3, "__builtin_ia32_comtruesd", IX86_BUILTIN_COMTRUESD, COM_TRUE_S, (int)MULTI_ARG_2_DF_TF },
20044 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_com_tfv2df3, "__builtin_ia32_comfalsepd", IX86_BUILTIN_COMFALSEPD, COM_FALSE_P, (int)MULTI_ARG_2_DF_TF },
20045 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_com_tfv2df3, "__builtin_ia32_comtruepd", IX86_BUILTIN_COMTRUEPD, COM_TRUE_P, (int)MULTI_ARG_2_DF_TF },
20047 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcom_tfv16qi3, "__builtin_ia32_pcomfalseb", IX86_BUILTIN_PCOMFALSEB, PCOM_FALSE, (int)MULTI_ARG_2_QI_TF },
20048 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcom_tfv8hi3, "__builtin_ia32_pcomfalsew", IX86_BUILTIN_PCOMFALSEW, PCOM_FALSE, (int)MULTI_ARG_2_HI_TF },
20049 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcom_tfv4si3, "__builtin_ia32_pcomfalsed", IX86_BUILTIN_PCOMFALSED, PCOM_FALSE, (int)MULTI_ARG_2_SI_TF },
20050 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcom_tfv2di3, "__builtin_ia32_pcomfalseq", IX86_BUILTIN_PCOMFALSEQ, PCOM_FALSE, (int)MULTI_ARG_2_DI_TF },
20051 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcom_tfv16qi3, "__builtin_ia32_pcomfalseub",IX86_BUILTIN_PCOMFALSEUB,PCOM_FALSE, (int)MULTI_ARG_2_QI_TF },
20052 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcom_tfv8hi3, "__builtin_ia32_pcomfalseuw",IX86_BUILTIN_PCOMFALSEUW,PCOM_FALSE, (int)MULTI_ARG_2_HI_TF },
20053 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcom_tfv4si3, "__builtin_ia32_pcomfalseud",IX86_BUILTIN_PCOMFALSEUD,PCOM_FALSE, (int)MULTI_ARG_2_SI_TF },
20054 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcom_tfv2di3, "__builtin_ia32_pcomfalseuq",IX86_BUILTIN_PCOMFALSEUQ,PCOM_FALSE, (int)MULTI_ARG_2_DI_TF },
20056 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcom_tfv16qi3, "__builtin_ia32_pcomtrueb", IX86_BUILTIN_PCOMTRUEB, PCOM_TRUE, (int)MULTI_ARG_2_QI_TF },
20057 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcom_tfv8hi3, "__builtin_ia32_pcomtruew", IX86_BUILTIN_PCOMTRUEW, PCOM_TRUE, (int)MULTI_ARG_2_HI_TF },
20058 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcom_tfv4si3, "__builtin_ia32_pcomtrued", IX86_BUILTIN_PCOMTRUED, PCOM_TRUE, (int)MULTI_ARG_2_SI_TF },
20059 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcom_tfv2di3, "__builtin_ia32_pcomtrueq", IX86_BUILTIN_PCOMTRUEQ, PCOM_TRUE, (int)MULTI_ARG_2_DI_TF },
20060 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcom_tfv16qi3, "__builtin_ia32_pcomtrueub", IX86_BUILTIN_PCOMTRUEUB, PCOM_TRUE, (int)MULTI_ARG_2_QI_TF },
20061 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcom_tfv8hi3, "__builtin_ia32_pcomtrueuw", IX86_BUILTIN_PCOMTRUEUW, PCOM_TRUE, (int)MULTI_ARG_2_HI_TF },
20062 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcom_tfv4si3, "__builtin_ia32_pcomtrueud", IX86_BUILTIN_PCOMTRUEUD, PCOM_TRUE, (int)MULTI_ARG_2_SI_TF },
20063 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcom_tfv2di3, "__builtin_ia32_pcomtrueuq", IX86_BUILTIN_PCOMTRUEUQ, PCOM_TRUE, (int)MULTI_ARG_2_DI_TF },
20066 /* Set up all the MMX/SSE builtins, even builtins for instructions that are not
20067 in the current target ISA to allow the user to compile particular modules
20068 with different target specific options that differ from the command line
20071 ix86_init_mmx_sse_builtins (void)
20073 const struct builtin_description * d;
20076 tree V16QI_type_node = build_vector_type_for_mode (char_type_node, V16QImode);
20077 tree V2SI_type_node = build_vector_type_for_mode (intSI_type_node, V2SImode);
20078 tree V1DI_type_node
20079 = build_vector_type_for_mode (long_long_integer_type_node, V1DImode);
20080 tree V2SF_type_node = build_vector_type_for_mode (float_type_node, V2SFmode);
20081 tree V2DI_type_node
20082 = build_vector_type_for_mode (long_long_integer_type_node, V2DImode);
20083 tree V2DF_type_node = build_vector_type_for_mode (double_type_node, V2DFmode);
20084 tree V4SF_type_node = build_vector_type_for_mode (float_type_node, V4SFmode);
20085 tree V4SI_type_node = build_vector_type_for_mode (intSI_type_node, V4SImode);
20086 tree V4HI_type_node = build_vector_type_for_mode (intHI_type_node, V4HImode);
20087 tree V8QI_type_node = build_vector_type_for_mode (char_type_node, V8QImode);
20088 tree V8HI_type_node = build_vector_type_for_mode (intHI_type_node, V8HImode);
20090 tree pchar_type_node = build_pointer_type (char_type_node);
20091 tree pcchar_type_node
20092 = build_pointer_type (build_type_variant (char_type_node, 1, 0));
20093 tree pfloat_type_node = build_pointer_type (float_type_node);
20094 tree pcfloat_type_node
20095 = build_pointer_type (build_type_variant (float_type_node, 1, 0));
20096 tree pv2sf_type_node = build_pointer_type (V2SF_type_node);
20097 tree pcv2sf_type_node
20098 = build_pointer_type (build_type_variant (V2SF_type_node, 1, 0));
20099 tree pv2di_type_node = build_pointer_type (V2DI_type_node);
20100 tree pdi_type_node = build_pointer_type (long_long_unsigned_type_node);
20103 tree int_ftype_v4sf_v4sf
20104 = build_function_type_list (integer_type_node,
20105 V4SF_type_node, V4SF_type_node, NULL_TREE);
20106 tree v4si_ftype_v4sf_v4sf
20107 = build_function_type_list (V4SI_type_node,
20108 V4SF_type_node, V4SF_type_node, NULL_TREE);
20109 /* MMX/SSE/integer conversions. */
20110 tree int_ftype_v4sf
20111 = build_function_type_list (integer_type_node,
20112 V4SF_type_node, NULL_TREE);
20113 tree int64_ftype_v4sf
20114 = build_function_type_list (long_long_integer_type_node,
20115 V4SF_type_node, NULL_TREE);
20116 tree int_ftype_v8qi
20117 = build_function_type_list (integer_type_node, V8QI_type_node, NULL_TREE);
20118 tree v4sf_ftype_v4sf_int
20119 = build_function_type_list (V4SF_type_node,
20120 V4SF_type_node, integer_type_node, NULL_TREE);
20121 tree v4sf_ftype_v4sf_int64
20122 = build_function_type_list (V4SF_type_node,
20123 V4SF_type_node, long_long_integer_type_node,
20125 tree v4sf_ftype_v4sf_v2si
20126 = build_function_type_list (V4SF_type_node,
20127 V4SF_type_node, V2SI_type_node, NULL_TREE);
20129 /* Miscellaneous. */
20130 tree v8qi_ftype_v4hi_v4hi
20131 = build_function_type_list (V8QI_type_node,
20132 V4HI_type_node, V4HI_type_node, NULL_TREE);
20133 tree v4hi_ftype_v2si_v2si
20134 = build_function_type_list (V4HI_type_node,
20135 V2SI_type_node, V2SI_type_node, NULL_TREE);
20136 tree v4sf_ftype_v4sf_v4sf_int
20137 = build_function_type_list (V4SF_type_node,
20138 V4SF_type_node, V4SF_type_node,
20139 integer_type_node, NULL_TREE);
20140 tree v2si_ftype_v4hi_v4hi
20141 = build_function_type_list (V2SI_type_node,
20142 V4HI_type_node, V4HI_type_node, NULL_TREE);
20143 tree v4hi_ftype_v4hi_int
20144 = build_function_type_list (V4HI_type_node,
20145 V4HI_type_node, integer_type_node, NULL_TREE);
20146 tree v2si_ftype_v2si_int
20147 = build_function_type_list (V2SI_type_node,
20148 V2SI_type_node, integer_type_node, NULL_TREE);
20149 tree v1di_ftype_v1di_int
20150 = build_function_type_list (V1DI_type_node,
20151 V1DI_type_node, integer_type_node, NULL_TREE);
20153 tree void_ftype_void
20154 = build_function_type (void_type_node, void_list_node);
20155 tree void_ftype_unsigned
20156 = build_function_type_list (void_type_node, unsigned_type_node, NULL_TREE);
20157 tree void_ftype_unsigned_unsigned
20158 = build_function_type_list (void_type_node, unsigned_type_node,
20159 unsigned_type_node, NULL_TREE);
20160 tree void_ftype_pcvoid_unsigned_unsigned
20161 = build_function_type_list (void_type_node, const_ptr_type_node,
20162 unsigned_type_node, unsigned_type_node,
20164 tree unsigned_ftype_void
20165 = build_function_type (unsigned_type_node, void_list_node);
20166 tree v2si_ftype_v4sf
20167 = build_function_type_list (V2SI_type_node, V4SF_type_node, NULL_TREE);
20168 /* Loads/stores. */
20169 tree void_ftype_v8qi_v8qi_pchar
20170 = build_function_type_list (void_type_node,
20171 V8QI_type_node, V8QI_type_node,
20172 pchar_type_node, NULL_TREE);
20173 tree v4sf_ftype_pcfloat
20174 = build_function_type_list (V4SF_type_node, pcfloat_type_node, NULL_TREE);
20175 tree v4sf_ftype_v4sf_pcv2sf
20176 = build_function_type_list (V4SF_type_node,
20177 V4SF_type_node, pcv2sf_type_node, NULL_TREE);
20178 tree void_ftype_pv2sf_v4sf
20179 = build_function_type_list (void_type_node,
20180 pv2sf_type_node, V4SF_type_node, NULL_TREE);
20181 tree void_ftype_pfloat_v4sf
20182 = build_function_type_list (void_type_node,
20183 pfloat_type_node, V4SF_type_node, NULL_TREE);
20184 tree void_ftype_pdi_di
20185 = build_function_type_list (void_type_node,
20186 pdi_type_node, long_long_unsigned_type_node,
20188 tree void_ftype_pv2di_v2di
20189 = build_function_type_list (void_type_node,
20190 pv2di_type_node, V2DI_type_node, NULL_TREE);
20191 /* Normal vector unops. */
20192 tree v4sf_ftype_v4sf
20193 = build_function_type_list (V4SF_type_node, V4SF_type_node, NULL_TREE);
20194 tree v16qi_ftype_v16qi
20195 = build_function_type_list (V16QI_type_node, V16QI_type_node, NULL_TREE);
20196 tree v8hi_ftype_v8hi
20197 = build_function_type_list (V8HI_type_node, V8HI_type_node, NULL_TREE);
20198 tree v4si_ftype_v4si
20199 = build_function_type_list (V4SI_type_node, V4SI_type_node, NULL_TREE);
20200 tree v8qi_ftype_v8qi
20201 = build_function_type_list (V8QI_type_node, V8QI_type_node, NULL_TREE);
20202 tree v4hi_ftype_v4hi
20203 = build_function_type_list (V4HI_type_node, V4HI_type_node, NULL_TREE);
20205 /* Normal vector binops. */
20206 tree v4sf_ftype_v4sf_v4sf
20207 = build_function_type_list (V4SF_type_node,
20208 V4SF_type_node, V4SF_type_node, NULL_TREE);
20209 tree v8qi_ftype_v8qi_v8qi
20210 = build_function_type_list (V8QI_type_node,
20211 V8QI_type_node, V8QI_type_node, NULL_TREE);
20212 tree v4hi_ftype_v4hi_v4hi
20213 = build_function_type_list (V4HI_type_node,
20214 V4HI_type_node, V4HI_type_node, NULL_TREE);
20215 tree v2si_ftype_v2si_v2si
20216 = build_function_type_list (V2SI_type_node,
20217 V2SI_type_node, V2SI_type_node, NULL_TREE);
20218 tree v1di_ftype_v1di_v1di
20219 = build_function_type_list (V1DI_type_node,
20220 V1DI_type_node, V1DI_type_node, NULL_TREE);
20221 tree v1di_ftype_v1di_v1di_int
20222 = build_function_type_list (V1DI_type_node,
20223 V1DI_type_node, V1DI_type_node,
20224 integer_type_node, NULL_TREE);
20225 tree v2si_ftype_v2sf
20226 = build_function_type_list (V2SI_type_node, V2SF_type_node, NULL_TREE);
20227 tree v2sf_ftype_v2si
20228 = build_function_type_list (V2SF_type_node, V2SI_type_node, NULL_TREE);
20229 tree v2si_ftype_v2si
20230 = build_function_type_list (V2SI_type_node, V2SI_type_node, NULL_TREE);
20231 tree v2sf_ftype_v2sf
20232 = build_function_type_list (V2SF_type_node, V2SF_type_node, NULL_TREE);
20233 tree v2sf_ftype_v2sf_v2sf
20234 = build_function_type_list (V2SF_type_node,
20235 V2SF_type_node, V2SF_type_node, NULL_TREE);
20236 tree v2si_ftype_v2sf_v2sf
20237 = build_function_type_list (V2SI_type_node,
20238 V2SF_type_node, V2SF_type_node, NULL_TREE);
20239 tree pint_type_node = build_pointer_type (integer_type_node);
20240 tree pdouble_type_node = build_pointer_type (double_type_node);
20241 tree pcdouble_type_node = build_pointer_type (
20242 build_type_variant (double_type_node, 1, 0));
20243 tree int_ftype_v2df_v2df
20244 = build_function_type_list (integer_type_node,
20245 V2DF_type_node, V2DF_type_node, NULL_TREE);
20247 tree void_ftype_pcvoid
20248 = build_function_type_list (void_type_node, const_ptr_type_node, NULL_TREE);
20249 tree v4sf_ftype_v4si
20250 = build_function_type_list (V4SF_type_node, V4SI_type_node, NULL_TREE);
20251 tree v4si_ftype_v4sf
20252 = build_function_type_list (V4SI_type_node, V4SF_type_node, NULL_TREE);
20253 tree v2df_ftype_v4si
20254 = build_function_type_list (V2DF_type_node, V4SI_type_node, NULL_TREE);
20255 tree v4si_ftype_v2df
20256 = build_function_type_list (V4SI_type_node, V2DF_type_node, NULL_TREE);
20257 tree v4si_ftype_v2df_v2df
20258 = build_function_type_list (V4SI_type_node,
20259 V2DF_type_node, V2DF_type_node, NULL_TREE);
20260 tree v2si_ftype_v2df
20261 = build_function_type_list (V2SI_type_node, V2DF_type_node, NULL_TREE);
20262 tree v4sf_ftype_v2df
20263 = build_function_type_list (V4SF_type_node, V2DF_type_node, NULL_TREE);
20264 tree v2df_ftype_v2si
20265 = build_function_type_list (V2DF_type_node, V2SI_type_node, NULL_TREE);
20266 tree v2df_ftype_v4sf
20267 = build_function_type_list (V2DF_type_node, V4SF_type_node, NULL_TREE);
20268 tree int_ftype_v2df
20269 = build_function_type_list (integer_type_node, V2DF_type_node, NULL_TREE);
20270 tree int64_ftype_v2df
20271 = build_function_type_list (long_long_integer_type_node,
20272 V2DF_type_node, NULL_TREE);
20273 tree v2df_ftype_v2df_int
20274 = build_function_type_list (V2DF_type_node,
20275 V2DF_type_node, integer_type_node, NULL_TREE);
20276 tree v2df_ftype_v2df_int64
20277 = build_function_type_list (V2DF_type_node,
20278 V2DF_type_node, long_long_integer_type_node,
20280 tree v4sf_ftype_v4sf_v2df
20281 = build_function_type_list (V4SF_type_node,
20282 V4SF_type_node, V2DF_type_node, NULL_TREE);
20283 tree v2df_ftype_v2df_v4sf
20284 = build_function_type_list (V2DF_type_node,
20285 V2DF_type_node, V4SF_type_node, NULL_TREE);
20286 tree v2df_ftype_v2df_v2df_int
20287 = build_function_type_list (V2DF_type_node,
20288 V2DF_type_node, V2DF_type_node,
20291 tree v2df_ftype_v2df_pcdouble
20292 = build_function_type_list (V2DF_type_node,
20293 V2DF_type_node, pcdouble_type_node, NULL_TREE);
20294 tree void_ftype_pdouble_v2df
20295 = build_function_type_list (void_type_node,
20296 pdouble_type_node, V2DF_type_node, NULL_TREE);
20297 tree void_ftype_pint_int
20298 = build_function_type_list (void_type_node,
20299 pint_type_node, integer_type_node, NULL_TREE);
20300 tree void_ftype_v16qi_v16qi_pchar
20301 = build_function_type_list (void_type_node,
20302 V16QI_type_node, V16QI_type_node,
20303 pchar_type_node, NULL_TREE);
20304 tree v2df_ftype_pcdouble
20305 = build_function_type_list (V2DF_type_node, pcdouble_type_node, NULL_TREE);
20306 tree v2df_ftype_v2df_v2df
20307 = build_function_type_list (V2DF_type_node,
20308 V2DF_type_node, V2DF_type_node, NULL_TREE);
20309 tree v16qi_ftype_v16qi_v16qi
20310 = build_function_type_list (V16QI_type_node,
20311 V16QI_type_node, V16QI_type_node, NULL_TREE);
20312 tree v8hi_ftype_v8hi_v8hi
20313 = build_function_type_list (V8HI_type_node,
20314 V8HI_type_node, V8HI_type_node, NULL_TREE);
20315 tree v4si_ftype_v4si_v4si
20316 = build_function_type_list (V4SI_type_node,
20317 V4SI_type_node, V4SI_type_node, NULL_TREE);
20318 tree v2di_ftype_v2di_v2di
20319 = build_function_type_list (V2DI_type_node,
20320 V2DI_type_node, V2DI_type_node, NULL_TREE);
20321 tree v2di_ftype_v2df_v2df
20322 = build_function_type_list (V2DI_type_node,
20323 V2DF_type_node, V2DF_type_node, NULL_TREE);
20324 tree v2df_ftype_v2df
20325 = build_function_type_list (V2DF_type_node, V2DF_type_node, NULL_TREE);
20326 tree v2di_ftype_v2di_int
20327 = build_function_type_list (V2DI_type_node,
20328 V2DI_type_node, integer_type_node, NULL_TREE);
20329 tree v2di_ftype_v2di_v2di_int
20330 = build_function_type_list (V2DI_type_node, V2DI_type_node,
20331 V2DI_type_node, integer_type_node, NULL_TREE);
20332 tree v4si_ftype_v4si_int
20333 = build_function_type_list (V4SI_type_node,
20334 V4SI_type_node, integer_type_node, NULL_TREE);
20335 tree v8hi_ftype_v8hi_int
20336 = build_function_type_list (V8HI_type_node,
20337 V8HI_type_node, integer_type_node, NULL_TREE);
20338 tree v4si_ftype_v8hi_v8hi
20339 = build_function_type_list (V4SI_type_node,
20340 V8HI_type_node, V8HI_type_node, NULL_TREE);
20341 tree v1di_ftype_v8qi_v8qi
20342 = build_function_type_list (V1DI_type_node,
20343 V8QI_type_node, V8QI_type_node, NULL_TREE);
20344 tree v1di_ftype_v2si_v2si
20345 = build_function_type_list (V1DI_type_node,
20346 V2SI_type_node, V2SI_type_node, NULL_TREE);
20347 tree v2di_ftype_v16qi_v16qi
20348 = build_function_type_list (V2DI_type_node,
20349 V16QI_type_node, V16QI_type_node, NULL_TREE);
20350 tree v2di_ftype_v4si_v4si
20351 = build_function_type_list (V2DI_type_node,
20352 V4SI_type_node, V4SI_type_node, NULL_TREE);
20353 tree int_ftype_v16qi
20354 = build_function_type_list (integer_type_node, V16QI_type_node, NULL_TREE);
20355 tree v16qi_ftype_pcchar
20356 = build_function_type_list (V16QI_type_node, pcchar_type_node, NULL_TREE);
20357 tree void_ftype_pchar_v16qi
20358 = build_function_type_list (void_type_node,
20359 pchar_type_node, V16QI_type_node, NULL_TREE);
20361 tree v2di_ftype_v2di_unsigned_unsigned
20362 = build_function_type_list (V2DI_type_node, V2DI_type_node,
20363 unsigned_type_node, unsigned_type_node,
20365 tree v2di_ftype_v2di_v2di_unsigned_unsigned
20366 = build_function_type_list (V2DI_type_node, V2DI_type_node, V2DI_type_node,
20367 unsigned_type_node, unsigned_type_node,
20369 tree v2di_ftype_v2di_v16qi
20370 = build_function_type_list (V2DI_type_node, V2DI_type_node, V16QI_type_node,
20372 tree v2df_ftype_v2df_v2df_v2df
20373 = build_function_type_list (V2DF_type_node,
20374 V2DF_type_node, V2DF_type_node,
20375 V2DF_type_node, NULL_TREE);
20376 tree v4sf_ftype_v4sf_v4sf_v4sf
20377 = build_function_type_list (V4SF_type_node,
20378 V4SF_type_node, V4SF_type_node,
20379 V4SF_type_node, NULL_TREE);
20380 tree v8hi_ftype_v16qi
20381 = build_function_type_list (V8HI_type_node, V16QI_type_node,
20383 tree v4si_ftype_v16qi
20384 = build_function_type_list (V4SI_type_node, V16QI_type_node,
20386 tree v2di_ftype_v16qi
20387 = build_function_type_list (V2DI_type_node, V16QI_type_node,
20389 tree v4si_ftype_v8hi
20390 = build_function_type_list (V4SI_type_node, V8HI_type_node,
20392 tree v2di_ftype_v8hi
20393 = build_function_type_list (V2DI_type_node, V8HI_type_node,
20395 tree v2di_ftype_v4si
20396 = build_function_type_list (V2DI_type_node, V4SI_type_node,
20398 tree v2di_ftype_pv2di
20399 = build_function_type_list (V2DI_type_node, pv2di_type_node,
20401 tree v16qi_ftype_v16qi_v16qi_int
20402 = build_function_type_list (V16QI_type_node, V16QI_type_node,
20403 V16QI_type_node, integer_type_node,
20405 tree v16qi_ftype_v16qi_v16qi_v16qi
20406 = build_function_type_list (V16QI_type_node, V16QI_type_node,
20407 V16QI_type_node, V16QI_type_node,
20409 tree v8hi_ftype_v8hi_v8hi_int
20410 = build_function_type_list (V8HI_type_node, V8HI_type_node,
20411 V8HI_type_node, integer_type_node,
20413 tree v4si_ftype_v4si_v4si_int
20414 = build_function_type_list (V4SI_type_node, V4SI_type_node,
20415 V4SI_type_node, integer_type_node,
20417 tree int_ftype_v2di_v2di
20418 = build_function_type_list (integer_type_node,
20419 V2DI_type_node, V2DI_type_node,
20421 tree int_ftype_v16qi_int_v16qi_int_int
20422 = build_function_type_list (integer_type_node,
20429 tree v16qi_ftype_v16qi_int_v16qi_int_int
20430 = build_function_type_list (V16QI_type_node,
20437 tree int_ftype_v16qi_v16qi_int
20438 = build_function_type_list (integer_type_node,
20444 /* SSE5 instructions */
20445 tree v2di_ftype_v2di_v2di_v2di
20446 = build_function_type_list (V2DI_type_node,
20452 tree v4si_ftype_v4si_v4si_v4si
20453 = build_function_type_list (V4SI_type_node,
20459 tree v4si_ftype_v4si_v4si_v2di
20460 = build_function_type_list (V4SI_type_node,
20466 tree v8hi_ftype_v8hi_v8hi_v8hi
20467 = build_function_type_list (V8HI_type_node,
20473 tree v8hi_ftype_v8hi_v8hi_v4si
20474 = build_function_type_list (V8HI_type_node,
20480 tree v2df_ftype_v2df_v2df_v16qi
20481 = build_function_type_list (V2DF_type_node,
20487 tree v4sf_ftype_v4sf_v4sf_v16qi
20488 = build_function_type_list (V4SF_type_node,
20494 tree v2di_ftype_v2di_si
20495 = build_function_type_list (V2DI_type_node,
20500 tree v4si_ftype_v4si_si
20501 = build_function_type_list (V4SI_type_node,
20506 tree v8hi_ftype_v8hi_si
20507 = build_function_type_list (V8HI_type_node,
20512 tree v16qi_ftype_v16qi_si
20513 = build_function_type_list (V16QI_type_node,
20517 tree v4sf_ftype_v4hi
20518 = build_function_type_list (V4SF_type_node,
20522 tree v4hi_ftype_v4sf
20523 = build_function_type_list (V4HI_type_node,
20527 tree v2di_ftype_v2di
20528 = build_function_type_list (V2DI_type_node, V2DI_type_node, NULL_TREE);
20530 tree v16qi_ftype_v8hi_v8hi
20531 = build_function_type_list (V16QI_type_node,
20532 V8HI_type_node, V8HI_type_node,
20534 tree v8hi_ftype_v4si_v4si
20535 = build_function_type_list (V8HI_type_node,
20536 V4SI_type_node, V4SI_type_node,
20538 tree v8hi_ftype_v16qi_v16qi
20539 = build_function_type_list (V8HI_type_node,
20540 V16QI_type_node, V16QI_type_node,
20542 tree v4hi_ftype_v8qi_v8qi
20543 = build_function_type_list (V4HI_type_node,
20544 V8QI_type_node, V8QI_type_node,
20546 tree unsigned_ftype_unsigned_uchar
20547 = build_function_type_list (unsigned_type_node,
20548 unsigned_type_node,
20549 unsigned_char_type_node,
20551 tree unsigned_ftype_unsigned_ushort
20552 = build_function_type_list (unsigned_type_node,
20553 unsigned_type_node,
20554 short_unsigned_type_node,
20556 tree unsigned_ftype_unsigned_unsigned
20557 = build_function_type_list (unsigned_type_node,
20558 unsigned_type_node,
20559 unsigned_type_node,
20561 tree uint64_ftype_uint64_uint64
20562 = build_function_type_list (long_long_unsigned_type_node,
20563 long_long_unsigned_type_node,
20564 long_long_unsigned_type_node,
20566 tree float_ftype_float
20567 = build_function_type_list (float_type_node,
20573 /* Add all special builtins with variable number of operands. */
20574 for (i = 0, d = bdesc_special_args;
20575 i < ARRAY_SIZE (bdesc_special_args);
20583 switch ((enum ix86_special_builtin_type) d->flag)
20585 case VOID_FTYPE_VOID:
20586 type = void_ftype_void;
20588 case V16QI_FTYPE_PCCHAR:
20589 type = v16qi_ftype_pcchar;
20591 case V4SF_FTYPE_PCFLOAT:
20592 type = v4sf_ftype_pcfloat;
20594 case V2DI_FTYPE_PV2DI:
20595 type = v2di_ftype_pv2di;
20597 case V2DF_FTYPE_PCDOUBLE:
20598 type = v2df_ftype_pcdouble;
20600 case V4SF_FTYPE_V4SF_PCV2SF:
20601 type = v4sf_ftype_v4sf_pcv2sf;
20603 case V2DF_FTYPE_V2DF_PCDOUBLE:
20604 type = v2df_ftype_v2df_pcdouble;
20606 case VOID_FTYPE_PV2SF_V4SF:
20607 type = void_ftype_pv2sf_v4sf;
20609 case VOID_FTYPE_PV2DI_V2DI:
20610 type = void_ftype_pv2di_v2di;
20612 case VOID_FTYPE_PCHAR_V16QI:
20613 type = void_ftype_pchar_v16qi;
20615 case VOID_FTYPE_PFLOAT_V4SF:
20616 type = void_ftype_pfloat_v4sf;
20618 case VOID_FTYPE_PDOUBLE_V2DF:
20619 type = void_ftype_pdouble_v2df;
20621 case VOID_FTYPE_PDI_DI:
20622 type = void_ftype_pdi_di;
20624 case VOID_FTYPE_PINT_INT:
20625 type = void_ftype_pint_int;
20628 gcc_unreachable ();
20631 def_builtin (d->mask, d->name, type, d->code);
20634 /* Add all builtins with variable number of operands. */
20635 for (i = 0, d = bdesc_args;
20636 i < ARRAY_SIZE (bdesc_args);
20644 switch ((enum ix86_builtin_type) d->flag)
20646 case FLOAT_FTYPE_FLOAT:
20647 type = float_ftype_float;
20649 case INT_FTYPE_V2DI_V2DI_PTEST:
20650 type = int_ftype_v2di_v2di;
20652 case INT64_FTYPE_V4SF:
20653 type = int64_ftype_v4sf;
20655 case INT64_FTYPE_V2DF:
20656 type = int64_ftype_v2df;
20658 case INT_FTYPE_V16QI:
20659 type = int_ftype_v16qi;
20661 case INT_FTYPE_V8QI:
20662 type = int_ftype_v8qi;
20664 case INT_FTYPE_V4SF:
20665 type = int_ftype_v4sf;
20667 case INT_FTYPE_V2DF:
20668 type = int_ftype_v2df;
20670 case V16QI_FTYPE_V16QI:
20671 type = v16qi_ftype_v16qi;
20673 case V8HI_FTYPE_V8HI:
20674 type = v8hi_ftype_v8hi;
20676 case V8HI_FTYPE_V16QI:
20677 type = v8hi_ftype_v16qi;
20679 case V8QI_FTYPE_V8QI:
20680 type = v8qi_ftype_v8qi;
20682 case V4SI_FTYPE_V4SI:
20683 type = v4si_ftype_v4si;
20685 case V4SI_FTYPE_V16QI:
20686 type = v4si_ftype_v16qi;
20688 case V4SI_FTYPE_V8HI:
20689 type = v4si_ftype_v8hi;
20691 case V4SI_FTYPE_V4SF:
20692 type = v4si_ftype_v4sf;
20694 case V4SI_FTYPE_V2DF:
20695 type = v4si_ftype_v2df;
20697 case V4HI_FTYPE_V4HI:
20698 type = v4hi_ftype_v4hi;
20700 case V4SF_FTYPE_V4SF:
20701 case V4SF_FTYPE_V4SF_VEC_MERGE:
20702 type = v4sf_ftype_v4sf;
20704 case V4SF_FTYPE_V4SI:
20705 type = v4sf_ftype_v4si;
20707 case V4SF_FTYPE_V2DF:
20708 type = v4sf_ftype_v2df;
20710 case V2DI_FTYPE_V2DI:
20711 type = v2di_ftype_v2di;
20713 case V2DI_FTYPE_V16QI:
20714 type = v2di_ftype_v16qi;
20716 case V2DI_FTYPE_V8HI:
20717 type = v2di_ftype_v8hi;
20719 case V2DI_FTYPE_V4SI:
20720 type = v2di_ftype_v4si;
20722 case V2SI_FTYPE_V2SI:
20723 type = v2si_ftype_v2si;
20725 case V2SI_FTYPE_V4SF:
20726 type = v2si_ftype_v4sf;
20728 case V2SI_FTYPE_V2DF:
20729 type = v2si_ftype_v2df;
20731 case V2SI_FTYPE_V2SF:
20732 type = v2si_ftype_v2sf;
20734 case V2DF_FTYPE_V4SF:
20735 type = v2df_ftype_v4sf;
20737 case V2DF_FTYPE_V2DF:
20738 case V2DF_FTYPE_V2DF_VEC_MERGE:
20739 type = v2df_ftype_v2df;
20741 case V2DF_FTYPE_V2SI:
20742 type = v2df_ftype_v2si;
20744 case V2DF_FTYPE_V4SI:
20745 type = v2df_ftype_v4si;
20747 case V2SF_FTYPE_V2SF:
20748 type = v2sf_ftype_v2sf;
20750 case V2SF_FTYPE_V2SI:
20751 type = v2sf_ftype_v2si;
20753 case V16QI_FTYPE_V16QI_V16QI:
20754 type = v16qi_ftype_v16qi_v16qi;
20756 case V16QI_FTYPE_V8HI_V8HI:
20757 type = v16qi_ftype_v8hi_v8hi;
20759 case V8QI_FTYPE_V8QI_V8QI:
20760 type = v8qi_ftype_v8qi_v8qi;
20762 case V8QI_FTYPE_V4HI_V4HI:
20763 type = v8qi_ftype_v4hi_v4hi;
20765 case V8HI_FTYPE_V8HI_V8HI:
20766 case V8HI_FTYPE_V8HI_V8HI_COUNT:
20767 type = v8hi_ftype_v8hi_v8hi;
20769 case V8HI_FTYPE_V16QI_V16QI:
20770 type = v8hi_ftype_v16qi_v16qi;
20772 case V8HI_FTYPE_V4SI_V4SI:
20773 type = v8hi_ftype_v4si_v4si;
20775 case V8HI_FTYPE_V8HI_SI_COUNT:
20776 type = v8hi_ftype_v8hi_int;
20778 case V4SI_FTYPE_V4SI_V4SI:
20779 case V4SI_FTYPE_V4SI_V4SI_COUNT:
20780 type = v4si_ftype_v4si_v4si;
20782 case V4SI_FTYPE_V8HI_V8HI:
20783 type = v4si_ftype_v8hi_v8hi;
20785 case V4SI_FTYPE_V4SF_V4SF:
20786 type = v4si_ftype_v4sf_v4sf;
20788 case V4SI_FTYPE_V2DF_V2DF:
20789 type = v4si_ftype_v2df_v2df;
20791 case V4SI_FTYPE_V4SI_SI_COUNT:
20792 type = v4si_ftype_v4si_int;
20794 case V4HI_FTYPE_V4HI_V4HI:
20795 case V4HI_FTYPE_V4HI_V4HI_COUNT:
20796 type = v4hi_ftype_v4hi_v4hi;
20798 case V4HI_FTYPE_V8QI_V8QI:
20799 type = v4hi_ftype_v8qi_v8qi;
20801 case V4HI_FTYPE_V2SI_V2SI:
20802 type = v4hi_ftype_v2si_v2si;
20804 case V4HI_FTYPE_V4HI_SI_COUNT:
20805 type = v4hi_ftype_v4hi_int;
20807 case V4SF_FTYPE_V4SF_V4SF:
20808 case V4SF_FTYPE_V4SF_V4SF_SWAP:
20809 type = v4sf_ftype_v4sf_v4sf;
20811 case V4SF_FTYPE_V4SF_V2SI:
20812 type = v4sf_ftype_v4sf_v2si;
20814 case V4SF_FTYPE_V4SF_V2DF:
20815 type = v4sf_ftype_v4sf_v2df;
20817 case V4SF_FTYPE_V4SF_DI:
20818 type = v4sf_ftype_v4sf_int64;
20820 case V4SF_FTYPE_V4SF_SI:
20821 type = v4sf_ftype_v4sf_int;
20823 case V2DI_FTYPE_V2DI_V2DI:
20824 case V2DI_FTYPE_V2DI_V2DI_COUNT:
20825 type = v2di_ftype_v2di_v2di;
20827 case V2DI_FTYPE_V16QI_V16QI:
20828 type = v2di_ftype_v16qi_v16qi;
20830 case V2DI_FTYPE_V4SI_V4SI:
20831 type = v2di_ftype_v4si_v4si;
20833 case V2DI_FTYPE_V2DI_V16QI:
20834 type = v2di_ftype_v2di_v16qi;
20836 case V2DI_FTYPE_V2DF_V2DF:
20837 type = v2di_ftype_v2df_v2df;
20839 case V2DI_FTYPE_V2DI_SI_COUNT:
20840 type = v2di_ftype_v2di_int;
20842 case V2SI_FTYPE_V2SI_V2SI:
20843 case V2SI_FTYPE_V2SI_V2SI_COUNT:
20844 type = v2si_ftype_v2si_v2si;
20846 case V2SI_FTYPE_V4HI_V4HI:
20847 type = v2si_ftype_v4hi_v4hi;
20849 case V2SI_FTYPE_V2SF_V2SF:
20850 type = v2si_ftype_v2sf_v2sf;
20852 case V2SI_FTYPE_V2SI_SI_COUNT:
20853 type = v2si_ftype_v2si_int;
20855 case V2DF_FTYPE_V2DF_V2DF:
20856 case V2DF_FTYPE_V2DF_V2DF_SWAP:
20857 type = v2df_ftype_v2df_v2df;
20859 case V2DF_FTYPE_V2DF_V4SF:
20860 type = v2df_ftype_v2df_v4sf;
20862 case V2DF_FTYPE_V2DF_DI:
20863 type = v2df_ftype_v2df_int64;
20865 case V2DF_FTYPE_V2DF_SI:
20866 type = v2df_ftype_v2df_int;
20868 case V2SF_FTYPE_V2SF_V2SF:
20869 type = v2sf_ftype_v2sf_v2sf;
20871 case V1DI_FTYPE_V1DI_V1DI:
20872 case V1DI_FTYPE_V1DI_V1DI_COUNT:
20873 type = v1di_ftype_v1di_v1di;
20875 case V1DI_FTYPE_V8QI_V8QI:
20876 type = v1di_ftype_v8qi_v8qi;
20878 case V1DI_FTYPE_V2SI_V2SI:
20879 type = v1di_ftype_v2si_v2si;
20881 case V1DI_FTYPE_V1DI_SI_COUNT:
20882 type = v1di_ftype_v1di_int;
20884 case UINT64_FTYPE_UINT64_UINT64:
20885 type = uint64_ftype_uint64_uint64;
20887 case UINT_FTYPE_UINT_UINT:
20888 type = unsigned_ftype_unsigned_unsigned;
20890 case UINT_FTYPE_UINT_USHORT:
20891 type = unsigned_ftype_unsigned_ushort;
20893 case UINT_FTYPE_UINT_UCHAR:
20894 type = unsigned_ftype_unsigned_uchar;
20896 case V8HI_FTYPE_V8HI_INT:
20897 type = v8hi_ftype_v8hi_int;
20899 case V4SI_FTYPE_V4SI_INT:
20900 type = v4si_ftype_v4si_int;
20902 case V4HI_FTYPE_V4HI_INT:
20903 type = v4hi_ftype_v4hi_int;
20905 case V4SF_FTYPE_V4SF_INT:
20906 type = v4sf_ftype_v4sf_int;
20908 case V2DI_FTYPE_V2DI_INT:
20909 case V2DI2TI_FTYPE_V2DI_INT:
20910 type = v2di_ftype_v2di_int;
20912 case V2DF_FTYPE_V2DF_INT:
20913 type = v2df_ftype_v2df_int;
20915 case V16QI_FTYPE_V16QI_V16QI_V16QI:
20916 type = v16qi_ftype_v16qi_v16qi_v16qi;
20918 case V4SF_FTYPE_V4SF_V4SF_V4SF:
20919 type = v4sf_ftype_v4sf_v4sf_v4sf;
20921 case V2DF_FTYPE_V2DF_V2DF_V2DF:
20922 type = v2df_ftype_v2df_v2df_v2df;
20924 case V16QI_FTYPE_V16QI_V16QI_INT:
20925 type = v16qi_ftype_v16qi_v16qi_int;
20927 case V8HI_FTYPE_V8HI_V8HI_INT:
20928 type = v8hi_ftype_v8hi_v8hi_int;
20930 case V4SI_FTYPE_V4SI_V4SI_INT:
20931 type = v4si_ftype_v4si_v4si_int;
20933 case V4SF_FTYPE_V4SF_V4SF_INT:
20934 type = v4sf_ftype_v4sf_v4sf_int;
20936 case V2DI_FTYPE_V2DI_V2DI_INT:
20937 case V2DI2TI_FTYPE_V2DI_V2DI_INT:
20938 type = v2di_ftype_v2di_v2di_int;
20940 case V2DF_FTYPE_V2DF_V2DF_INT:
20941 type = v2df_ftype_v2df_v2df_int;
20943 case V2DI_FTYPE_V2DI_UINT_UINT:
20944 type = v2di_ftype_v2di_unsigned_unsigned;
20946 case V2DI_FTYPE_V2DI_V2DI_UINT_UINT:
20947 type = v2di_ftype_v2di_v2di_unsigned_unsigned;
20949 case V1DI2DI_FTYPE_V1DI_V1DI_INT:
20950 type = v1di_ftype_v1di_v1di_int;
20953 gcc_unreachable ();
20956 def_builtin_const (d->mask, d->name, type, d->code);
20959 /* pcmpestr[im] insns. */
20960 for (i = 0, d = bdesc_pcmpestr;
20961 i < ARRAY_SIZE (bdesc_pcmpestr);
20964 if (d->code == IX86_BUILTIN_PCMPESTRM128)
20965 ftype = v16qi_ftype_v16qi_int_v16qi_int_int;
20967 ftype = int_ftype_v16qi_int_v16qi_int_int;
20968 def_builtin_const (d->mask, d->name, ftype, d->code);
20971 /* pcmpistr[im] insns. */
20972 for (i = 0, d = bdesc_pcmpistr;
20973 i < ARRAY_SIZE (bdesc_pcmpistr);
20976 if (d->code == IX86_BUILTIN_PCMPISTRM128)
20977 ftype = v16qi_ftype_v16qi_v16qi_int;
20979 ftype = int_ftype_v16qi_v16qi_int;
20980 def_builtin_const (d->mask, d->name, ftype, d->code);
20983 /* comi/ucomi insns. */
20984 for (i = 0, d = bdesc_comi; i < ARRAY_SIZE (bdesc_comi); i++, d++)
20985 if (d->mask == OPTION_MASK_ISA_SSE2)
20986 def_builtin_const (d->mask, d->name, int_ftype_v2df_v2df, d->code);
20988 def_builtin_const (d->mask, d->name, int_ftype_v4sf_v4sf, d->code);
20991 def_builtin (OPTION_MASK_ISA_SSE, "__builtin_ia32_ldmxcsr", void_ftype_unsigned, IX86_BUILTIN_LDMXCSR);
20992 def_builtin (OPTION_MASK_ISA_SSE, "__builtin_ia32_stmxcsr", unsigned_ftype_void, IX86_BUILTIN_STMXCSR);
20994 /* SSE or 3DNow!A */
20995 def_builtin (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, "__builtin_ia32_maskmovq", void_ftype_v8qi_v8qi_pchar, IX86_BUILTIN_MASKMOVQ);
20998 def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_maskmovdqu", void_ftype_v16qi_v16qi_pchar, IX86_BUILTIN_MASKMOVDQU);
21000 def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_clflush", void_ftype_pcvoid, IX86_BUILTIN_CLFLUSH);
21001 x86_mfence = def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_mfence", void_ftype_void, IX86_BUILTIN_MFENCE);
21004 def_builtin (OPTION_MASK_ISA_SSE3, "__builtin_ia32_monitor", void_ftype_pcvoid_unsigned_unsigned, IX86_BUILTIN_MONITOR);
21005 def_builtin (OPTION_MASK_ISA_SSE3, "__builtin_ia32_mwait", void_ftype_unsigned_unsigned, IX86_BUILTIN_MWAIT);
21008 def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aesenc128", v2di_ftype_v2di_v2di, IX86_BUILTIN_AESENC128);
21009 def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aesenclast128", v2di_ftype_v2di_v2di, IX86_BUILTIN_AESENCLAST128);
21010 def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aesdec128", v2di_ftype_v2di_v2di, IX86_BUILTIN_AESDEC128);
21011 def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aesdeclast128", v2di_ftype_v2di_v2di, IX86_BUILTIN_AESDECLAST128);
21012 def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aesimc128", v2di_ftype_v2di, IX86_BUILTIN_AESIMC128);
21013 def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aeskeygenassist128", v2di_ftype_v2di_int, IX86_BUILTIN_AESKEYGENASSIST128);
21016 def_builtin_const (OPTION_MASK_ISA_PCLMUL, "__builtin_ia32_pclmulqdq128", v2di_ftype_v2di_v2di_int, IX86_BUILTIN_PCLMULQDQ128);
21018 /* Access to the vec_init patterns. */
21019 ftype = build_function_type_list (V2SI_type_node, integer_type_node,
21020 integer_type_node, NULL_TREE);
21021 def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_vec_init_v2si", ftype, IX86_BUILTIN_VEC_INIT_V2SI);
21023 ftype = build_function_type_list (V4HI_type_node, short_integer_type_node,
21024 short_integer_type_node,
21025 short_integer_type_node,
21026 short_integer_type_node, NULL_TREE);
21027 def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_vec_init_v4hi", ftype, IX86_BUILTIN_VEC_INIT_V4HI);
21029 ftype = build_function_type_list (V8QI_type_node, char_type_node,
21030 char_type_node, char_type_node,
21031 char_type_node, char_type_node,
21032 char_type_node, char_type_node,
21033 char_type_node, NULL_TREE);
21034 def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_vec_init_v8qi", ftype, IX86_BUILTIN_VEC_INIT_V8QI);
21036 /* Access to the vec_extract patterns. */
21037 ftype = build_function_type_list (double_type_node, V2DF_type_node,
21038 integer_type_node, NULL_TREE);
21039 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v2df", ftype, IX86_BUILTIN_VEC_EXT_V2DF);
21041 ftype = build_function_type_list (long_long_integer_type_node,
21042 V2DI_type_node, integer_type_node,
21044 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v2di", ftype, IX86_BUILTIN_VEC_EXT_V2DI);
21046 ftype = build_function_type_list (float_type_node, V4SF_type_node,
21047 integer_type_node, NULL_TREE);
21048 def_builtin_const (OPTION_MASK_ISA_SSE, "__builtin_ia32_vec_ext_v4sf", ftype, IX86_BUILTIN_VEC_EXT_V4SF);
21050 ftype = build_function_type_list (intSI_type_node, V4SI_type_node,
21051 integer_type_node, NULL_TREE);
21052 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v4si", ftype, IX86_BUILTIN_VEC_EXT_V4SI);
21054 ftype = build_function_type_list (intHI_type_node, V8HI_type_node,
21055 integer_type_node, NULL_TREE);
21056 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v8hi", ftype, IX86_BUILTIN_VEC_EXT_V8HI);
21058 ftype = build_function_type_list (intHI_type_node, V4HI_type_node,
21059 integer_type_node, NULL_TREE);
21060 def_builtin_const (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, "__builtin_ia32_vec_ext_v4hi", ftype, IX86_BUILTIN_VEC_EXT_V4HI);
21062 ftype = build_function_type_list (intSI_type_node, V2SI_type_node,
21063 integer_type_node, NULL_TREE);
21064 def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_vec_ext_v2si", ftype, IX86_BUILTIN_VEC_EXT_V2SI);
21066 ftype = build_function_type_list (intQI_type_node, V16QI_type_node,
21067 integer_type_node, NULL_TREE);
21068 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v16qi", ftype, IX86_BUILTIN_VEC_EXT_V16QI);
21070 /* Access to the vec_set patterns. */
21071 ftype = build_function_type_list (V2DI_type_node, V2DI_type_node,
21073 integer_type_node, NULL_TREE);
21074 def_builtin_const (OPTION_MASK_ISA_SSE4_1 | OPTION_MASK_ISA_64BIT, "__builtin_ia32_vec_set_v2di", ftype, IX86_BUILTIN_VEC_SET_V2DI);
21076 ftype = build_function_type_list (V4SF_type_node, V4SF_type_node,
21078 integer_type_node, NULL_TREE);
21079 def_builtin_const (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_vec_set_v4sf", ftype, IX86_BUILTIN_VEC_SET_V4SF);
21081 ftype = build_function_type_list (V4SI_type_node, V4SI_type_node,
21083 integer_type_node, NULL_TREE);
21084 def_builtin_const (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_vec_set_v4si", ftype, IX86_BUILTIN_VEC_SET_V4SI);
21086 ftype = build_function_type_list (V8HI_type_node, V8HI_type_node,
21088 integer_type_node, NULL_TREE);
21089 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_set_v8hi", ftype, IX86_BUILTIN_VEC_SET_V8HI);
21091 ftype = build_function_type_list (V4HI_type_node, V4HI_type_node,
21093 integer_type_node, NULL_TREE);
21094 def_builtin_const (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, "__builtin_ia32_vec_set_v4hi", ftype, IX86_BUILTIN_VEC_SET_V4HI);
21096 ftype = build_function_type_list (V16QI_type_node, V16QI_type_node,
21098 integer_type_node, NULL_TREE);
21099 def_builtin_const (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_vec_set_v16qi", ftype, IX86_BUILTIN_VEC_SET_V16QI);
21101 /* Add SSE5 multi-arg argument instructions */
21102 for (i = 0, d = bdesc_multi_arg; i < ARRAY_SIZE (bdesc_multi_arg); i++, d++)
21104 tree mtype = NULL_TREE;
21109 switch ((enum multi_arg_type)d->flag)
21111 case MULTI_ARG_3_SF: mtype = v4sf_ftype_v4sf_v4sf_v4sf; break;
21112 case MULTI_ARG_3_DF: mtype = v2df_ftype_v2df_v2df_v2df; break;
21113 case MULTI_ARG_3_DI: mtype = v2di_ftype_v2di_v2di_v2di; break;
21114 case MULTI_ARG_3_SI: mtype = v4si_ftype_v4si_v4si_v4si; break;
21115 case MULTI_ARG_3_SI_DI: mtype = v4si_ftype_v4si_v4si_v2di; break;
21116 case MULTI_ARG_3_HI: mtype = v8hi_ftype_v8hi_v8hi_v8hi; break;
21117 case MULTI_ARG_3_HI_SI: mtype = v8hi_ftype_v8hi_v8hi_v4si; break;
21118 case MULTI_ARG_3_QI: mtype = v16qi_ftype_v16qi_v16qi_v16qi; break;
21119 case MULTI_ARG_3_PERMPS: mtype = v4sf_ftype_v4sf_v4sf_v16qi; break;
21120 case MULTI_ARG_3_PERMPD: mtype = v2df_ftype_v2df_v2df_v16qi; break;
21121 case MULTI_ARG_2_SF: mtype = v4sf_ftype_v4sf_v4sf; break;
21122 case MULTI_ARG_2_DF: mtype = v2df_ftype_v2df_v2df; break;
21123 case MULTI_ARG_2_DI: mtype = v2di_ftype_v2di_v2di; break;
21124 case MULTI_ARG_2_SI: mtype = v4si_ftype_v4si_v4si; break;
21125 case MULTI_ARG_2_HI: mtype = v8hi_ftype_v8hi_v8hi; break;
21126 case MULTI_ARG_2_QI: mtype = v16qi_ftype_v16qi_v16qi; break;
21127 case MULTI_ARG_2_DI_IMM: mtype = v2di_ftype_v2di_si; break;
21128 case MULTI_ARG_2_SI_IMM: mtype = v4si_ftype_v4si_si; break;
21129 case MULTI_ARG_2_HI_IMM: mtype = v8hi_ftype_v8hi_si; break;
21130 case MULTI_ARG_2_QI_IMM: mtype = v16qi_ftype_v16qi_si; break;
21131 case MULTI_ARG_2_SF_CMP: mtype = v4sf_ftype_v4sf_v4sf; break;
21132 case MULTI_ARG_2_DF_CMP: mtype = v2df_ftype_v2df_v2df; break;
21133 case MULTI_ARG_2_DI_CMP: mtype = v2di_ftype_v2di_v2di; break;
21134 case MULTI_ARG_2_SI_CMP: mtype = v4si_ftype_v4si_v4si; break;
21135 case MULTI_ARG_2_HI_CMP: mtype = v8hi_ftype_v8hi_v8hi; break;
21136 case MULTI_ARG_2_QI_CMP: mtype = v16qi_ftype_v16qi_v16qi; break;
21137 case MULTI_ARG_2_SF_TF: mtype = v4sf_ftype_v4sf_v4sf; break;
21138 case MULTI_ARG_2_DF_TF: mtype = v2df_ftype_v2df_v2df; break;
21139 case MULTI_ARG_2_DI_TF: mtype = v2di_ftype_v2di_v2di; break;
21140 case MULTI_ARG_2_SI_TF: mtype = v4si_ftype_v4si_v4si; break;
21141 case MULTI_ARG_2_HI_TF: mtype = v8hi_ftype_v8hi_v8hi; break;
21142 case MULTI_ARG_2_QI_TF: mtype = v16qi_ftype_v16qi_v16qi; break;
21143 case MULTI_ARG_1_SF: mtype = v4sf_ftype_v4sf; break;
21144 case MULTI_ARG_1_DF: mtype = v2df_ftype_v2df; break;
21145 case MULTI_ARG_1_DI: mtype = v2di_ftype_v2di; break;
21146 case MULTI_ARG_1_SI: mtype = v4si_ftype_v4si; break;
21147 case MULTI_ARG_1_HI: mtype = v8hi_ftype_v8hi; break;
21148 case MULTI_ARG_1_QI: mtype = v16qi_ftype_v16qi; break;
21149 case MULTI_ARG_1_SI_DI: mtype = v2di_ftype_v4si; break;
21150 case MULTI_ARG_1_HI_DI: mtype = v2di_ftype_v8hi; break;
21151 case MULTI_ARG_1_HI_SI: mtype = v4si_ftype_v8hi; break;
21152 case MULTI_ARG_1_QI_DI: mtype = v2di_ftype_v16qi; break;
21153 case MULTI_ARG_1_QI_SI: mtype = v4si_ftype_v16qi; break;
21154 case MULTI_ARG_1_QI_HI: mtype = v8hi_ftype_v16qi; break;
21155 case MULTI_ARG_1_PH2PS: mtype = v4sf_ftype_v4hi; break;
21156 case MULTI_ARG_1_PS2PH: mtype = v4hi_ftype_v4sf; break;
21157 case MULTI_ARG_UNKNOWN:
21159 gcc_unreachable ();
21163 def_builtin_const (d->mask, d->name, mtype, d->code);
21167 /* Internal method for ix86_init_builtins. */
21170 ix86_init_builtins_va_builtins_abi (void)
21172 tree ms_va_ref, sysv_va_ref;
21173 tree fnvoid_va_end_ms, fnvoid_va_end_sysv;
21174 tree fnvoid_va_start_ms, fnvoid_va_start_sysv;
21175 tree fnvoid_va_copy_ms, fnvoid_va_copy_sysv;
21176 tree fnattr_ms = NULL_TREE, fnattr_sysv = NULL_TREE;
21180 fnattr_ms = build_tree_list (get_identifier ("ms_abi"), NULL_TREE);
21181 fnattr_sysv = build_tree_list (get_identifier ("sysv_abi"), NULL_TREE);
21182 ms_va_ref = build_reference_type (ms_va_list_type_node);
21184 build_pointer_type (TREE_TYPE (sysv_va_list_type_node));
21187 build_function_type_list (void_type_node, ms_va_ref, NULL_TREE);
21188 fnvoid_va_start_ms =
21189 build_varargs_function_type_list (void_type_node, ms_va_ref, NULL_TREE);
21190 fnvoid_va_end_sysv =
21191 build_function_type_list (void_type_node, sysv_va_ref, NULL_TREE);
21192 fnvoid_va_start_sysv =
21193 build_varargs_function_type_list (void_type_node, sysv_va_ref,
21195 fnvoid_va_copy_ms =
21196 build_function_type_list (void_type_node, ms_va_ref, ms_va_list_type_node,
21198 fnvoid_va_copy_sysv =
21199 build_function_type_list (void_type_node, sysv_va_ref,
21200 sysv_va_ref, NULL_TREE);
21202 add_builtin_function ("__builtin_ms_va_start", fnvoid_va_start_ms,
21203 BUILT_IN_VA_START, BUILT_IN_NORMAL, NULL, fnattr_ms);
21204 add_builtin_function ("__builtin_ms_va_end", fnvoid_va_end_ms,
21205 BUILT_IN_VA_END, BUILT_IN_NORMAL, NULL, fnattr_ms);
21206 add_builtin_function ("__builtin_ms_va_copy", fnvoid_va_copy_ms,
21207 BUILT_IN_VA_COPY, BUILT_IN_NORMAL, NULL, fnattr_ms);
21208 add_builtin_function ("__builtin_sysv_va_start", fnvoid_va_start_sysv,
21209 BUILT_IN_VA_START, BUILT_IN_NORMAL, NULL, fnattr_sysv);
21210 add_builtin_function ("__builtin_sysv_va_end", fnvoid_va_end_sysv,
21211 BUILT_IN_VA_END, BUILT_IN_NORMAL, NULL, fnattr_sysv);
21212 add_builtin_function ("__builtin_sysv_va_copy", fnvoid_va_copy_sysv,
21213 BUILT_IN_VA_COPY, BUILT_IN_NORMAL, NULL, fnattr_sysv);
21217 ix86_init_builtins (void)
21219 tree float128_type_node = make_node (REAL_TYPE);
21222 /* The __float80 type. */
21223 if (TYPE_MODE (long_double_type_node) == XFmode)
21224 (*lang_hooks.types.register_builtin_type) (long_double_type_node,
21228 /* The __float80 type. */
21229 tree float80_type_node = make_node (REAL_TYPE);
21231 TYPE_PRECISION (float80_type_node) = 80;
21232 layout_type (float80_type_node);
21233 (*lang_hooks.types.register_builtin_type) (float80_type_node,
21237 /* The __float128 type. */
21238 TYPE_PRECISION (float128_type_node) = 128;
21239 layout_type (float128_type_node);
21240 (*lang_hooks.types.register_builtin_type) (float128_type_node,
21243 /* TFmode support builtins. */
21244 ftype = build_function_type (float128_type_node, void_list_node);
21245 decl = add_builtin_function ("__builtin_infq", ftype,
21246 IX86_BUILTIN_INFQ, BUILT_IN_MD,
21248 ix86_builtins[(int) IX86_BUILTIN_INFQ] = decl;
21250 /* We will expand them to normal call if SSE2 isn't available since
21251 they are used by libgcc. */
21252 ftype = build_function_type_list (float128_type_node,
21253 float128_type_node,
21255 decl = add_builtin_function ("__builtin_fabsq", ftype,
21256 IX86_BUILTIN_FABSQ, BUILT_IN_MD,
21257 "__fabstf2", NULL_TREE);
21258 ix86_builtins[(int) IX86_BUILTIN_FABSQ] = decl;
21259 TREE_READONLY (decl) = 1;
21261 ftype = build_function_type_list (float128_type_node,
21262 float128_type_node,
21263 float128_type_node,
21265 decl = add_builtin_function ("__builtin_copysignq", ftype,
21266 IX86_BUILTIN_COPYSIGNQ, BUILT_IN_MD,
21267 "__copysigntf3", NULL_TREE);
21268 ix86_builtins[(int) IX86_BUILTIN_COPYSIGNQ] = decl;
21269 TREE_READONLY (decl) = 1;
21271 ix86_init_mmx_sse_builtins ();
21273 ix86_init_builtins_va_builtins_abi ();
21276 /* Errors in the source file can cause expand_expr to return const0_rtx
21277 where we expect a vector. To avoid crashing, use one of the vector
21278 clear instructions. */
21280 safe_vector_operand (rtx x, enum machine_mode mode)
21282 if (x == const0_rtx)
21283 x = CONST0_RTX (mode);
21287 /* Subroutine of ix86_expand_builtin to take care of binop insns. */
21290 ix86_expand_binop_builtin (enum insn_code icode, tree exp, rtx target)
21293 tree arg0 = CALL_EXPR_ARG (exp, 0);
21294 tree arg1 = CALL_EXPR_ARG (exp, 1);
21295 rtx op0 = expand_normal (arg0);
21296 rtx op1 = expand_normal (arg1);
21297 enum machine_mode tmode = insn_data[icode].operand[0].mode;
21298 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
21299 enum machine_mode mode1 = insn_data[icode].operand[2].mode;
21301 if (VECTOR_MODE_P (mode0))
21302 op0 = safe_vector_operand (op0, mode0);
21303 if (VECTOR_MODE_P (mode1))
21304 op1 = safe_vector_operand (op1, mode1);
21306 if (optimize || !target
21307 || GET_MODE (target) != tmode
21308 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
21309 target = gen_reg_rtx (tmode);
21311 if (GET_MODE (op1) == SImode && mode1 == TImode)
21313 rtx x = gen_reg_rtx (V4SImode);
21314 emit_insn (gen_sse2_loadd (x, op1));
21315 op1 = gen_lowpart (TImode, x);
21318 if (!(*insn_data[icode].operand[1].predicate) (op0, mode0))
21319 op0 = copy_to_mode_reg (mode0, op0);
21320 if (!(*insn_data[icode].operand[2].predicate) (op1, mode1))
21321 op1 = copy_to_mode_reg (mode1, op1);
21323 pat = GEN_FCN (icode) (target, op0, op1);
21332 /* Subroutine of ix86_expand_builtin to take care of 2-4 argument insns. */
21335 ix86_expand_multi_arg_builtin (enum insn_code icode, tree exp, rtx target,
21336 enum multi_arg_type m_type,
21337 enum insn_code sub_code)
21342 bool comparison_p = false;
21344 bool last_arg_constant = false;
21345 int num_memory = 0;
21348 enum machine_mode mode;
21351 enum machine_mode tmode = insn_data[icode].operand[0].mode;
21355 case MULTI_ARG_3_SF:
21356 case MULTI_ARG_3_DF:
21357 case MULTI_ARG_3_DI:
21358 case MULTI_ARG_3_SI:
21359 case MULTI_ARG_3_SI_DI:
21360 case MULTI_ARG_3_HI:
21361 case MULTI_ARG_3_HI_SI:
21362 case MULTI_ARG_3_QI:
21363 case MULTI_ARG_3_PERMPS:
21364 case MULTI_ARG_3_PERMPD:
21368 case MULTI_ARG_2_SF:
21369 case MULTI_ARG_2_DF:
21370 case MULTI_ARG_2_DI:
21371 case MULTI_ARG_2_SI:
21372 case MULTI_ARG_2_HI:
21373 case MULTI_ARG_2_QI:
21377 case MULTI_ARG_2_DI_IMM:
21378 case MULTI_ARG_2_SI_IMM:
21379 case MULTI_ARG_2_HI_IMM:
21380 case MULTI_ARG_2_QI_IMM:
21382 last_arg_constant = true;
21385 case MULTI_ARG_1_SF:
21386 case MULTI_ARG_1_DF:
21387 case MULTI_ARG_1_DI:
21388 case MULTI_ARG_1_SI:
21389 case MULTI_ARG_1_HI:
21390 case MULTI_ARG_1_QI:
21391 case MULTI_ARG_1_SI_DI:
21392 case MULTI_ARG_1_HI_DI:
21393 case MULTI_ARG_1_HI_SI:
21394 case MULTI_ARG_1_QI_DI:
21395 case MULTI_ARG_1_QI_SI:
21396 case MULTI_ARG_1_QI_HI:
21397 case MULTI_ARG_1_PH2PS:
21398 case MULTI_ARG_1_PS2PH:
21402 case MULTI_ARG_2_SF_CMP:
21403 case MULTI_ARG_2_DF_CMP:
21404 case MULTI_ARG_2_DI_CMP:
21405 case MULTI_ARG_2_SI_CMP:
21406 case MULTI_ARG_2_HI_CMP:
21407 case MULTI_ARG_2_QI_CMP:
21409 comparison_p = true;
21412 case MULTI_ARG_2_SF_TF:
21413 case MULTI_ARG_2_DF_TF:
21414 case MULTI_ARG_2_DI_TF:
21415 case MULTI_ARG_2_SI_TF:
21416 case MULTI_ARG_2_HI_TF:
21417 case MULTI_ARG_2_QI_TF:
21422 case MULTI_ARG_UNKNOWN:
21424 gcc_unreachable ();
21427 if (optimize || !target
21428 || GET_MODE (target) != tmode
21429 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
21430 target = gen_reg_rtx (tmode);
21432 gcc_assert (nargs <= 4);
21434 for (i = 0; i < nargs; i++)
21436 tree arg = CALL_EXPR_ARG (exp, i);
21437 rtx op = expand_normal (arg);
21438 int adjust = (comparison_p) ? 1 : 0;
21439 enum machine_mode mode = insn_data[icode].operand[i+adjust+1].mode;
21441 if (last_arg_constant && i == nargs-1)
21443 if (GET_CODE (op) != CONST_INT)
21445 error ("last argument must be an immediate");
21446 return gen_reg_rtx (tmode);
21451 if (VECTOR_MODE_P (mode))
21452 op = safe_vector_operand (op, mode);
21454 /* If we aren't optimizing, only allow one memory operand to be
21456 if (memory_operand (op, mode))
21459 gcc_assert (GET_MODE (op) == mode || GET_MODE (op) == VOIDmode);
21462 || ! (*insn_data[icode].operand[i+adjust+1].predicate) (op, mode)
21464 op = force_reg (mode, op);
21468 args[i].mode = mode;
21474 pat = GEN_FCN (icode) (target, args[0].op);
21479 pat = GEN_FCN (icode) (target, args[0].op, args[1].op,
21480 GEN_INT ((int)sub_code));
21481 else if (! comparison_p)
21482 pat = GEN_FCN (icode) (target, args[0].op, args[1].op);
21485 rtx cmp_op = gen_rtx_fmt_ee (sub_code, GET_MODE (target),
21489 pat = GEN_FCN (icode) (target, cmp_op, args[0].op, args[1].op);
21494 pat = GEN_FCN (icode) (target, args[0].op, args[1].op, args[2].op);
21498 gcc_unreachable ();
21508 /* Subroutine of ix86_expand_args_builtin to take care of scalar unop
21509 insns with vec_merge. */
21512 ix86_expand_unop_vec_merge_builtin (enum insn_code icode, tree exp,
21516 tree arg0 = CALL_EXPR_ARG (exp, 0);
21517 rtx op1, op0 = expand_normal (arg0);
21518 enum machine_mode tmode = insn_data[icode].operand[0].mode;
21519 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
21521 if (optimize || !target
21522 || GET_MODE (target) != tmode
21523 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
21524 target = gen_reg_rtx (tmode);
21526 if (VECTOR_MODE_P (mode0))
21527 op0 = safe_vector_operand (op0, mode0);
21529 if ((optimize && !register_operand (op0, mode0))
21530 || ! (*insn_data[icode].operand[1].predicate) (op0, mode0))
21531 op0 = copy_to_mode_reg (mode0, op0);
21534 if (! (*insn_data[icode].operand[2].predicate) (op1, mode0))
21535 op1 = copy_to_mode_reg (mode0, op1);
21537 pat = GEN_FCN (icode) (target, op0, op1);
21544 /* Subroutine of ix86_expand_builtin to take care of comparison insns. */
21547 ix86_expand_sse_compare (const struct builtin_description *d,
21548 tree exp, rtx target, bool swap)
21551 tree arg0 = CALL_EXPR_ARG (exp, 0);
21552 tree arg1 = CALL_EXPR_ARG (exp, 1);
21553 rtx op0 = expand_normal (arg0);
21554 rtx op1 = expand_normal (arg1);
21556 enum machine_mode tmode = insn_data[d->icode].operand[0].mode;
21557 enum machine_mode mode0 = insn_data[d->icode].operand[1].mode;
21558 enum machine_mode mode1 = insn_data[d->icode].operand[2].mode;
21559 enum rtx_code comparison = d->comparison;
21561 if (VECTOR_MODE_P (mode0))
21562 op0 = safe_vector_operand (op0, mode0);
21563 if (VECTOR_MODE_P (mode1))
21564 op1 = safe_vector_operand (op1, mode1);
21566 /* Swap operands if we have a comparison that isn't available in
21570 rtx tmp = gen_reg_rtx (mode1);
21571 emit_move_insn (tmp, op1);
21576 if (optimize || !target
21577 || GET_MODE (target) != tmode
21578 || ! (*insn_data[d->icode].operand[0].predicate) (target, tmode))
21579 target = gen_reg_rtx (tmode);
21581 if ((optimize && !register_operand (op0, mode0))
21582 || ! (*insn_data[d->icode].operand[1].predicate) (op0, mode0))
21583 op0 = copy_to_mode_reg (mode0, op0);
21584 if ((optimize && !register_operand (op1, mode1))
21585 || ! (*insn_data[d->icode].operand[2].predicate) (op1, mode1))
21586 op1 = copy_to_mode_reg (mode1, op1);
21588 op2 = gen_rtx_fmt_ee (comparison, mode0, op0, op1);
21589 pat = GEN_FCN (d->icode) (target, op0, op1, op2);
21596 /* Subroutine of ix86_expand_builtin to take care of comi insns. */
21599 ix86_expand_sse_comi (const struct builtin_description *d, tree exp,
21603 tree arg0 = CALL_EXPR_ARG (exp, 0);
21604 tree arg1 = CALL_EXPR_ARG (exp, 1);
21605 rtx op0 = expand_normal (arg0);
21606 rtx op1 = expand_normal (arg1);
21607 enum machine_mode mode0 = insn_data[d->icode].operand[0].mode;
21608 enum machine_mode mode1 = insn_data[d->icode].operand[1].mode;
21609 enum rtx_code comparison = d->comparison;
21611 if (VECTOR_MODE_P (mode0))
21612 op0 = safe_vector_operand (op0, mode0);
21613 if (VECTOR_MODE_P (mode1))
21614 op1 = safe_vector_operand (op1, mode1);
21616 /* Swap operands if we have a comparison that isn't available in
21618 if (d->flag & BUILTIN_DESC_SWAP_OPERANDS)
21625 target = gen_reg_rtx (SImode);
21626 emit_move_insn (target, const0_rtx);
21627 target = gen_rtx_SUBREG (QImode, target, 0);
21629 if ((optimize && !register_operand (op0, mode0))
21630 || !(*insn_data[d->icode].operand[0].predicate) (op0, mode0))
21631 op0 = copy_to_mode_reg (mode0, op0);
21632 if ((optimize && !register_operand (op1, mode1))
21633 || !(*insn_data[d->icode].operand[1].predicate) (op1, mode1))
21634 op1 = copy_to_mode_reg (mode1, op1);
21636 pat = GEN_FCN (d->icode) (op0, op1);
21640 emit_insn (gen_rtx_SET (VOIDmode,
21641 gen_rtx_STRICT_LOW_PART (VOIDmode, target),
21642 gen_rtx_fmt_ee (comparison, QImode,
21646 return SUBREG_REG (target);
21649 /* Subroutine of ix86_expand_builtin to take care of ptest insns. */
21652 ix86_expand_sse_ptest (const struct builtin_description *d, tree exp,
21656 tree arg0 = CALL_EXPR_ARG (exp, 0);
21657 tree arg1 = CALL_EXPR_ARG (exp, 1);
21658 rtx op0 = expand_normal (arg0);
21659 rtx op1 = expand_normal (arg1);
21660 enum machine_mode mode0 = insn_data[d->icode].operand[0].mode;
21661 enum machine_mode mode1 = insn_data[d->icode].operand[1].mode;
21662 enum rtx_code comparison = d->comparison;
21664 if (VECTOR_MODE_P (mode0))
21665 op0 = safe_vector_operand (op0, mode0);
21666 if (VECTOR_MODE_P (mode1))
21667 op1 = safe_vector_operand (op1, mode1);
21669 target = gen_reg_rtx (SImode);
21670 emit_move_insn (target, const0_rtx);
21671 target = gen_rtx_SUBREG (QImode, target, 0);
21673 if ((optimize && !register_operand (op0, mode0))
21674 || !(*insn_data[d->icode].operand[0].predicate) (op0, mode0))
21675 op0 = copy_to_mode_reg (mode0, op0);
21676 if ((optimize && !register_operand (op1, mode1))
21677 || !(*insn_data[d->icode].operand[1].predicate) (op1, mode1))
21678 op1 = copy_to_mode_reg (mode1, op1);
21680 pat = GEN_FCN (d->icode) (op0, op1);
21684 emit_insn (gen_rtx_SET (VOIDmode,
21685 gen_rtx_STRICT_LOW_PART (VOIDmode, target),
21686 gen_rtx_fmt_ee (comparison, QImode,
21690 return SUBREG_REG (target);
21693 /* Subroutine of ix86_expand_builtin to take care of pcmpestr[im] insns. */
21696 ix86_expand_sse_pcmpestr (const struct builtin_description *d,
21697 tree exp, rtx target)
21700 tree arg0 = CALL_EXPR_ARG (exp, 0);
21701 tree arg1 = CALL_EXPR_ARG (exp, 1);
21702 tree arg2 = CALL_EXPR_ARG (exp, 2);
21703 tree arg3 = CALL_EXPR_ARG (exp, 3);
21704 tree arg4 = CALL_EXPR_ARG (exp, 4);
21705 rtx scratch0, scratch1;
21706 rtx op0 = expand_normal (arg0);
21707 rtx op1 = expand_normal (arg1);
21708 rtx op2 = expand_normal (arg2);
21709 rtx op3 = expand_normal (arg3);
21710 rtx op4 = expand_normal (arg4);
21711 enum machine_mode tmode0, tmode1, modev2, modei3, modev4, modei5, modeimm;
21713 tmode0 = insn_data[d->icode].operand[0].mode;
21714 tmode1 = insn_data[d->icode].operand[1].mode;
21715 modev2 = insn_data[d->icode].operand[2].mode;
21716 modei3 = insn_data[d->icode].operand[3].mode;
21717 modev4 = insn_data[d->icode].operand[4].mode;
21718 modei5 = insn_data[d->icode].operand[5].mode;
21719 modeimm = insn_data[d->icode].operand[6].mode;
21721 if (VECTOR_MODE_P (modev2))
21722 op0 = safe_vector_operand (op0, modev2);
21723 if (VECTOR_MODE_P (modev4))
21724 op2 = safe_vector_operand (op2, modev4);
21726 if (! (*insn_data[d->icode].operand[2].predicate) (op0, modev2))
21727 op0 = copy_to_mode_reg (modev2, op0);
21728 if (! (*insn_data[d->icode].operand[3].predicate) (op1, modei3))
21729 op1 = copy_to_mode_reg (modei3, op1);
21730 if ((optimize && !register_operand (op2, modev4))
21731 || !(*insn_data[d->icode].operand[4].predicate) (op2, modev4))
21732 op2 = copy_to_mode_reg (modev4, op2);
21733 if (! (*insn_data[d->icode].operand[5].predicate) (op3, modei5))
21734 op3 = copy_to_mode_reg (modei5, op3);
21736 if (! (*insn_data[d->icode].operand[6].predicate) (op4, modeimm))
21738 error ("the fifth argument must be a 8-bit immediate");
21742 if (d->code == IX86_BUILTIN_PCMPESTRI128)
21744 if (optimize || !target
21745 || GET_MODE (target) != tmode0
21746 || ! (*insn_data[d->icode].operand[0].predicate) (target, tmode0))
21747 target = gen_reg_rtx (tmode0);
21749 scratch1 = gen_reg_rtx (tmode1);
21751 pat = GEN_FCN (d->icode) (target, scratch1, op0, op1, op2, op3, op4);
21753 else if (d->code == IX86_BUILTIN_PCMPESTRM128)
21755 if (optimize || !target
21756 || GET_MODE (target) != tmode1
21757 || ! (*insn_data[d->icode].operand[1].predicate) (target, tmode1))
21758 target = gen_reg_rtx (tmode1);
21760 scratch0 = gen_reg_rtx (tmode0);
21762 pat = GEN_FCN (d->icode) (scratch0, target, op0, op1, op2, op3, op4);
21766 gcc_assert (d->flag);
21768 scratch0 = gen_reg_rtx (tmode0);
21769 scratch1 = gen_reg_rtx (tmode1);
21771 pat = GEN_FCN (d->icode) (scratch0, scratch1, op0, op1, op2, op3, op4);
21781 target = gen_reg_rtx (SImode);
21782 emit_move_insn (target, const0_rtx);
21783 target = gen_rtx_SUBREG (QImode, target, 0);
21786 (gen_rtx_SET (VOIDmode, gen_rtx_STRICT_LOW_PART (VOIDmode, target),
21787 gen_rtx_fmt_ee (EQ, QImode,
21788 gen_rtx_REG ((enum machine_mode) d->flag,
21791 return SUBREG_REG (target);
21798 /* Subroutine of ix86_expand_builtin to take care of pcmpistr[im] insns. */
21801 ix86_expand_sse_pcmpistr (const struct builtin_description *d,
21802 tree exp, rtx target)
21805 tree arg0 = CALL_EXPR_ARG (exp, 0);
21806 tree arg1 = CALL_EXPR_ARG (exp, 1);
21807 tree arg2 = CALL_EXPR_ARG (exp, 2);
21808 rtx scratch0, scratch1;
21809 rtx op0 = expand_normal (arg0);
21810 rtx op1 = expand_normal (arg1);
21811 rtx op2 = expand_normal (arg2);
21812 enum machine_mode tmode0, tmode1, modev2, modev3, modeimm;
21814 tmode0 = insn_data[d->icode].operand[0].mode;
21815 tmode1 = insn_data[d->icode].operand[1].mode;
21816 modev2 = insn_data[d->icode].operand[2].mode;
21817 modev3 = insn_data[d->icode].operand[3].mode;
21818 modeimm = insn_data[d->icode].operand[4].mode;
21820 if (VECTOR_MODE_P (modev2))
21821 op0 = safe_vector_operand (op0, modev2);
21822 if (VECTOR_MODE_P (modev3))
21823 op1 = safe_vector_operand (op1, modev3);
21825 if (! (*insn_data[d->icode].operand[2].predicate) (op0, modev2))
21826 op0 = copy_to_mode_reg (modev2, op0);
21827 if ((optimize && !register_operand (op1, modev3))
21828 || !(*insn_data[d->icode].operand[3].predicate) (op1, modev3))
21829 op1 = copy_to_mode_reg (modev3, op1);
21831 if (! (*insn_data[d->icode].operand[4].predicate) (op2, modeimm))
21833 error ("the third argument must be a 8-bit immediate");
21837 if (d->code == IX86_BUILTIN_PCMPISTRI128)
21839 if (optimize || !target
21840 || GET_MODE (target) != tmode0
21841 || ! (*insn_data[d->icode].operand[0].predicate) (target, tmode0))
21842 target = gen_reg_rtx (tmode0);
21844 scratch1 = gen_reg_rtx (tmode1);
21846 pat = GEN_FCN (d->icode) (target, scratch1, op0, op1, op2);
21848 else if (d->code == IX86_BUILTIN_PCMPISTRM128)
21850 if (optimize || !target
21851 || GET_MODE (target) != tmode1
21852 || ! (*insn_data[d->icode].operand[1].predicate) (target, tmode1))
21853 target = gen_reg_rtx (tmode1);
21855 scratch0 = gen_reg_rtx (tmode0);
21857 pat = GEN_FCN (d->icode) (scratch0, target, op0, op1, op2);
21861 gcc_assert (d->flag);
21863 scratch0 = gen_reg_rtx (tmode0);
21864 scratch1 = gen_reg_rtx (tmode1);
21866 pat = GEN_FCN (d->icode) (scratch0, scratch1, op0, op1, op2);
21876 target = gen_reg_rtx (SImode);
21877 emit_move_insn (target, const0_rtx);
21878 target = gen_rtx_SUBREG (QImode, target, 0);
21881 (gen_rtx_SET (VOIDmode, gen_rtx_STRICT_LOW_PART (VOIDmode, target),
21882 gen_rtx_fmt_ee (EQ, QImode,
21883 gen_rtx_REG ((enum machine_mode) d->flag,
21886 return SUBREG_REG (target);
21892 /* Subroutine of ix86_expand_builtin to take care of insns with
21893 variable number of operands. */
21896 ix86_expand_args_builtin (const struct builtin_description *d,
21897 tree exp, rtx target)
21899 rtx pat, real_target;
21900 unsigned int i, nargs;
21901 unsigned int nargs_constant = 0;
21902 int num_memory = 0;
21906 enum machine_mode mode;
21908 bool last_arg_count = false;
21909 enum insn_code icode = d->icode;
21910 const struct insn_data *insn_p = &insn_data[icode];
21911 enum machine_mode tmode = insn_p->operand[0].mode;
21912 enum machine_mode rmode = VOIDmode;
21914 enum rtx_code comparison = d->comparison;
21916 switch ((enum ix86_builtin_type) d->flag)
21918 case INT_FTYPE_V2DI_V2DI_PTEST:
21919 return ix86_expand_sse_ptest (d, exp, target);
21920 case FLOAT128_FTYPE_FLOAT128:
21921 case FLOAT_FTYPE_FLOAT:
21922 case INT64_FTYPE_V4SF:
21923 case INT64_FTYPE_V2DF:
21924 case INT_FTYPE_V16QI:
21925 case INT_FTYPE_V8QI:
21926 case INT_FTYPE_V4SF:
21927 case INT_FTYPE_V2DF:
21928 case V16QI_FTYPE_V16QI:
21929 case V8HI_FTYPE_V8HI:
21930 case V8HI_FTYPE_V16QI:
21931 case V8QI_FTYPE_V8QI:
21932 case V4SI_FTYPE_V4SI:
21933 case V4SI_FTYPE_V16QI:
21934 case V4SI_FTYPE_V4SF:
21935 case V4SI_FTYPE_V8HI:
21936 case V4SI_FTYPE_V2DF:
21937 case V4HI_FTYPE_V4HI:
21938 case V4SF_FTYPE_V4SF:
21939 case V4SF_FTYPE_V4SI:
21940 case V4SF_FTYPE_V2DF:
21941 case V2DI_FTYPE_V2DI:
21942 case V2DI_FTYPE_V16QI:
21943 case V2DI_FTYPE_V8HI:
21944 case V2DI_FTYPE_V4SI:
21945 case V2DF_FTYPE_V2DF:
21946 case V2DF_FTYPE_V4SI:
21947 case V2DF_FTYPE_V4SF:
21948 case V2DF_FTYPE_V2SI:
21949 case V2SI_FTYPE_V2SI:
21950 case V2SI_FTYPE_V4SF:
21951 case V2SI_FTYPE_V2SF:
21952 case V2SI_FTYPE_V2DF:
21953 case V2SF_FTYPE_V2SF:
21954 case V2SF_FTYPE_V2SI:
21957 case V4SF_FTYPE_V4SF_VEC_MERGE:
21958 case V2DF_FTYPE_V2DF_VEC_MERGE:
21959 return ix86_expand_unop_vec_merge_builtin (icode, exp, target);
21960 case FLOAT128_FTYPE_FLOAT128_FLOAT128:
21961 case V16QI_FTYPE_V16QI_V16QI:
21962 case V16QI_FTYPE_V8HI_V8HI:
21963 case V8QI_FTYPE_V8QI_V8QI:
21964 case V8QI_FTYPE_V4HI_V4HI:
21965 case V8HI_FTYPE_V8HI_V8HI:
21966 case V8HI_FTYPE_V16QI_V16QI:
21967 case V8HI_FTYPE_V4SI_V4SI:
21968 case V4SI_FTYPE_V4SI_V4SI:
21969 case V4SI_FTYPE_V8HI_V8HI:
21970 case V4SI_FTYPE_V4SF_V4SF:
21971 case V4SI_FTYPE_V2DF_V2DF:
21972 case V4HI_FTYPE_V4HI_V4HI:
21973 case V4HI_FTYPE_V8QI_V8QI:
21974 case V4HI_FTYPE_V2SI_V2SI:
21975 case V4SF_FTYPE_V4SF_V4SF:
21976 case V4SF_FTYPE_V4SF_V2SI:
21977 case V4SF_FTYPE_V4SF_V2DF:
21978 case V4SF_FTYPE_V4SF_DI:
21979 case V4SF_FTYPE_V4SF_SI:
21980 case V2DI_FTYPE_V2DI_V2DI:
21981 case V2DI_FTYPE_V16QI_V16QI:
21982 case V2DI_FTYPE_V4SI_V4SI:
21983 case V2DI_FTYPE_V2DI_V16QI:
21984 case V2DI_FTYPE_V2DF_V2DF:
21985 case V2SI_FTYPE_V2SI_V2SI:
21986 case V2SI_FTYPE_V4HI_V4HI:
21987 case V2SI_FTYPE_V2SF_V2SF:
21988 case V2DF_FTYPE_V2DF_V2DF:
21989 case V2DF_FTYPE_V2DF_V4SF:
21990 case V2DF_FTYPE_V2DF_DI:
21991 case V2DF_FTYPE_V2DF_SI:
21992 case V2SF_FTYPE_V2SF_V2SF:
21993 case V1DI_FTYPE_V1DI_V1DI:
21994 case V1DI_FTYPE_V8QI_V8QI:
21995 case V1DI_FTYPE_V2SI_V2SI:
21996 if (comparison == UNKNOWN)
21997 return ix86_expand_binop_builtin (icode, exp, target);
22000 case V4SF_FTYPE_V4SF_V4SF_SWAP:
22001 case V2DF_FTYPE_V2DF_V2DF_SWAP:
22002 gcc_assert (comparison != UNKNOWN);
22006 case V8HI_FTYPE_V8HI_V8HI_COUNT:
22007 case V8HI_FTYPE_V8HI_SI_COUNT:
22008 case V4SI_FTYPE_V4SI_V4SI_COUNT:
22009 case V4SI_FTYPE_V4SI_SI_COUNT:
22010 case V4HI_FTYPE_V4HI_V4HI_COUNT:
22011 case V4HI_FTYPE_V4HI_SI_COUNT:
22012 case V2DI_FTYPE_V2DI_V2DI_COUNT:
22013 case V2DI_FTYPE_V2DI_SI_COUNT:
22014 case V2SI_FTYPE_V2SI_V2SI_COUNT:
22015 case V2SI_FTYPE_V2SI_SI_COUNT:
22016 case V1DI_FTYPE_V1DI_V1DI_COUNT:
22017 case V1DI_FTYPE_V1DI_SI_COUNT:
22019 last_arg_count = true;
22021 case UINT64_FTYPE_UINT64_UINT64:
22022 case UINT_FTYPE_UINT_UINT:
22023 case UINT_FTYPE_UINT_USHORT:
22024 case UINT_FTYPE_UINT_UCHAR:
22027 case V2DI2TI_FTYPE_V2DI_INT:
22030 nargs_constant = 1;
22032 case V8HI_FTYPE_V8HI_INT:
22033 case V4SI_FTYPE_V4SI_INT:
22034 case V4HI_FTYPE_V4HI_INT:
22035 case V4SF_FTYPE_V4SF_INT:
22036 case V2DI_FTYPE_V2DI_INT:
22037 case V2DF_FTYPE_V2DF_INT:
22039 nargs_constant = 1;
22041 case V16QI_FTYPE_V16QI_V16QI_V16QI:
22042 case V4SF_FTYPE_V4SF_V4SF_V4SF:
22043 case V2DF_FTYPE_V2DF_V2DF_V2DF:
22046 case V16QI_FTYPE_V16QI_V16QI_INT:
22047 case V8HI_FTYPE_V8HI_V8HI_INT:
22048 case V4SI_FTYPE_V4SI_V4SI_INT:
22049 case V4SF_FTYPE_V4SF_V4SF_INT:
22050 case V2DI_FTYPE_V2DI_V2DI_INT:
22051 case V2DF_FTYPE_V2DF_V2DF_INT:
22053 nargs_constant = 1;
22055 case V2DI2TI_FTYPE_V2DI_V2DI_INT:
22058 nargs_constant = 1;
22060 case V1DI2DI_FTYPE_V1DI_V1DI_INT:
22063 nargs_constant = 1;
22065 case V2DI_FTYPE_V2DI_UINT_UINT:
22067 nargs_constant = 2;
22069 case V2DI_FTYPE_V2DI_V2DI_UINT_UINT:
22071 nargs_constant = 2;
22074 gcc_unreachable ();
22077 gcc_assert (nargs <= ARRAY_SIZE (args));
22079 if (comparison != UNKNOWN)
22081 gcc_assert (nargs == 2);
22082 return ix86_expand_sse_compare (d, exp, target, swap);
22085 if (rmode == VOIDmode || rmode == tmode)
22089 || GET_MODE (target) != tmode
22090 || ! (*insn_p->operand[0].predicate) (target, tmode))
22091 target = gen_reg_rtx (tmode);
22092 real_target = target;
22096 target = gen_reg_rtx (rmode);
22097 real_target = simplify_gen_subreg (tmode, target, rmode, 0);
22100 for (i = 0; i < nargs; i++)
22102 tree arg = CALL_EXPR_ARG (exp, i);
22103 rtx op = expand_normal (arg);
22104 enum machine_mode mode = insn_p->operand[i + 1].mode;
22105 bool match = (*insn_p->operand[i + 1].predicate) (op, mode);
22107 if (last_arg_count && (i + 1) == nargs)
22109 /* SIMD shift insns take either an 8-bit immediate or
22110 register as count. But builtin functions take int as
22111 count. If count doesn't match, we put it in register. */
22114 op = simplify_gen_subreg (SImode, op, GET_MODE (op), 0);
22115 if (!(*insn_p->operand[i + 1].predicate) (op, mode))
22116 op = copy_to_reg (op);
22119 else if ((nargs - i) <= nargs_constant)
22124 case CODE_FOR_sse4_1_roundpd:
22125 case CODE_FOR_sse4_1_roundps:
22126 case CODE_FOR_sse4_1_roundsd:
22127 case CODE_FOR_sse4_1_roundss:
22128 case CODE_FOR_sse4_1_blendps:
22129 error ("the last argument must be a 4-bit immediate");
22132 case CODE_FOR_sse4_1_blendpd:
22133 error ("the last argument must be a 2-bit immediate");
22137 switch (nargs_constant)
22140 if ((nargs - i) == nargs_constant)
22142 error ("the next to last argument must be an 8-bit immediate");
22146 error ("the last argument must be an 8-bit immediate");
22149 gcc_unreachable ();
22156 if (VECTOR_MODE_P (mode))
22157 op = safe_vector_operand (op, mode);
22159 /* If we aren't optimizing, only allow one memory operand to
22161 if (memory_operand (op, mode))
22164 if (GET_MODE (op) == mode || GET_MODE (op) == VOIDmode)
22166 if (optimize || !match || num_memory > 1)
22167 op = copy_to_mode_reg (mode, op);
22171 op = copy_to_reg (op);
22172 op = simplify_gen_subreg (mode, op, GET_MODE (op), 0);
22177 args[i].mode = mode;
22183 pat = GEN_FCN (icode) (real_target, args[0].op);
22186 pat = GEN_FCN (icode) (real_target, args[0].op, args[1].op);
22189 pat = GEN_FCN (icode) (real_target, args[0].op, args[1].op,
22193 pat = GEN_FCN (icode) (real_target, args[0].op, args[1].op,
22194 args[2].op, args[3].op);
22197 gcc_unreachable ();
22207 /* Subroutine of ix86_expand_builtin to take care of special insns
22208 with variable number of operands. */
22211 ix86_expand_special_args_builtin (const struct builtin_description *d,
22212 tree exp, rtx target)
22216 unsigned int i, nargs, arg_adjust, memory;
22220 enum machine_mode mode;
22222 enum insn_code icode = d->icode;
22223 bool last_arg_constant = false;
22224 const struct insn_data *insn_p = &insn_data[icode];
22225 enum machine_mode tmode = insn_p->operand[0].mode;
22226 enum { load, store } class;
22228 switch ((enum ix86_special_builtin_type) d->flag)
22230 case VOID_FTYPE_VOID:
22231 emit_insn (GEN_FCN (icode) (target));
22233 case V2DI_FTYPE_PV2DI:
22234 case V16QI_FTYPE_PCCHAR:
22235 case V4SF_FTYPE_PCFLOAT:
22236 case V2DF_FTYPE_PCDOUBLE:
22241 case VOID_FTYPE_PV2SF_V4SF:
22242 case VOID_FTYPE_PV2DI_V2DI:
22243 case VOID_FTYPE_PCHAR_V16QI:
22244 case VOID_FTYPE_PFLOAT_V4SF:
22245 case VOID_FTYPE_PDOUBLE_V2DF:
22246 case VOID_FTYPE_PDI_DI:
22247 case VOID_FTYPE_PINT_INT:
22250 /* Reserve memory operand for target. */
22251 memory = ARRAY_SIZE (args);
22253 case V4SF_FTYPE_V4SF_PCV2SF:
22254 case V2DF_FTYPE_V2DF_PCDOUBLE:
22260 gcc_unreachable ();
22263 gcc_assert (nargs <= ARRAY_SIZE (args));
22265 if (class == store)
22267 arg = CALL_EXPR_ARG (exp, 0);
22268 op = expand_normal (arg);
22269 gcc_assert (target == 0);
22270 target = gen_rtx_MEM (tmode, copy_to_mode_reg (Pmode, op));
22278 || GET_MODE (target) != tmode
22279 || ! (*insn_p->operand[0].predicate) (target, tmode))
22280 target = gen_reg_rtx (tmode);
22283 for (i = 0; i < nargs; i++)
22285 enum machine_mode mode = insn_p->operand[i + 1].mode;
22288 arg = CALL_EXPR_ARG (exp, i + arg_adjust);
22289 op = expand_normal (arg);
22290 match = (*insn_p->operand[i + 1].predicate) (op, mode);
22292 if (last_arg_constant && (i + 1) == nargs)
22298 error ("the last argument must be an 8-bit immediate");
22306 /* This must be the memory operand. */
22307 op = gen_rtx_MEM (mode, copy_to_mode_reg (Pmode, op));
22308 gcc_assert (GET_MODE (op) == mode
22309 || GET_MODE (op) == VOIDmode);
22313 /* This must be register. */
22314 if (VECTOR_MODE_P (mode))
22315 op = safe_vector_operand (op, mode);
22317 gcc_assert (GET_MODE (op) == mode
22318 || GET_MODE (op) == VOIDmode);
22319 op = copy_to_mode_reg (mode, op);
22324 args[i].mode = mode;
22330 pat = GEN_FCN (icode) (target, args[0].op);
22333 pat = GEN_FCN (icode) (target, args[0].op, args[1].op);
22336 gcc_unreachable ();
22342 return class == store ? 0 : target;
22345 /* Return the integer constant in ARG. Constrain it to be in the range
22346 of the subparts of VEC_TYPE; issue an error if not. */
22349 get_element_number (tree vec_type, tree arg)
22351 unsigned HOST_WIDE_INT elt, max = TYPE_VECTOR_SUBPARTS (vec_type) - 1;
22353 if (!host_integerp (arg, 1)
22354 || (elt = tree_low_cst (arg, 1), elt > max))
22356 error ("selector must be an integer constant in the range 0..%wi", max);
22363 /* A subroutine of ix86_expand_builtin. These builtins are a wrapper around
22364 ix86_expand_vector_init. We DO have language-level syntax for this, in
22365 the form of (type){ init-list }. Except that since we can't place emms
22366 instructions from inside the compiler, we can't allow the use of MMX
22367 registers unless the user explicitly asks for it. So we do *not* define
22368 vec_set/vec_extract/vec_init patterns for MMX modes in mmx.md. Instead
22369 we have builtins invoked by mmintrin.h that gives us license to emit
22370 these sorts of instructions. */
22373 ix86_expand_vec_init_builtin (tree type, tree exp, rtx target)
22375 enum machine_mode tmode = TYPE_MODE (type);
22376 enum machine_mode inner_mode = GET_MODE_INNER (tmode);
22377 int i, n_elt = GET_MODE_NUNITS (tmode);
22378 rtvec v = rtvec_alloc (n_elt);
22380 gcc_assert (VECTOR_MODE_P (tmode));
22381 gcc_assert (call_expr_nargs (exp) == n_elt);
22383 for (i = 0; i < n_elt; ++i)
22385 rtx x = expand_normal (CALL_EXPR_ARG (exp, i));
22386 RTVEC_ELT (v, i) = gen_lowpart (inner_mode, x);
22389 if (!target || !register_operand (target, tmode))
22390 target = gen_reg_rtx (tmode);
22392 ix86_expand_vector_init (true, target, gen_rtx_PARALLEL (tmode, v));
22396 /* A subroutine of ix86_expand_builtin. These builtins are a wrapper around
22397 ix86_expand_vector_extract. They would be redundant (for non-MMX) if we
22398 had a language-level syntax for referencing vector elements. */
22401 ix86_expand_vec_ext_builtin (tree exp, rtx target)
22403 enum machine_mode tmode, mode0;
22408 arg0 = CALL_EXPR_ARG (exp, 0);
22409 arg1 = CALL_EXPR_ARG (exp, 1);
22411 op0 = expand_normal (arg0);
22412 elt = get_element_number (TREE_TYPE (arg0), arg1);
22414 tmode = TYPE_MODE (TREE_TYPE (TREE_TYPE (arg0)));
22415 mode0 = TYPE_MODE (TREE_TYPE (arg0));
22416 gcc_assert (VECTOR_MODE_P (mode0));
22418 op0 = force_reg (mode0, op0);
22420 if (optimize || !target || !register_operand (target, tmode))
22421 target = gen_reg_rtx (tmode);
22423 ix86_expand_vector_extract (true, target, op0, elt);
22428 /* A subroutine of ix86_expand_builtin. These builtins are a wrapper around
22429 ix86_expand_vector_set. They would be redundant (for non-MMX) if we had
22430 a language-level syntax for referencing vector elements. */
22433 ix86_expand_vec_set_builtin (tree exp)
22435 enum machine_mode tmode, mode1;
22436 tree arg0, arg1, arg2;
22438 rtx op0, op1, target;
22440 arg0 = CALL_EXPR_ARG (exp, 0);
22441 arg1 = CALL_EXPR_ARG (exp, 1);
22442 arg2 = CALL_EXPR_ARG (exp, 2);
22444 tmode = TYPE_MODE (TREE_TYPE (arg0));
22445 mode1 = TYPE_MODE (TREE_TYPE (TREE_TYPE (arg0)));
22446 gcc_assert (VECTOR_MODE_P (tmode));
22448 op0 = expand_expr (arg0, NULL_RTX, tmode, EXPAND_NORMAL);
22449 op1 = expand_expr (arg1, NULL_RTX, mode1, EXPAND_NORMAL);
22450 elt = get_element_number (TREE_TYPE (arg0), arg2);
22452 if (GET_MODE (op1) != mode1 && GET_MODE (op1) != VOIDmode)
22453 op1 = convert_modes (mode1, GET_MODE (op1), op1, true);
22455 op0 = force_reg (tmode, op0);
22456 op1 = force_reg (mode1, op1);
22458 /* OP0 is the source of these builtin functions and shouldn't be
22459 modified. Create a copy, use it and return it as target. */
22460 target = gen_reg_rtx (tmode);
22461 emit_move_insn (target, op0);
22462 ix86_expand_vector_set (true, target, op1, elt);
22467 /* Expand an expression EXP that calls a built-in function,
22468 with result going to TARGET if that's convenient
22469 (and in mode MODE if that's convenient).
22470 SUBTARGET may be used as the target for computing one of EXP's operands.
22471 IGNORE is nonzero if the value is to be ignored. */
22474 ix86_expand_builtin (tree exp, rtx target, rtx subtarget ATTRIBUTE_UNUSED,
22475 enum machine_mode mode ATTRIBUTE_UNUSED,
22476 int ignore ATTRIBUTE_UNUSED)
22478 const struct builtin_description *d;
22480 enum insn_code icode;
22481 tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
22482 tree arg0, arg1, arg2;
22483 rtx op0, op1, op2, pat;
22484 enum machine_mode mode0, mode1, mode2;
22485 unsigned int fcode = DECL_FUNCTION_CODE (fndecl);
22487 /* Determine whether the builtin function is available under the current ISA.
22488 Originally the builtin was not created if it wasn't applicable to the
22489 current ISA based on the command line switches. With function specific
22490 options, we need to check in the context of the function making the call
22491 whether it is supported. */
22492 if (ix86_builtins_isa[fcode]
22493 && !(ix86_builtins_isa[fcode] & ix86_isa_flags))
22495 char *opts = ix86_target_string (ix86_builtins_isa[fcode], 0, NULL,
22496 NULL, NULL, false);
22499 error ("%qE needs unknown isa option", fndecl);
22502 gcc_assert (opts != NULL);
22503 error ("%qE needs isa option %s", fndecl, opts);
22511 case IX86_BUILTIN_MASKMOVQ:
22512 case IX86_BUILTIN_MASKMOVDQU:
22513 icode = (fcode == IX86_BUILTIN_MASKMOVQ
22514 ? CODE_FOR_mmx_maskmovq
22515 : CODE_FOR_sse2_maskmovdqu);
22516 /* Note the arg order is different from the operand order. */
22517 arg1 = CALL_EXPR_ARG (exp, 0);
22518 arg2 = CALL_EXPR_ARG (exp, 1);
22519 arg0 = CALL_EXPR_ARG (exp, 2);
22520 op0 = expand_normal (arg0);
22521 op1 = expand_normal (arg1);
22522 op2 = expand_normal (arg2);
22523 mode0 = insn_data[icode].operand[0].mode;
22524 mode1 = insn_data[icode].operand[1].mode;
22525 mode2 = insn_data[icode].operand[2].mode;
22527 op0 = force_reg (Pmode, op0);
22528 op0 = gen_rtx_MEM (mode1, op0);
22530 if (! (*insn_data[icode].operand[0].predicate) (op0, mode0))
22531 op0 = copy_to_mode_reg (mode0, op0);
22532 if (! (*insn_data[icode].operand[1].predicate) (op1, mode1))
22533 op1 = copy_to_mode_reg (mode1, op1);
22534 if (! (*insn_data[icode].operand[2].predicate) (op2, mode2))
22535 op2 = copy_to_mode_reg (mode2, op2);
22536 pat = GEN_FCN (icode) (op0, op1, op2);
22542 case IX86_BUILTIN_LDMXCSR:
22543 op0 = expand_normal (CALL_EXPR_ARG (exp, 0));
22544 target = assign_386_stack_local (SImode, SLOT_VIRTUAL);
22545 emit_move_insn (target, op0);
22546 emit_insn (gen_sse_ldmxcsr (target));
22549 case IX86_BUILTIN_STMXCSR:
22550 target = assign_386_stack_local (SImode, SLOT_VIRTUAL);
22551 emit_insn (gen_sse_stmxcsr (target));
22552 return copy_to_mode_reg (SImode, target);
22554 case IX86_BUILTIN_CLFLUSH:
22555 arg0 = CALL_EXPR_ARG (exp, 0);
22556 op0 = expand_normal (arg0);
22557 icode = CODE_FOR_sse2_clflush;
22558 if (! (*insn_data[icode].operand[0].predicate) (op0, Pmode))
22559 op0 = copy_to_mode_reg (Pmode, op0);
22561 emit_insn (gen_sse2_clflush (op0));
22564 case IX86_BUILTIN_MONITOR:
22565 arg0 = CALL_EXPR_ARG (exp, 0);
22566 arg1 = CALL_EXPR_ARG (exp, 1);
22567 arg2 = CALL_EXPR_ARG (exp, 2);
22568 op0 = expand_normal (arg0);
22569 op1 = expand_normal (arg1);
22570 op2 = expand_normal (arg2);
22572 op0 = copy_to_mode_reg (Pmode, op0);
22574 op1 = copy_to_mode_reg (SImode, op1);
22576 op2 = copy_to_mode_reg (SImode, op2);
22577 emit_insn ((*ix86_gen_monitor) (op0, op1, op2));
22580 case IX86_BUILTIN_MWAIT:
22581 arg0 = CALL_EXPR_ARG (exp, 0);
22582 arg1 = CALL_EXPR_ARG (exp, 1);
22583 op0 = expand_normal (arg0);
22584 op1 = expand_normal (arg1);
22586 op0 = copy_to_mode_reg (SImode, op0);
22588 op1 = copy_to_mode_reg (SImode, op1);
22589 emit_insn (gen_sse3_mwait (op0, op1));
22592 case IX86_BUILTIN_VEC_INIT_V2SI:
22593 case IX86_BUILTIN_VEC_INIT_V4HI:
22594 case IX86_BUILTIN_VEC_INIT_V8QI:
22595 return ix86_expand_vec_init_builtin (TREE_TYPE (exp), exp, target);
22597 case IX86_BUILTIN_VEC_EXT_V2DF:
22598 case IX86_BUILTIN_VEC_EXT_V2DI:
22599 case IX86_BUILTIN_VEC_EXT_V4SF:
22600 case IX86_BUILTIN_VEC_EXT_V4SI:
22601 case IX86_BUILTIN_VEC_EXT_V8HI:
22602 case IX86_BUILTIN_VEC_EXT_V2SI:
22603 case IX86_BUILTIN_VEC_EXT_V4HI:
22604 case IX86_BUILTIN_VEC_EXT_V16QI:
22605 return ix86_expand_vec_ext_builtin (exp, target);
22607 case IX86_BUILTIN_VEC_SET_V2DI:
22608 case IX86_BUILTIN_VEC_SET_V4SF:
22609 case IX86_BUILTIN_VEC_SET_V4SI:
22610 case IX86_BUILTIN_VEC_SET_V8HI:
22611 case IX86_BUILTIN_VEC_SET_V4HI:
22612 case IX86_BUILTIN_VEC_SET_V16QI:
22613 return ix86_expand_vec_set_builtin (exp);
22615 case IX86_BUILTIN_INFQ:
22617 REAL_VALUE_TYPE inf;
22621 tmp = CONST_DOUBLE_FROM_REAL_VALUE (inf, mode);
22623 tmp = validize_mem (force_const_mem (mode, tmp));
22626 target = gen_reg_rtx (mode);
22628 emit_move_insn (target, tmp);
22636 for (i = 0, d = bdesc_special_args;
22637 i < ARRAY_SIZE (bdesc_special_args);
22639 if (d->code == fcode)
22640 return ix86_expand_special_args_builtin (d, exp, target);
22642 for (i = 0, d = bdesc_args;
22643 i < ARRAY_SIZE (bdesc_args);
22645 if (d->code == fcode)
22648 case IX86_BUILTIN_FABSQ:
22649 case IX86_BUILTIN_COPYSIGNQ:
22651 /* Emit a normal call if SSE2 isn't available. */
22652 return expand_call (exp, target, ignore);
22654 return ix86_expand_args_builtin (d, exp, target);
22657 for (i = 0, d = bdesc_comi; i < ARRAY_SIZE (bdesc_comi); i++, d++)
22658 if (d->code == fcode)
22659 return ix86_expand_sse_comi (d, exp, target);
22661 for (i = 0, d = bdesc_pcmpestr;
22662 i < ARRAY_SIZE (bdesc_pcmpestr);
22664 if (d->code == fcode)
22665 return ix86_expand_sse_pcmpestr (d, exp, target);
22667 for (i = 0, d = bdesc_pcmpistr;
22668 i < ARRAY_SIZE (bdesc_pcmpistr);
22670 if (d->code == fcode)
22671 return ix86_expand_sse_pcmpistr (d, exp, target);
22673 for (i = 0, d = bdesc_multi_arg; i < ARRAY_SIZE (bdesc_multi_arg); i++, d++)
22674 if (d->code == fcode)
22675 return ix86_expand_multi_arg_builtin (d->icode, exp, target,
22676 (enum multi_arg_type)d->flag,
22679 gcc_unreachable ();
22682 /* Returns a function decl for a vectorized version of the builtin function
22683 with builtin function code FN and the result vector type TYPE, or NULL_TREE
22684 if it is not available. */
22687 ix86_builtin_vectorized_function (unsigned int fn, tree type_out,
22690 enum machine_mode in_mode, out_mode;
22693 if (TREE_CODE (type_out) != VECTOR_TYPE
22694 || TREE_CODE (type_in) != VECTOR_TYPE)
22697 out_mode = TYPE_MODE (TREE_TYPE (type_out));
22698 out_n = TYPE_VECTOR_SUBPARTS (type_out);
22699 in_mode = TYPE_MODE (TREE_TYPE (type_in));
22700 in_n = TYPE_VECTOR_SUBPARTS (type_in);
22704 case BUILT_IN_SQRT:
22705 if (out_mode == DFmode && out_n == 2
22706 && in_mode == DFmode && in_n == 2)
22707 return ix86_builtins[IX86_BUILTIN_SQRTPD];
22710 case BUILT_IN_SQRTF:
22711 if (out_mode == SFmode && out_n == 4
22712 && in_mode == SFmode && in_n == 4)
22713 return ix86_builtins[IX86_BUILTIN_SQRTPS_NR];
22716 case BUILT_IN_LRINT:
22717 if (out_mode == SImode && out_n == 4
22718 && in_mode == DFmode && in_n == 2)
22719 return ix86_builtins[IX86_BUILTIN_VEC_PACK_SFIX];
22722 case BUILT_IN_LRINTF:
22723 if (out_mode == SImode && out_n == 4
22724 && in_mode == SFmode && in_n == 4)
22725 return ix86_builtins[IX86_BUILTIN_CVTPS2DQ];
22732 /* Dispatch to a handler for a vectorization library. */
22733 if (ix86_veclib_handler)
22734 return (*ix86_veclib_handler)(fn, type_out, type_in);
22739 /* Handler for an SVML-style interface to
22740 a library with vectorized intrinsics. */
22743 ix86_veclibabi_svml (enum built_in_function fn, tree type_out, tree type_in)
22746 tree fntype, new_fndecl, args;
22749 enum machine_mode el_mode, in_mode;
22752 /* The SVML is suitable for unsafe math only. */
22753 if (!flag_unsafe_math_optimizations)
22756 el_mode = TYPE_MODE (TREE_TYPE (type_out));
22757 n = TYPE_VECTOR_SUBPARTS (type_out);
22758 in_mode = TYPE_MODE (TREE_TYPE (type_in));
22759 in_n = TYPE_VECTOR_SUBPARTS (type_in);
22760 if (el_mode != in_mode
22768 case BUILT_IN_LOG10:
22770 case BUILT_IN_TANH:
22772 case BUILT_IN_ATAN:
22773 case BUILT_IN_ATAN2:
22774 case BUILT_IN_ATANH:
22775 case BUILT_IN_CBRT:
22776 case BUILT_IN_SINH:
22778 case BUILT_IN_ASINH:
22779 case BUILT_IN_ASIN:
22780 case BUILT_IN_COSH:
22782 case BUILT_IN_ACOSH:
22783 case BUILT_IN_ACOS:
22784 if (el_mode != DFmode || n != 2)
22788 case BUILT_IN_EXPF:
22789 case BUILT_IN_LOGF:
22790 case BUILT_IN_LOG10F:
22791 case BUILT_IN_POWF:
22792 case BUILT_IN_TANHF:
22793 case BUILT_IN_TANF:
22794 case BUILT_IN_ATANF:
22795 case BUILT_IN_ATAN2F:
22796 case BUILT_IN_ATANHF:
22797 case BUILT_IN_CBRTF:
22798 case BUILT_IN_SINHF:
22799 case BUILT_IN_SINF:
22800 case BUILT_IN_ASINHF:
22801 case BUILT_IN_ASINF:
22802 case BUILT_IN_COSHF:
22803 case BUILT_IN_COSF:
22804 case BUILT_IN_ACOSHF:
22805 case BUILT_IN_ACOSF:
22806 if (el_mode != SFmode || n != 4)
22814 bname = IDENTIFIER_POINTER (DECL_NAME (implicit_built_in_decls[fn]));
22816 if (fn == BUILT_IN_LOGF)
22817 strcpy (name, "vmlsLn4");
22818 else if (fn == BUILT_IN_LOG)
22819 strcpy (name, "vmldLn2");
22822 sprintf (name, "vmls%s", bname+10);
22823 name[strlen (name)-1] = '4';
22826 sprintf (name, "vmld%s2", bname+10);
22828 /* Convert to uppercase. */
22832 for (args = DECL_ARGUMENTS (implicit_built_in_decls[fn]); args;
22833 args = TREE_CHAIN (args))
22837 fntype = build_function_type_list (type_out, type_in, NULL);
22839 fntype = build_function_type_list (type_out, type_in, type_in, NULL);
22841 /* Build a function declaration for the vectorized function. */
22842 new_fndecl = build_decl (FUNCTION_DECL, get_identifier (name), fntype);
22843 TREE_PUBLIC (new_fndecl) = 1;
22844 DECL_EXTERNAL (new_fndecl) = 1;
22845 DECL_IS_NOVOPS (new_fndecl) = 1;
22846 TREE_READONLY (new_fndecl) = 1;
22851 /* Handler for an ACML-style interface to
22852 a library with vectorized intrinsics. */
22855 ix86_veclibabi_acml (enum built_in_function fn, tree type_out, tree type_in)
22857 char name[20] = "__vr.._";
22858 tree fntype, new_fndecl, args;
22861 enum machine_mode el_mode, in_mode;
22864 /* The ACML is 64bits only and suitable for unsafe math only as
22865 it does not correctly support parts of IEEE with the required
22866 precision such as denormals. */
22868 || !flag_unsafe_math_optimizations)
22871 el_mode = TYPE_MODE (TREE_TYPE (type_out));
22872 n = TYPE_VECTOR_SUBPARTS (type_out);
22873 in_mode = TYPE_MODE (TREE_TYPE (type_in));
22874 in_n = TYPE_VECTOR_SUBPARTS (type_in);
22875 if (el_mode != in_mode
22885 case BUILT_IN_LOG2:
22886 case BUILT_IN_LOG10:
22889 if (el_mode != DFmode
22894 case BUILT_IN_SINF:
22895 case BUILT_IN_COSF:
22896 case BUILT_IN_EXPF:
22897 case BUILT_IN_POWF:
22898 case BUILT_IN_LOGF:
22899 case BUILT_IN_LOG2F:
22900 case BUILT_IN_LOG10F:
22903 if (el_mode != SFmode
22912 bname = IDENTIFIER_POINTER (DECL_NAME (implicit_built_in_decls[fn]));
22913 sprintf (name + 7, "%s", bname+10);
22916 for (args = DECL_ARGUMENTS (implicit_built_in_decls[fn]); args;
22917 args = TREE_CHAIN (args))
22921 fntype = build_function_type_list (type_out, type_in, NULL);
22923 fntype = build_function_type_list (type_out, type_in, type_in, NULL);
22925 /* Build a function declaration for the vectorized function. */
22926 new_fndecl = build_decl (FUNCTION_DECL, get_identifier (name), fntype);
22927 TREE_PUBLIC (new_fndecl) = 1;
22928 DECL_EXTERNAL (new_fndecl) = 1;
22929 DECL_IS_NOVOPS (new_fndecl) = 1;
22930 TREE_READONLY (new_fndecl) = 1;
22936 /* Returns a decl of a function that implements conversion of the
22937 input vector of type TYPE, or NULL_TREE if it is not available. */
22940 ix86_vectorize_builtin_conversion (unsigned int code, tree type)
22942 if (TREE_CODE (type) != VECTOR_TYPE)
22948 switch (TYPE_MODE (type))
22951 return ix86_builtins[IX86_BUILTIN_CVTDQ2PS];
22956 case FIX_TRUNC_EXPR:
22957 switch (TYPE_MODE (type))
22960 return ix86_builtins[IX86_BUILTIN_CVTTPS2DQ];
22970 /* Returns a code for a target-specific builtin that implements
22971 reciprocal of the function, or NULL_TREE if not available. */
22974 ix86_builtin_reciprocal (unsigned int fn, bool md_fn,
22975 bool sqrt ATTRIBUTE_UNUSED)
22977 if (! (TARGET_SSE_MATH && TARGET_RECIP && !optimize_size
22978 && flag_finite_math_only && !flag_trapping_math
22979 && flag_unsafe_math_optimizations))
22983 /* Machine dependent builtins. */
22986 /* Vectorized version of sqrt to rsqrt conversion. */
22987 case IX86_BUILTIN_SQRTPS_NR:
22988 return ix86_builtins[IX86_BUILTIN_RSQRTPS_NR];
22994 /* Normal builtins. */
22997 /* Sqrt to rsqrt conversion. */
22998 case BUILT_IN_SQRTF:
22999 return ix86_builtins[IX86_BUILTIN_RSQRTF];
23006 /* Store OPERAND to the memory after reload is completed. This means
23007 that we can't easily use assign_stack_local. */
23009 ix86_force_to_memory (enum machine_mode mode, rtx operand)
23013 gcc_assert (reload_completed);
23014 if (!TARGET_64BIT_MS_ABI && TARGET_RED_ZONE)
23016 result = gen_rtx_MEM (mode,
23017 gen_rtx_PLUS (Pmode,
23019 GEN_INT (-RED_ZONE_SIZE)));
23020 emit_move_insn (result, operand);
23022 else if ((TARGET_64BIT_MS_ABI || !TARGET_RED_ZONE) && TARGET_64BIT)
23028 operand = gen_lowpart (DImode, operand);
23032 gen_rtx_SET (VOIDmode,
23033 gen_rtx_MEM (DImode,
23034 gen_rtx_PRE_DEC (DImode,
23035 stack_pointer_rtx)),
23039 gcc_unreachable ();
23041 result = gen_rtx_MEM (mode, stack_pointer_rtx);
23050 split_di (&operand, 1, operands, operands + 1);
23052 gen_rtx_SET (VOIDmode,
23053 gen_rtx_MEM (SImode,
23054 gen_rtx_PRE_DEC (Pmode,
23055 stack_pointer_rtx)),
23058 gen_rtx_SET (VOIDmode,
23059 gen_rtx_MEM (SImode,
23060 gen_rtx_PRE_DEC (Pmode,
23061 stack_pointer_rtx)),
23066 /* Store HImodes as SImodes. */
23067 operand = gen_lowpart (SImode, operand);
23071 gen_rtx_SET (VOIDmode,
23072 gen_rtx_MEM (GET_MODE (operand),
23073 gen_rtx_PRE_DEC (SImode,
23074 stack_pointer_rtx)),
23078 gcc_unreachable ();
23080 result = gen_rtx_MEM (mode, stack_pointer_rtx);
23085 /* Free operand from the memory. */
23087 ix86_free_from_memory (enum machine_mode mode)
23089 if (!TARGET_RED_ZONE || TARGET_64BIT_MS_ABI)
23093 if (mode == DImode || TARGET_64BIT)
23097 /* Use LEA to deallocate stack space. In peephole2 it will be converted
23098 to pop or add instruction if registers are available. */
23099 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
23100 gen_rtx_PLUS (Pmode, stack_pointer_rtx,
23105 /* Put float CONST_DOUBLE in the constant pool instead of fp regs.
23106 QImode must go into class Q_REGS.
23107 Narrow ALL_REGS to GENERAL_REGS. This supports allowing movsf and
23108 movdf to do mem-to-mem moves through integer regs. */
23110 ix86_preferred_reload_class (rtx x, enum reg_class regclass)
23112 enum machine_mode mode = GET_MODE (x);
23114 /* We're only allowed to return a subclass of CLASS. Many of the
23115 following checks fail for NO_REGS, so eliminate that early. */
23116 if (regclass == NO_REGS)
23119 /* All classes can load zeros. */
23120 if (x == CONST0_RTX (mode))
23123 /* Force constants into memory if we are loading a (nonzero) constant into
23124 an MMX or SSE register. This is because there are no MMX/SSE instructions
23125 to load from a constant. */
23127 && (MAYBE_MMX_CLASS_P (regclass) || MAYBE_SSE_CLASS_P (regclass)))
23130 /* Prefer SSE regs only, if we can use them for math. */
23131 if (TARGET_SSE_MATH && !TARGET_MIX_SSE_I387 && SSE_FLOAT_MODE_P (mode))
23132 return SSE_CLASS_P (regclass) ? regclass : NO_REGS;
23134 /* Floating-point constants need more complex checks. */
23135 if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) != VOIDmode)
23137 /* General regs can load everything. */
23138 if (reg_class_subset_p (regclass, GENERAL_REGS))
23141 /* Floats can load 0 and 1 plus some others. Note that we eliminated
23142 zero above. We only want to wind up preferring 80387 registers if
23143 we plan on doing computation with them. */
23145 && standard_80387_constant_p (x))
23147 /* Limit class to non-sse. */
23148 if (regclass == FLOAT_SSE_REGS)
23150 if (regclass == FP_TOP_SSE_REGS)
23152 if (regclass == FP_SECOND_SSE_REGS)
23153 return FP_SECOND_REG;
23154 if (regclass == FLOAT_INT_REGS || regclass == FLOAT_REGS)
23161 /* Generally when we see PLUS here, it's the function invariant
23162 (plus soft-fp const_int). Which can only be computed into general
23164 if (GET_CODE (x) == PLUS)
23165 return reg_class_subset_p (regclass, GENERAL_REGS) ? regclass : NO_REGS;
23167 /* QImode constants are easy to load, but non-constant QImode data
23168 must go into Q_REGS. */
23169 if (GET_MODE (x) == QImode && !CONSTANT_P (x))
23171 if (reg_class_subset_p (regclass, Q_REGS))
23173 if (reg_class_subset_p (Q_REGS, regclass))
23181 /* Discourage putting floating-point values in SSE registers unless
23182 SSE math is being used, and likewise for the 387 registers. */
23184 ix86_preferred_output_reload_class (rtx x, enum reg_class regclass)
23186 enum machine_mode mode = GET_MODE (x);
23188 /* Restrict the output reload class to the register bank that we are doing
23189 math on. If we would like not to return a subset of CLASS, reject this
23190 alternative: if reload cannot do this, it will still use its choice. */
23191 mode = GET_MODE (x);
23192 if (TARGET_SSE_MATH && SSE_FLOAT_MODE_P (mode))
23193 return MAYBE_SSE_CLASS_P (regclass) ? SSE_REGS : NO_REGS;
23195 if (X87_FLOAT_MODE_P (mode))
23197 if (regclass == FP_TOP_SSE_REGS)
23199 else if (regclass == FP_SECOND_SSE_REGS)
23200 return FP_SECOND_REG;
23202 return FLOAT_CLASS_P (regclass) ? regclass : NO_REGS;
23208 static enum reg_class
23209 ix86_secondary_reload (bool in_p, rtx x, enum reg_class class,
23210 enum machine_mode mode,
23211 secondary_reload_info *sri ATTRIBUTE_UNUSED)
23213 /* QImode spills from non-QI registers require
23214 intermediate register on 32bit targets. */
23215 if (!in_p && mode == QImode && !TARGET_64BIT
23216 && (class == GENERAL_REGS
23217 || class == LEGACY_REGS
23218 || class == INDEX_REGS))
23227 if (regno >= FIRST_PSEUDO_REGISTER || GET_CODE (x) == SUBREG)
23228 regno = true_regnum (x);
23230 /* Return Q_REGS if the operand is in memory. */
23238 /* If we are copying between general and FP registers, we need a memory
23239 location. The same is true for SSE and MMX registers.
23241 To optimize register_move_cost performance, allow inline variant.
23243 The macro can't work reliably when one of the CLASSES is class containing
23244 registers from multiple units (SSE, MMX, integer). We avoid this by never
23245 combining those units in single alternative in the machine description.
23246 Ensure that this constraint holds to avoid unexpected surprises.
23248 When STRICT is false, we are being called from REGISTER_MOVE_COST, so do not
23249 enforce these sanity checks. */
23252 inline_secondary_memory_needed (enum reg_class class1, enum reg_class class2,
23253 enum machine_mode mode, int strict)
23255 if (MAYBE_FLOAT_CLASS_P (class1) != FLOAT_CLASS_P (class1)
23256 || MAYBE_FLOAT_CLASS_P (class2) != FLOAT_CLASS_P (class2)
23257 || MAYBE_SSE_CLASS_P (class1) != SSE_CLASS_P (class1)
23258 || MAYBE_SSE_CLASS_P (class2) != SSE_CLASS_P (class2)
23259 || MAYBE_MMX_CLASS_P (class1) != MMX_CLASS_P (class1)
23260 || MAYBE_MMX_CLASS_P (class2) != MMX_CLASS_P (class2))
23262 gcc_assert (!strict);
23266 if (FLOAT_CLASS_P (class1) != FLOAT_CLASS_P (class2))
23269 /* ??? This is a lie. We do have moves between mmx/general, and for
23270 mmx/sse2. But by saying we need secondary memory we discourage the
23271 register allocator from using the mmx registers unless needed. */
23272 if (MMX_CLASS_P (class1) != MMX_CLASS_P (class2))
23275 if (SSE_CLASS_P (class1) != SSE_CLASS_P (class2))
23277 /* SSE1 doesn't have any direct moves from other classes. */
23281 /* If the target says that inter-unit moves are more expensive
23282 than moving through memory, then don't generate them. */
23283 if (!TARGET_INTER_UNIT_MOVES)
23286 /* Between SSE and general, we have moves no larger than word size. */
23287 if (GET_MODE_SIZE (mode) > UNITS_PER_WORD)
23295 ix86_secondary_memory_needed (enum reg_class class1, enum reg_class class2,
23296 enum machine_mode mode, int strict)
23298 return inline_secondary_memory_needed (class1, class2, mode, strict);
23301 /* Return true if the registers in CLASS cannot represent the change from
23302 modes FROM to TO. */
23305 ix86_cannot_change_mode_class (enum machine_mode from, enum machine_mode to,
23306 enum reg_class regclass)
23311 /* x87 registers can't do subreg at all, as all values are reformatted
23312 to extended precision. */
23313 if (MAYBE_FLOAT_CLASS_P (regclass))
23316 if (MAYBE_SSE_CLASS_P (regclass) || MAYBE_MMX_CLASS_P (regclass))
23318 /* Vector registers do not support QI or HImode loads. If we don't
23319 disallow a change to these modes, reload will assume it's ok to
23320 drop the subreg from (subreg:SI (reg:HI 100) 0). This affects
23321 the vec_dupv4hi pattern. */
23322 if (GET_MODE_SIZE (from) < 4)
23325 /* Vector registers do not support subreg with nonzero offsets, which
23326 are otherwise valid for integer registers. Since we can't see
23327 whether we have a nonzero offset from here, prohibit all
23328 nonparadoxical subregs changing size. */
23329 if (GET_MODE_SIZE (to) < GET_MODE_SIZE (from))
23336 /* Return the cost of moving data of mode M between a
23337 register and memory. A value of 2 is the default; this cost is
23338 relative to those in `REGISTER_MOVE_COST'.
23340 This function is used extensively by register_move_cost that is used to
23341 build tables at startup. Make it inline in this case.
23342 When IN is 2, return maximum of in and out move cost.
23344 If moving between registers and memory is more expensive than
23345 between two registers, you should define this macro to express the
23348 Model also increased moving costs of QImode registers in non
23352 inline_memory_move_cost (enum machine_mode mode, enum reg_class regclass,
23356 if (FLOAT_CLASS_P (regclass))
23374 return MAX (ix86_cost->fp_load [index], ix86_cost->fp_store [index]);
23375 return in ? ix86_cost->fp_load [index] : ix86_cost->fp_store [index];
23377 if (SSE_CLASS_P (regclass))
23380 switch (GET_MODE_SIZE (mode))
23395 return MAX (ix86_cost->sse_load [index], ix86_cost->sse_store [index]);
23396 return in ? ix86_cost->sse_load [index] : ix86_cost->sse_store [index];
23398 if (MMX_CLASS_P (regclass))
23401 switch (GET_MODE_SIZE (mode))
23413 return MAX (ix86_cost->mmx_load [index], ix86_cost->mmx_store [index]);
23414 return in ? ix86_cost->mmx_load [index] : ix86_cost->mmx_store [index];
23416 switch (GET_MODE_SIZE (mode))
23419 if (Q_CLASS_P (regclass) || TARGET_64BIT)
23422 return ix86_cost->int_store[0];
23423 if (TARGET_PARTIAL_REG_DEPENDENCY && !optimize_size)
23424 cost = ix86_cost->movzbl_load;
23426 cost = ix86_cost->int_load[0];
23428 return MAX (cost, ix86_cost->int_store[0]);
23434 return MAX (ix86_cost->movzbl_load, ix86_cost->int_store[0] + 4);
23436 return ix86_cost->movzbl_load;
23438 return ix86_cost->int_store[0] + 4;
23443 return MAX (ix86_cost->int_load[1], ix86_cost->int_store[1]);
23444 return in ? ix86_cost->int_load[1] : ix86_cost->int_store[1];
23446 /* Compute number of 32bit moves needed. TFmode is moved as XFmode. */
23447 if (mode == TFmode)
23450 cost = MAX (ix86_cost->int_load[2] , ix86_cost->int_store[2]);
23452 cost = ix86_cost->int_load[2];
23454 cost = ix86_cost->int_store[2];
23455 return (cost * (((int) GET_MODE_SIZE (mode)
23456 + UNITS_PER_WORD - 1) / UNITS_PER_WORD));
23461 ix86_memory_move_cost (enum machine_mode mode, enum reg_class regclass, int in)
23463 return inline_memory_move_cost (mode, regclass, in);
23467 /* Return the cost of moving data from a register in class CLASS1 to
23468 one in class CLASS2.
23470 It is not required that the cost always equal 2 when FROM is the same as TO;
23471 on some machines it is expensive to move between registers if they are not
23472 general registers. */
23475 ix86_register_move_cost (enum machine_mode mode, enum reg_class class1,
23476 enum reg_class class2)
23478 /* In case we require secondary memory, compute cost of the store followed
23479 by load. In order to avoid bad register allocation choices, we need
23480 for this to be *at least* as high as the symmetric MEMORY_MOVE_COST. */
23482 if (inline_secondary_memory_needed (class1, class2, mode, 0))
23486 cost += inline_memory_move_cost (mode, class1, 2);
23487 cost += inline_memory_move_cost (mode, class2, 2);
23489 /* In case of copying from general_purpose_register we may emit multiple
23490 stores followed by single load causing memory size mismatch stall.
23491 Count this as arbitrarily high cost of 20. */
23492 if (CLASS_MAX_NREGS (class1, mode) > CLASS_MAX_NREGS (class2, mode))
23495 /* In the case of FP/MMX moves, the registers actually overlap, and we
23496 have to switch modes in order to treat them differently. */
23497 if ((MMX_CLASS_P (class1) && MAYBE_FLOAT_CLASS_P (class2))
23498 || (MMX_CLASS_P (class2) && MAYBE_FLOAT_CLASS_P (class1)))
23504 /* Moves between SSE/MMX and integer unit are expensive. */
23505 if (MMX_CLASS_P (class1) != MMX_CLASS_P (class2)
23506 || SSE_CLASS_P (class1) != SSE_CLASS_P (class2))
23508 /* ??? By keeping returned value relatively high, we limit the number
23509 of moves between integer and MMX/SSE registers for all targets.
23510 Additionally, high value prevents problem with x86_modes_tieable_p(),
23511 where integer modes in MMX/SSE registers are not tieable
23512 because of missing QImode and HImode moves to, from or between
23513 MMX/SSE registers. */
23514 return MAX (8, ix86_cost->mmxsse_to_integer);
23516 if (MAYBE_FLOAT_CLASS_P (class1))
23517 return ix86_cost->fp_move;
23518 if (MAYBE_SSE_CLASS_P (class1))
23519 return ix86_cost->sse_move;
23520 if (MAYBE_MMX_CLASS_P (class1))
23521 return ix86_cost->mmx_move;
23525 /* Return 1 if hard register REGNO can hold a value of machine-mode MODE. */
23528 ix86_hard_regno_mode_ok (int regno, enum machine_mode mode)
23530 /* Flags and only flags can only hold CCmode values. */
23531 if (CC_REGNO_P (regno))
23532 return GET_MODE_CLASS (mode) == MODE_CC;
23533 if (GET_MODE_CLASS (mode) == MODE_CC
23534 || GET_MODE_CLASS (mode) == MODE_RANDOM
23535 || GET_MODE_CLASS (mode) == MODE_PARTIAL_INT)
23537 if (FP_REGNO_P (regno))
23538 return VALID_FP_MODE_P (mode);
23539 if (SSE_REGNO_P (regno))
23541 /* We implement the move patterns for all vector modes into and
23542 out of SSE registers, even when no operation instructions
23544 return (VALID_SSE_REG_MODE (mode)
23545 || VALID_SSE2_REG_MODE (mode)
23546 || VALID_MMX_REG_MODE (mode)
23547 || VALID_MMX_REG_MODE_3DNOW (mode));
23549 if (MMX_REGNO_P (regno))
23551 /* We implement the move patterns for 3DNOW modes even in MMX mode,
23552 so if the register is available at all, then we can move data of
23553 the given mode into or out of it. */
23554 return (VALID_MMX_REG_MODE (mode)
23555 || VALID_MMX_REG_MODE_3DNOW (mode));
23558 if (mode == QImode)
23560 /* Take care for QImode values - they can be in non-QI regs,
23561 but then they do cause partial register stalls. */
23562 if (regno < 4 || TARGET_64BIT)
23564 if (!TARGET_PARTIAL_REG_STALL)
23566 return reload_in_progress || reload_completed;
23568 /* We handle both integer and floats in the general purpose registers. */
23569 else if (VALID_INT_MODE_P (mode))
23571 else if (VALID_FP_MODE_P (mode))
23573 else if (VALID_DFP_MODE_P (mode))
23575 /* Lots of MMX code casts 8 byte vector modes to DImode. If we then go
23576 on to use that value in smaller contexts, this can easily force a
23577 pseudo to be allocated to GENERAL_REGS. Since this is no worse than
23578 supporting DImode, allow it. */
23579 else if (VALID_MMX_REG_MODE_3DNOW (mode) || VALID_MMX_REG_MODE (mode))
23585 /* A subroutine of ix86_modes_tieable_p. Return true if MODE is a
23586 tieable integer mode. */
23589 ix86_tieable_integer_mode_p (enum machine_mode mode)
23598 return TARGET_64BIT || !TARGET_PARTIAL_REG_STALL;
23601 return TARGET_64BIT;
23608 /* Return true if MODE1 is accessible in a register that can hold MODE2
23609 without copying. That is, all register classes that can hold MODE2
23610 can also hold MODE1. */
23613 ix86_modes_tieable_p (enum machine_mode mode1, enum machine_mode mode2)
23615 if (mode1 == mode2)
23618 if (ix86_tieable_integer_mode_p (mode1)
23619 && ix86_tieable_integer_mode_p (mode2))
23622 /* MODE2 being XFmode implies fp stack or general regs, which means we
23623 can tie any smaller floating point modes to it. Note that we do not
23624 tie this with TFmode. */
23625 if (mode2 == XFmode)
23626 return mode1 == SFmode || mode1 == DFmode;
23628 /* MODE2 being DFmode implies fp stack, general or sse regs, which means
23629 that we can tie it with SFmode. */
23630 if (mode2 == DFmode)
23631 return mode1 == SFmode;
23633 /* If MODE2 is only appropriate for an SSE register, then tie with
23634 any other mode acceptable to SSE registers. */
23635 if (GET_MODE_SIZE (mode2) == 16
23636 && ix86_hard_regno_mode_ok (FIRST_SSE_REG, mode2))
23637 return (GET_MODE_SIZE (mode1) == 16
23638 && ix86_hard_regno_mode_ok (FIRST_SSE_REG, mode1));
23640 /* If MODE2 is appropriate for an MMX register, then tie
23641 with any other mode acceptable to MMX registers. */
23642 if (GET_MODE_SIZE (mode2) == 8
23643 && ix86_hard_regno_mode_ok (FIRST_MMX_REG, mode2))
23644 return (GET_MODE_SIZE (mode1) == 8
23645 && ix86_hard_regno_mode_ok (FIRST_MMX_REG, mode1));
23650 /* Compute a (partial) cost for rtx X. Return true if the complete
23651 cost has been computed, and false if subexpressions should be
23652 scanned. In either case, *TOTAL contains the cost result. */
23655 ix86_rtx_costs (rtx x, int code, int outer_code_i, int *total)
23657 enum rtx_code outer_code = (enum rtx_code) outer_code_i;
23658 enum machine_mode mode = GET_MODE (x);
23666 if (TARGET_64BIT && !x86_64_immediate_operand (x, VOIDmode))
23668 else if (TARGET_64BIT && !x86_64_zext_immediate_operand (x, VOIDmode))
23670 else if (flag_pic && SYMBOLIC_CONST (x)
23672 || (!GET_CODE (x) != LABEL_REF
23673 && (GET_CODE (x) != SYMBOL_REF
23674 || !SYMBOL_REF_LOCAL_P (x)))))
23681 if (mode == VOIDmode)
23684 switch (standard_80387_constant_p (x))
23689 default: /* Other constants */
23694 /* Start with (MEM (SYMBOL_REF)), since that's where
23695 it'll probably end up. Add a penalty for size. */
23696 *total = (COSTS_N_INSNS (1)
23697 + (flag_pic != 0 && !TARGET_64BIT)
23698 + (mode == SFmode ? 0 : mode == DFmode ? 1 : 2));
23704 /* The zero extensions is often completely free on x86_64, so make
23705 it as cheap as possible. */
23706 if (TARGET_64BIT && mode == DImode
23707 && GET_MODE (XEXP (x, 0)) == SImode)
23709 else if (TARGET_ZERO_EXTEND_WITH_AND)
23710 *total = ix86_cost->add;
23712 *total = ix86_cost->movzx;
23716 *total = ix86_cost->movsx;
23720 if (CONST_INT_P (XEXP (x, 1))
23721 && (GET_MODE (XEXP (x, 0)) != DImode || TARGET_64BIT))
23723 HOST_WIDE_INT value = INTVAL (XEXP (x, 1));
23726 *total = ix86_cost->add;
23729 if ((value == 2 || value == 3)
23730 && ix86_cost->lea <= ix86_cost->shift_const)
23732 *total = ix86_cost->lea;
23742 if (!TARGET_64BIT && GET_MODE (XEXP (x, 0)) == DImode)
23744 if (CONST_INT_P (XEXP (x, 1)))
23746 if (INTVAL (XEXP (x, 1)) > 32)
23747 *total = ix86_cost->shift_const + COSTS_N_INSNS (2);
23749 *total = ix86_cost->shift_const * 2;
23753 if (GET_CODE (XEXP (x, 1)) == AND)
23754 *total = ix86_cost->shift_var * 2;
23756 *total = ix86_cost->shift_var * 6 + COSTS_N_INSNS (2);
23761 if (CONST_INT_P (XEXP (x, 1)))
23762 *total = ix86_cost->shift_const;
23764 *total = ix86_cost->shift_var;
23769 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
23771 /* ??? SSE scalar cost should be used here. */
23772 *total = ix86_cost->fmul;
23775 else if (X87_FLOAT_MODE_P (mode))
23777 *total = ix86_cost->fmul;
23780 else if (FLOAT_MODE_P (mode))
23782 /* ??? SSE vector cost should be used here. */
23783 *total = ix86_cost->fmul;
23788 rtx op0 = XEXP (x, 0);
23789 rtx op1 = XEXP (x, 1);
23791 if (CONST_INT_P (XEXP (x, 1)))
23793 unsigned HOST_WIDE_INT value = INTVAL (XEXP (x, 1));
23794 for (nbits = 0; value != 0; value &= value - 1)
23798 /* This is arbitrary. */
23801 /* Compute costs correctly for widening multiplication. */
23802 if ((GET_CODE (op0) == SIGN_EXTEND || GET_CODE (op0) == ZERO_EXTEND)
23803 && GET_MODE_SIZE (GET_MODE (XEXP (op0, 0))) * 2
23804 == GET_MODE_SIZE (mode))
23806 int is_mulwiden = 0;
23807 enum machine_mode inner_mode = GET_MODE (op0);
23809 if (GET_CODE (op0) == GET_CODE (op1))
23810 is_mulwiden = 1, op1 = XEXP (op1, 0);
23811 else if (CONST_INT_P (op1))
23813 if (GET_CODE (op0) == SIGN_EXTEND)
23814 is_mulwiden = trunc_int_for_mode (INTVAL (op1), inner_mode)
23817 is_mulwiden = !(INTVAL (op1) & ~GET_MODE_MASK (inner_mode));
23821 op0 = XEXP (op0, 0), mode = GET_MODE (op0);
23824 *total = (ix86_cost->mult_init[MODE_INDEX (mode)]
23825 + nbits * ix86_cost->mult_bit
23826 + rtx_cost (op0, outer_code) + rtx_cost (op1, outer_code));
23835 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
23836 /* ??? SSE cost should be used here. */
23837 *total = ix86_cost->fdiv;
23838 else if (X87_FLOAT_MODE_P (mode))
23839 *total = ix86_cost->fdiv;
23840 else if (FLOAT_MODE_P (mode))
23841 /* ??? SSE vector cost should be used here. */
23842 *total = ix86_cost->fdiv;
23844 *total = ix86_cost->divide[MODE_INDEX (mode)];
23848 if (GET_MODE_CLASS (mode) == MODE_INT
23849 && GET_MODE_BITSIZE (mode) <= GET_MODE_BITSIZE (Pmode))
23851 if (GET_CODE (XEXP (x, 0)) == PLUS
23852 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
23853 && CONST_INT_P (XEXP (XEXP (XEXP (x, 0), 0), 1))
23854 && CONSTANT_P (XEXP (x, 1)))
23856 HOST_WIDE_INT val = INTVAL (XEXP (XEXP (XEXP (x, 0), 0), 1));
23857 if (val == 2 || val == 4 || val == 8)
23859 *total = ix86_cost->lea;
23860 *total += rtx_cost (XEXP (XEXP (x, 0), 1), outer_code);
23861 *total += rtx_cost (XEXP (XEXP (XEXP (x, 0), 0), 0),
23863 *total += rtx_cost (XEXP (x, 1), outer_code);
23867 else if (GET_CODE (XEXP (x, 0)) == MULT
23868 && CONST_INT_P (XEXP (XEXP (x, 0), 1)))
23870 HOST_WIDE_INT val = INTVAL (XEXP (XEXP (x, 0), 1));
23871 if (val == 2 || val == 4 || val == 8)
23873 *total = ix86_cost->lea;
23874 *total += rtx_cost (XEXP (XEXP (x, 0), 0), outer_code);
23875 *total += rtx_cost (XEXP (x, 1), outer_code);
23879 else if (GET_CODE (XEXP (x, 0)) == PLUS)
23881 *total = ix86_cost->lea;
23882 *total += rtx_cost (XEXP (XEXP (x, 0), 0), outer_code);
23883 *total += rtx_cost (XEXP (XEXP (x, 0), 1), outer_code);
23884 *total += rtx_cost (XEXP (x, 1), outer_code);
23891 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
23893 /* ??? SSE cost should be used here. */
23894 *total = ix86_cost->fadd;
23897 else if (X87_FLOAT_MODE_P (mode))
23899 *total = ix86_cost->fadd;
23902 else if (FLOAT_MODE_P (mode))
23904 /* ??? SSE vector cost should be used here. */
23905 *total = ix86_cost->fadd;
23913 if (!TARGET_64BIT && mode == DImode)
23915 *total = (ix86_cost->add * 2
23916 + (rtx_cost (XEXP (x, 0), outer_code)
23917 << (GET_MODE (XEXP (x, 0)) != DImode))
23918 + (rtx_cost (XEXP (x, 1), outer_code)
23919 << (GET_MODE (XEXP (x, 1)) != DImode)));
23925 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
23927 /* ??? SSE cost should be used here. */
23928 *total = ix86_cost->fchs;
23931 else if (X87_FLOAT_MODE_P (mode))
23933 *total = ix86_cost->fchs;
23936 else if (FLOAT_MODE_P (mode))
23938 /* ??? SSE vector cost should be used here. */
23939 *total = ix86_cost->fchs;
23945 if (!TARGET_64BIT && mode == DImode)
23946 *total = ix86_cost->add * 2;
23948 *total = ix86_cost->add;
23952 if (GET_CODE (XEXP (x, 0)) == ZERO_EXTRACT
23953 && XEXP (XEXP (x, 0), 1) == const1_rtx
23954 && CONST_INT_P (XEXP (XEXP (x, 0), 2))
23955 && XEXP (x, 1) == const0_rtx)
23957 /* This kind of construct is implemented using test[bwl].
23958 Treat it as if we had an AND. */
23959 *total = (ix86_cost->add
23960 + rtx_cost (XEXP (XEXP (x, 0), 0), outer_code)
23961 + rtx_cost (const1_rtx, outer_code));
23967 if (!(SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH))
23972 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
23973 /* ??? SSE cost should be used here. */
23974 *total = ix86_cost->fabs;
23975 else if (X87_FLOAT_MODE_P (mode))
23976 *total = ix86_cost->fabs;
23977 else if (FLOAT_MODE_P (mode))
23978 /* ??? SSE vector cost should be used here. */
23979 *total = ix86_cost->fabs;
23983 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
23984 /* ??? SSE cost should be used here. */
23985 *total = ix86_cost->fsqrt;
23986 else if (X87_FLOAT_MODE_P (mode))
23987 *total = ix86_cost->fsqrt;
23988 else if (FLOAT_MODE_P (mode))
23989 /* ??? SSE vector cost should be used here. */
23990 *total = ix86_cost->fsqrt;
23994 if (XINT (x, 1) == UNSPEC_TP)
24005 static int current_machopic_label_num;
24007 /* Given a symbol name and its associated stub, write out the
24008 definition of the stub. */
24011 machopic_output_stub (FILE *file, const char *symb, const char *stub)
24013 unsigned int length;
24014 char *binder_name, *symbol_name, lazy_ptr_name[32];
24015 int label = ++current_machopic_label_num;
24017 /* For 64-bit we shouldn't get here. */
24018 gcc_assert (!TARGET_64BIT);
24020 /* Lose our funky encoding stuff so it doesn't contaminate the stub. */
24021 symb = (*targetm.strip_name_encoding) (symb);
24023 length = strlen (stub);
24024 binder_name = XALLOCAVEC (char, length + 32);
24025 GEN_BINDER_NAME_FOR_STUB (binder_name, stub, length);
24027 length = strlen (symb);
24028 symbol_name = XALLOCAVEC (char, length + 32);
24029 GEN_SYMBOL_NAME_FOR_SYMBOL (symbol_name, symb, length);
24031 sprintf (lazy_ptr_name, "L%d$lz", label);
24034 switch_to_section (darwin_sections[machopic_picsymbol_stub_section]);
24036 switch_to_section (darwin_sections[machopic_symbol_stub_section]);
24038 fprintf (file, "%s:\n", stub);
24039 fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
24043 fprintf (file, "\tcall\tLPC$%d\nLPC$%d:\tpopl\t%%eax\n", label, label);
24044 fprintf (file, "\tmovl\t%s-LPC$%d(%%eax),%%edx\n", lazy_ptr_name, label);
24045 fprintf (file, "\tjmp\t*%%edx\n");
24048 fprintf (file, "\tjmp\t*%s\n", lazy_ptr_name);
24050 fprintf (file, "%s:\n", binder_name);
24054 fprintf (file, "\tlea\t%s-LPC$%d(%%eax),%%eax\n", lazy_ptr_name, label);
24055 fprintf (file, "\tpushl\t%%eax\n");
24058 fprintf (file, "\tpushl\t$%s\n", lazy_ptr_name);
24060 fprintf (file, "\tjmp\tdyld_stub_binding_helper\n");
24062 switch_to_section (darwin_sections[machopic_lazy_symbol_ptr_section]);
24063 fprintf (file, "%s:\n", lazy_ptr_name);
24064 fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
24065 fprintf (file, "\t.long %s\n", binder_name);
24069 darwin_x86_file_end (void)
24071 darwin_file_end ();
24074 #endif /* TARGET_MACHO */
24076 /* Order the registers for register allocator. */
24079 x86_order_regs_for_local_alloc (void)
24084 /* First allocate the local general purpose registers. */
24085 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
24086 if (GENERAL_REGNO_P (i) && call_used_regs[i])
24087 reg_alloc_order [pos++] = i;
24089 /* Global general purpose registers. */
24090 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
24091 if (GENERAL_REGNO_P (i) && !call_used_regs[i])
24092 reg_alloc_order [pos++] = i;
24094 /* x87 registers come first in case we are doing FP math
24096 if (!TARGET_SSE_MATH)
24097 for (i = FIRST_STACK_REG; i <= LAST_STACK_REG; i++)
24098 reg_alloc_order [pos++] = i;
24100 /* SSE registers. */
24101 for (i = FIRST_SSE_REG; i <= LAST_SSE_REG; i++)
24102 reg_alloc_order [pos++] = i;
24103 for (i = FIRST_REX_SSE_REG; i <= LAST_REX_SSE_REG; i++)
24104 reg_alloc_order [pos++] = i;
24106 /* x87 registers. */
24107 if (TARGET_SSE_MATH)
24108 for (i = FIRST_STACK_REG; i <= LAST_STACK_REG; i++)
24109 reg_alloc_order [pos++] = i;
24111 for (i = FIRST_MMX_REG; i <= LAST_MMX_REG; i++)
24112 reg_alloc_order [pos++] = i;
24114 /* Initialize the rest of array as we do not allocate some registers
24116 while (pos < FIRST_PSEUDO_REGISTER)
24117 reg_alloc_order [pos++] = 0;
24120 /* Handle a "ms_abi" or "sysv" attribute; arguments as in
24121 struct attribute_spec.handler. */
24123 ix86_handle_abi_attribute (tree *node, tree name,
24124 tree args ATTRIBUTE_UNUSED,
24125 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
24127 if (TREE_CODE (*node) != FUNCTION_TYPE
24128 && TREE_CODE (*node) != METHOD_TYPE
24129 && TREE_CODE (*node) != FIELD_DECL
24130 && TREE_CODE (*node) != TYPE_DECL)
24132 warning (OPT_Wattributes, "%qs attribute only applies to functions",
24133 IDENTIFIER_POINTER (name));
24134 *no_add_attrs = true;
24139 warning (OPT_Wattributes, "%qs attribute only available for 64-bit",
24140 IDENTIFIER_POINTER (name));
24141 *no_add_attrs = true;
24145 /* Can combine regparm with all attributes but fastcall. */
24146 if (is_attribute_p ("ms_abi", name))
24148 if (lookup_attribute ("sysv_abi", TYPE_ATTRIBUTES (*node)))
24150 error ("ms_abi and sysv_abi attributes are not compatible");
24155 else if (is_attribute_p ("sysv_abi", name))
24157 if (lookup_attribute ("ms_abi", TYPE_ATTRIBUTES (*node)))
24159 error ("ms_abi and sysv_abi attributes are not compatible");
24168 /* Handle a "ms_struct" or "gcc_struct" attribute; arguments as in
24169 struct attribute_spec.handler. */
24171 ix86_handle_struct_attribute (tree *node, tree name,
24172 tree args ATTRIBUTE_UNUSED,
24173 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
24176 if (DECL_P (*node))
24178 if (TREE_CODE (*node) == TYPE_DECL)
24179 type = &TREE_TYPE (*node);
24184 if (!(type && (TREE_CODE (*type) == RECORD_TYPE
24185 || TREE_CODE (*type) == UNION_TYPE)))
24187 warning (OPT_Wattributes, "%qs attribute ignored",
24188 IDENTIFIER_POINTER (name));
24189 *no_add_attrs = true;
24192 else if ((is_attribute_p ("ms_struct", name)
24193 && lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (*type)))
24194 || ((is_attribute_p ("gcc_struct", name)
24195 && lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (*type)))))
24197 warning (OPT_Wattributes, "%qs incompatible attribute ignored",
24198 IDENTIFIER_POINTER (name));
24199 *no_add_attrs = true;
24206 ix86_ms_bitfield_layout_p (const_tree record_type)
24208 return (TARGET_MS_BITFIELD_LAYOUT &&
24209 !lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (record_type)))
24210 || lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (record_type));
24213 /* Returns an expression indicating where the this parameter is
24214 located on entry to the FUNCTION. */
24217 x86_this_parameter (tree function)
24219 tree type = TREE_TYPE (function);
24220 bool aggr = aggregate_value_p (TREE_TYPE (type), type) != 0;
24225 const int *parm_regs;
24227 if (ix86_function_type_abi (type) == MS_ABI)
24228 parm_regs = x86_64_ms_abi_int_parameter_registers;
24230 parm_regs = x86_64_int_parameter_registers;
24231 return gen_rtx_REG (DImode, parm_regs[aggr]);
24234 nregs = ix86_function_regparm (type, function);
24236 if (nregs > 0 && !stdarg_p (type))
24240 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type)))
24241 regno = aggr ? DX_REG : CX_REG;
24249 return gen_rtx_MEM (SImode,
24250 plus_constant (stack_pointer_rtx, 4));
24253 return gen_rtx_REG (SImode, regno);
24256 return gen_rtx_MEM (SImode, plus_constant (stack_pointer_rtx, aggr ? 8 : 4));
24259 /* Determine whether x86_output_mi_thunk can succeed. */
24262 x86_can_output_mi_thunk (const_tree thunk ATTRIBUTE_UNUSED,
24263 HOST_WIDE_INT delta ATTRIBUTE_UNUSED,
24264 HOST_WIDE_INT vcall_offset, const_tree function)
24266 /* 64-bit can handle anything. */
24270 /* For 32-bit, everything's fine if we have one free register. */
24271 if (ix86_function_regparm (TREE_TYPE (function), function) < 3)
24274 /* Need a free register for vcall_offset. */
24278 /* Need a free register for GOT references. */
24279 if (flag_pic && !(*targetm.binds_local_p) (function))
24282 /* Otherwise ok. */
24286 /* Output the assembler code for a thunk function. THUNK_DECL is the
24287 declaration for the thunk function itself, FUNCTION is the decl for
24288 the target function. DELTA is an immediate constant offset to be
24289 added to THIS. If VCALL_OFFSET is nonzero, the word at
24290 *(*this + vcall_offset) should be added to THIS. */
24293 x86_output_mi_thunk (FILE *file ATTRIBUTE_UNUSED,
24294 tree thunk ATTRIBUTE_UNUSED, HOST_WIDE_INT delta,
24295 HOST_WIDE_INT vcall_offset, tree function)
24298 rtx this_param = x86_this_parameter (function);
24301 /* If VCALL_OFFSET, we'll need THIS in a register. Might as well
24302 pull it in now and let DELTA benefit. */
24303 if (REG_P (this_param))
24304 this_reg = this_param;
24305 else if (vcall_offset)
24307 /* Put the this parameter into %eax. */
24308 xops[0] = this_param;
24309 xops[1] = this_reg = gen_rtx_REG (Pmode, AX_REG);
24310 output_asm_insn ("mov%z1\t{%0, %1|%1, %0}", xops);
24313 this_reg = NULL_RTX;
24315 /* Adjust the this parameter by a fixed constant. */
24318 xops[0] = GEN_INT (delta);
24319 xops[1] = this_reg ? this_reg : this_param;
24322 if (!x86_64_general_operand (xops[0], DImode))
24324 tmp = gen_rtx_REG (DImode, R10_REG);
24326 output_asm_insn ("mov{q}\t{%1, %0|%0, %1}", xops);
24328 xops[1] = this_param;
24330 output_asm_insn ("add{q}\t{%0, %1|%1, %0}", xops);
24333 output_asm_insn ("add{l}\t{%0, %1|%1, %0}", xops);
24336 /* Adjust the this parameter by a value stored in the vtable. */
24340 tmp = gen_rtx_REG (DImode, R10_REG);
24343 int tmp_regno = CX_REG;
24344 if (lookup_attribute ("fastcall",
24345 TYPE_ATTRIBUTES (TREE_TYPE (function))))
24346 tmp_regno = AX_REG;
24347 tmp = gen_rtx_REG (SImode, tmp_regno);
24350 xops[0] = gen_rtx_MEM (Pmode, this_reg);
24352 output_asm_insn ("mov%z1\t{%0, %1|%1, %0}", xops);
24354 /* Adjust the this parameter. */
24355 xops[0] = gen_rtx_MEM (Pmode, plus_constant (tmp, vcall_offset));
24356 if (TARGET_64BIT && !memory_operand (xops[0], Pmode))
24358 rtx tmp2 = gen_rtx_REG (DImode, R11_REG);
24359 xops[0] = GEN_INT (vcall_offset);
24361 output_asm_insn ("mov{q}\t{%0, %1|%1, %0}", xops);
24362 xops[0] = gen_rtx_MEM (Pmode, gen_rtx_PLUS (Pmode, tmp, tmp2));
24364 xops[1] = this_reg;
24365 output_asm_insn ("add%z1\t{%0, %1|%1, %0}", xops);
24368 /* If necessary, drop THIS back to its stack slot. */
24369 if (this_reg && this_reg != this_param)
24371 xops[0] = this_reg;
24372 xops[1] = this_param;
24373 output_asm_insn ("mov%z1\t{%0, %1|%1, %0}", xops);
24376 xops[0] = XEXP (DECL_RTL (function), 0);
24379 if (!flag_pic || (*targetm.binds_local_p) (function))
24380 output_asm_insn ("jmp\t%P0", xops);
24381 /* All thunks should be in the same object as their target,
24382 and thus binds_local_p should be true. */
24383 else if (TARGET_64BIT && cfun->machine->call_abi == MS_ABI)
24384 gcc_unreachable ();
24387 tmp = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, xops[0]), UNSPEC_GOTPCREL);
24388 tmp = gen_rtx_CONST (Pmode, tmp);
24389 tmp = gen_rtx_MEM (QImode, tmp);
24391 output_asm_insn ("jmp\t%A0", xops);
24396 if (!flag_pic || (*targetm.binds_local_p) (function))
24397 output_asm_insn ("jmp\t%P0", xops);
24402 rtx sym_ref = XEXP (DECL_RTL (function), 0);
24403 tmp = (gen_rtx_SYMBOL_REF
24405 machopic_indirection_name (sym_ref, /*stub_p=*/true)));
24406 tmp = gen_rtx_MEM (QImode, tmp);
24408 output_asm_insn ("jmp\t%0", xops);
24411 #endif /* TARGET_MACHO */
24413 tmp = gen_rtx_REG (SImode, CX_REG);
24414 output_set_got (tmp, NULL_RTX);
24417 output_asm_insn ("mov{l}\t{%0@GOT(%1), %1|%1, %0@GOT[%1]}", xops);
24418 output_asm_insn ("jmp\t{*}%1", xops);
24424 x86_file_start (void)
24426 default_file_start ();
24428 darwin_file_start ();
24430 if (X86_FILE_START_VERSION_DIRECTIVE)
24431 fputs ("\t.version\t\"01.01\"\n", asm_out_file);
24432 if (X86_FILE_START_FLTUSED)
24433 fputs ("\t.global\t__fltused\n", asm_out_file);
24434 if (ix86_asm_dialect == ASM_INTEL)
24435 fputs ("\t.intel_syntax noprefix\n", asm_out_file);
24439 x86_field_alignment (tree field, int computed)
24441 enum machine_mode mode;
24442 tree type = TREE_TYPE (field);
24444 if (TARGET_64BIT || TARGET_ALIGN_DOUBLE)
24446 mode = TYPE_MODE (strip_array_types (type));
24447 if (mode == DFmode || mode == DCmode
24448 || GET_MODE_CLASS (mode) == MODE_INT
24449 || GET_MODE_CLASS (mode) == MODE_COMPLEX_INT)
24450 return MIN (32, computed);
24454 /* Output assembler code to FILE to increment profiler label # LABELNO
24455 for profiling a function entry. */
24457 x86_function_profiler (FILE *file, int labelno ATTRIBUTE_UNUSED)
24461 #ifndef NO_PROFILE_COUNTERS
24462 fprintf (file, "\tleaq\t%sP%d@(%%rip),%%r11\n", LPREFIX, labelno);
24465 if (DEFAULT_ABI == SYSV_ABI && flag_pic)
24466 fprintf (file, "\tcall\t*%s@GOTPCREL(%%rip)\n", MCOUNT_NAME);
24468 fprintf (file, "\tcall\t%s\n", MCOUNT_NAME);
24472 #ifndef NO_PROFILE_COUNTERS
24473 fprintf (file, "\tleal\t%sP%d@GOTOFF(%%ebx),%%%s\n",
24474 LPREFIX, labelno, PROFILE_COUNT_REGISTER);
24476 fprintf (file, "\tcall\t*%s@GOT(%%ebx)\n", MCOUNT_NAME);
24480 #ifndef NO_PROFILE_COUNTERS
24481 fprintf (file, "\tmovl\t$%sP%d,%%%s\n", LPREFIX, labelno,
24482 PROFILE_COUNT_REGISTER);
24484 fprintf (file, "\tcall\t%s\n", MCOUNT_NAME);
24488 /* We don't have exact information about the insn sizes, but we may assume
24489 quite safely that we are informed about all 1 byte insns and memory
24490 address sizes. This is enough to eliminate unnecessary padding in
24494 min_insn_size (rtx insn)
24498 if (!INSN_P (insn) || !active_insn_p (insn))
24501 /* Discard alignments we've emit and jump instructions. */
24502 if (GET_CODE (PATTERN (insn)) == UNSPEC_VOLATILE
24503 && XINT (PATTERN (insn), 1) == UNSPECV_ALIGN)
24506 && (GET_CODE (PATTERN (insn)) == ADDR_VEC
24507 || GET_CODE (PATTERN (insn)) == ADDR_DIFF_VEC))
24510 /* Important case - calls are always 5 bytes.
24511 It is common to have many calls in the row. */
24513 && symbolic_reference_mentioned_p (PATTERN (insn))
24514 && !SIBLING_CALL_P (insn))
24516 if (get_attr_length (insn) <= 1)
24519 /* For normal instructions we may rely on the sizes of addresses
24520 and the presence of symbol to require 4 bytes of encoding.
24521 This is not the case for jumps where references are PC relative. */
24522 if (!JUMP_P (insn))
24524 l = get_attr_length_address (insn);
24525 if (l < 4 && symbolic_reference_mentioned_p (PATTERN (insn)))
24534 /* AMD K8 core mispredicts jumps when there are more than 3 jumps in 16 byte
24538 ix86_avoid_jump_misspredicts (void)
24540 rtx insn, start = get_insns ();
24541 int nbytes = 0, njumps = 0;
24544 /* Look for all minimal intervals of instructions containing 4 jumps.
24545 The intervals are bounded by START and INSN. NBYTES is the total
24546 size of instructions in the interval including INSN and not including
24547 START. When the NBYTES is smaller than 16 bytes, it is possible
24548 that the end of START and INSN ends up in the same 16byte page.
24550 The smallest offset in the page INSN can start is the case where START
24551 ends on the offset 0. Offset of INSN is then NBYTES - sizeof (INSN).
24552 We add p2align to 16byte window with maxskip 17 - NBYTES + sizeof (INSN).
24554 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
24557 nbytes += min_insn_size (insn);
24559 fprintf(dump_file, "Insn %i estimated to %i bytes\n",
24560 INSN_UID (insn), min_insn_size (insn));
24562 && GET_CODE (PATTERN (insn)) != ADDR_VEC
24563 && GET_CODE (PATTERN (insn)) != ADDR_DIFF_VEC)
24571 start = NEXT_INSN (start);
24572 if ((JUMP_P (start)
24573 && GET_CODE (PATTERN (start)) != ADDR_VEC
24574 && GET_CODE (PATTERN (start)) != ADDR_DIFF_VEC)
24576 njumps--, isjump = 1;
24579 nbytes -= min_insn_size (start);
24581 gcc_assert (njumps >= 0);
24583 fprintf (dump_file, "Interval %i to %i has %i bytes\n",
24584 INSN_UID (start), INSN_UID (insn), nbytes);
24586 if (njumps == 3 && isjump && nbytes < 16)
24588 int padsize = 15 - nbytes + min_insn_size (insn);
24591 fprintf (dump_file, "Padding insn %i by %i bytes!\n",
24592 INSN_UID (insn), padsize);
24593 emit_insn_before (gen_align (GEN_INT (padsize)), insn);
24598 /* AMD Athlon works faster
24599 when RET is not destination of conditional jump or directly preceded
24600 by other jump instruction. We avoid the penalty by inserting NOP just
24601 before the RET instructions in such cases. */
24603 ix86_pad_returns (void)
24608 FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR->preds)
24610 basic_block bb = e->src;
24611 rtx ret = BB_END (bb);
24613 bool replace = false;
24615 if (!JUMP_P (ret) || GET_CODE (PATTERN (ret)) != RETURN
24616 || !maybe_hot_bb_p (bb))
24618 for (prev = PREV_INSN (ret); prev; prev = PREV_INSN (prev))
24619 if (active_insn_p (prev) || LABEL_P (prev))
24621 if (prev && LABEL_P (prev))
24626 FOR_EACH_EDGE (e, ei, bb->preds)
24627 if (EDGE_FREQUENCY (e) && e->src->index >= 0
24628 && !(e->flags & EDGE_FALLTHRU))
24633 prev = prev_active_insn (ret);
24635 && ((JUMP_P (prev) && any_condjump_p (prev))
24638 /* Empty functions get branch mispredict even when the jump destination
24639 is not visible to us. */
24640 if (!prev && cfun->function_frequency > FUNCTION_FREQUENCY_UNLIKELY_EXECUTED)
24645 emit_insn_before (gen_return_internal_long (), ret);
24651 /* Implement machine specific optimizations. We implement padding of returns
24652 for K8 CPUs and pass to avoid 4 jumps in the single 16 byte window. */
24656 if (TARGET_PAD_RETURNS && optimize && !optimize_size)
24657 ix86_pad_returns ();
24658 if (TARGET_FOUR_JUMP_LIMIT && optimize && !optimize_size)
24659 ix86_avoid_jump_misspredicts ();
24662 /* Return nonzero when QImode register that must be represented via REX prefix
24665 x86_extended_QIreg_mentioned_p (rtx insn)
24668 extract_insn_cached (insn);
24669 for (i = 0; i < recog_data.n_operands; i++)
24670 if (REG_P (recog_data.operand[i])
24671 && REGNO (recog_data.operand[i]) >= 4)
24676 /* Return nonzero when P points to register encoded via REX prefix.
24677 Called via for_each_rtx. */
24679 extended_reg_mentioned_1 (rtx *p, void *data ATTRIBUTE_UNUSED)
24681 unsigned int regno;
24684 regno = REGNO (*p);
24685 return REX_INT_REGNO_P (regno) || REX_SSE_REGNO_P (regno);
24688 /* Return true when INSN mentions register that must be encoded using REX
24691 x86_extended_reg_mentioned_p (rtx insn)
24693 return for_each_rtx (&PATTERN (insn), extended_reg_mentioned_1, NULL);
24696 /* Generate an unsigned DImode/SImode to FP conversion. This is the same code
24697 optabs would emit if we didn't have TFmode patterns. */
24700 x86_emit_floatuns (rtx operands[2])
24702 rtx neglab, donelab, i0, i1, f0, in, out;
24703 enum machine_mode mode, inmode;
24705 inmode = GET_MODE (operands[1]);
24706 gcc_assert (inmode == SImode || inmode == DImode);
24709 in = force_reg (inmode, operands[1]);
24710 mode = GET_MODE (out);
24711 neglab = gen_label_rtx ();
24712 donelab = gen_label_rtx ();
24713 f0 = gen_reg_rtx (mode);
24715 emit_cmp_and_jump_insns (in, const0_rtx, LT, const0_rtx, inmode, 0, neglab);
24717 expand_float (out, in, 0);
24719 emit_jump_insn (gen_jump (donelab));
24722 emit_label (neglab);
24724 i0 = expand_simple_binop (inmode, LSHIFTRT, in, const1_rtx, NULL,
24726 i1 = expand_simple_binop (inmode, AND, in, const1_rtx, NULL,
24728 i0 = expand_simple_binop (inmode, IOR, i0, i1, i0, 1, OPTAB_DIRECT);
24730 expand_float (f0, i0, 0);
24732 emit_insn (gen_rtx_SET (VOIDmode, out, gen_rtx_PLUS (mode, f0, f0)));
24734 emit_label (donelab);
24737 /* A subroutine of ix86_expand_vector_init. Store into TARGET a vector
24738 with all elements equal to VAR. Return true if successful. */
24741 ix86_expand_vector_init_duplicate (bool mmx_ok, enum machine_mode mode,
24742 rtx target, rtx val)
24744 enum machine_mode smode, wsmode, wvmode;
24759 val = force_reg (GET_MODE_INNER (mode), val);
24760 x = gen_rtx_VEC_DUPLICATE (mode, val);
24761 emit_insn (gen_rtx_SET (VOIDmode, target, x));
24767 if (TARGET_SSE || TARGET_3DNOW_A)
24769 val = gen_lowpart (SImode, val);
24770 x = gen_rtx_TRUNCATE (HImode, val);
24771 x = gen_rtx_VEC_DUPLICATE (mode, x);
24772 emit_insn (gen_rtx_SET (VOIDmode, target, x));
24794 /* Extend HImode to SImode using a paradoxical SUBREG. */
24795 tmp1 = gen_reg_rtx (SImode);
24796 emit_move_insn (tmp1, gen_lowpart (SImode, val));
24797 /* Insert the SImode value as low element of V4SImode vector. */
24798 tmp2 = gen_reg_rtx (V4SImode);
24799 tmp1 = gen_rtx_VEC_MERGE (V4SImode,
24800 gen_rtx_VEC_DUPLICATE (V4SImode, tmp1),
24801 CONST0_RTX (V4SImode),
24803 emit_insn (gen_rtx_SET (VOIDmode, tmp2, tmp1));
24804 /* Cast the V4SImode vector back to a V8HImode vector. */
24805 tmp1 = gen_reg_rtx (V8HImode);
24806 emit_move_insn (tmp1, gen_lowpart (V8HImode, tmp2));
24807 /* Duplicate the low short through the whole low SImode word. */
24808 emit_insn (gen_sse2_punpcklwd (tmp1, tmp1, tmp1));
24809 /* Cast the V8HImode vector back to a V4SImode vector. */
24810 tmp2 = gen_reg_rtx (V4SImode);
24811 emit_move_insn (tmp2, gen_lowpart (V4SImode, tmp1));
24812 /* Replicate the low element of the V4SImode vector. */
24813 emit_insn (gen_sse2_pshufd (tmp2, tmp2, const0_rtx));
24814 /* Cast the V2SImode back to V8HImode, and store in target. */
24815 emit_move_insn (target, gen_lowpart (V8HImode, tmp2));
24826 /* Extend QImode to SImode using a paradoxical SUBREG. */
24827 tmp1 = gen_reg_rtx (SImode);
24828 emit_move_insn (tmp1, gen_lowpart (SImode, val));
24829 /* Insert the SImode value as low element of V4SImode vector. */
24830 tmp2 = gen_reg_rtx (V4SImode);
24831 tmp1 = gen_rtx_VEC_MERGE (V4SImode,
24832 gen_rtx_VEC_DUPLICATE (V4SImode, tmp1),
24833 CONST0_RTX (V4SImode),
24835 emit_insn (gen_rtx_SET (VOIDmode, tmp2, tmp1));
24836 /* Cast the V4SImode vector back to a V16QImode vector. */
24837 tmp1 = gen_reg_rtx (V16QImode);
24838 emit_move_insn (tmp1, gen_lowpart (V16QImode, tmp2));
24839 /* Duplicate the low byte through the whole low SImode word. */
24840 emit_insn (gen_sse2_punpcklbw (tmp1, tmp1, tmp1));
24841 emit_insn (gen_sse2_punpcklbw (tmp1, tmp1, tmp1));
24842 /* Cast the V16QImode vector back to a V4SImode vector. */
24843 tmp2 = gen_reg_rtx (V4SImode);
24844 emit_move_insn (tmp2, gen_lowpart (V4SImode, tmp1));
24845 /* Replicate the low element of the V4SImode vector. */
24846 emit_insn (gen_sse2_pshufd (tmp2, tmp2, const0_rtx));
24847 /* Cast the V2SImode back to V16QImode, and store in target. */
24848 emit_move_insn (target, gen_lowpart (V16QImode, tmp2));
24856 /* Replicate the value once into the next wider mode and recurse. */
24857 val = convert_modes (wsmode, smode, val, true);
24858 x = expand_simple_binop (wsmode, ASHIFT, val,
24859 GEN_INT (GET_MODE_BITSIZE (smode)),
24860 NULL_RTX, 1, OPTAB_LIB_WIDEN);
24861 val = expand_simple_binop (wsmode, IOR, val, x, x, 1, OPTAB_LIB_WIDEN);
24863 x = gen_reg_rtx (wvmode);
24864 if (!ix86_expand_vector_init_duplicate (mmx_ok, wvmode, x, val))
24865 gcc_unreachable ();
24866 emit_move_insn (target, gen_lowpart (mode, x));
24874 /* A subroutine of ix86_expand_vector_init. Store into TARGET a vector
24875 whose ONE_VAR element is VAR, and other elements are zero. Return true
24879 ix86_expand_vector_init_one_nonzero (bool mmx_ok, enum machine_mode mode,
24880 rtx target, rtx var, int one_var)
24882 enum machine_mode vsimode;
24885 bool use_vector_set = false;
24890 use_vector_set = TARGET_64BIT && TARGET_SSE4_1;
24895 use_vector_set = TARGET_SSE4_1;
24898 use_vector_set = TARGET_SSE2;
24901 use_vector_set = TARGET_SSE || TARGET_3DNOW_A;
24907 if (use_vector_set)
24909 emit_insn (gen_rtx_SET (VOIDmode, target, CONST0_RTX (mode)));
24910 var = force_reg (GET_MODE_INNER (mode), var);
24911 ix86_expand_vector_set (mmx_ok, target, var, one_var);
24927 var = force_reg (GET_MODE_INNER (mode), var);
24928 x = gen_rtx_VEC_CONCAT (mode, var, CONST0_RTX (GET_MODE_INNER (mode)));
24929 emit_insn (gen_rtx_SET (VOIDmode, target, x));
24934 if (!REG_P (target) || REGNO (target) < FIRST_PSEUDO_REGISTER)
24935 new_target = gen_reg_rtx (mode);
24937 new_target = target;
24938 var = force_reg (GET_MODE_INNER (mode), var);
24939 x = gen_rtx_VEC_DUPLICATE (mode, var);
24940 x = gen_rtx_VEC_MERGE (mode, x, CONST0_RTX (mode), const1_rtx);
24941 emit_insn (gen_rtx_SET (VOIDmode, new_target, x));
24944 /* We need to shuffle the value to the correct position, so
24945 create a new pseudo to store the intermediate result. */
24947 /* With SSE2, we can use the integer shuffle insns. */
24948 if (mode != V4SFmode && TARGET_SSE2)
24950 emit_insn (gen_sse2_pshufd_1 (new_target, new_target,
24952 GEN_INT (one_var == 1 ? 0 : 1),
24953 GEN_INT (one_var == 2 ? 0 : 1),
24954 GEN_INT (one_var == 3 ? 0 : 1)));
24955 if (target != new_target)
24956 emit_move_insn (target, new_target);
24960 /* Otherwise convert the intermediate result to V4SFmode and
24961 use the SSE1 shuffle instructions. */
24962 if (mode != V4SFmode)
24964 tmp = gen_reg_rtx (V4SFmode);
24965 emit_move_insn (tmp, gen_lowpart (V4SFmode, new_target));
24970 emit_insn (gen_sse_shufps_1 (tmp, tmp, tmp,
24972 GEN_INT (one_var == 1 ? 0 : 1),
24973 GEN_INT (one_var == 2 ? 0+4 : 1+4),
24974 GEN_INT (one_var == 3 ? 0+4 : 1+4)));
24976 if (mode != V4SFmode)
24977 emit_move_insn (target, gen_lowpart (V4SImode, tmp));
24978 else if (tmp != target)
24979 emit_move_insn (target, tmp);
24981 else if (target != new_target)
24982 emit_move_insn (target, new_target);
24987 vsimode = V4SImode;
24993 vsimode = V2SImode;
24999 /* Zero extend the variable element to SImode and recurse. */
25000 var = convert_modes (SImode, GET_MODE_INNER (mode), var, true);
25002 x = gen_reg_rtx (vsimode);
25003 if (!ix86_expand_vector_init_one_nonzero (mmx_ok, vsimode, x,
25005 gcc_unreachable ();
25007 emit_move_insn (target, gen_lowpart (mode, x));
25015 /* A subroutine of ix86_expand_vector_init. Store into TARGET a vector
25016 consisting of the values in VALS. It is known that all elements
25017 except ONE_VAR are constants. Return true if successful. */
25020 ix86_expand_vector_init_one_var (bool mmx_ok, enum machine_mode mode,
25021 rtx target, rtx vals, int one_var)
25023 rtx var = XVECEXP (vals, 0, one_var);
25024 enum machine_mode wmode;
25027 const_vec = copy_rtx (vals);
25028 XVECEXP (const_vec, 0, one_var) = CONST0_RTX (GET_MODE_INNER (mode));
25029 const_vec = gen_rtx_CONST_VECTOR (mode, XVEC (const_vec, 0));
25037 /* For the two element vectors, it's just as easy to use
25038 the general case. */
25056 /* There's no way to set one QImode entry easily. Combine
25057 the variable value with its adjacent constant value, and
25058 promote to an HImode set. */
25059 x = XVECEXP (vals, 0, one_var ^ 1);
25062 var = convert_modes (HImode, QImode, var, true);
25063 var = expand_simple_binop (HImode, ASHIFT, var, GEN_INT (8),
25064 NULL_RTX, 1, OPTAB_LIB_WIDEN);
25065 x = GEN_INT (INTVAL (x) & 0xff);
25069 var = convert_modes (HImode, QImode, var, true);
25070 x = gen_int_mode (INTVAL (x) << 8, HImode);
25072 if (x != const0_rtx)
25073 var = expand_simple_binop (HImode, IOR, var, x, var,
25074 1, OPTAB_LIB_WIDEN);
25076 x = gen_reg_rtx (wmode);
25077 emit_move_insn (x, gen_lowpart (wmode, const_vec));
25078 ix86_expand_vector_set (mmx_ok, x, var, one_var >> 1);
25080 emit_move_insn (target, gen_lowpart (mode, x));
25087 emit_move_insn (target, const_vec);
25088 ix86_expand_vector_set (mmx_ok, target, var, one_var);
25092 /* A subroutine of ix86_expand_vector_init_general. Use vector
25093 concatenate to handle the most general case: all values variable,
25094 and none identical. */
25097 ix86_expand_vector_init_concat (enum machine_mode mode,
25098 rtx target, rtx *ops, int n)
25100 enum machine_mode cmode, hmode = VOIDmode;
25101 rtx first[4], second[2];
25129 gcc_unreachable ();
25132 if (!register_operand (ops[1], cmode))
25133 ops[1] = force_reg (cmode, ops[1]);
25134 if (!register_operand (ops[0], cmode))
25135 ops[0] = force_reg (cmode, ops[0]);
25136 emit_insn (gen_rtx_SET (VOIDmode, target,
25137 gen_rtx_VEC_CONCAT (mode, ops[0],
25151 gcc_unreachable ();
25156 /* FIXME: We process inputs backward to help RA. PR 36222. */
25159 for (; i > 0; i -= 2, j--)
25161 first[j] = gen_reg_rtx (cmode);
25162 v = gen_rtvec (2, ops[i - 1], ops[i]);
25163 ix86_expand_vector_init (false, first[j],
25164 gen_rtx_PARALLEL (cmode, v));
25170 gcc_assert (hmode != VOIDmode);
25171 for (i = j = 0; i < n; i += 2, j++)
25173 second[j] = gen_reg_rtx (hmode);
25174 ix86_expand_vector_init_concat (hmode, second [j],
25178 ix86_expand_vector_init_concat (mode, target, second, n);
25181 ix86_expand_vector_init_concat (mode, target, first, n);
25185 gcc_unreachable ();
25189 /* A subroutine of ix86_expand_vector_init_general. Use vector
25190 interleave to handle the most general case: all values variable,
25191 and none identical. */
25194 ix86_expand_vector_init_interleave (enum machine_mode mode,
25195 rtx target, rtx *ops, int n)
25197 enum machine_mode first_imode, second_imode, third_imode;
25200 rtx (*gen_load_even) (rtx, rtx, rtx);
25201 rtx (*gen_interleave_first_low) (rtx, rtx, rtx);
25202 rtx (*gen_interleave_second_low) (rtx, rtx, rtx);
25207 gen_load_even = gen_vec_setv8hi;
25208 gen_interleave_first_low = gen_vec_interleave_lowv4si;
25209 gen_interleave_second_low = gen_vec_interleave_lowv2di;
25210 first_imode = V4SImode;
25211 second_imode = V2DImode;
25212 third_imode = VOIDmode;
25215 gen_load_even = gen_vec_setv16qi;
25216 gen_interleave_first_low = gen_vec_interleave_lowv8hi;
25217 gen_interleave_second_low = gen_vec_interleave_lowv4si;
25218 first_imode = V8HImode;
25219 second_imode = V4SImode;
25220 third_imode = V2DImode;
25223 gcc_unreachable ();
25226 for (i = 0; i < n; i++)
25228 /* Extend the odd elment to SImode using a paradoxical SUBREG. */
25229 op0 = gen_reg_rtx (SImode);
25230 emit_move_insn (op0, gen_lowpart (SImode, ops [i + i]));
25232 /* Insert the SImode value as low element of V4SImode vector. */
25233 op1 = gen_reg_rtx (V4SImode);
25234 op0 = gen_rtx_VEC_MERGE (V4SImode,
25235 gen_rtx_VEC_DUPLICATE (V4SImode,
25237 CONST0_RTX (V4SImode),
25239 emit_insn (gen_rtx_SET (VOIDmode, op1, op0));
25241 /* Cast the V4SImode vector back to a vector in orignal mode. */
25242 op0 = gen_reg_rtx (mode);
25243 emit_move_insn (op0, gen_lowpart (mode, op1));
25245 /* Load even elements into the second positon. */
25246 emit_insn ((*gen_load_even) (op0, ops [i + i + 1],
25249 /* Cast vector to FIRST_IMODE vector. */
25250 ops[i] = gen_reg_rtx (first_imode);
25251 emit_move_insn (ops[i], gen_lowpart (first_imode, op0));
25254 /* Interleave low FIRST_IMODE vectors. */
25255 for (i = j = 0; i < n; i += 2, j++)
25257 op0 = gen_reg_rtx (first_imode);
25258 emit_insn ((*gen_interleave_first_low) (op0, ops[i], ops[i + 1]));
25260 /* Cast FIRST_IMODE vector to SECOND_IMODE vector. */
25261 ops[j] = gen_reg_rtx (second_imode);
25262 emit_move_insn (ops[j], gen_lowpart (second_imode, op0));
25265 /* Interleave low SECOND_IMODE vectors. */
25266 switch (second_imode)
25269 for (i = j = 0; i < n / 2; i += 2, j++)
25271 op0 = gen_reg_rtx (second_imode);
25272 emit_insn ((*gen_interleave_second_low) (op0, ops[i],
25275 /* Cast the SECOND_IMODE vector to the THIRD_IMODE
25277 ops[j] = gen_reg_rtx (third_imode);
25278 emit_move_insn (ops[j], gen_lowpart (third_imode, op0));
25280 second_imode = V2DImode;
25281 gen_interleave_second_low = gen_vec_interleave_lowv2di;
25285 op0 = gen_reg_rtx (second_imode);
25286 emit_insn ((*gen_interleave_second_low) (op0, ops[0],
25289 /* Cast the SECOND_IMODE vector back to a vector on original
25291 emit_insn (gen_rtx_SET (VOIDmode, target,
25292 gen_lowpart (mode, op0)));
25296 gcc_unreachable ();
25300 /* A subroutine of ix86_expand_vector_init. Handle the most general case:
25301 all values variable, and none identical. */
25304 ix86_expand_vector_init_general (bool mmx_ok, enum machine_mode mode,
25305 rtx target, rtx vals)
25314 if (!mmx_ok && !TARGET_SSE)
25322 n = GET_MODE_NUNITS (mode);
25323 for (i = 0; i < n; i++)
25324 ops[i] = XVECEXP (vals, 0, i);
25325 ix86_expand_vector_init_concat (mode, target, ops, n);
25329 if (!TARGET_SSE4_1)
25337 n = GET_MODE_NUNITS (mode);
25338 for (i = 0; i < n; i++)
25339 ops[i] = XVECEXP (vals, 0, i);
25340 ix86_expand_vector_init_interleave (mode, target, ops, n >> 1);
25348 gcc_unreachable ();
25352 int i, j, n_elts, n_words, n_elt_per_word;
25353 enum machine_mode inner_mode;
25354 rtx words[4], shift;
25356 inner_mode = GET_MODE_INNER (mode);
25357 n_elts = GET_MODE_NUNITS (mode);
25358 n_words = GET_MODE_SIZE (mode) / UNITS_PER_WORD;
25359 n_elt_per_word = n_elts / n_words;
25360 shift = GEN_INT (GET_MODE_BITSIZE (inner_mode));
25362 for (i = 0; i < n_words; ++i)
25364 rtx word = NULL_RTX;
25366 for (j = 0; j < n_elt_per_word; ++j)
25368 rtx elt = XVECEXP (vals, 0, (i+1)*n_elt_per_word - j - 1);
25369 elt = convert_modes (word_mode, inner_mode, elt, true);
25375 word = expand_simple_binop (word_mode, ASHIFT, word, shift,
25376 word, 1, OPTAB_LIB_WIDEN);
25377 word = expand_simple_binop (word_mode, IOR, word, elt,
25378 word, 1, OPTAB_LIB_WIDEN);
25386 emit_move_insn (target, gen_lowpart (mode, words[0]));
25387 else if (n_words == 2)
25389 rtx tmp = gen_reg_rtx (mode);
25390 emit_clobber (tmp);
25391 emit_move_insn (gen_lowpart (word_mode, tmp), words[0]);
25392 emit_move_insn (gen_highpart (word_mode, tmp), words[1]);
25393 emit_move_insn (target, tmp);
25395 else if (n_words == 4)
25397 rtx tmp = gen_reg_rtx (V4SImode);
25398 gcc_assert (word_mode == SImode);
25399 vals = gen_rtx_PARALLEL (V4SImode, gen_rtvec_v (4, words));
25400 ix86_expand_vector_init_general (false, V4SImode, tmp, vals);
25401 emit_move_insn (target, gen_lowpart (mode, tmp));
25404 gcc_unreachable ();
25408 /* Initialize vector TARGET via VALS. Suppress the use of MMX
25409 instructions unless MMX_OK is true. */
25412 ix86_expand_vector_init (bool mmx_ok, rtx target, rtx vals)
25414 enum machine_mode mode = GET_MODE (target);
25415 enum machine_mode inner_mode = GET_MODE_INNER (mode);
25416 int n_elts = GET_MODE_NUNITS (mode);
25417 int n_var = 0, one_var = -1;
25418 bool all_same = true, all_const_zero = true;
25422 for (i = 0; i < n_elts; ++i)
25424 x = XVECEXP (vals, 0, i);
25425 if (!(CONST_INT_P (x)
25426 || GET_CODE (x) == CONST_DOUBLE
25427 || GET_CODE (x) == CONST_FIXED))
25428 n_var++, one_var = i;
25429 else if (x != CONST0_RTX (inner_mode))
25430 all_const_zero = false;
25431 if (i > 0 && !rtx_equal_p (x, XVECEXP (vals, 0, 0)))
25435 /* Constants are best loaded from the constant pool. */
25438 emit_move_insn (target, gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0)));
25442 /* If all values are identical, broadcast the value. */
25444 && ix86_expand_vector_init_duplicate (mmx_ok, mode, target,
25445 XVECEXP (vals, 0, 0)))
25448 /* Values where only one field is non-constant are best loaded from
25449 the pool and overwritten via move later. */
25453 && ix86_expand_vector_init_one_nonzero (mmx_ok, mode, target,
25454 XVECEXP (vals, 0, one_var),
25458 if (ix86_expand_vector_init_one_var (mmx_ok, mode, target, vals, one_var))
25462 ix86_expand_vector_init_general (mmx_ok, mode, target, vals);
25466 ix86_expand_vector_set (bool mmx_ok, rtx target, rtx val, int elt)
25468 enum machine_mode mode = GET_MODE (target);
25469 enum machine_mode inner_mode = GET_MODE_INNER (mode);
25470 bool use_vec_merge = false;
25479 tmp = gen_reg_rtx (GET_MODE_INNER (mode));
25480 ix86_expand_vector_extract (true, tmp, target, 1 - elt);
25482 tmp = gen_rtx_VEC_CONCAT (mode, tmp, val);
25484 tmp = gen_rtx_VEC_CONCAT (mode, val, tmp);
25485 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
25491 use_vec_merge = TARGET_SSE4_1;
25499 /* For the two element vectors, we implement a VEC_CONCAT with
25500 the extraction of the other element. */
25502 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, GEN_INT (1 - elt)));
25503 tmp = gen_rtx_VEC_SELECT (inner_mode, target, tmp);
25506 op0 = val, op1 = tmp;
25508 op0 = tmp, op1 = val;
25510 tmp = gen_rtx_VEC_CONCAT (mode, op0, op1);
25511 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
25516 use_vec_merge = TARGET_SSE4_1;
25523 use_vec_merge = true;
25527 /* tmp = target = A B C D */
25528 tmp = copy_to_reg (target);
25529 /* target = A A B B */
25530 emit_insn (gen_sse_unpcklps (target, target, target));
25531 /* target = X A B B */
25532 ix86_expand_vector_set (false, target, val, 0);
25533 /* target = A X C D */
25534 emit_insn (gen_sse_shufps_1 (target, target, tmp,
25535 GEN_INT (1), GEN_INT (0),
25536 GEN_INT (2+4), GEN_INT (3+4)));
25540 /* tmp = target = A B C D */
25541 tmp = copy_to_reg (target);
25542 /* tmp = X B C D */
25543 ix86_expand_vector_set (false, tmp, val, 0);
25544 /* target = A B X D */
25545 emit_insn (gen_sse_shufps_1 (target, target, tmp,
25546 GEN_INT (0), GEN_INT (1),
25547 GEN_INT (0+4), GEN_INT (3+4)));
25551 /* tmp = target = A B C D */
25552 tmp = copy_to_reg (target);
25553 /* tmp = X B C D */
25554 ix86_expand_vector_set (false, tmp, val, 0);
25555 /* target = A B X D */
25556 emit_insn (gen_sse_shufps_1 (target, target, tmp,
25557 GEN_INT (0), GEN_INT (1),
25558 GEN_INT (2+4), GEN_INT (0+4)));
25562 gcc_unreachable ();
25567 use_vec_merge = TARGET_SSE4_1;
25571 /* Element 0 handled by vec_merge below. */
25574 use_vec_merge = true;
25580 /* With SSE2, use integer shuffles to swap element 0 and ELT,
25581 store into element 0, then shuffle them back. */
25585 order[0] = GEN_INT (elt);
25586 order[1] = const1_rtx;
25587 order[2] = const2_rtx;
25588 order[3] = GEN_INT (3);
25589 order[elt] = const0_rtx;
25591 emit_insn (gen_sse2_pshufd_1 (target, target, order[0],
25592 order[1], order[2], order[3]));
25594 ix86_expand_vector_set (false, target, val, 0);
25596 emit_insn (gen_sse2_pshufd_1 (target, target, order[0],
25597 order[1], order[2], order[3]));
25601 /* For SSE1, we have to reuse the V4SF code. */
25602 ix86_expand_vector_set (false, gen_lowpart (V4SFmode, target),
25603 gen_lowpart (SFmode, val), elt);
25608 use_vec_merge = TARGET_SSE2;
25611 use_vec_merge = mmx_ok && (TARGET_SSE || TARGET_3DNOW_A);
25615 use_vec_merge = TARGET_SSE4_1;
25625 tmp = gen_rtx_VEC_DUPLICATE (mode, val);
25626 tmp = gen_rtx_VEC_MERGE (mode, tmp, target, GEN_INT (1 << elt));
25627 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
25631 rtx mem = assign_stack_temp (mode, GET_MODE_SIZE (mode), false);
25633 emit_move_insn (mem, target);
25635 tmp = adjust_address (mem, inner_mode, elt*GET_MODE_SIZE (inner_mode));
25636 emit_move_insn (tmp, val);
25638 emit_move_insn (target, mem);
25643 ix86_expand_vector_extract (bool mmx_ok, rtx target, rtx vec, int elt)
25645 enum machine_mode mode = GET_MODE (vec);
25646 enum machine_mode inner_mode = GET_MODE_INNER (mode);
25647 bool use_vec_extr = false;
25660 use_vec_extr = true;
25664 use_vec_extr = TARGET_SSE4_1;
25676 tmp = gen_reg_rtx (mode);
25677 emit_insn (gen_sse_shufps_1 (tmp, vec, vec,
25678 GEN_INT (elt), GEN_INT (elt),
25679 GEN_INT (elt+4), GEN_INT (elt+4)));
25683 tmp = gen_reg_rtx (mode);
25684 emit_insn (gen_sse_unpckhps (tmp, vec, vec));
25688 gcc_unreachable ();
25691 use_vec_extr = true;
25696 use_vec_extr = TARGET_SSE4_1;
25710 tmp = gen_reg_rtx (mode);
25711 emit_insn (gen_sse2_pshufd_1 (tmp, vec,
25712 GEN_INT (elt), GEN_INT (elt),
25713 GEN_INT (elt), GEN_INT (elt)));
25717 tmp = gen_reg_rtx (mode);
25718 emit_insn (gen_sse2_punpckhdq (tmp, vec, vec));
25722 gcc_unreachable ();
25725 use_vec_extr = true;
25730 /* For SSE1, we have to reuse the V4SF code. */
25731 ix86_expand_vector_extract (false, gen_lowpart (SFmode, target),
25732 gen_lowpart (V4SFmode, vec), elt);
25738 use_vec_extr = TARGET_SSE2;
25741 use_vec_extr = mmx_ok && (TARGET_SSE || TARGET_3DNOW_A);
25745 use_vec_extr = TARGET_SSE4_1;
25749 /* ??? Could extract the appropriate HImode element and shift. */
25756 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, GEN_INT (elt)));
25757 tmp = gen_rtx_VEC_SELECT (inner_mode, vec, tmp);
25759 /* Let the rtl optimizers know about the zero extension performed. */
25760 if (inner_mode == QImode || inner_mode == HImode)
25762 tmp = gen_rtx_ZERO_EXTEND (SImode, tmp);
25763 target = gen_lowpart (SImode, target);
25766 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
25770 rtx mem = assign_stack_temp (mode, GET_MODE_SIZE (mode), false);
25772 emit_move_insn (mem, vec);
25774 tmp = adjust_address (mem, inner_mode, elt*GET_MODE_SIZE (inner_mode));
25775 emit_move_insn (target, tmp);
25779 /* Expand a vector reduction on V4SFmode for SSE1. FN is the binary
25780 pattern to reduce; DEST is the destination; IN is the input vector. */
25783 ix86_expand_reduc_v4sf (rtx (*fn) (rtx, rtx, rtx), rtx dest, rtx in)
25785 rtx tmp1, tmp2, tmp3;
25787 tmp1 = gen_reg_rtx (V4SFmode);
25788 tmp2 = gen_reg_rtx (V4SFmode);
25789 tmp3 = gen_reg_rtx (V4SFmode);
25791 emit_insn (gen_sse_movhlps (tmp1, in, in));
25792 emit_insn (fn (tmp2, tmp1, in));
25794 emit_insn (gen_sse_shufps_1 (tmp3, tmp2, tmp2,
25795 GEN_INT (1), GEN_INT (1),
25796 GEN_INT (1+4), GEN_INT (1+4)));
25797 emit_insn (fn (dest, tmp2, tmp3));
25800 /* Target hook for scalar_mode_supported_p. */
25802 ix86_scalar_mode_supported_p (enum machine_mode mode)
25804 if (DECIMAL_FLOAT_MODE_P (mode))
25806 else if (mode == TFmode)
25809 return default_scalar_mode_supported_p (mode);
25812 /* Implements target hook vector_mode_supported_p. */
25814 ix86_vector_mode_supported_p (enum machine_mode mode)
25816 if (TARGET_SSE && VALID_SSE_REG_MODE (mode))
25818 if (TARGET_SSE2 && VALID_SSE2_REG_MODE (mode))
25820 if (TARGET_MMX && VALID_MMX_REG_MODE (mode))
25822 if (TARGET_3DNOW && VALID_MMX_REG_MODE_3DNOW (mode))
25827 /* Target hook for c_mode_for_suffix. */
25828 static enum machine_mode
25829 ix86_c_mode_for_suffix (char suffix)
25839 /* Worker function for TARGET_MD_ASM_CLOBBERS.
25841 We do this in the new i386 backend to maintain source compatibility
25842 with the old cc0-based compiler. */
25845 ix86_md_asm_clobbers (tree outputs ATTRIBUTE_UNUSED,
25846 tree inputs ATTRIBUTE_UNUSED,
25849 clobbers = tree_cons (NULL_TREE, build_string (5, "flags"),
25851 clobbers = tree_cons (NULL_TREE, build_string (4, "fpsr"),
25856 /* Implements target vector targetm.asm.encode_section_info. This
25857 is not used by netware. */
25859 static void ATTRIBUTE_UNUSED
25860 ix86_encode_section_info (tree decl, rtx rtl, int first)
25862 default_encode_section_info (decl, rtl, first);
25864 if (TREE_CODE (decl) == VAR_DECL
25865 && (TREE_STATIC (decl) || DECL_EXTERNAL (decl))
25866 && ix86_in_large_data_p (decl))
25867 SYMBOL_REF_FLAGS (XEXP (rtl, 0)) |= SYMBOL_FLAG_FAR_ADDR;
25870 /* Worker function for REVERSE_CONDITION. */
25873 ix86_reverse_condition (enum rtx_code code, enum machine_mode mode)
25875 return (mode != CCFPmode && mode != CCFPUmode
25876 ? reverse_condition (code)
25877 : reverse_condition_maybe_unordered (code));
25880 /* Output code to perform an x87 FP register move, from OPERANDS[1]
25884 output_387_reg_move (rtx insn, rtx *operands)
25886 if (REG_P (operands[0]))
25888 if (REG_P (operands[1])
25889 && find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
25891 if (REGNO (operands[0]) == FIRST_STACK_REG)
25892 return output_387_ffreep (operands, 0);
25893 return "fstp\t%y0";
25895 if (STACK_TOP_P (operands[0]))
25896 return "fld%z1\t%y1";
25899 else if (MEM_P (operands[0]))
25901 gcc_assert (REG_P (operands[1]));
25902 if (find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
25903 return "fstp%z0\t%y0";
25906 /* There is no non-popping store to memory for XFmode.
25907 So if we need one, follow the store with a load. */
25908 if (GET_MODE (operands[0]) == XFmode)
25909 return "fstp%z0\t%y0\n\tfld%z0\t%y0";
25911 return "fst%z0\t%y0";
25918 /* Output code to perform a conditional jump to LABEL, if C2 flag in
25919 FP status register is set. */
25922 ix86_emit_fp_unordered_jump (rtx label)
25924 rtx reg = gen_reg_rtx (HImode);
25927 emit_insn (gen_x86_fnstsw_1 (reg));
25929 if (TARGET_SAHF && (TARGET_USE_SAHF || optimize_size))
25931 emit_insn (gen_x86_sahf_1 (reg));
25933 temp = gen_rtx_REG (CCmode, FLAGS_REG);
25934 temp = gen_rtx_UNORDERED (VOIDmode, temp, const0_rtx);
25938 emit_insn (gen_testqi_ext_ccno_0 (reg, GEN_INT (0x04)));
25940 temp = gen_rtx_REG (CCNOmode, FLAGS_REG);
25941 temp = gen_rtx_NE (VOIDmode, temp, const0_rtx);
25944 temp = gen_rtx_IF_THEN_ELSE (VOIDmode, temp,
25945 gen_rtx_LABEL_REF (VOIDmode, label),
25947 temp = gen_rtx_SET (VOIDmode, pc_rtx, temp);
25949 emit_jump_insn (temp);
25950 predict_jump (REG_BR_PROB_BASE * 10 / 100);
25953 /* Output code to perform a log1p XFmode calculation. */
25955 void ix86_emit_i387_log1p (rtx op0, rtx op1)
25957 rtx label1 = gen_label_rtx ();
25958 rtx label2 = gen_label_rtx ();
25960 rtx tmp = gen_reg_rtx (XFmode);
25961 rtx tmp2 = gen_reg_rtx (XFmode);
25963 emit_insn (gen_absxf2 (tmp, op1));
25964 emit_insn (gen_cmpxf (tmp,
25965 CONST_DOUBLE_FROM_REAL_VALUE (
25966 REAL_VALUE_ATOF ("0.29289321881345247561810596348408353", XFmode),
25968 emit_jump_insn (gen_bge (label1));
25970 emit_move_insn (tmp2, standard_80387_constant_rtx (4)); /* fldln2 */
25971 emit_insn (gen_fyl2xp1xf3_i387 (op0, op1, tmp2));
25972 emit_jump (label2);
25974 emit_label (label1);
25975 emit_move_insn (tmp, CONST1_RTX (XFmode));
25976 emit_insn (gen_addxf3 (tmp, op1, tmp));
25977 emit_move_insn (tmp2, standard_80387_constant_rtx (4)); /* fldln2 */
25978 emit_insn (gen_fyl2xxf3_i387 (op0, tmp, tmp2));
25980 emit_label (label2);
25983 /* Output code to perform a Newton-Rhapson approximation of a single precision
25984 floating point divide [http://en.wikipedia.org/wiki/N-th_root_algorithm]. */
25986 void ix86_emit_swdivsf (rtx res, rtx a, rtx b, enum machine_mode mode)
25988 rtx x0, x1, e0, e1, two;
25990 x0 = gen_reg_rtx (mode);
25991 e0 = gen_reg_rtx (mode);
25992 e1 = gen_reg_rtx (mode);
25993 x1 = gen_reg_rtx (mode);
25995 two = CONST_DOUBLE_FROM_REAL_VALUE (dconst2, SFmode);
25997 if (VECTOR_MODE_P (mode))
25998 two = ix86_build_const_vector (SFmode, true, two);
26000 two = force_reg (mode, two);
26002 /* a / b = a * rcp(b) * (2.0 - b * rcp(b)) */
26004 /* x0 = rcp(b) estimate */
26005 emit_insn (gen_rtx_SET (VOIDmode, x0,
26006 gen_rtx_UNSPEC (mode, gen_rtvec (1, b),
26009 emit_insn (gen_rtx_SET (VOIDmode, e0,
26010 gen_rtx_MULT (mode, x0, b)));
26012 emit_insn (gen_rtx_SET (VOIDmode, e1,
26013 gen_rtx_MINUS (mode, two, e0)));
26015 emit_insn (gen_rtx_SET (VOIDmode, x1,
26016 gen_rtx_MULT (mode, x0, e1)));
26018 emit_insn (gen_rtx_SET (VOIDmode, res,
26019 gen_rtx_MULT (mode, a, x1)));
26022 /* Output code to perform a Newton-Rhapson approximation of a
26023 single precision floating point [reciprocal] square root. */
26025 void ix86_emit_swsqrtsf (rtx res, rtx a, enum machine_mode mode,
26028 rtx x0, e0, e1, e2, e3, mthree, mhalf;
26031 x0 = gen_reg_rtx (mode);
26032 e0 = gen_reg_rtx (mode);
26033 e1 = gen_reg_rtx (mode);
26034 e2 = gen_reg_rtx (mode);
26035 e3 = gen_reg_rtx (mode);
26037 real_from_integer (&r, VOIDmode, -3, -1, 0);
26038 mthree = CONST_DOUBLE_FROM_REAL_VALUE (r, SFmode);
26040 real_arithmetic (&r, NEGATE_EXPR, &dconsthalf, NULL);
26041 mhalf = CONST_DOUBLE_FROM_REAL_VALUE (r, SFmode);
26043 if (VECTOR_MODE_P (mode))
26045 mthree = ix86_build_const_vector (SFmode, true, mthree);
26046 mhalf = ix86_build_const_vector (SFmode, true, mhalf);
26049 /* sqrt(a) = -0.5 * a * rsqrtss(a) * (a * rsqrtss(a) * rsqrtss(a) - 3.0)
26050 rsqrt(a) = -0.5 * rsqrtss(a) * (a * rsqrtss(a) * rsqrtss(a) - 3.0) */
26052 /* x0 = rsqrt(a) estimate */
26053 emit_insn (gen_rtx_SET (VOIDmode, x0,
26054 gen_rtx_UNSPEC (mode, gen_rtvec (1, a),
26057 /* If (a == 0.0) Filter out infinity to prevent NaN for sqrt(0.0). */
26062 zero = gen_reg_rtx (mode);
26063 mask = gen_reg_rtx (mode);
26065 zero = force_reg (mode, CONST0_RTX(mode));
26066 emit_insn (gen_rtx_SET (VOIDmode, mask,
26067 gen_rtx_NE (mode, zero, a)));
26069 emit_insn (gen_rtx_SET (VOIDmode, x0,
26070 gen_rtx_AND (mode, x0, mask)));
26074 emit_insn (gen_rtx_SET (VOIDmode, e0,
26075 gen_rtx_MULT (mode, x0, a)));
26077 emit_insn (gen_rtx_SET (VOIDmode, e1,
26078 gen_rtx_MULT (mode, e0, x0)));
26081 mthree = force_reg (mode, mthree);
26082 emit_insn (gen_rtx_SET (VOIDmode, e2,
26083 gen_rtx_PLUS (mode, e1, mthree)));
26085 mhalf = force_reg (mode, mhalf);
26087 /* e3 = -.5 * x0 */
26088 emit_insn (gen_rtx_SET (VOIDmode, e3,
26089 gen_rtx_MULT (mode, x0, mhalf)));
26091 /* e3 = -.5 * e0 */
26092 emit_insn (gen_rtx_SET (VOIDmode, e3,
26093 gen_rtx_MULT (mode, e0, mhalf)));
26094 /* ret = e2 * e3 */
26095 emit_insn (gen_rtx_SET (VOIDmode, res,
26096 gen_rtx_MULT (mode, e2, e3)));
26099 /* Solaris implementation of TARGET_ASM_NAMED_SECTION. */
26101 static void ATTRIBUTE_UNUSED
26102 i386_solaris_elf_named_section (const char *name, unsigned int flags,
26105 /* With Binutils 2.15, the "@unwind" marker must be specified on
26106 every occurrence of the ".eh_frame" section, not just the first
26109 && strcmp (name, ".eh_frame") == 0)
26111 fprintf (asm_out_file, "\t.section\t%s,\"%s\",@unwind\n", name,
26112 flags & SECTION_WRITE ? "aw" : "a");
26115 default_elf_asm_named_section (name, flags, decl);
26118 /* Return the mangling of TYPE if it is an extended fundamental type. */
26120 static const char *
26121 ix86_mangle_type (const_tree type)
26123 type = TYPE_MAIN_VARIANT (type);
26125 if (TREE_CODE (type) != VOID_TYPE && TREE_CODE (type) != BOOLEAN_TYPE
26126 && TREE_CODE (type) != INTEGER_TYPE && TREE_CODE (type) != REAL_TYPE)
26129 switch (TYPE_MODE (type))
26132 /* __float128 is "g". */
26135 /* "long double" or __float80 is "e". */
26142 /* For 32-bit code we can save PIC register setup by using
26143 __stack_chk_fail_local hidden function instead of calling
26144 __stack_chk_fail directly. 64-bit code doesn't need to setup any PIC
26145 register, so it is better to call __stack_chk_fail directly. */
26148 ix86_stack_protect_fail (void)
26150 return TARGET_64BIT
26151 ? default_external_stack_protect_fail ()
26152 : default_hidden_stack_protect_fail ();
26155 /* Select a format to encode pointers in exception handling data. CODE
26156 is 0 for data, 1 for code labels, 2 for function pointers. GLOBAL is
26157 true if the symbol may be affected by dynamic relocations.
26159 ??? All x86 object file formats are capable of representing this.
26160 After all, the relocation needed is the same as for the call insn.
26161 Whether or not a particular assembler allows us to enter such, I
26162 guess we'll have to see. */
26164 asm_preferred_eh_data_format (int code, int global)
26168 int type = DW_EH_PE_sdata8;
26170 || ix86_cmodel == CM_SMALL_PIC
26171 || (ix86_cmodel == CM_MEDIUM_PIC && (global || code)))
26172 type = DW_EH_PE_sdata4;
26173 return (global ? DW_EH_PE_indirect : 0) | DW_EH_PE_pcrel | type;
26175 if (ix86_cmodel == CM_SMALL
26176 || (ix86_cmodel == CM_MEDIUM && code))
26177 return DW_EH_PE_udata4;
26178 return DW_EH_PE_absptr;
26181 /* Expand copysign from SIGN to the positive value ABS_VALUE
26182 storing in RESULT. If MASK is non-null, it shall be a mask to mask out
26185 ix86_sse_copysign_to_positive (rtx result, rtx abs_value, rtx sign, rtx mask)
26187 enum machine_mode mode = GET_MODE (sign);
26188 rtx sgn = gen_reg_rtx (mode);
26189 if (mask == NULL_RTX)
26191 mask = ix86_build_signbit_mask (mode, VECTOR_MODE_P (mode), false);
26192 if (!VECTOR_MODE_P (mode))
26194 /* We need to generate a scalar mode mask in this case. */
26195 rtx tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, const0_rtx));
26196 tmp = gen_rtx_VEC_SELECT (mode, mask, tmp);
26197 mask = gen_reg_rtx (mode);
26198 emit_insn (gen_rtx_SET (VOIDmode, mask, tmp));
26202 mask = gen_rtx_NOT (mode, mask);
26203 emit_insn (gen_rtx_SET (VOIDmode, sgn,
26204 gen_rtx_AND (mode, mask, sign)));
26205 emit_insn (gen_rtx_SET (VOIDmode, result,
26206 gen_rtx_IOR (mode, abs_value, sgn)));
26209 /* Expand fabs (OP0) and return a new rtx that holds the result. The
26210 mask for masking out the sign-bit is stored in *SMASK, if that is
26213 ix86_expand_sse_fabs (rtx op0, rtx *smask)
26215 enum machine_mode mode = GET_MODE (op0);
26218 xa = gen_reg_rtx (mode);
26219 mask = ix86_build_signbit_mask (mode, VECTOR_MODE_P (mode), true);
26220 if (!VECTOR_MODE_P (mode))
26222 /* We need to generate a scalar mode mask in this case. */
26223 rtx tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, const0_rtx));
26224 tmp = gen_rtx_VEC_SELECT (mode, mask, tmp);
26225 mask = gen_reg_rtx (mode);
26226 emit_insn (gen_rtx_SET (VOIDmode, mask, tmp));
26228 emit_insn (gen_rtx_SET (VOIDmode, xa,
26229 gen_rtx_AND (mode, op0, mask)));
26237 /* Expands a comparison of OP0 with OP1 using comparison code CODE,
26238 swapping the operands if SWAP_OPERANDS is true. The expanded
26239 code is a forward jump to a newly created label in case the
26240 comparison is true. The generated label rtx is returned. */
26242 ix86_expand_sse_compare_and_jump (enum rtx_code code, rtx op0, rtx op1,
26243 bool swap_operands)
26254 label = gen_label_rtx ();
26255 tmp = gen_rtx_REG (CCFPUmode, FLAGS_REG);
26256 emit_insn (gen_rtx_SET (VOIDmode, tmp,
26257 gen_rtx_COMPARE (CCFPUmode, op0, op1)));
26258 tmp = gen_rtx_fmt_ee (code, VOIDmode, tmp, const0_rtx);
26259 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
26260 gen_rtx_LABEL_REF (VOIDmode, label), pc_rtx);
26261 tmp = emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
26262 JUMP_LABEL (tmp) = label;
26267 /* Expand a mask generating SSE comparison instruction comparing OP0 with OP1
26268 using comparison code CODE. Operands are swapped for the comparison if
26269 SWAP_OPERANDS is true. Returns a rtx for the generated mask. */
26271 ix86_expand_sse_compare_mask (enum rtx_code code, rtx op0, rtx op1,
26272 bool swap_operands)
26274 enum machine_mode mode = GET_MODE (op0);
26275 rtx mask = gen_reg_rtx (mode);
26284 if (mode == DFmode)
26285 emit_insn (gen_sse2_maskcmpdf3 (mask, op0, op1,
26286 gen_rtx_fmt_ee (code, mode, op0, op1)));
26288 emit_insn (gen_sse_maskcmpsf3 (mask, op0, op1,
26289 gen_rtx_fmt_ee (code, mode, op0, op1)));
26294 /* Generate and return a rtx of mode MODE for 2**n where n is the number
26295 of bits of the mantissa of MODE, which must be one of DFmode or SFmode. */
26297 ix86_gen_TWO52 (enum machine_mode mode)
26299 REAL_VALUE_TYPE TWO52r;
26302 real_ldexp (&TWO52r, &dconst1, mode == DFmode ? 52 : 23);
26303 TWO52 = const_double_from_real_value (TWO52r, mode);
26304 TWO52 = force_reg (mode, TWO52);
26309 /* Expand SSE sequence for computing lround from OP1 storing
26312 ix86_expand_lround (rtx op0, rtx op1)
26314 /* C code for the stuff we're doing below:
26315 tmp = op1 + copysign (nextafter (0.5, 0.0), op1)
26318 enum machine_mode mode = GET_MODE (op1);
26319 const struct real_format *fmt;
26320 REAL_VALUE_TYPE pred_half, half_minus_pred_half;
26323 /* load nextafter (0.5, 0.0) */
26324 fmt = REAL_MODE_FORMAT (mode);
26325 real_2expN (&half_minus_pred_half, -(fmt->p) - 1, mode);
26326 REAL_ARITHMETIC (pred_half, MINUS_EXPR, dconsthalf, half_minus_pred_half);
26328 /* adj = copysign (0.5, op1) */
26329 adj = force_reg (mode, const_double_from_real_value (pred_half, mode));
26330 ix86_sse_copysign_to_positive (adj, adj, force_reg (mode, op1), NULL_RTX);
26332 /* adj = op1 + adj */
26333 adj = expand_simple_binop (mode, PLUS, adj, op1, NULL_RTX, 0, OPTAB_DIRECT);
26335 /* op0 = (imode)adj */
26336 expand_fix (op0, adj, 0);
26339 /* Expand SSE2 sequence for computing lround from OPERAND1 storing
26342 ix86_expand_lfloorceil (rtx op0, rtx op1, bool do_floor)
26344 /* C code for the stuff we're doing below (for do_floor):
26346 xi -= (double)xi > op1 ? 1 : 0;
26349 enum machine_mode fmode = GET_MODE (op1);
26350 enum machine_mode imode = GET_MODE (op0);
26351 rtx ireg, freg, label, tmp;
26353 /* reg = (long)op1 */
26354 ireg = gen_reg_rtx (imode);
26355 expand_fix (ireg, op1, 0);
26357 /* freg = (double)reg */
26358 freg = gen_reg_rtx (fmode);
26359 expand_float (freg, ireg, 0);
26361 /* ireg = (freg > op1) ? ireg - 1 : ireg */
26362 label = ix86_expand_sse_compare_and_jump (UNLE,
26363 freg, op1, !do_floor);
26364 tmp = expand_simple_binop (imode, do_floor ? MINUS : PLUS,
26365 ireg, const1_rtx, NULL_RTX, 0, OPTAB_DIRECT);
26366 emit_move_insn (ireg, tmp);
26368 emit_label (label);
26369 LABEL_NUSES (label) = 1;
26371 emit_move_insn (op0, ireg);
26374 /* Expand rint (IEEE round to nearest) rounding OPERAND1 and storing the
26375 result in OPERAND0. */
26377 ix86_expand_rint (rtx operand0, rtx operand1)
26379 /* C code for the stuff we're doing below:
26380 xa = fabs (operand1);
26381 if (!isless (xa, 2**52))
26383 xa = xa + 2**52 - 2**52;
26384 return copysign (xa, operand1);
26386 enum machine_mode mode = GET_MODE (operand0);
26387 rtx res, xa, label, TWO52, mask;
26389 res = gen_reg_rtx (mode);
26390 emit_move_insn (res, operand1);
26392 /* xa = abs (operand1) */
26393 xa = ix86_expand_sse_fabs (res, &mask);
26395 /* if (!isless (xa, TWO52)) goto label; */
26396 TWO52 = ix86_gen_TWO52 (mode);
26397 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
26399 xa = expand_simple_binop (mode, PLUS, xa, TWO52, NULL_RTX, 0, OPTAB_DIRECT);
26400 xa = expand_simple_binop (mode, MINUS, xa, TWO52, xa, 0, OPTAB_DIRECT);
26402 ix86_sse_copysign_to_positive (res, xa, res, mask);
26404 emit_label (label);
26405 LABEL_NUSES (label) = 1;
26407 emit_move_insn (operand0, res);
26410 /* Expand SSE2 sequence for computing floor or ceil from OPERAND1 storing
26413 ix86_expand_floorceildf_32 (rtx operand0, rtx operand1, bool do_floor)
26415 /* C code for the stuff we expand below.
26416 double xa = fabs (x), x2;
26417 if (!isless (xa, TWO52))
26419 xa = xa + TWO52 - TWO52;
26420 x2 = copysign (xa, x);
26429 enum machine_mode mode = GET_MODE (operand0);
26430 rtx xa, TWO52, tmp, label, one, res, mask;
26432 TWO52 = ix86_gen_TWO52 (mode);
26434 /* Temporary for holding the result, initialized to the input
26435 operand to ease control flow. */
26436 res = gen_reg_rtx (mode);
26437 emit_move_insn (res, operand1);
26439 /* xa = abs (operand1) */
26440 xa = ix86_expand_sse_fabs (res, &mask);
26442 /* if (!isless (xa, TWO52)) goto label; */
26443 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
26445 /* xa = xa + TWO52 - TWO52; */
26446 xa = expand_simple_binop (mode, PLUS, xa, TWO52, NULL_RTX, 0, OPTAB_DIRECT);
26447 xa = expand_simple_binop (mode, MINUS, xa, TWO52, xa, 0, OPTAB_DIRECT);
26449 /* xa = copysign (xa, operand1) */
26450 ix86_sse_copysign_to_positive (xa, xa, res, mask);
26452 /* generate 1.0 or -1.0 */
26453 one = force_reg (mode,
26454 const_double_from_real_value (do_floor
26455 ? dconst1 : dconstm1, mode));
26457 /* Compensate: xa = xa - (xa > operand1 ? 1 : 0) */
26458 tmp = ix86_expand_sse_compare_mask (UNGT, xa, res, !do_floor);
26459 emit_insn (gen_rtx_SET (VOIDmode, tmp,
26460 gen_rtx_AND (mode, one, tmp)));
26461 /* We always need to subtract here to preserve signed zero. */
26462 tmp = expand_simple_binop (mode, MINUS,
26463 xa, tmp, NULL_RTX, 0, OPTAB_DIRECT);
26464 emit_move_insn (res, tmp);
26466 emit_label (label);
26467 LABEL_NUSES (label) = 1;
26469 emit_move_insn (operand0, res);
26472 /* Expand SSE2 sequence for computing floor or ceil from OPERAND1 storing
26475 ix86_expand_floorceil (rtx operand0, rtx operand1, bool do_floor)
26477 /* C code for the stuff we expand below.
26478 double xa = fabs (x), x2;
26479 if (!isless (xa, TWO52))
26481 x2 = (double)(long)x;
26488 if (HONOR_SIGNED_ZEROS (mode))
26489 return copysign (x2, x);
26492 enum machine_mode mode = GET_MODE (operand0);
26493 rtx xa, xi, TWO52, tmp, label, one, res, mask;
26495 TWO52 = ix86_gen_TWO52 (mode);
26497 /* Temporary for holding the result, initialized to the input
26498 operand to ease control flow. */
26499 res = gen_reg_rtx (mode);
26500 emit_move_insn (res, operand1);
26502 /* xa = abs (operand1) */
26503 xa = ix86_expand_sse_fabs (res, &mask);
26505 /* if (!isless (xa, TWO52)) goto label; */
26506 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
26508 /* xa = (double)(long)x */
26509 xi = gen_reg_rtx (mode == DFmode ? DImode : SImode);
26510 expand_fix (xi, res, 0);
26511 expand_float (xa, xi, 0);
26514 one = force_reg (mode, const_double_from_real_value (dconst1, mode));
26516 /* Compensate: xa = xa - (xa > operand1 ? 1 : 0) */
26517 tmp = ix86_expand_sse_compare_mask (UNGT, xa, res, !do_floor);
26518 emit_insn (gen_rtx_SET (VOIDmode, tmp,
26519 gen_rtx_AND (mode, one, tmp)));
26520 tmp = expand_simple_binop (mode, do_floor ? MINUS : PLUS,
26521 xa, tmp, NULL_RTX, 0, OPTAB_DIRECT);
26522 emit_move_insn (res, tmp);
26524 if (HONOR_SIGNED_ZEROS (mode))
26525 ix86_sse_copysign_to_positive (res, res, force_reg (mode, operand1), mask);
26527 emit_label (label);
26528 LABEL_NUSES (label) = 1;
26530 emit_move_insn (operand0, res);
26533 /* Expand SSE sequence for computing round from OPERAND1 storing
26534 into OPERAND0. Sequence that works without relying on DImode truncation
26535 via cvttsd2siq that is only available on 64bit targets. */
26537 ix86_expand_rounddf_32 (rtx operand0, rtx operand1)
26539 /* C code for the stuff we expand below.
26540 double xa = fabs (x), xa2, x2;
26541 if (!isless (xa, TWO52))
26543 Using the absolute value and copying back sign makes
26544 -0.0 -> -0.0 correct.
26545 xa2 = xa + TWO52 - TWO52;
26550 else if (dxa > 0.5)
26552 x2 = copysign (xa2, x);
26555 enum machine_mode mode = GET_MODE (operand0);
26556 rtx xa, xa2, dxa, TWO52, tmp, label, half, mhalf, one, res, mask;
26558 TWO52 = ix86_gen_TWO52 (mode);
26560 /* Temporary for holding the result, initialized to the input
26561 operand to ease control flow. */
26562 res = gen_reg_rtx (mode);
26563 emit_move_insn (res, operand1);
26565 /* xa = abs (operand1) */
26566 xa = ix86_expand_sse_fabs (res, &mask);
26568 /* if (!isless (xa, TWO52)) goto label; */
26569 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
26571 /* xa2 = xa + TWO52 - TWO52; */
26572 xa2 = expand_simple_binop (mode, PLUS, xa, TWO52, NULL_RTX, 0, OPTAB_DIRECT);
26573 xa2 = expand_simple_binop (mode, MINUS, xa2, TWO52, xa2, 0, OPTAB_DIRECT);
26575 /* dxa = xa2 - xa; */
26576 dxa = expand_simple_binop (mode, MINUS, xa2, xa, NULL_RTX, 0, OPTAB_DIRECT);
26578 /* generate 0.5, 1.0 and -0.5 */
26579 half = force_reg (mode, const_double_from_real_value (dconsthalf, mode));
26580 one = expand_simple_binop (mode, PLUS, half, half, NULL_RTX, 0, OPTAB_DIRECT);
26581 mhalf = expand_simple_binop (mode, MINUS, half, one, NULL_RTX,
26585 tmp = gen_reg_rtx (mode);
26586 /* xa2 = xa2 - (dxa > 0.5 ? 1 : 0) */
26587 tmp = ix86_expand_sse_compare_mask (UNGT, dxa, half, false);
26588 emit_insn (gen_rtx_SET (VOIDmode, tmp,
26589 gen_rtx_AND (mode, one, tmp)));
26590 xa2 = expand_simple_binop (mode, MINUS, xa2, tmp, NULL_RTX, 0, OPTAB_DIRECT);
26591 /* xa2 = xa2 + (dxa <= -0.5 ? 1 : 0) */
26592 tmp = ix86_expand_sse_compare_mask (UNGE, mhalf, dxa, false);
26593 emit_insn (gen_rtx_SET (VOIDmode, tmp,
26594 gen_rtx_AND (mode, one, tmp)));
26595 xa2 = expand_simple_binop (mode, PLUS, xa2, tmp, NULL_RTX, 0, OPTAB_DIRECT);
26597 /* res = copysign (xa2, operand1) */
26598 ix86_sse_copysign_to_positive (res, xa2, force_reg (mode, operand1), mask);
26600 emit_label (label);
26601 LABEL_NUSES (label) = 1;
26603 emit_move_insn (operand0, res);
26606 /* Expand SSE sequence for computing trunc from OPERAND1 storing
26609 ix86_expand_trunc (rtx operand0, rtx operand1)
26611 /* C code for SSE variant we expand below.
26612 double xa = fabs (x), x2;
26613 if (!isless (xa, TWO52))
26615 x2 = (double)(long)x;
26616 if (HONOR_SIGNED_ZEROS (mode))
26617 return copysign (x2, x);
26620 enum machine_mode mode = GET_MODE (operand0);
26621 rtx xa, xi, TWO52, label, res, mask;
26623 TWO52 = ix86_gen_TWO52 (mode);
26625 /* Temporary for holding the result, initialized to the input
26626 operand to ease control flow. */
26627 res = gen_reg_rtx (mode);
26628 emit_move_insn (res, operand1);
26630 /* xa = abs (operand1) */
26631 xa = ix86_expand_sse_fabs (res, &mask);
26633 /* if (!isless (xa, TWO52)) goto label; */
26634 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
26636 /* x = (double)(long)x */
26637 xi = gen_reg_rtx (mode == DFmode ? DImode : SImode);
26638 expand_fix (xi, res, 0);
26639 expand_float (res, xi, 0);
26641 if (HONOR_SIGNED_ZEROS (mode))
26642 ix86_sse_copysign_to_positive (res, res, force_reg (mode, operand1), mask);
26644 emit_label (label);
26645 LABEL_NUSES (label) = 1;
26647 emit_move_insn (operand0, res);
26650 /* Expand SSE sequence for computing trunc from OPERAND1 storing
26653 ix86_expand_truncdf_32 (rtx operand0, rtx operand1)
26655 enum machine_mode mode = GET_MODE (operand0);
26656 rtx xa, mask, TWO52, label, one, res, smask, tmp;
26658 /* C code for SSE variant we expand below.
26659 double xa = fabs (x), x2;
26660 if (!isless (xa, TWO52))
26662 xa2 = xa + TWO52 - TWO52;
26666 x2 = copysign (xa2, x);
26670 TWO52 = ix86_gen_TWO52 (mode);
26672 /* Temporary for holding the result, initialized to the input
26673 operand to ease control flow. */
26674 res = gen_reg_rtx (mode);
26675 emit_move_insn (res, operand1);
26677 /* xa = abs (operand1) */
26678 xa = ix86_expand_sse_fabs (res, &smask);
26680 /* if (!isless (xa, TWO52)) goto label; */
26681 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
26683 /* res = xa + TWO52 - TWO52; */
26684 tmp = expand_simple_binop (mode, PLUS, xa, TWO52, NULL_RTX, 0, OPTAB_DIRECT);
26685 tmp = expand_simple_binop (mode, MINUS, tmp, TWO52, tmp, 0, OPTAB_DIRECT);
26686 emit_move_insn (res, tmp);
26689 one = force_reg (mode, const_double_from_real_value (dconst1, mode));
26691 /* Compensate: res = xa2 - (res > xa ? 1 : 0) */
26692 mask = ix86_expand_sse_compare_mask (UNGT, res, xa, false);
26693 emit_insn (gen_rtx_SET (VOIDmode, mask,
26694 gen_rtx_AND (mode, mask, one)));
26695 tmp = expand_simple_binop (mode, MINUS,
26696 res, mask, NULL_RTX, 0, OPTAB_DIRECT);
26697 emit_move_insn (res, tmp);
26699 /* res = copysign (res, operand1) */
26700 ix86_sse_copysign_to_positive (res, res, force_reg (mode, operand1), smask);
26702 emit_label (label);
26703 LABEL_NUSES (label) = 1;
26705 emit_move_insn (operand0, res);
26708 /* Expand SSE sequence for computing round from OPERAND1 storing
26711 ix86_expand_round (rtx operand0, rtx operand1)
26713 /* C code for the stuff we're doing below:
26714 double xa = fabs (x);
26715 if (!isless (xa, TWO52))
26717 xa = (double)(long)(xa + nextafter (0.5, 0.0));
26718 return copysign (xa, x);
26720 enum machine_mode mode = GET_MODE (operand0);
26721 rtx res, TWO52, xa, label, xi, half, mask;
26722 const struct real_format *fmt;
26723 REAL_VALUE_TYPE pred_half, half_minus_pred_half;
26725 /* Temporary for holding the result, initialized to the input
26726 operand to ease control flow. */
26727 res = gen_reg_rtx (mode);
26728 emit_move_insn (res, operand1);
26730 TWO52 = ix86_gen_TWO52 (mode);
26731 xa = ix86_expand_sse_fabs (res, &mask);
26732 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
26734 /* load nextafter (0.5, 0.0) */
26735 fmt = REAL_MODE_FORMAT (mode);
26736 real_2expN (&half_minus_pred_half, -(fmt->p) - 1, mode);
26737 REAL_ARITHMETIC (pred_half, MINUS_EXPR, dconsthalf, half_minus_pred_half);
26739 /* xa = xa + 0.5 */
26740 half = force_reg (mode, const_double_from_real_value (pred_half, mode));
26741 xa = expand_simple_binop (mode, PLUS, xa, half, NULL_RTX, 0, OPTAB_DIRECT);
26743 /* xa = (double)(int64_t)xa */
26744 xi = gen_reg_rtx (mode == DFmode ? DImode : SImode);
26745 expand_fix (xi, xa, 0);
26746 expand_float (xa, xi, 0);
26748 /* res = copysign (xa, operand1) */
26749 ix86_sse_copysign_to_positive (res, xa, force_reg (mode, operand1), mask);
26751 emit_label (label);
26752 LABEL_NUSES (label) = 1;
26754 emit_move_insn (operand0, res);
26758 /* Validate whether a SSE5 instruction is valid or not.
26759 OPERANDS is the array of operands.
26760 NUM is the number of operands.
26761 USES_OC0 is true if the instruction uses OC0 and provides 4 variants.
26762 NUM_MEMORY is the maximum number of memory operands to accept.
26763 when COMMUTATIVE is set, operand 1 and 2 can be swapped. */
26766 ix86_sse5_valid_op_p (rtx operands[], rtx insn ATTRIBUTE_UNUSED, int num,
26767 bool uses_oc0, int num_memory, bool commutative)
26773 /* Count the number of memory arguments */
26776 for (i = 0; i < num; i++)
26778 enum machine_mode mode = GET_MODE (operands[i]);
26779 if (register_operand (operands[i], mode))
26782 else if (memory_operand (operands[i], mode))
26784 mem_mask |= (1 << i);
26790 rtx pattern = PATTERN (insn);
26792 /* allow 0 for pcmov */
26793 if (GET_CODE (pattern) != SET
26794 || GET_CODE (SET_SRC (pattern)) != IF_THEN_ELSE
26796 || operands[i] != CONST0_RTX (mode))
26801 /* Special case pmacsdq{l,h} where we allow the 3rd argument to be
26802 a memory operation. */
26803 if (num_memory < 0)
26805 num_memory = -num_memory;
26806 if ((mem_mask & (1 << (num-1))) != 0)
26808 mem_mask &= ~(1 << (num-1));
26813 /* If there were no memory operations, allow the insn */
26817 /* Do not allow the destination register to be a memory operand. */
26818 else if (mem_mask & (1 << 0))
26821 /* If there are too many memory operations, disallow the instruction. While
26822 the hardware only allows 1 memory reference, before register allocation
26823 for some insns, we allow two memory operations sometimes in order to allow
26824 code like the following to be optimized:
26826 float fmadd (float *a, float *b, float *c) { return (*a * *b) + *c; }
26828 or similar cases that are vectorized into using the fmaddss
26830 else if (mem_count > num_memory)
26833 /* Don't allow more than one memory operation if not optimizing. */
26834 else if (mem_count > 1 && !optimize)
26837 else if (num == 4 && mem_count == 1)
26839 /* formats (destination is the first argument), example fmaddss:
26840 xmm1, xmm1, xmm2, xmm3/mem
26841 xmm1, xmm1, xmm2/mem, xmm3
26842 xmm1, xmm2, xmm3/mem, xmm1
26843 xmm1, xmm2/mem, xmm3, xmm1 */
26845 return ((mem_mask == (1 << 1))
26846 || (mem_mask == (1 << 2))
26847 || (mem_mask == (1 << 3)));
26849 /* format, example pmacsdd:
26850 xmm1, xmm2, xmm3/mem, xmm1 */
26852 return (mem_mask == (1 << 2) || mem_mask == (1 << 1));
26854 return (mem_mask == (1 << 2));
26857 else if (num == 4 && num_memory == 2)
26859 /* If there are two memory operations, we can load one of the memory ops
26860 into the destination register. This is for optimizing the
26861 multiply/add ops, which the combiner has optimized both the multiply
26862 and the add insns to have a memory operation. We have to be careful
26863 that the destination doesn't overlap with the inputs. */
26864 rtx op0 = operands[0];
26866 if (reg_mentioned_p (op0, operands[1])
26867 || reg_mentioned_p (op0, operands[2])
26868 || reg_mentioned_p (op0, operands[3]))
26871 /* formats (destination is the first argument), example fmaddss:
26872 xmm1, xmm1, xmm2, xmm3/mem
26873 xmm1, xmm1, xmm2/mem, xmm3
26874 xmm1, xmm2, xmm3/mem, xmm1
26875 xmm1, xmm2/mem, xmm3, xmm1
26877 For the oc0 case, we will load either operands[1] or operands[3] into
26878 operands[0], so any combination of 2 memory operands is ok. */
26882 /* format, example pmacsdd:
26883 xmm1, xmm2, xmm3/mem, xmm1
26885 For the integer multiply/add instructions be more restrictive and
26886 require operands[2] and operands[3] to be the memory operands. */
26888 return (mem_mask == ((1 << 1) | (1 << 3)) || ((1 << 2) | (1 << 3)));
26890 return (mem_mask == ((1 << 2) | (1 << 3)));
26893 else if (num == 3 && num_memory == 1)
26895 /* formats, example protb:
26896 xmm1, xmm2, xmm3/mem
26897 xmm1, xmm2/mem, xmm3 */
26899 return ((mem_mask == (1 << 1)) || (mem_mask == (1 << 2)));
26901 /* format, example comeq:
26902 xmm1, xmm2, xmm3/mem */
26904 return (mem_mask == (1 << 2));
26908 gcc_unreachable ();
26914 /* Fixup an SSE5 instruction that has 2 memory input references into a form the
26915 hardware will allow by using the destination register to load one of the
26916 memory operations. Presently this is used by the multiply/add routines to
26917 allow 2 memory references. */
26920 ix86_expand_sse5_multiple_memory (rtx operands[],
26922 enum machine_mode mode)
26924 rtx op0 = operands[0];
26926 || memory_operand (op0, mode)
26927 || reg_mentioned_p (op0, operands[1])
26928 || reg_mentioned_p (op0, operands[2])
26929 || reg_mentioned_p (op0, operands[3]))
26930 gcc_unreachable ();
26932 /* For 2 memory operands, pick either operands[1] or operands[3] to move into
26933 the destination register. */
26934 if (memory_operand (operands[1], mode))
26936 emit_move_insn (op0, operands[1]);
26939 else if (memory_operand (operands[3], mode))
26941 emit_move_insn (op0, operands[3]);
26945 gcc_unreachable ();
26951 /* Table of valid machine attributes. */
26952 static const struct attribute_spec ix86_attribute_table[] =
26954 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler } */
26955 /* Stdcall attribute says callee is responsible for popping arguments
26956 if they are not variable. */
26957 { "stdcall", 0, 0, false, true, true, ix86_handle_cconv_attribute },
26958 /* Fastcall attribute says callee is responsible for popping arguments
26959 if they are not variable. */
26960 { "fastcall", 0, 0, false, true, true, ix86_handle_cconv_attribute },
26961 /* Cdecl attribute says the callee is a normal C declaration */
26962 { "cdecl", 0, 0, false, true, true, ix86_handle_cconv_attribute },
26963 /* Regparm attribute specifies how many integer arguments are to be
26964 passed in registers. */
26965 { "regparm", 1, 1, false, true, true, ix86_handle_cconv_attribute },
26966 /* Sseregparm attribute says we are using x86_64 calling conventions
26967 for FP arguments. */
26968 { "sseregparm", 0, 0, false, true, true, ix86_handle_cconv_attribute },
26969 /* force_align_arg_pointer says this function realigns the stack at entry. */
26970 { (const char *)&ix86_force_align_arg_pointer_string, 0, 0,
26971 false, true, true, ix86_handle_cconv_attribute },
26972 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
26973 { "dllimport", 0, 0, false, false, false, handle_dll_attribute },
26974 { "dllexport", 0, 0, false, false, false, handle_dll_attribute },
26975 { "shared", 0, 0, true, false, false, ix86_handle_shared_attribute },
26977 { "ms_struct", 0, 0, false, false, false, ix86_handle_struct_attribute },
26978 { "gcc_struct", 0, 0, false, false, false, ix86_handle_struct_attribute },
26979 #ifdef SUBTARGET_ATTRIBUTE_TABLE
26980 SUBTARGET_ATTRIBUTE_TABLE,
26982 /* ms_abi and sysv_abi calling convention function attributes. */
26983 { "ms_abi", 0, 0, false, true, true, ix86_handle_abi_attribute },
26984 { "sysv_abi", 0, 0, false, true, true, ix86_handle_abi_attribute },
26986 { NULL, 0, 0, false, false, false, NULL }
26989 /* Implement targetm.vectorize.builtin_vectorization_cost. */
26991 x86_builtin_vectorization_cost (bool runtime_test)
26993 /* If the branch of the runtime test is taken - i.e. - the vectorized
26994 version is skipped - this incurs a misprediction cost (because the
26995 vectorized version is expected to be the fall-through). So we subtract
26996 the latency of a mispredicted branch from the costs that are incured
26997 when the vectorized version is executed.
26999 TODO: The values in individual target tables have to be tuned or new
27000 fields may be needed. For eg. on K8, the default branch path is the
27001 not-taken path. If the taken path is predicted correctly, the minimum
27002 penalty of going down the taken-path is 1 cycle. If the taken-path is
27003 not predicted correctly, then the minimum penalty is 10 cycles. */
27007 return (-(ix86_cost->cond_taken_branch_cost));
27013 /* This function returns the calling abi specific va_list type node.
27014 It returns the FNDECL specific va_list type. */
27017 ix86_fn_abi_va_list (tree fndecl)
27022 return va_list_type_node;
27023 gcc_assert (fndecl != NULL_TREE);
27024 abi = ix86_function_abi ((const_tree) fndecl);
27027 return ms_va_list_type_node;
27029 return sysv_va_list_type_node;
27032 /* Returns the canonical va_list type specified by TYPE. If there
27033 is no valid TYPE provided, it return NULL_TREE. */
27036 ix86_canonical_va_list_type (tree type)
27040 /* Resolve references and pointers to va_list type. */
27041 if (INDIRECT_REF_P (type))
27042 type = TREE_TYPE (type);
27043 else if (POINTER_TYPE_P (type) && POINTER_TYPE_P (TREE_TYPE(type)))
27044 type = TREE_TYPE (type);
27048 wtype = va_list_type_node;
27049 gcc_assert (wtype != NULL_TREE);
27051 if (TREE_CODE (wtype) == ARRAY_TYPE)
27053 /* If va_list is an array type, the argument may have decayed
27054 to a pointer type, e.g. by being passed to another function.
27055 In that case, unwrap both types so that we can compare the
27056 underlying records. */
27057 if (TREE_CODE (htype) == ARRAY_TYPE
27058 || POINTER_TYPE_P (htype))
27060 wtype = TREE_TYPE (wtype);
27061 htype = TREE_TYPE (htype);
27064 if (TYPE_MAIN_VARIANT (wtype) == TYPE_MAIN_VARIANT (htype))
27065 return va_list_type_node;
27066 wtype = sysv_va_list_type_node;
27067 gcc_assert (wtype != NULL_TREE);
27069 if (TREE_CODE (wtype) == ARRAY_TYPE)
27071 /* If va_list is an array type, the argument may have decayed
27072 to a pointer type, e.g. by being passed to another function.
27073 In that case, unwrap both types so that we can compare the
27074 underlying records. */
27075 if (TREE_CODE (htype) == ARRAY_TYPE
27076 || POINTER_TYPE_P (htype))
27078 wtype = TREE_TYPE (wtype);
27079 htype = TREE_TYPE (htype);
27082 if (TYPE_MAIN_VARIANT (wtype) == TYPE_MAIN_VARIANT (htype))
27083 return sysv_va_list_type_node;
27084 wtype = ms_va_list_type_node;
27085 gcc_assert (wtype != NULL_TREE);
27087 if (TREE_CODE (wtype) == ARRAY_TYPE)
27089 /* If va_list is an array type, the argument may have decayed
27090 to a pointer type, e.g. by being passed to another function.
27091 In that case, unwrap both types so that we can compare the
27092 underlying records. */
27093 if (TREE_CODE (htype) == ARRAY_TYPE
27094 || POINTER_TYPE_P (htype))
27096 wtype = TREE_TYPE (wtype);
27097 htype = TREE_TYPE (htype);
27100 if (TYPE_MAIN_VARIANT (wtype) == TYPE_MAIN_VARIANT (htype))
27101 return ms_va_list_type_node;
27104 return std_canonical_va_list_type (type);
27107 /* Iterate through the target-specific builtin types for va_list.
27108 IDX denotes the iterator, *PTREE is set to the result type of
27109 the va_list builtin, and *PNAME to its internal type.
27110 Returns zero if there is no element for this index, otherwise
27111 IDX should be increased upon the next call.
27112 Note, do not iterate a base builtin's name like __builtin_va_list.
27113 Used from c_common_nodes_and_builtins. */
27116 ix86_enum_va_list (int idx, const char **pname, tree *ptree)
27122 *ptree = ms_va_list_type_node;
27123 *pname = "__builtin_ms_va_list";
27126 *ptree = sysv_va_list_type_node;
27127 *pname = "__builtin_sysv_va_list";
27135 /* Initialize the GCC target structure. */
27136 #undef TARGET_RETURN_IN_MEMORY
27137 #define TARGET_RETURN_IN_MEMORY ix86_return_in_memory
27139 #undef TARGET_ATTRIBUTE_TABLE
27140 #define TARGET_ATTRIBUTE_TABLE ix86_attribute_table
27141 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
27142 # undef TARGET_MERGE_DECL_ATTRIBUTES
27143 # define TARGET_MERGE_DECL_ATTRIBUTES merge_dllimport_decl_attributes
27146 #undef TARGET_COMP_TYPE_ATTRIBUTES
27147 #define TARGET_COMP_TYPE_ATTRIBUTES ix86_comp_type_attributes
27149 #undef TARGET_INIT_BUILTINS
27150 #define TARGET_INIT_BUILTINS ix86_init_builtins
27151 #undef TARGET_EXPAND_BUILTIN
27152 #define TARGET_EXPAND_BUILTIN ix86_expand_builtin
27154 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION
27155 #define TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION \
27156 ix86_builtin_vectorized_function
27158 #undef TARGET_VECTORIZE_BUILTIN_CONVERSION
27159 #define TARGET_VECTORIZE_BUILTIN_CONVERSION ix86_vectorize_builtin_conversion
27161 #undef TARGET_BUILTIN_RECIPROCAL
27162 #define TARGET_BUILTIN_RECIPROCAL ix86_builtin_reciprocal
27164 #undef TARGET_ASM_FUNCTION_EPILOGUE
27165 #define TARGET_ASM_FUNCTION_EPILOGUE ix86_output_function_epilogue
27167 #undef TARGET_ENCODE_SECTION_INFO
27168 #ifndef SUBTARGET_ENCODE_SECTION_INFO
27169 #define TARGET_ENCODE_SECTION_INFO ix86_encode_section_info
27171 #define TARGET_ENCODE_SECTION_INFO SUBTARGET_ENCODE_SECTION_INFO
27174 #undef TARGET_ASM_OPEN_PAREN
27175 #define TARGET_ASM_OPEN_PAREN ""
27176 #undef TARGET_ASM_CLOSE_PAREN
27177 #define TARGET_ASM_CLOSE_PAREN ""
27179 #undef TARGET_ASM_ALIGNED_HI_OP
27180 #define TARGET_ASM_ALIGNED_HI_OP ASM_SHORT
27181 #undef TARGET_ASM_ALIGNED_SI_OP
27182 #define TARGET_ASM_ALIGNED_SI_OP ASM_LONG
27184 #undef TARGET_ASM_ALIGNED_DI_OP
27185 #define TARGET_ASM_ALIGNED_DI_OP ASM_QUAD
27188 #undef TARGET_ASM_UNALIGNED_HI_OP
27189 #define TARGET_ASM_UNALIGNED_HI_OP TARGET_ASM_ALIGNED_HI_OP
27190 #undef TARGET_ASM_UNALIGNED_SI_OP
27191 #define TARGET_ASM_UNALIGNED_SI_OP TARGET_ASM_ALIGNED_SI_OP
27192 #undef TARGET_ASM_UNALIGNED_DI_OP
27193 #define TARGET_ASM_UNALIGNED_DI_OP TARGET_ASM_ALIGNED_DI_OP
27195 #undef TARGET_SCHED_ADJUST_COST
27196 #define TARGET_SCHED_ADJUST_COST ix86_adjust_cost
27197 #undef TARGET_SCHED_ISSUE_RATE
27198 #define TARGET_SCHED_ISSUE_RATE ix86_issue_rate
27199 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD
27200 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD \
27201 ia32_multipass_dfa_lookahead
27203 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
27204 #define TARGET_FUNCTION_OK_FOR_SIBCALL ix86_function_ok_for_sibcall
27207 #undef TARGET_HAVE_TLS
27208 #define TARGET_HAVE_TLS true
27210 #undef TARGET_CANNOT_FORCE_CONST_MEM
27211 #define TARGET_CANNOT_FORCE_CONST_MEM ix86_cannot_force_const_mem
27212 #undef TARGET_USE_BLOCKS_FOR_CONSTANT_P
27213 #define TARGET_USE_BLOCKS_FOR_CONSTANT_P hook_bool_mode_const_rtx_true
27215 #undef TARGET_DELEGITIMIZE_ADDRESS
27216 #define TARGET_DELEGITIMIZE_ADDRESS ix86_delegitimize_address
27218 #undef TARGET_MS_BITFIELD_LAYOUT_P
27219 #define TARGET_MS_BITFIELD_LAYOUT_P ix86_ms_bitfield_layout_p
27222 #undef TARGET_BINDS_LOCAL_P
27223 #define TARGET_BINDS_LOCAL_P darwin_binds_local_p
27225 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
27226 #undef TARGET_BINDS_LOCAL_P
27227 #define TARGET_BINDS_LOCAL_P i386_pe_binds_local_p
27230 #undef TARGET_ASM_OUTPUT_MI_THUNK
27231 #define TARGET_ASM_OUTPUT_MI_THUNK x86_output_mi_thunk
27232 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
27233 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK x86_can_output_mi_thunk
27235 #undef TARGET_ASM_FILE_START
27236 #define TARGET_ASM_FILE_START x86_file_start
27238 #undef TARGET_DEFAULT_TARGET_FLAGS
27239 #define TARGET_DEFAULT_TARGET_FLAGS \
27241 | TARGET_SUBTARGET_DEFAULT \
27242 | TARGET_TLS_DIRECT_SEG_REFS_DEFAULT)
27244 #undef TARGET_HANDLE_OPTION
27245 #define TARGET_HANDLE_OPTION ix86_handle_option
27247 #undef TARGET_RTX_COSTS
27248 #define TARGET_RTX_COSTS ix86_rtx_costs
27249 #undef TARGET_ADDRESS_COST
27250 #define TARGET_ADDRESS_COST ix86_address_cost
27252 #undef TARGET_FIXED_CONDITION_CODE_REGS
27253 #define TARGET_FIXED_CONDITION_CODE_REGS ix86_fixed_condition_code_regs
27254 #undef TARGET_CC_MODES_COMPATIBLE
27255 #define TARGET_CC_MODES_COMPATIBLE ix86_cc_modes_compatible
27257 #undef TARGET_MACHINE_DEPENDENT_REORG
27258 #define TARGET_MACHINE_DEPENDENT_REORG ix86_reorg
27260 #undef TARGET_BUILD_BUILTIN_VA_LIST
27261 #define TARGET_BUILD_BUILTIN_VA_LIST ix86_build_builtin_va_list
27263 #undef TARGET_FN_ABI_VA_LIST
27264 #define TARGET_FN_ABI_VA_LIST ix86_fn_abi_va_list
27266 #undef TARGET_CANONICAL_VA_LIST_TYPE
27267 #define TARGET_CANONICAL_VA_LIST_TYPE ix86_canonical_va_list_type
27269 #undef TARGET_EXPAND_BUILTIN_VA_START
27270 #define TARGET_EXPAND_BUILTIN_VA_START ix86_va_start
27272 #undef TARGET_MD_ASM_CLOBBERS
27273 #define TARGET_MD_ASM_CLOBBERS ix86_md_asm_clobbers
27275 #undef TARGET_PROMOTE_PROTOTYPES
27276 #define TARGET_PROMOTE_PROTOTYPES hook_bool_const_tree_true
27277 #undef TARGET_STRUCT_VALUE_RTX
27278 #define TARGET_STRUCT_VALUE_RTX ix86_struct_value_rtx
27279 #undef TARGET_SETUP_INCOMING_VARARGS
27280 #define TARGET_SETUP_INCOMING_VARARGS ix86_setup_incoming_varargs
27281 #undef TARGET_MUST_PASS_IN_STACK
27282 #define TARGET_MUST_PASS_IN_STACK ix86_must_pass_in_stack
27283 #undef TARGET_PASS_BY_REFERENCE
27284 #define TARGET_PASS_BY_REFERENCE ix86_pass_by_reference
27285 #undef TARGET_INTERNAL_ARG_POINTER
27286 #define TARGET_INTERNAL_ARG_POINTER ix86_internal_arg_pointer
27287 #undef TARGET_DWARF_HANDLE_FRAME_UNSPEC
27288 #define TARGET_DWARF_HANDLE_FRAME_UNSPEC ix86_dwarf_handle_frame_unspec
27289 #undef TARGET_STRICT_ARGUMENT_NAMING
27290 #define TARGET_STRICT_ARGUMENT_NAMING hook_bool_CUMULATIVE_ARGS_true
27292 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
27293 #define TARGET_GIMPLIFY_VA_ARG_EXPR ix86_gimplify_va_arg
27295 #undef TARGET_SCALAR_MODE_SUPPORTED_P
27296 #define TARGET_SCALAR_MODE_SUPPORTED_P ix86_scalar_mode_supported_p
27298 #undef TARGET_VECTOR_MODE_SUPPORTED_P
27299 #define TARGET_VECTOR_MODE_SUPPORTED_P ix86_vector_mode_supported_p
27301 #undef TARGET_C_MODE_FOR_SUFFIX
27302 #define TARGET_C_MODE_FOR_SUFFIX ix86_c_mode_for_suffix
27305 #undef TARGET_ASM_OUTPUT_DWARF_DTPREL
27306 #define TARGET_ASM_OUTPUT_DWARF_DTPREL i386_output_dwarf_dtprel
27309 #ifdef SUBTARGET_INSERT_ATTRIBUTES
27310 #undef TARGET_INSERT_ATTRIBUTES
27311 #define TARGET_INSERT_ATTRIBUTES SUBTARGET_INSERT_ATTRIBUTES
27314 #undef TARGET_MANGLE_TYPE
27315 #define TARGET_MANGLE_TYPE ix86_mangle_type
27317 #undef TARGET_STACK_PROTECT_FAIL
27318 #define TARGET_STACK_PROTECT_FAIL ix86_stack_protect_fail
27320 #undef TARGET_FUNCTION_VALUE
27321 #define TARGET_FUNCTION_VALUE ix86_function_value
27323 #undef TARGET_SECONDARY_RELOAD
27324 #define TARGET_SECONDARY_RELOAD ix86_secondary_reload
27326 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST
27327 #define TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST x86_builtin_vectorization_cost
27329 #undef TARGET_SET_CURRENT_FUNCTION
27330 #define TARGET_SET_CURRENT_FUNCTION ix86_set_current_function
27332 #undef TARGET_OPTION_VALID_ATTRIBUTE_P
27333 #define TARGET_OPTION_VALID_ATTRIBUTE_P ix86_valid_option_attribute_p
27335 #undef TARGET_OPTION_SAVE
27336 #define TARGET_OPTION_SAVE ix86_function_specific_save
27338 #undef TARGET_OPTION_RESTORE
27339 #define TARGET_OPTION_RESTORE ix86_function_specific_restore
27341 #undef TARGET_OPTION_PRINT
27342 #define TARGET_OPTION_PRINT ix86_function_specific_print
27344 #undef TARGET_OPTION_CAN_INLINE_P
27345 #define TARGET_OPTION_CAN_INLINE_P ix86_can_inline_p
27347 struct gcc_target targetm = TARGET_INITIALIZER;
27349 #include "gt-i386.h"