1 /* Subroutines used for code generation on IA-32.
2 Copyright (C) 1988, 1992, 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001,
3 2002, 2003, 2004, 2005, 2006, 2007, 2008
4 Free Software Foundation, Inc.
6 This file is part of GCC.
8 GCC is free software; you can redistribute it and/or modify
9 it under the terms of the GNU General Public License as published by
10 the Free Software Foundation; either version 3, or (at your option)
13 GCC is distributed in the hope that it will be useful,
14 but WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 GNU General Public License for more details.
18 You should have received a copy of the GNU General Public License
19 along with GCC; see the file COPYING3. If not see
20 <http://www.gnu.org/licenses/>. */
24 #include "coretypes.h"
30 #include "hard-reg-set.h"
32 #include "insn-config.h"
33 #include "conditions.h"
35 #include "insn-codes.h"
36 #include "insn-attr.h"
45 #include "basic-block.h"
48 #include "target-def.h"
49 #include "langhooks.h"
51 #include "tree-gimple.h"
54 #include "tm-constrs.h"
57 static int x86_builtin_vectorization_cost (bool);
58 static rtx legitimize_dllimport_symbol (rtx, bool);
60 #ifndef CHECK_STACK_LIMIT
61 #define CHECK_STACK_LIMIT (-1)
64 /* Return index of given mode in mult and division cost tables. */
65 #define MODE_INDEX(mode) \
66 ((mode) == QImode ? 0 \
67 : (mode) == HImode ? 1 \
68 : (mode) == SImode ? 2 \
69 : (mode) == DImode ? 3 \
72 /* Processor costs (relative to an add) */
73 /* We assume COSTS_N_INSNS is defined as (N)*4 and an addition is 2 bytes. */
74 #define COSTS_N_BYTES(N) ((N) * 2)
76 #define DUMMY_STRINGOP_ALGS {libcall, {{-1, libcall}}}
79 struct processor_costs size_cost = { /* costs for tuning for size */
80 COSTS_N_BYTES (2), /* cost of an add instruction */
81 COSTS_N_BYTES (3), /* cost of a lea instruction */
82 COSTS_N_BYTES (2), /* variable shift costs */
83 COSTS_N_BYTES (3), /* constant shift costs */
84 {COSTS_N_BYTES (3), /* cost of starting multiply for QI */
85 COSTS_N_BYTES (3), /* HI */
86 COSTS_N_BYTES (3), /* SI */
87 COSTS_N_BYTES (3), /* DI */
88 COSTS_N_BYTES (5)}, /* other */
89 0, /* cost of multiply per each bit set */
90 {COSTS_N_BYTES (3), /* cost of a divide/mod for QI */
91 COSTS_N_BYTES (3), /* HI */
92 COSTS_N_BYTES (3), /* SI */
93 COSTS_N_BYTES (3), /* DI */
94 COSTS_N_BYTES (5)}, /* other */
95 COSTS_N_BYTES (3), /* cost of movsx */
96 COSTS_N_BYTES (3), /* cost of movzx */
99 2, /* cost for loading QImode using movzbl */
100 {2, 2, 2}, /* cost of loading integer registers
101 in QImode, HImode and SImode.
102 Relative to reg-reg move (2). */
103 {2, 2, 2}, /* cost of storing integer registers */
104 2, /* cost of reg,reg fld/fst */
105 {2, 2, 2}, /* cost of loading fp registers
106 in SFmode, DFmode and XFmode */
107 {2, 2, 2}, /* cost of storing fp registers
108 in SFmode, DFmode and XFmode */
109 3, /* cost of moving MMX register */
110 {3, 3}, /* cost of loading MMX registers
111 in SImode and DImode */
112 {3, 3}, /* cost of storing MMX registers
113 in SImode and DImode */
114 3, /* cost of moving SSE register */
115 {3, 3, 3}, /* cost of loading SSE registers
116 in SImode, DImode and TImode */
117 {3, 3, 3}, /* cost of storing SSE registers
118 in SImode, DImode and TImode */
119 3, /* MMX or SSE register to integer */
120 0, /* size of l1 cache */
121 0, /* size of l2 cache */
122 0, /* size of prefetch block */
123 0, /* number of parallel prefetches */
125 COSTS_N_BYTES (2), /* cost of FADD and FSUB insns. */
126 COSTS_N_BYTES (2), /* cost of FMUL instruction. */
127 COSTS_N_BYTES (2), /* cost of FDIV instruction. */
128 COSTS_N_BYTES (2), /* cost of FABS instruction. */
129 COSTS_N_BYTES (2), /* cost of FCHS instruction. */
130 COSTS_N_BYTES (2), /* cost of FSQRT instruction. */
131 {{rep_prefix_1_byte, {{-1, rep_prefix_1_byte}}},
132 {rep_prefix_1_byte, {{-1, rep_prefix_1_byte}}}},
133 {{rep_prefix_1_byte, {{-1, rep_prefix_1_byte}}},
134 {rep_prefix_1_byte, {{-1, rep_prefix_1_byte}}}},
135 1, /* scalar_stmt_cost. */
136 1, /* scalar load_cost. */
137 1, /* scalar_store_cost. */
138 1, /* vec_stmt_cost. */
139 1, /* vec_to_scalar_cost. */
140 1, /* scalar_to_vec_cost. */
141 1, /* vec_align_load_cost. */
142 1, /* vec_unalign_load_cost. */
143 1, /* vec_store_cost. */
144 1, /* cond_taken_branch_cost. */
145 1, /* cond_not_taken_branch_cost. */
148 /* Processor costs (relative to an add) */
150 struct processor_costs i386_cost = { /* 386 specific costs */
151 COSTS_N_INSNS (1), /* cost of an add instruction */
152 COSTS_N_INSNS (1), /* cost of a lea instruction */
153 COSTS_N_INSNS (3), /* variable shift costs */
154 COSTS_N_INSNS (2), /* constant shift costs */
155 {COSTS_N_INSNS (6), /* cost of starting multiply for QI */
156 COSTS_N_INSNS (6), /* HI */
157 COSTS_N_INSNS (6), /* SI */
158 COSTS_N_INSNS (6), /* DI */
159 COSTS_N_INSNS (6)}, /* other */
160 COSTS_N_INSNS (1), /* cost of multiply per each bit set */
161 {COSTS_N_INSNS (23), /* cost of a divide/mod for QI */
162 COSTS_N_INSNS (23), /* HI */
163 COSTS_N_INSNS (23), /* SI */
164 COSTS_N_INSNS (23), /* DI */
165 COSTS_N_INSNS (23)}, /* other */
166 COSTS_N_INSNS (3), /* cost of movsx */
167 COSTS_N_INSNS (2), /* cost of movzx */
168 15, /* "large" insn */
170 4, /* cost for loading QImode using movzbl */
171 {2, 4, 2}, /* cost of loading integer registers
172 in QImode, HImode and SImode.
173 Relative to reg-reg move (2). */
174 {2, 4, 2}, /* cost of storing integer registers */
175 2, /* cost of reg,reg fld/fst */
176 {8, 8, 8}, /* cost of loading fp registers
177 in SFmode, DFmode and XFmode */
178 {8, 8, 8}, /* cost of storing fp registers
179 in SFmode, DFmode and XFmode */
180 2, /* cost of moving MMX register */
181 {4, 8}, /* cost of loading MMX registers
182 in SImode and DImode */
183 {4, 8}, /* cost of storing MMX registers
184 in SImode and DImode */
185 2, /* cost of moving SSE register */
186 {4, 8, 16}, /* cost of loading SSE registers
187 in SImode, DImode and TImode */
188 {4, 8, 16}, /* cost of storing SSE registers
189 in SImode, DImode and TImode */
190 3, /* MMX or SSE register to integer */
191 0, /* size of l1 cache */
192 0, /* size of l2 cache */
193 0, /* size of prefetch block */
194 0, /* number of parallel prefetches */
196 COSTS_N_INSNS (23), /* cost of FADD and FSUB insns. */
197 COSTS_N_INSNS (27), /* cost of FMUL instruction. */
198 COSTS_N_INSNS (88), /* cost of FDIV instruction. */
199 COSTS_N_INSNS (22), /* cost of FABS instruction. */
200 COSTS_N_INSNS (24), /* cost of FCHS instruction. */
201 COSTS_N_INSNS (122), /* cost of FSQRT instruction. */
202 {{rep_prefix_1_byte, {{-1, rep_prefix_1_byte}}},
203 DUMMY_STRINGOP_ALGS},
204 {{rep_prefix_1_byte, {{-1, rep_prefix_1_byte}}},
205 DUMMY_STRINGOP_ALGS},
206 1, /* scalar_stmt_cost. */
207 1, /* scalar load_cost. */
208 1, /* scalar_store_cost. */
209 1, /* vec_stmt_cost. */
210 1, /* vec_to_scalar_cost. */
211 1, /* scalar_to_vec_cost. */
212 1, /* vec_align_load_cost. */
213 2, /* vec_unalign_load_cost. */
214 1, /* vec_store_cost. */
215 3, /* cond_taken_branch_cost. */
216 1, /* cond_not_taken_branch_cost. */
220 struct processor_costs i486_cost = { /* 486 specific costs */
221 COSTS_N_INSNS (1), /* cost of an add instruction */
222 COSTS_N_INSNS (1), /* cost of a lea instruction */
223 COSTS_N_INSNS (3), /* variable shift costs */
224 COSTS_N_INSNS (2), /* constant shift costs */
225 {COSTS_N_INSNS (12), /* cost of starting multiply for QI */
226 COSTS_N_INSNS (12), /* HI */
227 COSTS_N_INSNS (12), /* SI */
228 COSTS_N_INSNS (12), /* DI */
229 COSTS_N_INSNS (12)}, /* other */
230 1, /* cost of multiply per each bit set */
231 {COSTS_N_INSNS (40), /* cost of a divide/mod for QI */
232 COSTS_N_INSNS (40), /* HI */
233 COSTS_N_INSNS (40), /* SI */
234 COSTS_N_INSNS (40), /* DI */
235 COSTS_N_INSNS (40)}, /* other */
236 COSTS_N_INSNS (3), /* cost of movsx */
237 COSTS_N_INSNS (2), /* cost of movzx */
238 15, /* "large" insn */
240 4, /* cost for loading QImode using movzbl */
241 {2, 4, 2}, /* cost of loading integer registers
242 in QImode, HImode and SImode.
243 Relative to reg-reg move (2). */
244 {2, 4, 2}, /* cost of storing integer registers */
245 2, /* cost of reg,reg fld/fst */
246 {8, 8, 8}, /* cost of loading fp registers
247 in SFmode, DFmode and XFmode */
248 {8, 8, 8}, /* cost of storing fp registers
249 in SFmode, DFmode and XFmode */
250 2, /* cost of moving MMX register */
251 {4, 8}, /* cost of loading MMX registers
252 in SImode and DImode */
253 {4, 8}, /* cost of storing MMX registers
254 in SImode and DImode */
255 2, /* cost of moving SSE register */
256 {4, 8, 16}, /* cost of loading SSE registers
257 in SImode, DImode and TImode */
258 {4, 8, 16}, /* cost of storing SSE registers
259 in SImode, DImode and TImode */
260 3, /* MMX or SSE register to integer */
261 4, /* size of l1 cache. 486 has 8kB cache
262 shared for code and data, so 4kB is
263 not really precise. */
264 4, /* size of l2 cache */
265 0, /* size of prefetch block */
266 0, /* number of parallel prefetches */
268 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
269 COSTS_N_INSNS (16), /* cost of FMUL instruction. */
270 COSTS_N_INSNS (73), /* cost of FDIV instruction. */
271 COSTS_N_INSNS (3), /* cost of FABS instruction. */
272 COSTS_N_INSNS (3), /* cost of FCHS instruction. */
273 COSTS_N_INSNS (83), /* cost of FSQRT instruction. */
274 {{rep_prefix_4_byte, {{-1, rep_prefix_4_byte}}},
275 DUMMY_STRINGOP_ALGS},
276 {{rep_prefix_4_byte, {{-1, rep_prefix_4_byte}}},
277 DUMMY_STRINGOP_ALGS},
278 1, /* scalar_stmt_cost. */
279 1, /* scalar load_cost. */
280 1, /* scalar_store_cost. */
281 1, /* vec_stmt_cost. */
282 1, /* vec_to_scalar_cost. */
283 1, /* scalar_to_vec_cost. */
284 1, /* vec_align_load_cost. */
285 2, /* vec_unalign_load_cost. */
286 1, /* vec_store_cost. */
287 3, /* cond_taken_branch_cost. */
288 1, /* cond_not_taken_branch_cost. */
292 struct processor_costs pentium_cost = {
293 COSTS_N_INSNS (1), /* cost of an add instruction */
294 COSTS_N_INSNS (1), /* cost of a lea instruction */
295 COSTS_N_INSNS (4), /* variable shift costs */
296 COSTS_N_INSNS (1), /* constant shift costs */
297 {COSTS_N_INSNS (11), /* cost of starting multiply for QI */
298 COSTS_N_INSNS (11), /* HI */
299 COSTS_N_INSNS (11), /* SI */
300 COSTS_N_INSNS (11), /* DI */
301 COSTS_N_INSNS (11)}, /* other */
302 0, /* cost of multiply per each bit set */
303 {COSTS_N_INSNS (25), /* cost of a divide/mod for QI */
304 COSTS_N_INSNS (25), /* HI */
305 COSTS_N_INSNS (25), /* SI */
306 COSTS_N_INSNS (25), /* DI */
307 COSTS_N_INSNS (25)}, /* other */
308 COSTS_N_INSNS (3), /* cost of movsx */
309 COSTS_N_INSNS (2), /* cost of movzx */
310 8, /* "large" insn */
312 6, /* cost for loading QImode using movzbl */
313 {2, 4, 2}, /* cost of loading integer registers
314 in QImode, HImode and SImode.
315 Relative to reg-reg move (2). */
316 {2, 4, 2}, /* cost of storing integer registers */
317 2, /* cost of reg,reg fld/fst */
318 {2, 2, 6}, /* cost of loading fp registers
319 in SFmode, DFmode and XFmode */
320 {4, 4, 6}, /* cost of storing fp registers
321 in SFmode, DFmode and XFmode */
322 8, /* cost of moving MMX register */
323 {8, 8}, /* cost of loading MMX registers
324 in SImode and DImode */
325 {8, 8}, /* cost of storing MMX registers
326 in SImode and DImode */
327 2, /* cost of moving SSE register */
328 {4, 8, 16}, /* cost of loading SSE registers
329 in SImode, DImode and TImode */
330 {4, 8, 16}, /* cost of storing SSE registers
331 in SImode, DImode and TImode */
332 3, /* MMX or SSE register to integer */
333 8, /* size of l1 cache. */
334 8, /* size of l2 cache */
335 0, /* size of prefetch block */
336 0, /* number of parallel prefetches */
338 COSTS_N_INSNS (3), /* cost of FADD and FSUB insns. */
339 COSTS_N_INSNS (3), /* cost of FMUL instruction. */
340 COSTS_N_INSNS (39), /* cost of FDIV instruction. */
341 COSTS_N_INSNS (1), /* cost of FABS instruction. */
342 COSTS_N_INSNS (1), /* cost of FCHS instruction. */
343 COSTS_N_INSNS (70), /* cost of FSQRT instruction. */
344 {{libcall, {{256, rep_prefix_4_byte}, {-1, libcall}}},
345 DUMMY_STRINGOP_ALGS},
346 {{libcall, {{-1, rep_prefix_4_byte}}},
347 DUMMY_STRINGOP_ALGS},
348 1, /* scalar_stmt_cost. */
349 1, /* scalar load_cost. */
350 1, /* scalar_store_cost. */
351 1, /* vec_stmt_cost. */
352 1, /* vec_to_scalar_cost. */
353 1, /* scalar_to_vec_cost. */
354 1, /* vec_align_load_cost. */
355 2, /* vec_unalign_load_cost. */
356 1, /* vec_store_cost. */
357 3, /* cond_taken_branch_cost. */
358 1, /* cond_not_taken_branch_cost. */
362 struct processor_costs pentiumpro_cost = {
363 COSTS_N_INSNS (1), /* cost of an add instruction */
364 COSTS_N_INSNS (1), /* cost of a lea instruction */
365 COSTS_N_INSNS (1), /* variable shift costs */
366 COSTS_N_INSNS (1), /* constant shift costs */
367 {COSTS_N_INSNS (4), /* cost of starting multiply for QI */
368 COSTS_N_INSNS (4), /* HI */
369 COSTS_N_INSNS (4), /* SI */
370 COSTS_N_INSNS (4), /* DI */
371 COSTS_N_INSNS (4)}, /* other */
372 0, /* cost of multiply per each bit set */
373 {COSTS_N_INSNS (17), /* cost of a divide/mod for QI */
374 COSTS_N_INSNS (17), /* HI */
375 COSTS_N_INSNS (17), /* SI */
376 COSTS_N_INSNS (17), /* DI */
377 COSTS_N_INSNS (17)}, /* other */
378 COSTS_N_INSNS (1), /* cost of movsx */
379 COSTS_N_INSNS (1), /* cost of movzx */
380 8, /* "large" insn */
382 2, /* cost for loading QImode using movzbl */
383 {4, 4, 4}, /* cost of loading integer registers
384 in QImode, HImode and SImode.
385 Relative to reg-reg move (2). */
386 {2, 2, 2}, /* cost of storing integer registers */
387 2, /* cost of reg,reg fld/fst */
388 {2, 2, 6}, /* cost of loading fp registers
389 in SFmode, DFmode and XFmode */
390 {4, 4, 6}, /* cost of storing fp registers
391 in SFmode, DFmode and XFmode */
392 2, /* cost of moving MMX register */
393 {2, 2}, /* cost of loading MMX registers
394 in SImode and DImode */
395 {2, 2}, /* cost of storing MMX registers
396 in SImode and DImode */
397 2, /* cost of moving SSE register */
398 {2, 2, 8}, /* cost of loading SSE registers
399 in SImode, DImode and TImode */
400 {2, 2, 8}, /* cost of storing SSE registers
401 in SImode, DImode and TImode */
402 3, /* MMX or SSE register to integer */
403 8, /* size of l1 cache. */
404 256, /* size of l2 cache */
405 32, /* size of prefetch block */
406 6, /* number of parallel prefetches */
408 COSTS_N_INSNS (3), /* cost of FADD and FSUB insns. */
409 COSTS_N_INSNS (5), /* cost of FMUL instruction. */
410 COSTS_N_INSNS (56), /* cost of FDIV instruction. */
411 COSTS_N_INSNS (2), /* cost of FABS instruction. */
412 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
413 COSTS_N_INSNS (56), /* cost of FSQRT instruction. */
414 /* PentiumPro has optimized rep instructions for blocks aligned by 8 bytes (we ensure
415 the alignment). For small blocks inline loop is still a noticeable win, for bigger
416 blocks either rep movsl or rep movsb is way to go. Rep movsb has apparently
417 more expensive startup time in CPU, but after 4K the difference is down in the noise.
419 {{rep_prefix_4_byte, {{128, loop}, {1024, unrolled_loop},
420 {8192, rep_prefix_4_byte}, {-1, rep_prefix_1_byte}}},
421 DUMMY_STRINGOP_ALGS},
422 {{rep_prefix_4_byte, {{1024, unrolled_loop},
423 {8192, rep_prefix_4_byte}, {-1, libcall}}},
424 DUMMY_STRINGOP_ALGS},
425 1, /* scalar_stmt_cost. */
426 1, /* scalar load_cost. */
427 1, /* scalar_store_cost. */
428 1, /* vec_stmt_cost. */
429 1, /* vec_to_scalar_cost. */
430 1, /* scalar_to_vec_cost. */
431 1, /* vec_align_load_cost. */
432 2, /* vec_unalign_load_cost. */
433 1, /* vec_store_cost. */
434 3, /* cond_taken_branch_cost. */
435 1, /* cond_not_taken_branch_cost. */
439 struct processor_costs geode_cost = {
440 COSTS_N_INSNS (1), /* cost of an add instruction */
441 COSTS_N_INSNS (1), /* cost of a lea instruction */
442 COSTS_N_INSNS (2), /* variable shift costs */
443 COSTS_N_INSNS (1), /* constant shift costs */
444 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
445 COSTS_N_INSNS (4), /* HI */
446 COSTS_N_INSNS (7), /* SI */
447 COSTS_N_INSNS (7), /* DI */
448 COSTS_N_INSNS (7)}, /* other */
449 0, /* cost of multiply per each bit set */
450 {COSTS_N_INSNS (15), /* cost of a divide/mod for QI */
451 COSTS_N_INSNS (23), /* HI */
452 COSTS_N_INSNS (39), /* SI */
453 COSTS_N_INSNS (39), /* DI */
454 COSTS_N_INSNS (39)}, /* other */
455 COSTS_N_INSNS (1), /* cost of movsx */
456 COSTS_N_INSNS (1), /* cost of movzx */
457 8, /* "large" insn */
459 1, /* cost for loading QImode using movzbl */
460 {1, 1, 1}, /* cost of loading integer registers
461 in QImode, HImode and SImode.
462 Relative to reg-reg move (2). */
463 {1, 1, 1}, /* cost of storing integer registers */
464 1, /* cost of reg,reg fld/fst */
465 {1, 1, 1}, /* cost of loading fp registers
466 in SFmode, DFmode and XFmode */
467 {4, 6, 6}, /* cost of storing fp registers
468 in SFmode, DFmode and XFmode */
470 1, /* cost of moving MMX register */
471 {1, 1}, /* cost of loading MMX registers
472 in SImode and DImode */
473 {1, 1}, /* cost of storing MMX registers
474 in SImode and DImode */
475 1, /* cost of moving SSE register */
476 {1, 1, 1}, /* cost of loading SSE registers
477 in SImode, DImode and TImode */
478 {1, 1, 1}, /* cost of storing SSE registers
479 in SImode, DImode and TImode */
480 1, /* MMX or SSE register to integer */
481 64, /* size of l1 cache. */
482 128, /* size of l2 cache. */
483 32, /* size of prefetch block */
484 1, /* number of parallel prefetches */
486 COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */
487 COSTS_N_INSNS (11), /* cost of FMUL instruction. */
488 COSTS_N_INSNS (47), /* cost of FDIV instruction. */
489 COSTS_N_INSNS (1), /* cost of FABS instruction. */
490 COSTS_N_INSNS (1), /* cost of FCHS instruction. */
491 COSTS_N_INSNS (54), /* cost of FSQRT instruction. */
492 {{libcall, {{256, rep_prefix_4_byte}, {-1, libcall}}},
493 DUMMY_STRINGOP_ALGS},
494 {{libcall, {{256, rep_prefix_4_byte}, {-1, libcall}}},
495 DUMMY_STRINGOP_ALGS},
496 1, /* scalar_stmt_cost. */
497 1, /* scalar load_cost. */
498 1, /* scalar_store_cost. */
499 1, /* vec_stmt_cost. */
500 1, /* vec_to_scalar_cost. */
501 1, /* scalar_to_vec_cost. */
502 1, /* vec_align_load_cost. */
503 2, /* vec_unalign_load_cost. */
504 1, /* vec_store_cost. */
505 3, /* cond_taken_branch_cost. */
506 1, /* cond_not_taken_branch_cost. */
510 struct processor_costs k6_cost = {
511 COSTS_N_INSNS (1), /* cost of an add instruction */
512 COSTS_N_INSNS (2), /* cost of a lea instruction */
513 COSTS_N_INSNS (1), /* variable shift costs */
514 COSTS_N_INSNS (1), /* constant shift costs */
515 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
516 COSTS_N_INSNS (3), /* HI */
517 COSTS_N_INSNS (3), /* SI */
518 COSTS_N_INSNS (3), /* DI */
519 COSTS_N_INSNS (3)}, /* other */
520 0, /* cost of multiply per each bit set */
521 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
522 COSTS_N_INSNS (18), /* HI */
523 COSTS_N_INSNS (18), /* SI */
524 COSTS_N_INSNS (18), /* DI */
525 COSTS_N_INSNS (18)}, /* other */
526 COSTS_N_INSNS (2), /* cost of movsx */
527 COSTS_N_INSNS (2), /* cost of movzx */
528 8, /* "large" insn */
530 3, /* cost for loading QImode using movzbl */
531 {4, 5, 4}, /* cost of loading integer registers
532 in QImode, HImode and SImode.
533 Relative to reg-reg move (2). */
534 {2, 3, 2}, /* cost of storing integer registers */
535 4, /* cost of reg,reg fld/fst */
536 {6, 6, 6}, /* cost of loading fp registers
537 in SFmode, DFmode and XFmode */
538 {4, 4, 4}, /* cost of storing fp registers
539 in SFmode, DFmode and XFmode */
540 2, /* cost of moving MMX register */
541 {2, 2}, /* cost of loading MMX registers
542 in SImode and DImode */
543 {2, 2}, /* cost of storing MMX registers
544 in SImode and DImode */
545 2, /* cost of moving SSE register */
546 {2, 2, 8}, /* cost of loading SSE registers
547 in SImode, DImode and TImode */
548 {2, 2, 8}, /* cost of storing SSE registers
549 in SImode, DImode and TImode */
550 6, /* MMX or SSE register to integer */
551 32, /* size of l1 cache. */
552 32, /* size of l2 cache. Some models
553 have integrated l2 cache, but
554 optimizing for k6 is not important
555 enough to worry about that. */
556 32, /* size of prefetch block */
557 1, /* number of parallel prefetches */
559 COSTS_N_INSNS (2), /* cost of FADD and FSUB insns. */
560 COSTS_N_INSNS (2), /* cost of FMUL instruction. */
561 COSTS_N_INSNS (56), /* cost of FDIV instruction. */
562 COSTS_N_INSNS (2), /* cost of FABS instruction. */
563 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
564 COSTS_N_INSNS (56), /* cost of FSQRT instruction. */
565 {{libcall, {{256, rep_prefix_4_byte}, {-1, libcall}}},
566 DUMMY_STRINGOP_ALGS},
567 {{libcall, {{256, rep_prefix_4_byte}, {-1, libcall}}},
568 DUMMY_STRINGOP_ALGS},
569 1, /* scalar_stmt_cost. */
570 1, /* scalar load_cost. */
571 1, /* scalar_store_cost. */
572 1, /* vec_stmt_cost. */
573 1, /* vec_to_scalar_cost. */
574 1, /* scalar_to_vec_cost. */
575 1, /* vec_align_load_cost. */
576 2, /* vec_unalign_load_cost. */
577 1, /* vec_store_cost. */
578 3, /* cond_taken_branch_cost. */
579 1, /* cond_not_taken_branch_cost. */
583 struct processor_costs athlon_cost = {
584 COSTS_N_INSNS (1), /* cost of an add instruction */
585 COSTS_N_INSNS (2), /* cost of a lea instruction */
586 COSTS_N_INSNS (1), /* variable shift costs */
587 COSTS_N_INSNS (1), /* constant shift costs */
588 {COSTS_N_INSNS (5), /* cost of starting multiply for QI */
589 COSTS_N_INSNS (5), /* HI */
590 COSTS_N_INSNS (5), /* SI */
591 COSTS_N_INSNS (5), /* DI */
592 COSTS_N_INSNS (5)}, /* other */
593 0, /* cost of multiply per each bit set */
594 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
595 COSTS_N_INSNS (26), /* HI */
596 COSTS_N_INSNS (42), /* SI */
597 COSTS_N_INSNS (74), /* DI */
598 COSTS_N_INSNS (74)}, /* other */
599 COSTS_N_INSNS (1), /* cost of movsx */
600 COSTS_N_INSNS (1), /* cost of movzx */
601 8, /* "large" insn */
603 4, /* cost for loading QImode using movzbl */
604 {3, 4, 3}, /* cost of loading integer registers
605 in QImode, HImode and SImode.
606 Relative to reg-reg move (2). */
607 {3, 4, 3}, /* cost of storing integer registers */
608 4, /* cost of reg,reg fld/fst */
609 {4, 4, 12}, /* cost of loading fp registers
610 in SFmode, DFmode and XFmode */
611 {6, 6, 8}, /* cost of storing fp registers
612 in SFmode, DFmode and XFmode */
613 2, /* cost of moving MMX register */
614 {4, 4}, /* cost of loading MMX registers
615 in SImode and DImode */
616 {4, 4}, /* cost of storing MMX registers
617 in SImode and DImode */
618 2, /* cost of moving SSE register */
619 {4, 4, 6}, /* cost of loading SSE registers
620 in SImode, DImode and TImode */
621 {4, 4, 5}, /* cost of storing SSE registers
622 in SImode, DImode and TImode */
623 5, /* MMX or SSE register to integer */
624 64, /* size of l1 cache. */
625 256, /* size of l2 cache. */
626 64, /* size of prefetch block */
627 6, /* number of parallel prefetches */
629 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
630 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
631 COSTS_N_INSNS (24), /* cost of FDIV instruction. */
632 COSTS_N_INSNS (2), /* cost of FABS instruction. */
633 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
634 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
635 /* For some reason, Athlon deals better with REP prefix (relative to loops)
636 compared to K8. Alignment becomes important after 8 bytes for memcpy and
637 128 bytes for memset. */
638 {{libcall, {{2048, rep_prefix_4_byte}, {-1, libcall}}},
639 DUMMY_STRINGOP_ALGS},
640 {{libcall, {{2048, rep_prefix_4_byte}, {-1, libcall}}},
641 DUMMY_STRINGOP_ALGS},
642 1, /* scalar_stmt_cost. */
643 1, /* scalar load_cost. */
644 1, /* scalar_store_cost. */
645 1, /* vec_stmt_cost. */
646 1, /* vec_to_scalar_cost. */
647 1, /* scalar_to_vec_cost. */
648 1, /* vec_align_load_cost. */
649 2, /* vec_unalign_load_cost. */
650 1, /* vec_store_cost. */
651 3, /* cond_taken_branch_cost. */
652 1, /* cond_not_taken_branch_cost. */
656 struct processor_costs k8_cost = {
657 COSTS_N_INSNS (1), /* cost of an add instruction */
658 COSTS_N_INSNS (2), /* cost of a lea instruction */
659 COSTS_N_INSNS (1), /* variable shift costs */
660 COSTS_N_INSNS (1), /* constant shift costs */
661 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
662 COSTS_N_INSNS (4), /* HI */
663 COSTS_N_INSNS (3), /* SI */
664 COSTS_N_INSNS (4), /* DI */
665 COSTS_N_INSNS (5)}, /* other */
666 0, /* cost of multiply per each bit set */
667 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
668 COSTS_N_INSNS (26), /* HI */
669 COSTS_N_INSNS (42), /* SI */
670 COSTS_N_INSNS (74), /* DI */
671 COSTS_N_INSNS (74)}, /* other */
672 COSTS_N_INSNS (1), /* cost of movsx */
673 COSTS_N_INSNS (1), /* cost of movzx */
674 8, /* "large" insn */
676 4, /* cost for loading QImode using movzbl */
677 {3, 4, 3}, /* cost of loading integer registers
678 in QImode, HImode and SImode.
679 Relative to reg-reg move (2). */
680 {3, 4, 3}, /* cost of storing integer registers */
681 4, /* cost of reg,reg fld/fst */
682 {4, 4, 12}, /* cost of loading fp registers
683 in SFmode, DFmode and XFmode */
684 {6, 6, 8}, /* cost of storing fp registers
685 in SFmode, DFmode and XFmode */
686 2, /* cost of moving MMX register */
687 {3, 3}, /* cost of loading MMX registers
688 in SImode and DImode */
689 {4, 4}, /* cost of storing MMX registers
690 in SImode and DImode */
691 2, /* cost of moving SSE register */
692 {4, 3, 6}, /* cost of loading SSE registers
693 in SImode, DImode and TImode */
694 {4, 4, 5}, /* cost of storing SSE registers
695 in SImode, DImode and TImode */
696 5, /* MMX or SSE register to integer */
697 64, /* size of l1 cache. */
698 512, /* size of l2 cache. */
699 64, /* size of prefetch block */
700 /* New AMD processors never drop prefetches; if they cannot be performed
701 immediately, they are queued. We set number of simultaneous prefetches
702 to a large constant to reflect this (it probably is not a good idea not
703 to limit number of prefetches at all, as their execution also takes some
705 100, /* number of parallel prefetches */
707 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
708 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
709 COSTS_N_INSNS (19), /* cost of FDIV instruction. */
710 COSTS_N_INSNS (2), /* cost of FABS instruction. */
711 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
712 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
713 /* K8 has optimized REP instruction for medium sized blocks, but for very small
714 blocks it is better to use loop. For large blocks, libcall can do
715 nontemporary accesses and beat inline considerably. */
716 {{libcall, {{6, loop}, {14, unrolled_loop}, {-1, rep_prefix_4_byte}}},
717 {libcall, {{16, loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
718 {{libcall, {{8, loop}, {24, unrolled_loop},
719 {2048, rep_prefix_4_byte}, {-1, libcall}}},
720 {libcall, {{48, unrolled_loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
721 4, /* scalar_stmt_cost. */
722 2, /* scalar load_cost. */
723 2, /* scalar_store_cost. */
724 5, /* vec_stmt_cost. */
725 0, /* vec_to_scalar_cost. */
726 2, /* scalar_to_vec_cost. */
727 2, /* vec_align_load_cost. */
728 3, /* vec_unalign_load_cost. */
729 3, /* vec_store_cost. */
730 3, /* cond_taken_branch_cost. */
731 2, /* cond_not_taken_branch_cost. */
734 struct processor_costs amdfam10_cost = {
735 COSTS_N_INSNS (1), /* cost of an add instruction */
736 COSTS_N_INSNS (2), /* cost of a lea instruction */
737 COSTS_N_INSNS (1), /* variable shift costs */
738 COSTS_N_INSNS (1), /* constant shift costs */
739 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
740 COSTS_N_INSNS (4), /* HI */
741 COSTS_N_INSNS (3), /* SI */
742 COSTS_N_INSNS (4), /* DI */
743 COSTS_N_INSNS (5)}, /* other */
744 0, /* cost of multiply per each bit set */
745 {COSTS_N_INSNS (19), /* cost of a divide/mod for QI */
746 COSTS_N_INSNS (35), /* HI */
747 COSTS_N_INSNS (51), /* SI */
748 COSTS_N_INSNS (83), /* DI */
749 COSTS_N_INSNS (83)}, /* other */
750 COSTS_N_INSNS (1), /* cost of movsx */
751 COSTS_N_INSNS (1), /* cost of movzx */
752 8, /* "large" insn */
754 4, /* cost for loading QImode using movzbl */
755 {3, 4, 3}, /* cost of loading integer registers
756 in QImode, HImode and SImode.
757 Relative to reg-reg move (2). */
758 {3, 4, 3}, /* cost of storing integer registers */
759 4, /* cost of reg,reg fld/fst */
760 {4, 4, 12}, /* cost of loading fp registers
761 in SFmode, DFmode and XFmode */
762 {6, 6, 8}, /* cost of storing fp registers
763 in SFmode, DFmode and XFmode */
764 2, /* cost of moving MMX register */
765 {3, 3}, /* cost of loading MMX registers
766 in SImode and DImode */
767 {4, 4}, /* cost of storing MMX registers
768 in SImode and DImode */
769 2, /* cost of moving SSE register */
770 {4, 4, 3}, /* cost of loading SSE registers
771 in SImode, DImode and TImode */
772 {4, 4, 5}, /* cost of storing SSE registers
773 in SImode, DImode and TImode */
774 3, /* MMX or SSE register to integer */
776 MOVD reg64, xmmreg Double FSTORE 4
777 MOVD reg32, xmmreg Double FSTORE 4
779 MOVD reg64, xmmreg Double FADD 3
781 MOVD reg32, xmmreg Double FADD 3
783 64, /* size of l1 cache. */
784 512, /* size of l2 cache. */
785 64, /* size of prefetch block */
786 /* New AMD processors never drop prefetches; if they cannot be performed
787 immediately, they are queued. We set number of simultaneous prefetches
788 to a large constant to reflect this (it probably is not a good idea not
789 to limit number of prefetches at all, as their execution also takes some
791 100, /* number of parallel prefetches */
793 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
794 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
795 COSTS_N_INSNS (19), /* cost of FDIV instruction. */
796 COSTS_N_INSNS (2), /* cost of FABS instruction. */
797 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
798 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
800 /* AMDFAM10 has optimized REP instruction for medium sized blocks, but for
801 very small blocks it is better to use loop. For large blocks, libcall can
802 do nontemporary accesses and beat inline considerably. */
803 {{libcall, {{6, loop}, {14, unrolled_loop}, {-1, rep_prefix_4_byte}}},
804 {libcall, {{16, loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
805 {{libcall, {{8, loop}, {24, unrolled_loop},
806 {2048, rep_prefix_4_byte}, {-1, libcall}}},
807 {libcall, {{48, unrolled_loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
808 4, /* scalar_stmt_cost. */
809 2, /* scalar load_cost. */
810 2, /* scalar_store_cost. */
811 6, /* vec_stmt_cost. */
812 0, /* vec_to_scalar_cost. */
813 2, /* scalar_to_vec_cost. */
814 2, /* vec_align_load_cost. */
815 2, /* vec_unalign_load_cost. */
816 2, /* vec_store_cost. */
817 2, /* cond_taken_branch_cost. */
818 1, /* cond_not_taken_branch_cost. */
822 struct processor_costs pentium4_cost = {
823 COSTS_N_INSNS (1), /* cost of an add instruction */
824 COSTS_N_INSNS (3), /* cost of a lea instruction */
825 COSTS_N_INSNS (4), /* variable shift costs */
826 COSTS_N_INSNS (4), /* constant shift costs */
827 {COSTS_N_INSNS (15), /* cost of starting multiply for QI */
828 COSTS_N_INSNS (15), /* HI */
829 COSTS_N_INSNS (15), /* SI */
830 COSTS_N_INSNS (15), /* DI */
831 COSTS_N_INSNS (15)}, /* other */
832 0, /* cost of multiply per each bit set */
833 {COSTS_N_INSNS (56), /* cost of a divide/mod for QI */
834 COSTS_N_INSNS (56), /* HI */
835 COSTS_N_INSNS (56), /* SI */
836 COSTS_N_INSNS (56), /* DI */
837 COSTS_N_INSNS (56)}, /* other */
838 COSTS_N_INSNS (1), /* cost of movsx */
839 COSTS_N_INSNS (1), /* cost of movzx */
840 16, /* "large" insn */
842 2, /* cost for loading QImode using movzbl */
843 {4, 5, 4}, /* cost of loading integer registers
844 in QImode, HImode and SImode.
845 Relative to reg-reg move (2). */
846 {2, 3, 2}, /* cost of storing integer registers */
847 2, /* cost of reg,reg fld/fst */
848 {2, 2, 6}, /* cost of loading fp registers
849 in SFmode, DFmode and XFmode */
850 {4, 4, 6}, /* cost of storing fp registers
851 in SFmode, DFmode and XFmode */
852 2, /* cost of moving MMX register */
853 {2, 2}, /* cost of loading MMX registers
854 in SImode and DImode */
855 {2, 2}, /* cost of storing MMX registers
856 in SImode and DImode */
857 12, /* cost of moving SSE register */
858 {12, 12, 12}, /* cost of loading SSE registers
859 in SImode, DImode and TImode */
860 {2, 2, 8}, /* cost of storing SSE registers
861 in SImode, DImode and TImode */
862 10, /* MMX or SSE register to integer */
863 8, /* size of l1 cache. */
864 256, /* size of l2 cache. */
865 64, /* size of prefetch block */
866 6, /* number of parallel prefetches */
868 COSTS_N_INSNS (5), /* cost of FADD and FSUB insns. */
869 COSTS_N_INSNS (7), /* cost of FMUL instruction. */
870 COSTS_N_INSNS (43), /* cost of FDIV instruction. */
871 COSTS_N_INSNS (2), /* cost of FABS instruction. */
872 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
873 COSTS_N_INSNS (43), /* cost of FSQRT instruction. */
874 {{libcall, {{12, loop_1_byte}, {-1, rep_prefix_4_byte}}},
875 DUMMY_STRINGOP_ALGS},
876 {{libcall, {{6, loop_1_byte}, {48, loop}, {20480, rep_prefix_4_byte},
878 DUMMY_STRINGOP_ALGS},
879 1, /* scalar_stmt_cost. */
880 1, /* scalar load_cost. */
881 1, /* scalar_store_cost. */
882 1, /* vec_stmt_cost. */
883 1, /* vec_to_scalar_cost. */
884 1, /* scalar_to_vec_cost. */
885 1, /* vec_align_load_cost. */
886 2, /* vec_unalign_load_cost. */
887 1, /* vec_store_cost. */
888 3, /* cond_taken_branch_cost. */
889 1, /* cond_not_taken_branch_cost. */
893 struct processor_costs nocona_cost = {
894 COSTS_N_INSNS (1), /* cost of an add instruction */
895 COSTS_N_INSNS (1), /* cost of a lea instruction */
896 COSTS_N_INSNS (1), /* variable shift costs */
897 COSTS_N_INSNS (1), /* constant shift costs */
898 {COSTS_N_INSNS (10), /* cost of starting multiply for QI */
899 COSTS_N_INSNS (10), /* HI */
900 COSTS_N_INSNS (10), /* SI */
901 COSTS_N_INSNS (10), /* DI */
902 COSTS_N_INSNS (10)}, /* other */
903 0, /* cost of multiply per each bit set */
904 {COSTS_N_INSNS (66), /* cost of a divide/mod for QI */
905 COSTS_N_INSNS (66), /* HI */
906 COSTS_N_INSNS (66), /* SI */
907 COSTS_N_INSNS (66), /* DI */
908 COSTS_N_INSNS (66)}, /* other */
909 COSTS_N_INSNS (1), /* cost of movsx */
910 COSTS_N_INSNS (1), /* cost of movzx */
911 16, /* "large" insn */
913 4, /* cost for loading QImode using movzbl */
914 {4, 4, 4}, /* cost of loading integer registers
915 in QImode, HImode and SImode.
916 Relative to reg-reg move (2). */
917 {4, 4, 4}, /* cost of storing integer registers */
918 3, /* cost of reg,reg fld/fst */
919 {12, 12, 12}, /* cost of loading fp registers
920 in SFmode, DFmode and XFmode */
921 {4, 4, 4}, /* cost of storing fp registers
922 in SFmode, DFmode and XFmode */
923 6, /* cost of moving MMX register */
924 {12, 12}, /* cost of loading MMX registers
925 in SImode and DImode */
926 {12, 12}, /* cost of storing MMX registers
927 in SImode and DImode */
928 6, /* cost of moving SSE register */
929 {12, 12, 12}, /* cost of loading SSE registers
930 in SImode, DImode and TImode */
931 {12, 12, 12}, /* cost of storing SSE registers
932 in SImode, DImode and TImode */
933 8, /* MMX or SSE register to integer */
934 8, /* size of l1 cache. */
935 1024, /* size of l2 cache. */
936 128, /* size of prefetch block */
937 8, /* number of parallel prefetches */
939 COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */
940 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
941 COSTS_N_INSNS (40), /* cost of FDIV instruction. */
942 COSTS_N_INSNS (3), /* cost of FABS instruction. */
943 COSTS_N_INSNS (3), /* cost of FCHS instruction. */
944 COSTS_N_INSNS (44), /* cost of FSQRT instruction. */
945 {{libcall, {{12, loop_1_byte}, {-1, rep_prefix_4_byte}}},
946 {libcall, {{32, loop}, {20000, rep_prefix_8_byte},
947 {100000, unrolled_loop}, {-1, libcall}}}},
948 {{libcall, {{6, loop_1_byte}, {48, loop}, {20480, rep_prefix_4_byte},
950 {libcall, {{24, loop}, {64, unrolled_loop},
951 {8192, rep_prefix_8_byte}, {-1, libcall}}}},
952 1, /* scalar_stmt_cost. */
953 1, /* scalar load_cost. */
954 1, /* scalar_store_cost. */
955 1, /* vec_stmt_cost. */
956 1, /* vec_to_scalar_cost. */
957 1, /* scalar_to_vec_cost. */
958 1, /* vec_align_load_cost. */
959 2, /* vec_unalign_load_cost. */
960 1, /* vec_store_cost. */
961 3, /* cond_taken_branch_cost. */
962 1, /* cond_not_taken_branch_cost. */
966 struct processor_costs core2_cost = {
967 COSTS_N_INSNS (1), /* cost of an add instruction */
968 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
969 COSTS_N_INSNS (1), /* variable shift costs */
970 COSTS_N_INSNS (1), /* constant shift costs */
971 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
972 COSTS_N_INSNS (3), /* HI */
973 COSTS_N_INSNS (3), /* SI */
974 COSTS_N_INSNS (3), /* DI */
975 COSTS_N_INSNS (3)}, /* other */
976 0, /* cost of multiply per each bit set */
977 {COSTS_N_INSNS (22), /* cost of a divide/mod for QI */
978 COSTS_N_INSNS (22), /* HI */
979 COSTS_N_INSNS (22), /* SI */
980 COSTS_N_INSNS (22), /* DI */
981 COSTS_N_INSNS (22)}, /* other */
982 COSTS_N_INSNS (1), /* cost of movsx */
983 COSTS_N_INSNS (1), /* cost of movzx */
984 8, /* "large" insn */
986 2, /* cost for loading QImode using movzbl */
987 {6, 6, 6}, /* cost of loading integer registers
988 in QImode, HImode and SImode.
989 Relative to reg-reg move (2). */
990 {4, 4, 4}, /* cost of storing integer registers */
991 2, /* cost of reg,reg fld/fst */
992 {6, 6, 6}, /* cost of loading fp registers
993 in SFmode, DFmode and XFmode */
994 {4, 4, 4}, /* cost of loading integer registers */
995 2, /* cost of moving MMX register */
996 {6, 6}, /* cost of loading MMX registers
997 in SImode and DImode */
998 {4, 4}, /* cost of storing MMX registers
999 in SImode and DImode */
1000 2, /* cost of moving SSE register */
1001 {6, 6, 6}, /* cost of loading SSE registers
1002 in SImode, DImode and TImode */
1003 {4, 4, 4}, /* cost of storing SSE registers
1004 in SImode, DImode and TImode */
1005 2, /* MMX or SSE register to integer */
1006 32, /* size of l1 cache. */
1007 2048, /* size of l2 cache. */
1008 128, /* size of prefetch block */
1009 8, /* number of parallel prefetches */
1010 3, /* Branch cost */
1011 COSTS_N_INSNS (3), /* cost of FADD and FSUB insns. */
1012 COSTS_N_INSNS (5), /* cost of FMUL instruction. */
1013 COSTS_N_INSNS (32), /* cost of FDIV instruction. */
1014 COSTS_N_INSNS (1), /* cost of FABS instruction. */
1015 COSTS_N_INSNS (1), /* cost of FCHS instruction. */
1016 COSTS_N_INSNS (58), /* cost of FSQRT instruction. */
1017 {{libcall, {{11, loop}, {-1, rep_prefix_4_byte}}},
1018 {libcall, {{32, loop}, {64, rep_prefix_4_byte},
1019 {8192, rep_prefix_8_byte}, {-1, libcall}}}},
1020 {{libcall, {{8, loop}, {15, unrolled_loop},
1021 {2048, rep_prefix_4_byte}, {-1, libcall}}},
1022 {libcall, {{24, loop}, {32, unrolled_loop},
1023 {8192, rep_prefix_8_byte}, {-1, libcall}}}},
1024 1, /* scalar_stmt_cost. */
1025 1, /* scalar load_cost. */
1026 1, /* scalar_store_cost. */
1027 1, /* vec_stmt_cost. */
1028 1, /* vec_to_scalar_cost. */
1029 1, /* scalar_to_vec_cost. */
1030 1, /* vec_align_load_cost. */
1031 2, /* vec_unalign_load_cost. */
1032 1, /* vec_store_cost. */
1033 3, /* cond_taken_branch_cost. */
1034 1, /* cond_not_taken_branch_cost. */
1037 /* Generic64 should produce code tuned for Nocona and K8. */
1039 struct processor_costs generic64_cost = {
1040 COSTS_N_INSNS (1), /* cost of an add instruction */
1041 /* On all chips taken into consideration lea is 2 cycles and more. With
1042 this cost however our current implementation of synth_mult results in
1043 use of unnecessary temporary registers causing regression on several
1044 SPECfp benchmarks. */
1045 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
1046 COSTS_N_INSNS (1), /* variable shift costs */
1047 COSTS_N_INSNS (1), /* constant shift costs */
1048 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1049 COSTS_N_INSNS (4), /* HI */
1050 COSTS_N_INSNS (3), /* SI */
1051 COSTS_N_INSNS (4), /* DI */
1052 COSTS_N_INSNS (2)}, /* other */
1053 0, /* cost of multiply per each bit set */
1054 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
1055 COSTS_N_INSNS (26), /* HI */
1056 COSTS_N_INSNS (42), /* SI */
1057 COSTS_N_INSNS (74), /* DI */
1058 COSTS_N_INSNS (74)}, /* other */
1059 COSTS_N_INSNS (1), /* cost of movsx */
1060 COSTS_N_INSNS (1), /* cost of movzx */
1061 8, /* "large" insn */
1062 17, /* MOVE_RATIO */
1063 4, /* cost for loading QImode using movzbl */
1064 {4, 4, 4}, /* cost of loading integer registers
1065 in QImode, HImode and SImode.
1066 Relative to reg-reg move (2). */
1067 {4, 4, 4}, /* cost of storing integer registers */
1068 4, /* cost of reg,reg fld/fst */
1069 {12, 12, 12}, /* cost of loading fp registers
1070 in SFmode, DFmode and XFmode */
1071 {6, 6, 8}, /* cost of storing fp registers
1072 in SFmode, DFmode and XFmode */
1073 2, /* cost of moving MMX register */
1074 {8, 8}, /* cost of loading MMX registers
1075 in SImode and DImode */
1076 {8, 8}, /* cost of storing MMX registers
1077 in SImode and DImode */
1078 2, /* cost of moving SSE register */
1079 {8, 8, 8}, /* cost of loading SSE registers
1080 in SImode, DImode and TImode */
1081 {8, 8, 8}, /* cost of storing SSE registers
1082 in SImode, DImode and TImode */
1083 5, /* MMX or SSE register to integer */
1084 32, /* size of l1 cache. */
1085 512, /* size of l2 cache. */
1086 64, /* size of prefetch block */
1087 6, /* number of parallel prefetches */
1088 /* Benchmarks shows large regressions on K8 sixtrack benchmark when this value
1089 is increased to perhaps more appropriate value of 5. */
1090 3, /* Branch cost */
1091 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
1092 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
1093 COSTS_N_INSNS (20), /* cost of FDIV instruction. */
1094 COSTS_N_INSNS (8), /* cost of FABS instruction. */
1095 COSTS_N_INSNS (8), /* cost of FCHS instruction. */
1096 COSTS_N_INSNS (40), /* cost of FSQRT instruction. */
1097 {DUMMY_STRINGOP_ALGS,
1098 {libcall, {{32, loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
1099 {DUMMY_STRINGOP_ALGS,
1100 {libcall, {{32, loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
1101 1, /* scalar_stmt_cost. */
1102 1, /* scalar load_cost. */
1103 1, /* scalar_store_cost. */
1104 1, /* vec_stmt_cost. */
1105 1, /* vec_to_scalar_cost. */
1106 1, /* scalar_to_vec_cost. */
1107 1, /* vec_align_load_cost. */
1108 2, /* vec_unalign_load_cost. */
1109 1, /* vec_store_cost. */
1110 3, /* cond_taken_branch_cost. */
1111 1, /* cond_not_taken_branch_cost. */
1114 /* Generic32 should produce code tuned for Athlon, PPro, Pentium4, Nocona and K8. */
1116 struct processor_costs generic32_cost = {
1117 COSTS_N_INSNS (1), /* cost of an add instruction */
1118 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
1119 COSTS_N_INSNS (1), /* variable shift costs */
1120 COSTS_N_INSNS (1), /* constant shift costs */
1121 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1122 COSTS_N_INSNS (4), /* HI */
1123 COSTS_N_INSNS (3), /* SI */
1124 COSTS_N_INSNS (4), /* DI */
1125 COSTS_N_INSNS (2)}, /* other */
1126 0, /* cost of multiply per each bit set */
1127 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
1128 COSTS_N_INSNS (26), /* HI */
1129 COSTS_N_INSNS (42), /* SI */
1130 COSTS_N_INSNS (74), /* DI */
1131 COSTS_N_INSNS (74)}, /* other */
1132 COSTS_N_INSNS (1), /* cost of movsx */
1133 COSTS_N_INSNS (1), /* cost of movzx */
1134 8, /* "large" insn */
1135 17, /* MOVE_RATIO */
1136 4, /* cost for loading QImode using movzbl */
1137 {4, 4, 4}, /* cost of loading integer registers
1138 in QImode, HImode and SImode.
1139 Relative to reg-reg move (2). */
1140 {4, 4, 4}, /* cost of storing integer registers */
1141 4, /* cost of reg,reg fld/fst */
1142 {12, 12, 12}, /* cost of loading fp registers
1143 in SFmode, DFmode and XFmode */
1144 {6, 6, 8}, /* cost of storing fp registers
1145 in SFmode, DFmode and XFmode */
1146 2, /* cost of moving MMX register */
1147 {8, 8}, /* cost of loading MMX registers
1148 in SImode and DImode */
1149 {8, 8}, /* cost of storing MMX registers
1150 in SImode and DImode */
1151 2, /* cost of moving SSE register */
1152 {8, 8, 8}, /* cost of loading SSE registers
1153 in SImode, DImode and TImode */
1154 {8, 8, 8}, /* cost of storing SSE registers
1155 in SImode, DImode and TImode */
1156 5, /* MMX or SSE register to integer */
1157 32, /* size of l1 cache. */
1158 256, /* size of l2 cache. */
1159 64, /* size of prefetch block */
1160 6, /* number of parallel prefetches */
1161 3, /* Branch cost */
1162 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
1163 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
1164 COSTS_N_INSNS (20), /* cost of FDIV instruction. */
1165 COSTS_N_INSNS (8), /* cost of FABS instruction. */
1166 COSTS_N_INSNS (8), /* cost of FCHS instruction. */
1167 COSTS_N_INSNS (40), /* cost of FSQRT instruction. */
1168 {{libcall, {{32, loop}, {8192, rep_prefix_4_byte}, {-1, libcall}}},
1169 DUMMY_STRINGOP_ALGS},
1170 {{libcall, {{32, loop}, {8192, rep_prefix_4_byte}, {-1, libcall}}},
1171 DUMMY_STRINGOP_ALGS},
1172 1, /* scalar_stmt_cost. */
1173 1, /* scalar load_cost. */
1174 1, /* scalar_store_cost. */
1175 1, /* vec_stmt_cost. */
1176 1, /* vec_to_scalar_cost. */
1177 1, /* scalar_to_vec_cost. */
1178 1, /* vec_align_load_cost. */
1179 2, /* vec_unalign_load_cost. */
1180 1, /* vec_store_cost. */
1181 3, /* cond_taken_branch_cost. */
1182 1, /* cond_not_taken_branch_cost. */
1185 const struct processor_costs *ix86_cost = &pentium_cost;
1187 /* Processor feature/optimization bitmasks. */
1188 #define m_386 (1<<PROCESSOR_I386)
1189 #define m_486 (1<<PROCESSOR_I486)
1190 #define m_PENT (1<<PROCESSOR_PENTIUM)
1191 #define m_PPRO (1<<PROCESSOR_PENTIUMPRO)
1192 #define m_PENT4 (1<<PROCESSOR_PENTIUM4)
1193 #define m_NOCONA (1<<PROCESSOR_NOCONA)
1194 #define m_CORE2 (1<<PROCESSOR_CORE2)
1196 #define m_GEODE (1<<PROCESSOR_GEODE)
1197 #define m_K6 (1<<PROCESSOR_K6)
1198 #define m_K6_GEODE (m_K6 | m_GEODE)
1199 #define m_K8 (1<<PROCESSOR_K8)
1200 #define m_ATHLON (1<<PROCESSOR_ATHLON)
1201 #define m_ATHLON_K8 (m_K8 | m_ATHLON)
1202 #define m_AMDFAM10 (1<<PROCESSOR_AMDFAM10)
1203 #define m_AMD_MULTIPLE (m_K8 | m_ATHLON | m_AMDFAM10)
1205 #define m_GENERIC32 (1<<PROCESSOR_GENERIC32)
1206 #define m_GENERIC64 (1<<PROCESSOR_GENERIC64)
1208 /* Generic instruction choice should be common subset of supported CPUs
1209 (PPro/PENT4/NOCONA/CORE2/Athlon/K8). */
1210 #define m_GENERIC (m_GENERIC32 | m_GENERIC64)
1212 /* Feature tests against the various tunings. */
1213 unsigned int ix86_tune_features[X86_TUNE_LAST] = {
1214 /* X86_TUNE_USE_LEAVE: Leave does not affect Nocona SPEC2000 results
1215 negatively, so enabling for Generic64 seems like good code size
1216 tradeoff. We can't enable it for 32bit generic because it does not
1217 work well with PPro base chips. */
1218 m_386 | m_K6_GEODE | m_AMD_MULTIPLE | m_CORE2 | m_GENERIC64,
1220 /* X86_TUNE_PUSH_MEMORY */
1221 m_386 | m_K6_GEODE | m_AMD_MULTIPLE | m_PENT4
1222 | m_NOCONA | m_CORE2 | m_GENERIC,
1224 /* X86_TUNE_ZERO_EXTEND_WITH_AND */
1227 /* X86_TUNE_USE_BIT_TEST */
1230 /* X86_TUNE_UNROLL_STRLEN */
1231 m_486 | m_PENT | m_PPRO | m_AMD_MULTIPLE | m_K6 | m_CORE2 | m_GENERIC,
1233 /* X86_TUNE_DEEP_BRANCH_PREDICTION */
1234 m_PPRO | m_K6_GEODE | m_AMD_MULTIPLE | m_PENT4 | m_GENERIC,
1236 /* X86_TUNE_BRANCH_PREDICTION_HINTS: Branch hints were put in P4 based
1237 on simulation result. But after P4 was made, no performance benefit
1238 was observed with branch hints. It also increases the code size.
1239 As a result, icc never generates branch hints. */
1242 /* X86_TUNE_DOUBLE_WITH_ADD */
1245 /* X86_TUNE_USE_SAHF */
1246 m_PPRO | m_K6_GEODE | m_K8 | m_AMDFAM10 | m_PENT4
1247 | m_NOCONA | m_CORE2 | m_GENERIC,
1249 /* X86_TUNE_MOVX: Enable to zero extend integer registers to avoid
1250 partial dependencies. */
1251 m_AMD_MULTIPLE | m_PPRO | m_PENT4 | m_NOCONA
1252 | m_CORE2 | m_GENERIC | m_GEODE /* m_386 | m_K6 */,
1254 /* X86_TUNE_PARTIAL_REG_STALL: We probably ought to watch for partial
1255 register stalls on Generic32 compilation setting as well. However
1256 in current implementation the partial register stalls are not eliminated
1257 very well - they can be introduced via subregs synthesized by combine
1258 and can happen in caller/callee saving sequences. Because this option
1259 pays back little on PPro based chips and is in conflict with partial reg
1260 dependencies used by Athlon/P4 based chips, it is better to leave it off
1261 for generic32 for now. */
1264 /* X86_TUNE_PARTIAL_FLAG_REG_STALL */
1265 m_CORE2 | m_GENERIC,
1267 /* X86_TUNE_USE_HIMODE_FIOP */
1268 m_386 | m_486 | m_K6_GEODE,
1270 /* X86_TUNE_USE_SIMODE_FIOP */
1271 ~(m_PPRO | m_AMD_MULTIPLE | m_PENT | m_CORE2 | m_GENERIC),
1273 /* X86_TUNE_USE_MOV0 */
1276 /* X86_TUNE_USE_CLTD */
1277 ~(m_PENT | m_K6 | m_CORE2 | m_GENERIC),
1279 /* X86_TUNE_USE_XCHGB: Use xchgb %rh,%rl instead of rolw/rorw $8,rx. */
1282 /* X86_TUNE_SPLIT_LONG_MOVES */
1285 /* X86_TUNE_READ_MODIFY_WRITE */
1288 /* X86_TUNE_READ_MODIFY */
1291 /* X86_TUNE_PROMOTE_QIMODE */
1292 m_K6_GEODE | m_PENT | m_386 | m_486 | m_AMD_MULTIPLE | m_CORE2
1293 | m_GENERIC /* | m_PENT4 ? */,
1295 /* X86_TUNE_FAST_PREFIX */
1296 ~(m_PENT | m_486 | m_386),
1298 /* X86_TUNE_SINGLE_STRINGOP */
1299 m_386 | m_PENT4 | m_NOCONA,
1301 /* X86_TUNE_QIMODE_MATH */
1304 /* X86_TUNE_HIMODE_MATH: On PPro this flag is meant to avoid partial
1305 register stalls. Just like X86_TUNE_PARTIAL_REG_STALL this option
1306 might be considered for Generic32 if our scheme for avoiding partial
1307 stalls was more effective. */
1310 /* X86_TUNE_PROMOTE_QI_REGS */
1313 /* X86_TUNE_PROMOTE_HI_REGS */
1316 /* X86_TUNE_ADD_ESP_4: Enable if add/sub is preferred over 1/2 push/pop. */
1317 m_AMD_MULTIPLE | m_K6_GEODE | m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC,
1319 /* X86_TUNE_ADD_ESP_8 */
1320 m_AMD_MULTIPLE | m_PPRO | m_K6_GEODE | m_386
1321 | m_486 | m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC,
1323 /* X86_TUNE_SUB_ESP_4 */
1324 m_AMD_MULTIPLE | m_PPRO | m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC,
1326 /* X86_TUNE_SUB_ESP_8 */
1327 m_AMD_MULTIPLE | m_PPRO | m_386 | m_486
1328 | m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC,
1330 /* X86_TUNE_INTEGER_DFMODE_MOVES: Enable if integer moves are preferred
1331 for DFmode copies */
1332 ~(m_AMD_MULTIPLE | m_PENT4 | m_NOCONA | m_PPRO | m_CORE2
1333 | m_GENERIC | m_GEODE),
1335 /* X86_TUNE_PARTIAL_REG_DEPENDENCY */
1336 m_AMD_MULTIPLE | m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC,
1338 /* X86_TUNE_SSE_PARTIAL_REG_DEPENDENCY: In the Generic model we have a
1339 conflict here in between PPro/Pentium4 based chips that thread 128bit
1340 SSE registers as single units versus K8 based chips that divide SSE
1341 registers to two 64bit halves. This knob promotes all store destinations
1342 to be 128bit to allow register renaming on 128bit SSE units, but usually
1343 results in one extra microop on 64bit SSE units. Experimental results
1344 shows that disabling this option on P4 brings over 20% SPECfp regression,
1345 while enabling it on K8 brings roughly 2.4% regression that can be partly
1346 masked by careful scheduling of moves. */
1347 m_PENT4 | m_NOCONA | m_PPRO | m_CORE2 | m_GENERIC | m_AMDFAM10,
1349 /* X86_TUNE_SSE_UNALIGNED_MOVE_OPTIMAL */
1352 /* X86_TUNE_SSE_SPLIT_REGS: Set for machines where the type and dependencies
1353 are resolved on SSE register parts instead of whole registers, so we may
1354 maintain just lower part of scalar values in proper format leaving the
1355 upper part undefined. */
1358 /* X86_TUNE_SSE_TYPELESS_STORES */
1361 /* X86_TUNE_SSE_LOAD0_BY_PXOR */
1362 m_PPRO | m_PENT4 | m_NOCONA,
1364 /* X86_TUNE_MEMORY_MISMATCH_STALL */
1365 m_AMD_MULTIPLE | m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC,
1367 /* X86_TUNE_PROLOGUE_USING_MOVE */
1368 m_ATHLON_K8 | m_PPRO | m_CORE2 | m_GENERIC,
1370 /* X86_TUNE_EPILOGUE_USING_MOVE */
1371 m_ATHLON_K8 | m_PPRO | m_CORE2 | m_GENERIC,
1373 /* X86_TUNE_SHIFT1 */
1376 /* X86_TUNE_USE_FFREEP */
1379 /* X86_TUNE_INTER_UNIT_MOVES */
1380 ~(m_AMD_MULTIPLE | m_GENERIC),
1382 /* X86_TUNE_INTER_UNIT_CONVERSIONS */
1385 /* X86_TUNE_FOUR_JUMP_LIMIT: Some CPU cores are not able to predict more
1386 than 4 branch instructions in the 16 byte window. */
1387 m_PPRO | m_AMD_MULTIPLE | m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC,
1389 /* X86_TUNE_SCHEDULE */
1390 m_PPRO | m_AMD_MULTIPLE | m_K6_GEODE | m_PENT | m_CORE2 | m_GENERIC,
1392 /* X86_TUNE_USE_BT */
1395 /* X86_TUNE_USE_INCDEC */
1396 ~(m_PENT4 | m_NOCONA | m_GENERIC),
1398 /* X86_TUNE_PAD_RETURNS */
1399 m_AMD_MULTIPLE | m_CORE2 | m_GENERIC,
1401 /* X86_TUNE_EXT_80387_CONSTANTS */
1402 m_K6_GEODE | m_ATHLON_K8 | m_PENT4 | m_NOCONA | m_PPRO | m_CORE2 | m_GENERIC,
1404 /* X86_TUNE_SHORTEN_X87_SSE */
1407 /* X86_TUNE_AVOID_VECTOR_DECODE */
1410 /* X86_TUNE_PROMOTE_HIMODE_IMUL: Modern CPUs have same latency for HImode
1411 and SImode multiply, but 386 and 486 do HImode multiply faster. */
1414 /* X86_TUNE_SLOW_IMUL_IMM32_MEM: Imul of 32-bit constant and memory is
1415 vector path on AMD machines. */
1416 m_K8 | m_GENERIC64 | m_AMDFAM10,
1418 /* X86_TUNE_SLOW_IMUL_IMM8: Imul of 8-bit constant is vector path on AMD
1420 m_K8 | m_GENERIC64 | m_AMDFAM10,
1422 /* X86_TUNE_MOVE_M1_VIA_OR: On pentiums, it is faster to load -1 via OR
1426 /* X86_TUNE_NOT_UNPAIRABLE: NOT is not pairable on Pentium, while XOR is,
1427 but one byte longer. */
1430 /* X86_TUNE_NOT_VECTORMODE: On AMD K6, NOT is vector decoded with memory
1431 operand that cannot be represented using a modRM byte. The XOR
1432 replacement is long decoded, so this split helps here as well. */
1435 /* X86_TUNE_USE_VECTOR_CONVERTS: Prefer vector packed SSE conversion
1436 from integer to FP. */
1440 /* Feature tests against the various architecture variations. */
1441 unsigned int ix86_arch_features[X86_ARCH_LAST] = {
1442 /* X86_ARCH_CMOVE: Conditional move was added for pentiumpro. */
1443 ~(m_386 | m_486 | m_PENT | m_K6),
1445 /* X86_ARCH_CMPXCHG: Compare and exchange was added for 80486. */
1448 /* X86_ARCH_CMPXCHG8B: Compare and exchange 8 bytes was added for pentium. */
1451 /* X86_ARCH_XADD: Exchange and add was added for 80486. */
1454 /* X86_ARCH_BSWAP: Byteswap was added for 80486. */
1458 static const unsigned int x86_accumulate_outgoing_args
1459 = m_AMD_MULTIPLE | m_PENT4 | m_NOCONA | m_PPRO | m_CORE2 | m_GENERIC;
1461 static const unsigned int x86_arch_always_fancy_math_387
1462 = m_PENT | m_PPRO | m_AMD_MULTIPLE | m_PENT4
1463 | m_NOCONA | m_CORE2 | m_GENERIC;
1465 static enum stringop_alg stringop_alg = no_stringop;
1467 /* In case the average insn count for single function invocation is
1468 lower than this constant, emit fast (but longer) prologue and
1470 #define FAST_PROLOGUE_INSN_COUNT 20
1472 /* Names for 8 (low), 8 (high), and 16-bit registers, respectively. */
1473 static const char *const qi_reg_name[] = QI_REGISTER_NAMES;
1474 static const char *const qi_high_reg_name[] = QI_HIGH_REGISTER_NAMES;
1475 static const char *const hi_reg_name[] = HI_REGISTER_NAMES;
1477 /* Array of the smallest class containing reg number REGNO, indexed by
1478 REGNO. Used by REGNO_REG_CLASS in i386.h. */
1480 enum reg_class const regclass_map[FIRST_PSEUDO_REGISTER] =
1482 /* ax, dx, cx, bx */
1483 AREG, DREG, CREG, BREG,
1484 /* si, di, bp, sp */
1485 SIREG, DIREG, NON_Q_REGS, NON_Q_REGS,
1487 FP_TOP_REG, FP_SECOND_REG, FLOAT_REGS, FLOAT_REGS,
1488 FLOAT_REGS, FLOAT_REGS, FLOAT_REGS, FLOAT_REGS,
1491 /* flags, fpsr, fpcr, frame */
1492 NO_REGS, NO_REGS, NO_REGS, NON_Q_REGS,
1494 SSE_FIRST_REG, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS,
1497 MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS,
1500 NON_Q_REGS, NON_Q_REGS, NON_Q_REGS, NON_Q_REGS,
1501 NON_Q_REGS, NON_Q_REGS, NON_Q_REGS, NON_Q_REGS,
1502 /* SSE REX registers */
1503 SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS,
1507 /* The "default" register map used in 32bit mode. */
1509 int const dbx_register_map[FIRST_PSEUDO_REGISTER] =
1511 0, 2, 1, 3, 6, 7, 4, 5, /* general regs */
1512 12, 13, 14, 15, 16, 17, 18, 19, /* fp regs */
1513 -1, -1, -1, -1, -1, /* arg, flags, fpsr, fpcr, frame */
1514 21, 22, 23, 24, 25, 26, 27, 28, /* SSE */
1515 29, 30, 31, 32, 33, 34, 35, 36, /* MMX */
1516 -1, -1, -1, -1, -1, -1, -1, -1, /* extended integer registers */
1517 -1, -1, -1, -1, -1, -1, -1, -1, /* extended SSE registers */
1520 static int const x86_64_int_parameter_registers[6] =
1522 5 /*RDI*/, 4 /*RSI*/, 1 /*RDX*/, 2 /*RCX*/,
1523 FIRST_REX_INT_REG /*R8 */, FIRST_REX_INT_REG + 1 /*R9 */
1526 static int const x86_64_ms_abi_int_parameter_registers[4] =
1528 2 /*RCX*/, 1 /*RDX*/,
1529 FIRST_REX_INT_REG /*R8 */, FIRST_REX_INT_REG + 1 /*R9 */
1532 static int const x86_64_int_return_registers[4] =
1534 0 /*RAX*/, 1 /*RDX*/, 5 /*RDI*/, 4 /*RSI*/
1537 /* The "default" register map used in 64bit mode. */
1538 int const dbx64_register_map[FIRST_PSEUDO_REGISTER] =
1540 0, 1, 2, 3, 4, 5, 6, 7, /* general regs */
1541 33, 34, 35, 36, 37, 38, 39, 40, /* fp regs */
1542 -1, -1, -1, -1, -1, /* arg, flags, fpsr, fpcr, frame */
1543 17, 18, 19, 20, 21, 22, 23, 24, /* SSE */
1544 41, 42, 43, 44, 45, 46, 47, 48, /* MMX */
1545 8,9,10,11,12,13,14,15, /* extended integer registers */
1546 25, 26, 27, 28, 29, 30, 31, 32, /* extended SSE registers */
1549 /* Define the register numbers to be used in Dwarf debugging information.
1550 The SVR4 reference port C compiler uses the following register numbers
1551 in its Dwarf output code:
1552 0 for %eax (gcc regno = 0)
1553 1 for %ecx (gcc regno = 2)
1554 2 for %edx (gcc regno = 1)
1555 3 for %ebx (gcc regno = 3)
1556 4 for %esp (gcc regno = 7)
1557 5 for %ebp (gcc regno = 6)
1558 6 for %esi (gcc regno = 4)
1559 7 for %edi (gcc regno = 5)
1560 The following three DWARF register numbers are never generated by
1561 the SVR4 C compiler or by the GNU compilers, but SDB on x86/svr4
1562 believes these numbers have these meanings.
1563 8 for %eip (no gcc equivalent)
1564 9 for %eflags (gcc regno = 17)
1565 10 for %trapno (no gcc equivalent)
1566 It is not at all clear how we should number the FP stack registers
1567 for the x86 architecture. If the version of SDB on x86/svr4 were
1568 a bit less brain dead with respect to floating-point then we would
1569 have a precedent to follow with respect to DWARF register numbers
1570 for x86 FP registers, but the SDB on x86/svr4 is so completely
1571 broken with respect to FP registers that it is hardly worth thinking
1572 of it as something to strive for compatibility with.
1573 The version of x86/svr4 SDB I have at the moment does (partially)
1574 seem to believe that DWARF register number 11 is associated with
1575 the x86 register %st(0), but that's about all. Higher DWARF
1576 register numbers don't seem to be associated with anything in
1577 particular, and even for DWARF regno 11, SDB only seems to under-
1578 stand that it should say that a variable lives in %st(0) (when
1579 asked via an `=' command) if we said it was in DWARF regno 11,
1580 but SDB still prints garbage when asked for the value of the
1581 variable in question (via a `/' command).
1582 (Also note that the labels SDB prints for various FP stack regs
1583 when doing an `x' command are all wrong.)
1584 Note that these problems generally don't affect the native SVR4
1585 C compiler because it doesn't allow the use of -O with -g and
1586 because when it is *not* optimizing, it allocates a memory
1587 location for each floating-point variable, and the memory
1588 location is what gets described in the DWARF AT_location
1589 attribute for the variable in question.
1590 Regardless of the severe mental illness of the x86/svr4 SDB, we
1591 do something sensible here and we use the following DWARF
1592 register numbers. Note that these are all stack-top-relative
1594 11 for %st(0) (gcc regno = 8)
1595 12 for %st(1) (gcc regno = 9)
1596 13 for %st(2) (gcc regno = 10)
1597 14 for %st(3) (gcc regno = 11)
1598 15 for %st(4) (gcc regno = 12)
1599 16 for %st(5) (gcc regno = 13)
1600 17 for %st(6) (gcc regno = 14)
1601 18 for %st(7) (gcc regno = 15)
1603 int const svr4_dbx_register_map[FIRST_PSEUDO_REGISTER] =
1605 0, 2, 1, 3, 6, 7, 5, 4, /* general regs */
1606 11, 12, 13, 14, 15, 16, 17, 18, /* fp regs */
1607 -1, 9, -1, -1, -1, /* arg, flags, fpsr, fpcr, frame */
1608 21, 22, 23, 24, 25, 26, 27, 28, /* SSE registers */
1609 29, 30, 31, 32, 33, 34, 35, 36, /* MMX registers */
1610 -1, -1, -1, -1, -1, -1, -1, -1, /* extended integer registers */
1611 -1, -1, -1, -1, -1, -1, -1, -1, /* extended SSE registers */
1614 /* Test and compare insns in i386.md store the information needed to
1615 generate branch and scc insns here. */
1617 rtx ix86_compare_op0 = NULL_RTX;
1618 rtx ix86_compare_op1 = NULL_RTX;
1619 rtx ix86_compare_emitted = NULL_RTX;
1621 /* Size of the register save area. */
1622 #define X86_64_VARARGS_SIZE (REGPARM_MAX * UNITS_PER_WORD + SSE_REGPARM_MAX * 16)
1624 /* Define the structure for the machine field in struct function. */
1626 struct stack_local_entry GTY(())
1628 unsigned short mode;
1631 struct stack_local_entry *next;
1634 /* Structure describing stack frame layout.
1635 Stack grows downward:
1641 saved frame pointer if frame_pointer_needed
1642 <- HARD_FRAME_POINTER
1647 [va_arg registers] (
1648 > to_allocate <- FRAME_POINTER
1658 HOST_WIDE_INT frame;
1660 int outgoing_arguments_size;
1663 HOST_WIDE_INT to_allocate;
1664 /* The offsets relative to ARG_POINTER. */
1665 HOST_WIDE_INT frame_pointer_offset;
1666 HOST_WIDE_INT hard_frame_pointer_offset;
1667 HOST_WIDE_INT stack_pointer_offset;
1669 /* When save_regs_using_mov is set, emit prologue using
1670 move instead of push instructions. */
1671 bool save_regs_using_mov;
1674 /* Code model option. */
1675 enum cmodel ix86_cmodel;
1677 enum asm_dialect ix86_asm_dialect = ASM_ATT;
1679 enum tls_dialect ix86_tls_dialect = TLS_DIALECT_GNU;
1681 /* Which unit we are generating floating point math for. */
1682 enum fpmath_unit ix86_fpmath;
1684 /* Which cpu are we scheduling for. */
1685 enum processor_type ix86_tune;
1687 /* Which instruction set architecture to use. */
1688 enum processor_type ix86_arch;
1690 /* true if sse prefetch instruction is not NOOP. */
1691 int x86_prefetch_sse;
1693 /* ix86_regparm_string as a number */
1694 static int ix86_regparm;
1696 /* -mstackrealign option */
1697 extern int ix86_force_align_arg_pointer;
1698 static const char ix86_force_align_arg_pointer_string[] = "force_align_arg_pointer";
1700 /* Preferred alignment for stack boundary in bits. */
1701 unsigned int ix86_preferred_stack_boundary;
1703 /* Values 1-5: see jump.c */
1704 int ix86_branch_cost;
1706 /* Variables which are this size or smaller are put in the data/bss
1707 or ldata/lbss sections. */
1709 int ix86_section_threshold = 65536;
1711 /* Prefix built by ASM_GENERATE_INTERNAL_LABEL. */
1712 char internal_label_prefix[16];
1713 int internal_label_prefix_len;
1715 /* Fence to use after loop using movnt. */
1718 /* Register class used for passing given 64bit part of the argument.
1719 These represent classes as documented by the PS ABI, with the exception
1720 of SSESF, SSEDF classes, that are basically SSE class, just gcc will
1721 use SF or DFmode move instead of DImode to avoid reformatting penalties.
1723 Similarly we play games with INTEGERSI_CLASS to use cheaper SImode moves
1724 whenever possible (upper half does contain padding). */
1725 enum x86_64_reg_class
1728 X86_64_INTEGER_CLASS,
1729 X86_64_INTEGERSI_CLASS,
1736 X86_64_COMPLEX_X87_CLASS,
1739 static const char * const x86_64_reg_class_name[] =
1741 "no", "integer", "integerSI", "sse", "sseSF", "sseDF",
1742 "sseup", "x87", "x87up", "cplx87", "no"
1745 #define MAX_CLASSES 4
1747 /* Table of constants used by fldpi, fldln2, etc.... */
1748 static REAL_VALUE_TYPE ext_80387_constants_table [5];
1749 static bool ext_80387_constants_init = 0;
1752 static struct machine_function * ix86_init_machine_status (void);
1753 static rtx ix86_function_value (const_tree, const_tree, bool);
1754 static int ix86_function_regparm (const_tree, const_tree);
1755 static void ix86_compute_frame_layout (struct ix86_frame *);
1756 static bool ix86_expand_vector_init_one_nonzero (bool, enum machine_mode,
1760 /* The svr4 ABI for the i386 says that records and unions are returned
1762 #ifndef DEFAULT_PCC_STRUCT_RETURN
1763 #define DEFAULT_PCC_STRUCT_RETURN 1
1766 /* Bit flags that specify the ISA we are compiling for. */
1767 int ix86_isa_flags = TARGET_64BIT_DEFAULT | TARGET_SUBTARGET_ISA_DEFAULT;
1769 /* A mask of ix86_isa_flags that includes bit X if X
1770 was set or cleared on the command line. */
1771 static int ix86_isa_flags_explicit;
1773 /* Define a set of ISAs which are available when a given ISA is
1774 enabled. MMX and SSE ISAs are handled separately. */
1776 #define OPTION_MASK_ISA_MMX_SET OPTION_MASK_ISA_MMX
1777 #define OPTION_MASK_ISA_3DNOW_SET \
1778 (OPTION_MASK_ISA_3DNOW | OPTION_MASK_ISA_MMX_SET)
1780 #define OPTION_MASK_ISA_SSE_SET OPTION_MASK_ISA_SSE
1781 #define OPTION_MASK_ISA_SSE2_SET \
1782 (OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_SSE_SET)
1783 #define OPTION_MASK_ISA_SSE3_SET \
1784 (OPTION_MASK_ISA_SSE3 | OPTION_MASK_ISA_SSE2_SET)
1785 #define OPTION_MASK_ISA_SSSE3_SET \
1786 (OPTION_MASK_ISA_SSSE3 | OPTION_MASK_ISA_SSE3_SET)
1787 #define OPTION_MASK_ISA_SSE4_1_SET \
1788 (OPTION_MASK_ISA_SSE4_1 | OPTION_MASK_ISA_SSSE3_SET)
1789 #define OPTION_MASK_ISA_SSE4_2_SET \
1790 (OPTION_MASK_ISA_SSE4_2 | OPTION_MASK_ISA_SSE4_1_SET)
1792 /* SSE4 includes both SSE4.1 and SSE4.2. -msse4 should be the same
1794 #define OPTION_MASK_ISA_SSE4_SET OPTION_MASK_ISA_SSE4_2_SET
1796 #define OPTION_MASK_ISA_SSE4A_SET \
1797 (OPTION_MASK_ISA_SSE4A | OPTION_MASK_ISA_SSE3_SET)
1798 #define OPTION_MASK_ISA_SSE5_SET \
1799 (OPTION_MASK_ISA_SSE5 | OPTION_MASK_ISA_SSE4A_SET)
1801 /* Define a set of ISAs which aren't available when a given ISA is
1802 disabled. MMX and SSE ISAs are handled separately. */
1804 #define OPTION_MASK_ISA_MMX_UNSET \
1805 (OPTION_MASK_ISA_MMX | OPTION_MASK_ISA_3DNOW_UNSET)
1806 #define OPTION_MASK_ISA_3DNOW_UNSET \
1807 (OPTION_MASK_ISA_3DNOW | OPTION_MASK_ISA_3DNOW_A_UNSET)
1808 #define OPTION_MASK_ISA_3DNOW_A_UNSET OPTION_MASK_ISA_3DNOW_A
1810 #define OPTION_MASK_ISA_SSE_UNSET \
1811 (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_SSE2_UNSET)
1812 #define OPTION_MASK_ISA_SSE2_UNSET \
1813 (OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_SSE3_UNSET)
1814 #define OPTION_MASK_ISA_SSE3_UNSET \
1815 (OPTION_MASK_ISA_SSE3 \
1816 | OPTION_MASK_ISA_SSSE3_UNSET \
1817 | OPTION_MASK_ISA_SSE4A_UNSET )
1818 #define OPTION_MASK_ISA_SSSE3_UNSET \
1819 (OPTION_MASK_ISA_SSSE3 | OPTION_MASK_ISA_SSE4_1_UNSET)
1820 #define OPTION_MASK_ISA_SSE4_1_UNSET \
1821 (OPTION_MASK_ISA_SSE4_1 | OPTION_MASK_ISA_SSE4_2_UNSET)
1822 #define OPTION_MASK_ISA_SSE4_2_UNSET OPTION_MASK_ISA_SSE4_2
1824 /* SSE4 includes both SSE4.1 and SSE4.2. -mno-sse4 should the same
1826 #define OPTION_MASK_ISA_SSE4_UNSET OPTION_MASK_ISA_SSE4_1_UNSET
1828 #define OPTION_MASK_ISA_SSE4A_UNSET \
1829 (OPTION_MASK_ISA_SSE4A | OPTION_MASK_ISA_SSE5_UNSET)
1831 #define OPTION_MASK_ISA_SSE5_UNSET OPTION_MASK_ISA_SSE5
1833 /* Vectorization library interface and handlers. */
1834 tree (*ix86_veclib_handler)(enum built_in_function, tree, tree) = NULL;
1835 static tree ix86_veclibabi_svml (enum built_in_function, tree, tree);
1836 static tree ix86_veclibabi_acml (enum built_in_function, tree, tree);
1838 /* Implement TARGET_HANDLE_OPTION. */
1841 ix86_handle_option (size_t code, const char *arg ATTRIBUTE_UNUSED, int value)
1848 ix86_isa_flags |= OPTION_MASK_ISA_MMX_SET;
1849 ix86_isa_flags_explicit |= OPTION_MASK_ISA_MMX_SET;
1853 ix86_isa_flags &= ~OPTION_MASK_ISA_MMX_UNSET;
1854 ix86_isa_flags_explicit |= OPTION_MASK_ISA_MMX_UNSET;
1861 ix86_isa_flags |= OPTION_MASK_ISA_3DNOW_SET;
1862 ix86_isa_flags_explicit |= OPTION_MASK_ISA_3DNOW_SET;
1866 ix86_isa_flags &= ~OPTION_MASK_ISA_3DNOW_UNSET;
1867 ix86_isa_flags_explicit |= OPTION_MASK_ISA_3DNOW_UNSET;
1877 ix86_isa_flags |= OPTION_MASK_ISA_SSE_SET;
1878 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE_SET;
1882 ix86_isa_flags &= ~OPTION_MASK_ISA_SSE_UNSET;
1883 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE_UNSET;
1890 ix86_isa_flags |= OPTION_MASK_ISA_SSE2_SET;
1891 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE2_SET;
1895 ix86_isa_flags &= ~OPTION_MASK_ISA_SSE2_UNSET;
1896 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE2_UNSET;
1903 ix86_isa_flags |= OPTION_MASK_ISA_SSE3_SET;
1904 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE3_SET;
1908 ix86_isa_flags &= ~OPTION_MASK_ISA_SSE3_UNSET;
1909 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE3_UNSET;
1916 ix86_isa_flags |= OPTION_MASK_ISA_SSSE3_SET;
1917 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSSE3_SET;
1921 ix86_isa_flags &= ~OPTION_MASK_ISA_SSSE3_UNSET;
1922 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSSE3_UNSET;
1929 ix86_isa_flags |= OPTION_MASK_ISA_SSE4_1_SET;
1930 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE4_1_SET;
1934 ix86_isa_flags &= ~OPTION_MASK_ISA_SSE4_1_UNSET;
1935 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE4_1_UNSET;
1942 ix86_isa_flags |= OPTION_MASK_ISA_SSE4_2_SET;
1943 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE4_2_SET;
1947 ix86_isa_flags &= ~OPTION_MASK_ISA_SSE4_2_UNSET;
1948 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE4_2_UNSET;
1953 ix86_isa_flags |= OPTION_MASK_ISA_SSE4_SET;
1954 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE4_SET;
1958 ix86_isa_flags &= ~OPTION_MASK_ISA_SSE4_UNSET;
1959 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE4_UNSET;
1965 ix86_isa_flags |= OPTION_MASK_ISA_SSE4A_SET;
1966 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE4A_SET;
1970 ix86_isa_flags &= ~OPTION_MASK_ISA_SSE4A_UNSET;
1971 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE4A_UNSET;
1978 ix86_isa_flags |= OPTION_MASK_ISA_SSE5_SET;
1979 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE5_SET;
1983 ix86_isa_flags &= ~OPTION_MASK_ISA_SSE5_UNSET;
1984 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE5_UNSET;
1993 /* Sometimes certain combinations of command options do not make
1994 sense on a particular target machine. You can define a macro
1995 `OVERRIDE_OPTIONS' to take account of this. This macro, if
1996 defined, is executed once just after all the command options have
1999 Don't use this macro to turn on various extra optimizations for
2000 `-O'. That is what `OPTIMIZATION_OPTIONS' is for. */
2003 override_options (void)
2006 int ix86_tune_defaulted = 0;
2007 int ix86_arch_specified = 0;
2008 unsigned int ix86_arch_mask, ix86_tune_mask;
2010 /* Comes from final.c -- no real reason to change it. */
2011 #define MAX_CODE_ALIGN 16
2015 const struct processor_costs *cost; /* Processor costs */
2016 const int align_loop; /* Default alignments. */
2017 const int align_loop_max_skip;
2018 const int align_jump;
2019 const int align_jump_max_skip;
2020 const int align_func;
2022 const processor_target_table[PROCESSOR_max] =
2024 {&i386_cost, 4, 3, 4, 3, 4},
2025 {&i486_cost, 16, 15, 16, 15, 16},
2026 {&pentium_cost, 16, 7, 16, 7, 16},
2027 {&pentiumpro_cost, 16, 15, 16, 10, 16},
2028 {&geode_cost, 0, 0, 0, 0, 0},
2029 {&k6_cost, 32, 7, 32, 7, 32},
2030 {&athlon_cost, 16, 7, 16, 7, 16},
2031 {&pentium4_cost, 0, 0, 0, 0, 0},
2032 {&k8_cost, 16, 7, 16, 7, 16},
2033 {&nocona_cost, 0, 0, 0, 0, 0},
2034 {&core2_cost, 16, 10, 16, 10, 16},
2035 {&generic32_cost, 16, 7, 16, 7, 16},
2036 {&generic64_cost, 16, 10, 16, 10, 16},
2037 {&amdfam10_cost, 32, 24, 32, 7, 32}
2040 static const char *const cpu_names[TARGET_CPU_DEFAULT_max] =
2071 PTA_PREFETCH_SSE = 1 << 4,
2073 PTA_3DNOW_A = 1 << 6,
2077 PTA_POPCNT = 1 << 10,
2079 PTA_SSE4A = 1 << 12,
2080 PTA_NO_SAHF = 1 << 13,
2081 PTA_SSE4_1 = 1 << 14,
2082 PTA_SSE4_2 = 1 << 15,
2085 PTA_PCLMUL = 1 << 18
2090 const char *const name; /* processor name or nickname. */
2091 const enum processor_type processor;
2092 const unsigned /*enum pta_flags*/ flags;
2094 const processor_alias_table[] =
2096 {"i386", PROCESSOR_I386, 0},
2097 {"i486", PROCESSOR_I486, 0},
2098 {"i586", PROCESSOR_PENTIUM, 0},
2099 {"pentium", PROCESSOR_PENTIUM, 0},
2100 {"pentium-mmx", PROCESSOR_PENTIUM, PTA_MMX},
2101 {"winchip-c6", PROCESSOR_I486, PTA_MMX},
2102 {"winchip2", PROCESSOR_I486, PTA_MMX | PTA_3DNOW},
2103 {"c3", PROCESSOR_I486, PTA_MMX | PTA_3DNOW},
2104 {"c3-2", PROCESSOR_PENTIUMPRO, PTA_MMX | PTA_SSE},
2105 {"i686", PROCESSOR_PENTIUMPRO, 0},
2106 {"pentiumpro", PROCESSOR_PENTIUMPRO, 0},
2107 {"pentium2", PROCESSOR_PENTIUMPRO, PTA_MMX},
2108 {"pentium3", PROCESSOR_PENTIUMPRO, PTA_MMX | PTA_SSE},
2109 {"pentium3m", PROCESSOR_PENTIUMPRO, PTA_MMX | PTA_SSE},
2110 {"pentium-m", PROCESSOR_PENTIUMPRO, PTA_MMX | PTA_SSE | PTA_SSE2},
2111 {"pentium4", PROCESSOR_PENTIUM4, PTA_MMX |PTA_SSE | PTA_SSE2},
2112 {"pentium4m", PROCESSOR_PENTIUM4, PTA_MMX | PTA_SSE | PTA_SSE2},
2113 {"prescott", PROCESSOR_NOCONA, PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3},
2114 {"nocona", PROCESSOR_NOCONA, (PTA_64BIT
2115 | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
2116 | PTA_CX16 | PTA_NO_SAHF)},
2117 {"core2", PROCESSOR_CORE2, (PTA_64BIT
2118 | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
2121 {"geode", PROCESSOR_GEODE, (PTA_MMX | PTA_3DNOW | PTA_3DNOW_A
2122 |PTA_PREFETCH_SSE)},
2123 {"k6", PROCESSOR_K6, PTA_MMX},
2124 {"k6-2", PROCESSOR_K6, PTA_MMX | PTA_3DNOW},
2125 {"k6-3", PROCESSOR_K6, PTA_MMX | PTA_3DNOW},
2126 {"athlon", PROCESSOR_ATHLON, (PTA_MMX | PTA_3DNOW | PTA_3DNOW_A
2127 | PTA_PREFETCH_SSE)},
2128 {"athlon-tbird", PROCESSOR_ATHLON, (PTA_MMX | PTA_3DNOW | PTA_3DNOW_A
2129 | PTA_PREFETCH_SSE)},
2130 {"athlon-4", PROCESSOR_ATHLON, (PTA_MMX | PTA_3DNOW | PTA_3DNOW_A
2132 {"athlon-xp", PROCESSOR_ATHLON, (PTA_MMX | PTA_3DNOW | PTA_3DNOW_A
2134 {"athlon-mp", PROCESSOR_ATHLON, (PTA_MMX | PTA_3DNOW | PTA_3DNOW_A
2136 {"x86-64", PROCESSOR_K8, (PTA_64BIT
2137 | PTA_MMX | PTA_SSE | PTA_SSE2
2139 {"k8", PROCESSOR_K8, (PTA_64BIT
2140 | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A
2141 | PTA_SSE | PTA_SSE2
2143 {"k8-sse3", PROCESSOR_K8, (PTA_64BIT
2144 | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A
2145 | PTA_SSE | PTA_SSE2 | PTA_SSE3
2147 {"opteron", PROCESSOR_K8, (PTA_64BIT
2148 | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A
2149 | PTA_SSE | PTA_SSE2
2151 {"opteron-sse3", PROCESSOR_K8, (PTA_64BIT
2152 | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A
2153 | PTA_SSE | PTA_SSE2 | PTA_SSE3
2155 {"athlon64", PROCESSOR_K8, (PTA_64BIT
2156 | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A
2157 | PTA_SSE | PTA_SSE2
2159 {"athlon64-sse3", PROCESSOR_K8, (PTA_64BIT
2160 | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A
2161 | PTA_SSE | PTA_SSE2 | PTA_SSE3
2163 {"athlon-fx", PROCESSOR_K8, (PTA_64BIT
2164 | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A
2165 | PTA_SSE | PTA_SSE2
2167 {"amdfam10", PROCESSOR_AMDFAM10, (PTA_64BIT
2168 | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A
2169 | PTA_SSE | PTA_SSE2 | PTA_SSE3
2171 | PTA_CX16 | PTA_ABM)},
2172 {"barcelona", PROCESSOR_AMDFAM10, (PTA_64BIT
2173 | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A
2174 | PTA_SSE | PTA_SSE2 | PTA_SSE3
2176 | PTA_CX16 | PTA_ABM)},
2177 {"generic32", PROCESSOR_GENERIC32, 0 /* flags are only used for -march switch. */ },
2178 {"generic64", PROCESSOR_GENERIC64, PTA_64BIT /* flags are only used for -march switch. */ },
2181 int const pta_size = ARRAY_SIZE (processor_alias_table);
2183 #ifdef SUBTARGET_OVERRIDE_OPTIONS
2184 SUBTARGET_OVERRIDE_OPTIONS;
2187 #ifdef SUBSUBTARGET_OVERRIDE_OPTIONS
2188 SUBSUBTARGET_OVERRIDE_OPTIONS;
2191 /* -fPIC is the default for x86_64. */
2192 if (TARGET_MACHO && TARGET_64BIT)
2195 /* Set the default values for switches whose default depends on TARGET_64BIT
2196 in case they weren't overwritten by command line options. */
2199 /* Mach-O doesn't support omitting the frame pointer for now. */
2200 if (flag_omit_frame_pointer == 2)
2201 flag_omit_frame_pointer = (TARGET_MACHO ? 0 : 1);
2202 if (flag_asynchronous_unwind_tables == 2)
2203 flag_asynchronous_unwind_tables = 1;
2204 if (flag_pcc_struct_return == 2)
2205 flag_pcc_struct_return = 0;
2209 if (flag_omit_frame_pointer == 2)
2210 flag_omit_frame_pointer = 0;
2211 if (flag_asynchronous_unwind_tables == 2)
2212 flag_asynchronous_unwind_tables = 0;
2213 if (flag_pcc_struct_return == 2)
2214 flag_pcc_struct_return = DEFAULT_PCC_STRUCT_RETURN;
2217 /* Need to check -mtune=generic first. */
2218 if (ix86_tune_string)
2220 if (!strcmp (ix86_tune_string, "generic")
2221 || !strcmp (ix86_tune_string, "i686")
2222 /* As special support for cross compilers we read -mtune=native
2223 as -mtune=generic. With native compilers we won't see the
2224 -mtune=native, as it was changed by the driver. */
2225 || !strcmp (ix86_tune_string, "native"))
2228 ix86_tune_string = "generic64";
2230 ix86_tune_string = "generic32";
2232 else if (!strncmp (ix86_tune_string, "generic", 7))
2233 error ("bad value (%s) for -mtune= switch", ix86_tune_string);
2237 if (ix86_arch_string)
2238 ix86_tune_string = ix86_arch_string;
2239 if (!ix86_tune_string)
2241 ix86_tune_string = cpu_names[TARGET_CPU_DEFAULT];
2242 ix86_tune_defaulted = 1;
2245 /* ix86_tune_string is set to ix86_arch_string or defaulted. We
2246 need to use a sensible tune option. */
2247 if (!strcmp (ix86_tune_string, "generic")
2248 || !strcmp (ix86_tune_string, "x86-64")
2249 || !strcmp (ix86_tune_string, "i686"))
2252 ix86_tune_string = "generic64";
2254 ix86_tune_string = "generic32";
2257 if (ix86_stringop_string)
2259 if (!strcmp (ix86_stringop_string, "rep_byte"))
2260 stringop_alg = rep_prefix_1_byte;
2261 else if (!strcmp (ix86_stringop_string, "libcall"))
2262 stringop_alg = libcall;
2263 else if (!strcmp (ix86_stringop_string, "rep_4byte"))
2264 stringop_alg = rep_prefix_4_byte;
2265 else if (!strcmp (ix86_stringop_string, "rep_8byte"))
2266 stringop_alg = rep_prefix_8_byte;
2267 else if (!strcmp (ix86_stringop_string, "byte_loop"))
2268 stringop_alg = loop_1_byte;
2269 else if (!strcmp (ix86_stringop_string, "loop"))
2270 stringop_alg = loop;
2271 else if (!strcmp (ix86_stringop_string, "unrolled_loop"))
2272 stringop_alg = unrolled_loop;
2274 error ("bad value (%s) for -mstringop-strategy= switch", ix86_stringop_string);
2276 if (!strcmp (ix86_tune_string, "x86-64"))
2277 warning (OPT_Wdeprecated, "-mtune=x86-64 is deprecated. Use -mtune=k8 or "
2278 "-mtune=generic instead as appropriate.");
2280 if (!ix86_arch_string)
2281 ix86_arch_string = TARGET_64BIT ? "x86-64" : "i386";
2283 ix86_arch_specified = 1;
2285 if (!strcmp (ix86_arch_string, "generic"))
2286 error ("generic CPU can be used only for -mtune= switch");
2287 if (!strncmp (ix86_arch_string, "generic", 7))
2288 error ("bad value (%s) for -march= switch", ix86_arch_string);
2290 if (ix86_cmodel_string != 0)
2292 if (!strcmp (ix86_cmodel_string, "small"))
2293 ix86_cmodel = flag_pic ? CM_SMALL_PIC : CM_SMALL;
2294 else if (!strcmp (ix86_cmodel_string, "medium"))
2295 ix86_cmodel = flag_pic ? CM_MEDIUM_PIC : CM_MEDIUM;
2296 else if (!strcmp (ix86_cmodel_string, "large"))
2297 ix86_cmodel = flag_pic ? CM_LARGE_PIC : CM_LARGE;
2299 error ("code model %s does not support PIC mode", ix86_cmodel_string);
2300 else if (!strcmp (ix86_cmodel_string, "32"))
2301 ix86_cmodel = CM_32;
2302 else if (!strcmp (ix86_cmodel_string, "kernel") && !flag_pic)
2303 ix86_cmodel = CM_KERNEL;
2305 error ("bad value (%s) for -mcmodel= switch", ix86_cmodel_string);
2309 /* For TARGET_64BIT_MS_ABI, force pic on, in order to enable the
2310 use of rip-relative addressing. This eliminates fixups that
2311 would otherwise be needed if this object is to be placed in a
2312 DLL, and is essentially just as efficient as direct addressing. */
2313 if (TARGET_64BIT_MS_ABI)
2314 ix86_cmodel = CM_SMALL_PIC, flag_pic = 1;
2315 else if (TARGET_64BIT)
2316 ix86_cmodel = flag_pic ? CM_SMALL_PIC : CM_SMALL;
2318 ix86_cmodel = CM_32;
2320 if (ix86_asm_string != 0)
2323 && !strcmp (ix86_asm_string, "intel"))
2324 ix86_asm_dialect = ASM_INTEL;
2325 else if (!strcmp (ix86_asm_string, "att"))
2326 ix86_asm_dialect = ASM_ATT;
2328 error ("bad value (%s) for -masm= switch", ix86_asm_string);
2330 if ((TARGET_64BIT == 0) != (ix86_cmodel == CM_32))
2331 error ("code model %qs not supported in the %s bit mode",
2332 ix86_cmodel_string, TARGET_64BIT ? "64" : "32");
2333 if ((TARGET_64BIT != 0) != ((ix86_isa_flags & OPTION_MASK_ISA_64BIT) != 0))
2334 sorry ("%i-bit mode not compiled in",
2335 (ix86_isa_flags & OPTION_MASK_ISA_64BIT) ? 64 : 32);
2337 for (i = 0; i < pta_size; i++)
2338 if (! strcmp (ix86_arch_string, processor_alias_table[i].name))
2340 ix86_arch = processor_alias_table[i].processor;
2341 /* Default cpu tuning to the architecture. */
2342 ix86_tune = ix86_arch;
2344 if (TARGET_64BIT && !(processor_alias_table[i].flags & PTA_64BIT))
2345 error ("CPU you selected does not support x86-64 "
2348 if (processor_alias_table[i].flags & PTA_MMX
2349 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_MMX))
2350 ix86_isa_flags |= OPTION_MASK_ISA_MMX;
2351 if (processor_alias_table[i].flags & PTA_3DNOW
2352 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_3DNOW))
2353 ix86_isa_flags |= OPTION_MASK_ISA_3DNOW;
2354 if (processor_alias_table[i].flags & PTA_3DNOW_A
2355 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_3DNOW_A))
2356 ix86_isa_flags |= OPTION_MASK_ISA_3DNOW_A;
2357 if (processor_alias_table[i].flags & PTA_SSE
2358 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE))
2359 ix86_isa_flags |= OPTION_MASK_ISA_SSE;
2360 if (processor_alias_table[i].flags & PTA_SSE2
2361 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE2))
2362 ix86_isa_flags |= OPTION_MASK_ISA_SSE2;
2363 if (processor_alias_table[i].flags & PTA_SSE3
2364 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE3))
2365 ix86_isa_flags |= OPTION_MASK_ISA_SSE3;
2366 if (processor_alias_table[i].flags & PTA_SSSE3
2367 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSSE3))
2368 ix86_isa_flags |= OPTION_MASK_ISA_SSSE3;
2369 if (processor_alias_table[i].flags & PTA_SSE4_1
2370 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE4_1))
2371 ix86_isa_flags |= OPTION_MASK_ISA_SSE4_1;
2372 if (processor_alias_table[i].flags & PTA_SSE4_2
2373 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE4_2))
2374 ix86_isa_flags |= OPTION_MASK_ISA_SSE4_2;
2375 if (processor_alias_table[i].flags & PTA_SSE4A
2376 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE4A))
2377 ix86_isa_flags |= OPTION_MASK_ISA_SSE4A;
2378 if (processor_alias_table[i].flags & PTA_SSE5
2379 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE5))
2380 ix86_isa_flags |= OPTION_MASK_ISA_SSE5;
2382 if (processor_alias_table[i].flags & PTA_ABM)
2384 if (processor_alias_table[i].flags & PTA_CX16)
2385 x86_cmpxchg16b = true;
2386 if (processor_alias_table[i].flags & (PTA_POPCNT | PTA_ABM))
2388 if (processor_alias_table[i].flags & (PTA_PREFETCH_SSE | PTA_SSE))
2389 x86_prefetch_sse = true;
2390 if (!(TARGET_64BIT && (processor_alias_table[i].flags & PTA_NO_SAHF)))
2392 if (processor_alias_table[i].flags & PTA_AES)
2394 if (processor_alias_table[i].flags & PTA_PCLMUL)
2401 error ("bad value (%s) for -march= switch", ix86_arch_string);
2403 ix86_arch_mask = 1u << ix86_arch;
2404 for (i = 0; i < X86_ARCH_LAST; ++i)
2405 ix86_arch_features[i] &= ix86_arch_mask;
2407 for (i = 0; i < pta_size; i++)
2408 if (! strcmp (ix86_tune_string, processor_alias_table[i].name))
2410 ix86_tune = processor_alias_table[i].processor;
2411 if (TARGET_64BIT && !(processor_alias_table[i].flags & PTA_64BIT))
2413 if (ix86_tune_defaulted)
2415 ix86_tune_string = "x86-64";
2416 for (i = 0; i < pta_size; i++)
2417 if (! strcmp (ix86_tune_string,
2418 processor_alias_table[i].name))
2420 ix86_tune = processor_alias_table[i].processor;
2423 error ("CPU you selected does not support x86-64 "
2426 /* Intel CPUs have always interpreted SSE prefetch instructions as
2427 NOPs; so, we can enable SSE prefetch instructions even when
2428 -mtune (rather than -march) points us to a processor that has them.
2429 However, the VIA C3 gives a SIGILL, so we only do that for i686 and
2430 higher processors. */
2432 && (processor_alias_table[i].flags & (PTA_PREFETCH_SSE | PTA_SSE)))
2433 x86_prefetch_sse = true;
2437 error ("bad value (%s) for -mtune= switch", ix86_tune_string);
2439 /* Enable SSE2 if AES or PCLMUL is enabled. */
2440 if ((x86_aes || x86_pclmul)
2441 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE2))
2443 ix86_isa_flags |= OPTION_MASK_ISA_SSE2_SET;
2444 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE2_SET;
2447 ix86_tune_mask = 1u << ix86_tune;
2448 for (i = 0; i < X86_TUNE_LAST; ++i)
2449 ix86_tune_features[i] &= ix86_tune_mask;
2452 ix86_cost = &size_cost;
2454 ix86_cost = processor_target_table[ix86_tune].cost;
2456 /* Arrange to set up i386_stack_locals for all functions. */
2457 init_machine_status = ix86_init_machine_status;
2459 /* Validate -mregparm= value. */
2460 if (ix86_regparm_string)
2463 warning (0, "-mregparm is ignored in 64-bit mode");
2464 i = atoi (ix86_regparm_string);
2465 if (i < 0 || i > REGPARM_MAX)
2466 error ("-mregparm=%d is not between 0 and %d", i, REGPARM_MAX);
2471 ix86_regparm = REGPARM_MAX;
2473 /* If the user has provided any of the -malign-* options,
2474 warn and use that value only if -falign-* is not set.
2475 Remove this code in GCC 3.2 or later. */
2476 if (ix86_align_loops_string)
2478 warning (0, "-malign-loops is obsolete, use -falign-loops");
2479 if (align_loops == 0)
2481 i = atoi (ix86_align_loops_string);
2482 if (i < 0 || i > MAX_CODE_ALIGN)
2483 error ("-malign-loops=%d is not between 0 and %d", i, MAX_CODE_ALIGN);
2485 align_loops = 1 << i;
2489 if (ix86_align_jumps_string)
2491 warning (0, "-malign-jumps is obsolete, use -falign-jumps");
2492 if (align_jumps == 0)
2494 i = atoi (ix86_align_jumps_string);
2495 if (i < 0 || i > MAX_CODE_ALIGN)
2496 error ("-malign-loops=%d is not between 0 and %d", i, MAX_CODE_ALIGN);
2498 align_jumps = 1 << i;
2502 if (ix86_align_funcs_string)
2504 warning (0, "-malign-functions is obsolete, use -falign-functions");
2505 if (align_functions == 0)
2507 i = atoi (ix86_align_funcs_string);
2508 if (i < 0 || i > MAX_CODE_ALIGN)
2509 error ("-malign-loops=%d is not between 0 and %d", i, MAX_CODE_ALIGN);
2511 align_functions = 1 << i;
2515 /* Default align_* from the processor table. */
2516 if (align_loops == 0)
2518 align_loops = processor_target_table[ix86_tune].align_loop;
2519 align_loops_max_skip = processor_target_table[ix86_tune].align_loop_max_skip;
2521 if (align_jumps == 0)
2523 align_jumps = processor_target_table[ix86_tune].align_jump;
2524 align_jumps_max_skip = processor_target_table[ix86_tune].align_jump_max_skip;
2526 if (align_functions == 0)
2528 align_functions = processor_target_table[ix86_tune].align_func;
2531 /* Validate -mbranch-cost= value, or provide default. */
2532 ix86_branch_cost = ix86_cost->branch_cost;
2533 if (ix86_branch_cost_string)
2535 i = atoi (ix86_branch_cost_string);
2537 error ("-mbranch-cost=%d is not between 0 and 5", i);
2539 ix86_branch_cost = i;
2541 if (ix86_section_threshold_string)
2543 i = atoi (ix86_section_threshold_string);
2545 error ("-mlarge-data-threshold=%d is negative", i);
2547 ix86_section_threshold = i;
2550 if (ix86_tls_dialect_string)
2552 if (strcmp (ix86_tls_dialect_string, "gnu") == 0)
2553 ix86_tls_dialect = TLS_DIALECT_GNU;
2554 else if (strcmp (ix86_tls_dialect_string, "gnu2") == 0)
2555 ix86_tls_dialect = TLS_DIALECT_GNU2;
2556 else if (strcmp (ix86_tls_dialect_string, "sun") == 0)
2557 ix86_tls_dialect = TLS_DIALECT_SUN;
2559 error ("bad value (%s) for -mtls-dialect= switch",
2560 ix86_tls_dialect_string);
2563 if (ix87_precision_string)
2565 i = atoi (ix87_precision_string);
2566 if (i != 32 && i != 64 && i != 80)
2567 error ("pc%d is not valid precision setting (32, 64 or 80)", i);
2572 target_flags |= TARGET_SUBTARGET64_DEFAULT & ~target_flags_explicit;
2574 /* Enable by default the SSE and MMX builtins. Do allow the user to
2575 explicitly disable any of these. In particular, disabling SSE and
2576 MMX for kernel code is extremely useful. */
2577 if (!ix86_arch_specified)
2579 |= ((OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_MMX
2580 | TARGET_SUBTARGET64_ISA_DEFAULT) & ~ix86_isa_flags_explicit);
2583 warning (0, "-mrtd is ignored in 64bit mode");
2587 target_flags |= TARGET_SUBTARGET32_DEFAULT & ~target_flags_explicit;
2589 if (!ix86_arch_specified)
2591 |= TARGET_SUBTARGET32_ISA_DEFAULT & ~ix86_isa_flags_explicit;
2593 /* i386 ABI does not specify red zone. It still makes sense to use it
2594 when programmer takes care to stack from being destroyed. */
2595 if (!(target_flags_explicit & MASK_NO_RED_ZONE))
2596 target_flags |= MASK_NO_RED_ZONE;
2599 /* Keep nonleaf frame pointers. */
2600 if (flag_omit_frame_pointer)
2601 target_flags &= ~MASK_OMIT_LEAF_FRAME_POINTER;
2602 else if (TARGET_OMIT_LEAF_FRAME_POINTER)
2603 flag_omit_frame_pointer = 1;
2605 /* If we're doing fast math, we don't care about comparison order
2606 wrt NaNs. This lets us use a shorter comparison sequence. */
2607 if (flag_finite_math_only)
2608 target_flags &= ~MASK_IEEE_FP;
2610 /* If the architecture always has an FPU, turn off NO_FANCY_MATH_387,
2611 since the insns won't need emulation. */
2612 if (x86_arch_always_fancy_math_387 & ix86_arch_mask)
2613 target_flags &= ~MASK_NO_FANCY_MATH_387;
2615 /* Likewise, if the target doesn't have a 387, or we've specified
2616 software floating point, don't use 387 inline intrinsics. */
2618 target_flags |= MASK_NO_FANCY_MATH_387;
2620 /* Turn on MMX builtins for -msse. */
2623 ix86_isa_flags |= OPTION_MASK_ISA_MMX & ~ix86_isa_flags_explicit;
2624 x86_prefetch_sse = true;
2627 /* Turn on popcnt instruction for -msse4.2 or -mabm. */
2628 if (TARGET_SSE4_2 || TARGET_ABM)
2631 /* Validate -mpreferred-stack-boundary= value, or provide default.
2632 The default of 128 bits is for Pentium III's SSE __m128. We can't
2633 change it because of optimize_size. Otherwise, we can't mix object
2634 files compiled with -Os and -On. */
2635 ix86_preferred_stack_boundary = 128;
2636 if (ix86_preferred_stack_boundary_string)
2638 i = atoi (ix86_preferred_stack_boundary_string);
2639 if (i < (TARGET_64BIT ? 4 : 2) || i > 12)
2640 error ("-mpreferred-stack-boundary=%d is not between %d and 12", i,
2641 TARGET_64BIT ? 4 : 2);
2643 ix86_preferred_stack_boundary = (1 << i) * BITS_PER_UNIT;
2646 /* Accept -msseregparm only if at least SSE support is enabled. */
2647 if (TARGET_SSEREGPARM
2649 error ("-msseregparm used without SSE enabled");
2651 ix86_fpmath = TARGET_FPMATH_DEFAULT;
2652 if (ix86_fpmath_string != 0)
2654 if (! strcmp (ix86_fpmath_string, "387"))
2655 ix86_fpmath = FPMATH_387;
2656 else if (! strcmp (ix86_fpmath_string, "sse"))
2660 warning (0, "SSE instruction set disabled, using 387 arithmetics");
2661 ix86_fpmath = FPMATH_387;
2664 ix86_fpmath = FPMATH_SSE;
2666 else if (! strcmp (ix86_fpmath_string, "387,sse")
2667 || ! strcmp (ix86_fpmath_string, "sse,387"))
2671 warning (0, "SSE instruction set disabled, using 387 arithmetics");
2672 ix86_fpmath = FPMATH_387;
2674 else if (!TARGET_80387)
2676 warning (0, "387 instruction set disabled, using SSE arithmetics");
2677 ix86_fpmath = FPMATH_SSE;
2680 ix86_fpmath = (enum fpmath_unit) (FPMATH_SSE | FPMATH_387);
2683 error ("bad value (%s) for -mfpmath= switch", ix86_fpmath_string);
2686 /* If the i387 is disabled, then do not return values in it. */
2688 target_flags &= ~MASK_FLOAT_RETURNS;
2690 /* Use external vectorized library in vectorizing intrinsics. */
2691 if (ix86_veclibabi_string)
2693 if (strcmp (ix86_veclibabi_string, "svml") == 0)
2694 ix86_veclib_handler = ix86_veclibabi_svml;
2695 else if (strcmp (ix86_veclibabi_string, "acml") == 0)
2696 ix86_veclib_handler = ix86_veclibabi_acml;
2698 error ("unknown vectorization library ABI type (%s) for "
2699 "-mveclibabi= switch", ix86_veclibabi_string);
2702 if ((x86_accumulate_outgoing_args & ix86_tune_mask)
2703 && !(target_flags_explicit & MASK_ACCUMULATE_OUTGOING_ARGS)
2705 target_flags |= MASK_ACCUMULATE_OUTGOING_ARGS;
2707 /* ??? Unwind info is not correct around the CFG unless either a frame
2708 pointer is present or M_A_O_A is set. Fixing this requires rewriting
2709 unwind info generation to be aware of the CFG and propagating states
2711 if ((flag_unwind_tables || flag_asynchronous_unwind_tables
2712 || flag_exceptions || flag_non_call_exceptions)
2713 && flag_omit_frame_pointer
2714 && !(target_flags & MASK_ACCUMULATE_OUTGOING_ARGS))
2716 if (target_flags_explicit & MASK_ACCUMULATE_OUTGOING_ARGS)
2717 warning (0, "unwind tables currently require either a frame pointer "
2718 "or -maccumulate-outgoing-args for correctness");
2719 target_flags |= MASK_ACCUMULATE_OUTGOING_ARGS;
2722 /* If stack probes are required, the space used for large function
2723 arguments on the stack must also be probed, so enable
2724 -maccumulate-outgoing-args so this happens in the prologue. */
2725 if (TARGET_STACK_PROBE
2726 && !(target_flags & MASK_ACCUMULATE_OUTGOING_ARGS))
2728 if (target_flags_explicit & MASK_ACCUMULATE_OUTGOING_ARGS)
2729 warning (0, "stack probing requires -maccumulate-outgoing-args "
2731 target_flags |= MASK_ACCUMULATE_OUTGOING_ARGS;
2734 /* For sane SSE instruction set generation we need fcomi instruction.
2735 It is safe to enable all CMOVE instructions. */
2739 /* Figure out what ASM_GENERATE_INTERNAL_LABEL builds as a prefix. */
2742 ASM_GENERATE_INTERNAL_LABEL (internal_label_prefix, "LX", 0);
2743 p = strchr (internal_label_prefix, 'X');
2744 internal_label_prefix_len = p - internal_label_prefix;
2748 /* When scheduling description is not available, disable scheduler pass
2749 so it won't slow down the compilation and make x87 code slower. */
2750 if (!TARGET_SCHEDULE)
2751 flag_schedule_insns_after_reload = flag_schedule_insns = 0;
2753 if (!PARAM_SET_P (PARAM_SIMULTANEOUS_PREFETCHES))
2754 set_param_value ("simultaneous-prefetches",
2755 ix86_cost->simultaneous_prefetches);
2756 if (!PARAM_SET_P (PARAM_L1_CACHE_LINE_SIZE))
2757 set_param_value ("l1-cache-line-size", ix86_cost->prefetch_block);
2758 if (!PARAM_SET_P (PARAM_L1_CACHE_SIZE))
2759 set_param_value ("l1-cache-size", ix86_cost->l1_cache_size);
2760 if (!PARAM_SET_P (PARAM_L2_CACHE_SIZE))
2761 set_param_value ("l2-cache-size", ix86_cost->l2_cache_size);
2763 /* If using typedef char *va_list, signal that __builtin_va_start (&ap, 0)
2764 can be optimized to ap = __builtin_next_arg (0). */
2765 if (!TARGET_64BIT || TARGET_64BIT_MS_ABI)
2766 targetm.expand_builtin_va_start = NULL;
2769 /* Return true if this goes in large data/bss. */
2772 ix86_in_large_data_p (tree exp)
2774 if (ix86_cmodel != CM_MEDIUM && ix86_cmodel != CM_MEDIUM_PIC)
2777 /* Functions are never large data. */
2778 if (TREE_CODE (exp) == FUNCTION_DECL)
2781 if (TREE_CODE (exp) == VAR_DECL && DECL_SECTION_NAME (exp))
2783 const char *section = TREE_STRING_POINTER (DECL_SECTION_NAME (exp));
2784 if (strcmp (section, ".ldata") == 0
2785 || strcmp (section, ".lbss") == 0)
2791 HOST_WIDE_INT size = int_size_in_bytes (TREE_TYPE (exp));
2793 /* If this is an incomplete type with size 0, then we can't put it
2794 in data because it might be too big when completed. */
2795 if (!size || size > ix86_section_threshold)
2802 /* Switch to the appropriate section for output of DECL.
2803 DECL is either a `VAR_DECL' node or a constant of some sort.
2804 RELOC indicates whether forming the initial value of DECL requires
2805 link-time relocations. */
2807 static section * x86_64_elf_select_section (tree, int, unsigned HOST_WIDE_INT)
2811 x86_64_elf_select_section (tree decl, int reloc,
2812 unsigned HOST_WIDE_INT align)
2814 if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
2815 && ix86_in_large_data_p (decl))
2817 const char *sname = NULL;
2818 unsigned int flags = SECTION_WRITE;
2819 switch (categorize_decl_for_section (decl, reloc))
2824 case SECCAT_DATA_REL:
2825 sname = ".ldata.rel";
2827 case SECCAT_DATA_REL_LOCAL:
2828 sname = ".ldata.rel.local";
2830 case SECCAT_DATA_REL_RO:
2831 sname = ".ldata.rel.ro";
2833 case SECCAT_DATA_REL_RO_LOCAL:
2834 sname = ".ldata.rel.ro.local";
2838 flags |= SECTION_BSS;
2841 case SECCAT_RODATA_MERGE_STR:
2842 case SECCAT_RODATA_MERGE_STR_INIT:
2843 case SECCAT_RODATA_MERGE_CONST:
2847 case SECCAT_SRODATA:
2854 /* We don't split these for medium model. Place them into
2855 default sections and hope for best. */
2857 case SECCAT_EMUTLS_VAR:
2858 case SECCAT_EMUTLS_TMPL:
2863 /* We might get called with string constants, but get_named_section
2864 doesn't like them as they are not DECLs. Also, we need to set
2865 flags in that case. */
2867 return get_section (sname, flags, NULL);
2868 return get_named_section (decl, sname, reloc);
2871 return default_elf_select_section (decl, reloc, align);
2874 /* Build up a unique section name, expressed as a
2875 STRING_CST node, and assign it to DECL_SECTION_NAME (decl).
2876 RELOC indicates whether the initial value of EXP requires
2877 link-time relocations. */
2879 static void ATTRIBUTE_UNUSED
2880 x86_64_elf_unique_section (tree decl, int reloc)
2882 if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
2883 && ix86_in_large_data_p (decl))
2885 const char *prefix = NULL;
2886 /* We only need to use .gnu.linkonce if we don't have COMDAT groups. */
2887 bool one_only = DECL_ONE_ONLY (decl) && !HAVE_COMDAT_GROUP;
2889 switch (categorize_decl_for_section (decl, reloc))
2892 case SECCAT_DATA_REL:
2893 case SECCAT_DATA_REL_LOCAL:
2894 case SECCAT_DATA_REL_RO:
2895 case SECCAT_DATA_REL_RO_LOCAL:
2896 prefix = one_only ? ".ld" : ".ldata";
2899 prefix = one_only ? ".lb" : ".lbss";
2902 case SECCAT_RODATA_MERGE_STR:
2903 case SECCAT_RODATA_MERGE_STR_INIT:
2904 case SECCAT_RODATA_MERGE_CONST:
2905 prefix = one_only ? ".lr" : ".lrodata";
2907 case SECCAT_SRODATA:
2914 /* We don't split these for medium model. Place them into
2915 default sections and hope for best. */
2917 case SECCAT_EMUTLS_VAR:
2918 prefix = targetm.emutls.var_section;
2920 case SECCAT_EMUTLS_TMPL:
2921 prefix = targetm.emutls.tmpl_section;
2926 const char *name, *linkonce;
2929 name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl));
2930 name = targetm.strip_name_encoding (name);
2932 /* If we're using one_only, then there needs to be a .gnu.linkonce
2933 prefix to the section name. */
2934 linkonce = one_only ? ".gnu.linkonce" : "";
2936 string = ACONCAT ((linkonce, prefix, ".", name, NULL));
2938 DECL_SECTION_NAME (decl) = build_string (strlen (string), string);
2942 default_unique_section (decl, reloc);
2945 #ifdef COMMON_ASM_OP
2946 /* This says how to output assembler code to declare an
2947 uninitialized external linkage data object.
2949 For medium model x86-64 we need to use .largecomm opcode for
2952 x86_elf_aligned_common (FILE *file,
2953 const char *name, unsigned HOST_WIDE_INT size,
2956 if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
2957 && size > (unsigned int)ix86_section_threshold)
2958 fprintf (file, ".largecomm\t");
2960 fprintf (file, "%s", COMMON_ASM_OP);
2961 assemble_name (file, name);
2962 fprintf (file, ","HOST_WIDE_INT_PRINT_UNSIGNED",%u\n",
2963 size, align / BITS_PER_UNIT);
2967 /* Utility function for targets to use in implementing
2968 ASM_OUTPUT_ALIGNED_BSS. */
2971 x86_output_aligned_bss (FILE *file, tree decl ATTRIBUTE_UNUSED,
2972 const char *name, unsigned HOST_WIDE_INT size,
2975 if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
2976 && size > (unsigned int)ix86_section_threshold)
2977 switch_to_section (get_named_section (decl, ".lbss", 0));
2979 switch_to_section (bss_section);
2980 ASM_OUTPUT_ALIGN (file, floor_log2 (align / BITS_PER_UNIT));
2981 #ifdef ASM_DECLARE_OBJECT_NAME
2982 last_assemble_variable_decl = decl;
2983 ASM_DECLARE_OBJECT_NAME (file, name, decl);
2985 /* Standard thing is just output label for the object. */
2986 ASM_OUTPUT_LABEL (file, name);
2987 #endif /* ASM_DECLARE_OBJECT_NAME */
2988 ASM_OUTPUT_SKIP (file, size ? size : 1);
2992 optimization_options (int level, int size ATTRIBUTE_UNUSED)
2994 /* For -O2 and beyond, turn off -fschedule-insns by default. It tends to
2995 make the problem with not enough registers even worse. */
2996 #ifdef INSN_SCHEDULING
2998 flag_schedule_insns = 0;
3002 /* The Darwin libraries never set errno, so we might as well
3003 avoid calling them when that's the only reason we would. */
3004 flag_errno_math = 0;
3006 /* The default values of these switches depend on the TARGET_64BIT
3007 that is not known at this moment. Mark these values with 2 and
3008 let user the to override these. In case there is no command line option
3009 specifying them, we will set the defaults in override_options. */
3011 flag_omit_frame_pointer = 2;
3012 flag_pcc_struct_return = 2;
3013 flag_asynchronous_unwind_tables = 2;
3014 flag_vect_cost_model = 1;
3015 #ifdef SUBTARGET_OPTIMIZATION_OPTIONS
3016 SUBTARGET_OPTIMIZATION_OPTIONS;
3020 /* Decide whether we can make a sibling call to a function. DECL is the
3021 declaration of the function being targeted by the call and EXP is the
3022 CALL_EXPR representing the call. */
3025 ix86_function_ok_for_sibcall (tree decl, tree exp)
3030 /* If we are generating position-independent code, we cannot sibcall
3031 optimize any indirect call, or a direct call to a global function,
3032 as the PLT requires %ebx be live. */
3033 if (!TARGET_64BIT && flag_pic && (!decl || !targetm.binds_local_p (decl)))
3040 func = TREE_TYPE (CALL_EXPR_FN (exp));
3041 if (POINTER_TYPE_P (func))
3042 func = TREE_TYPE (func);
3045 /* Check that the return value locations are the same. Like
3046 if we are returning floats on the 80387 register stack, we cannot
3047 make a sibcall from a function that doesn't return a float to a
3048 function that does or, conversely, from a function that does return
3049 a float to a function that doesn't; the necessary stack adjustment
3050 would not be executed. This is also the place we notice
3051 differences in the return value ABI. Note that it is ok for one
3052 of the functions to have void return type as long as the return
3053 value of the other is passed in a register. */
3054 a = ix86_function_value (TREE_TYPE (exp), func, false);
3055 b = ix86_function_value (TREE_TYPE (DECL_RESULT (cfun->decl)),
3057 if (STACK_REG_P (a) || STACK_REG_P (b))
3059 if (!rtx_equal_p (a, b))
3062 else if (VOID_TYPE_P (TREE_TYPE (DECL_RESULT (cfun->decl))))
3064 else if (!rtx_equal_p (a, b))
3067 /* If this call is indirect, we'll need to be able to use a call-clobbered
3068 register for the address of the target function. Make sure that all
3069 such registers are not used for passing parameters. */
3070 if (!decl && !TARGET_64BIT)
3074 /* We're looking at the CALL_EXPR, we need the type of the function. */
3075 type = CALL_EXPR_FN (exp); /* pointer expression */
3076 type = TREE_TYPE (type); /* pointer type */
3077 type = TREE_TYPE (type); /* function type */
3079 if (ix86_function_regparm (type, NULL) >= 3)
3081 /* ??? Need to count the actual number of registers to be used,
3082 not the possible number of registers. Fix later. */
3087 /* Dllimport'd functions are also called indirectly. */
3088 if (TARGET_DLLIMPORT_DECL_ATTRIBUTES
3089 && decl && DECL_DLLIMPORT_P (decl)
3090 && ix86_function_regparm (TREE_TYPE (decl), NULL) >= 3)
3093 /* If we forced aligned the stack, then sibcalling would unalign the
3094 stack, which may break the called function. */
3095 if (cfun->machine->force_align_arg_pointer)
3098 /* Otherwise okay. That also includes certain types of indirect calls. */
3102 /* Handle "cdecl", "stdcall", "fastcall", "regparm" and "sseregparm"
3103 calling convention attributes;
3104 arguments as in struct attribute_spec.handler. */
3107 ix86_handle_cconv_attribute (tree *node, tree name,
3109 int flags ATTRIBUTE_UNUSED,
3112 if (TREE_CODE (*node) != FUNCTION_TYPE
3113 && TREE_CODE (*node) != METHOD_TYPE
3114 && TREE_CODE (*node) != FIELD_DECL
3115 && TREE_CODE (*node) != TYPE_DECL)
3117 warning (OPT_Wattributes, "%qs attribute only applies to functions",
3118 IDENTIFIER_POINTER (name));
3119 *no_add_attrs = true;
3123 /* Can combine regparm with all attributes but fastcall. */
3124 if (is_attribute_p ("regparm", name))
3128 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
3130 error ("fastcall and regparm attributes are not compatible");
3133 cst = TREE_VALUE (args);
3134 if (TREE_CODE (cst) != INTEGER_CST)
3136 warning (OPT_Wattributes,
3137 "%qs attribute requires an integer constant argument",
3138 IDENTIFIER_POINTER (name));
3139 *no_add_attrs = true;
3141 else if (compare_tree_int (cst, REGPARM_MAX) > 0)
3143 warning (OPT_Wattributes, "argument to %qs attribute larger than %d",
3144 IDENTIFIER_POINTER (name), REGPARM_MAX);
3145 *no_add_attrs = true;
3149 && lookup_attribute (ix86_force_align_arg_pointer_string,
3150 TYPE_ATTRIBUTES (*node))
3151 && compare_tree_int (cst, REGPARM_MAX-1))
3153 error ("%s functions limited to %d register parameters",
3154 ix86_force_align_arg_pointer_string, REGPARM_MAX-1);
3162 /* Do not warn when emulating the MS ABI. */
3163 if (!TARGET_64BIT_MS_ABI)
3164 warning (OPT_Wattributes, "%qs attribute ignored",
3165 IDENTIFIER_POINTER (name));
3166 *no_add_attrs = true;
3170 /* Can combine fastcall with stdcall (redundant) and sseregparm. */
3171 if (is_attribute_p ("fastcall", name))
3173 if (lookup_attribute ("cdecl", TYPE_ATTRIBUTES (*node)))
3175 error ("fastcall and cdecl attributes are not compatible");
3177 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (*node)))
3179 error ("fastcall and stdcall attributes are not compatible");
3181 if (lookup_attribute ("regparm", TYPE_ATTRIBUTES (*node)))
3183 error ("fastcall and regparm attributes are not compatible");
3187 /* Can combine stdcall with fastcall (redundant), regparm and
3189 else if (is_attribute_p ("stdcall", name))
3191 if (lookup_attribute ("cdecl", TYPE_ATTRIBUTES (*node)))
3193 error ("stdcall and cdecl attributes are not compatible");
3195 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
3197 error ("stdcall and fastcall attributes are not compatible");
3201 /* Can combine cdecl with regparm and sseregparm. */
3202 else if (is_attribute_p ("cdecl", name))
3204 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (*node)))
3206 error ("stdcall and cdecl attributes are not compatible");
3208 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
3210 error ("fastcall and cdecl attributes are not compatible");
3214 /* Can combine sseregparm with all attributes. */
3219 /* Return 0 if the attributes for two types are incompatible, 1 if they
3220 are compatible, and 2 if they are nearly compatible (which causes a
3221 warning to be generated). */
3224 ix86_comp_type_attributes (const_tree type1, const_tree type2)
3226 /* Check for mismatch of non-default calling convention. */
3227 const char *const rtdstr = TARGET_RTD ? "cdecl" : "stdcall";
3229 if (TREE_CODE (type1) != FUNCTION_TYPE
3230 && TREE_CODE (type1) != METHOD_TYPE)
3233 /* Check for mismatched fastcall/regparm types. */
3234 if ((!lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type1))
3235 != !lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type2)))
3236 || (ix86_function_regparm (type1, NULL)
3237 != ix86_function_regparm (type2, NULL)))
3240 /* Check for mismatched sseregparm types. */
3241 if (!lookup_attribute ("sseregparm", TYPE_ATTRIBUTES (type1))
3242 != !lookup_attribute ("sseregparm", TYPE_ATTRIBUTES (type2)))
3245 /* Check for mismatched return types (cdecl vs stdcall). */
3246 if (!lookup_attribute (rtdstr, TYPE_ATTRIBUTES (type1))
3247 != !lookup_attribute (rtdstr, TYPE_ATTRIBUTES (type2)))
3253 /* Return the regparm value for a function with the indicated TYPE and DECL.
3254 DECL may be NULL when calling function indirectly
3255 or considering a libcall. */
3258 ix86_function_regparm (const_tree type, const_tree decl)
3261 int regparm = ix86_regparm;
3263 static bool error_issued;
3268 attr = lookup_attribute ("regparm", TYPE_ATTRIBUTES (type));
3272 = TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (attr)));
3274 if (decl && TREE_CODE (decl) == FUNCTION_DECL)
3276 /* We can't use regparm(3) for nested functions because
3277 these pass static chain pointer in %ecx register. */
3278 if (!error_issued && regparm == 3
3279 && decl_function_context (decl)
3280 && !DECL_NO_STATIC_CHAIN (decl))
3282 error ("nested functions are limited to 2 register parameters");
3283 error_issued = true;
3291 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type)))
3294 /* Use register calling convention for local functions when possible. */
3295 if (decl && TREE_CODE (decl) == FUNCTION_DECL
3296 && flag_unit_at_a_time && !profile_flag)
3298 /* FIXME: remove this CONST_CAST when cgraph.[ch] is constified. */
3299 struct cgraph_local_info *i = cgraph_local_info (CONST_CAST_TREE(decl));
3302 int local_regparm, globals = 0, regno;
3305 /* Make sure no regparm register is taken by a
3306 fixed register variable. */
3307 for (local_regparm = 0; local_regparm < REGPARM_MAX; local_regparm++)
3308 if (fixed_regs[local_regparm])
3311 /* We can't use regparm(3) for nested functions as these use
3312 static chain pointer in third argument. */
3313 if (local_regparm == 3
3314 && (decl_function_context (decl)
3315 || ix86_force_align_arg_pointer)
3316 && !DECL_NO_STATIC_CHAIN (decl))
3319 /* If the function realigns its stackpointer, the prologue will
3320 clobber %ecx. If we've already generated code for the callee,
3321 the callee DECL_STRUCT_FUNCTION is gone, so we fall back to
3322 scanning the attributes for the self-realigning property. */
3323 f = DECL_STRUCT_FUNCTION (decl);
3324 if (local_regparm == 3
3325 && (f ? !!f->machine->force_align_arg_pointer
3326 : !!lookup_attribute (ix86_force_align_arg_pointer_string,
3327 TYPE_ATTRIBUTES (TREE_TYPE (decl)))))
3330 /* Each fixed register usage increases register pressure,
3331 so less registers should be used for argument passing.
3332 This functionality can be overriden by an explicit
3334 for (regno = 0; regno <= DI_REG; regno++)
3335 if (fixed_regs[regno])
3339 = globals < local_regparm ? local_regparm - globals : 0;
3341 if (local_regparm > regparm)
3342 regparm = local_regparm;
3349 /* Return 1 or 2, if we can pass up to SSE_REGPARM_MAX SFmode (1) and
3350 DFmode (2) arguments in SSE registers for a function with the
3351 indicated TYPE and DECL. DECL may be NULL when calling function
3352 indirectly or considering a libcall. Otherwise return 0. */
3355 ix86_function_sseregparm (const_tree type, const_tree decl, bool warn)
3357 gcc_assert (!TARGET_64BIT);
3359 /* Use SSE registers to pass SFmode and DFmode arguments if requested
3360 by the sseregparm attribute. */
3361 if (TARGET_SSEREGPARM
3362 || (type && lookup_attribute ("sseregparm", TYPE_ATTRIBUTES (type))))
3369 error ("Calling %qD with attribute sseregparm without "
3370 "SSE/SSE2 enabled", decl);
3372 error ("Calling %qT with attribute sseregparm without "
3373 "SSE/SSE2 enabled", type);
3381 /* For local functions, pass up to SSE_REGPARM_MAX SFmode
3382 (and DFmode for SSE2) arguments in SSE registers. */
3383 if (decl && TARGET_SSE_MATH && flag_unit_at_a_time && !profile_flag)
3385 /* FIXME: remove this CONST_CAST when cgraph.[ch] is constified. */
3386 struct cgraph_local_info *i = cgraph_local_info (CONST_CAST_TREE(decl));
3388 return TARGET_SSE2 ? 2 : 1;
3394 /* Return true if EAX is live at the start of the function. Used by
3395 ix86_expand_prologue to determine if we need special help before
3396 calling allocate_stack_worker. */
3399 ix86_eax_live_at_start_p (void)
3401 /* Cheat. Don't bother working forward from ix86_function_regparm
3402 to the function type to whether an actual argument is located in
3403 eax. Instead just look at cfg info, which is still close enough
3404 to correct at this point. This gives false positives for broken
3405 functions that might use uninitialized data that happens to be
3406 allocated in eax, but who cares? */
3407 return REGNO_REG_SET_P (df_get_live_out (ENTRY_BLOCK_PTR), 0);
3410 /* Value is the number of bytes of arguments automatically
3411 popped when returning from a subroutine call.
3412 FUNDECL is the declaration node of the function (as a tree),
3413 FUNTYPE is the data type of the function (as a tree),
3414 or for a library call it is an identifier node for the subroutine name.
3415 SIZE is the number of bytes of arguments passed on the stack.
3417 On the 80386, the RTD insn may be used to pop them if the number
3418 of args is fixed, but if the number is variable then the caller
3419 must pop them all. RTD can't be used for library calls now
3420 because the library is compiled with the Unix compiler.
3421 Use of RTD is a selectable option, since it is incompatible with
3422 standard Unix calling sequences. If the option is not selected,
3423 the caller must always pop the args.
3425 The attribute stdcall is equivalent to RTD on a per module basis. */
3428 ix86_return_pops_args (tree fundecl, tree funtype, int size)
3432 /* None of the 64-bit ABIs pop arguments. */
3436 rtd = TARGET_RTD && (!fundecl || TREE_CODE (fundecl) != IDENTIFIER_NODE);
3438 /* Cdecl functions override -mrtd, and never pop the stack. */
3439 if (! lookup_attribute ("cdecl", TYPE_ATTRIBUTES (funtype)))
3441 /* Stdcall and fastcall functions will pop the stack if not
3443 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (funtype))
3444 || lookup_attribute ("fastcall", TYPE_ATTRIBUTES (funtype)))
3447 if (rtd && ! stdarg_p (funtype))
3451 /* Lose any fake structure return argument if it is passed on the stack. */
3452 if (aggregate_value_p (TREE_TYPE (funtype), fundecl)
3453 && !KEEP_AGGREGATE_RETURN_POINTER)
3455 int nregs = ix86_function_regparm (funtype, fundecl);
3457 return GET_MODE_SIZE (Pmode);
3463 /* Argument support functions. */
3465 /* Return true when register may be used to pass function parameters. */
3467 ix86_function_arg_regno_p (int regno)
3470 const int *parm_regs;
3475 return (regno < REGPARM_MAX
3476 || (TARGET_SSE && SSE_REGNO_P (regno) && !fixed_regs[regno]));
3478 return (regno < REGPARM_MAX
3479 || (TARGET_MMX && MMX_REGNO_P (regno)
3480 && (regno < FIRST_MMX_REG + MMX_REGPARM_MAX))
3481 || (TARGET_SSE && SSE_REGNO_P (regno)
3482 && (regno < FIRST_SSE_REG + SSE_REGPARM_MAX)));
3487 if (SSE_REGNO_P (regno) && TARGET_SSE)
3492 if (TARGET_SSE && SSE_REGNO_P (regno)
3493 && (regno < FIRST_SSE_REG + SSE_REGPARM_MAX))
3497 /* RAX is used as hidden argument to va_arg functions. */
3498 if (!TARGET_64BIT_MS_ABI && regno == AX_REG)
3501 if (TARGET_64BIT_MS_ABI)
3502 parm_regs = x86_64_ms_abi_int_parameter_registers;
3504 parm_regs = x86_64_int_parameter_registers;
3505 for (i = 0; i < REGPARM_MAX; i++)
3506 if (regno == parm_regs[i])
3511 /* Return if we do not know how to pass TYPE solely in registers. */
3514 ix86_must_pass_in_stack (enum machine_mode mode, const_tree type)
3516 if (must_pass_in_stack_var_size_or_pad (mode, type))
3519 /* For 32-bit, we want TImode aggregates to go on the stack. But watch out!
3520 The layout_type routine is crafty and tries to trick us into passing
3521 currently unsupported vector types on the stack by using TImode. */
3522 return (!TARGET_64BIT && mode == TImode
3523 && type && TREE_CODE (type) != VECTOR_TYPE);
3526 /* Initialize a variable CUM of type CUMULATIVE_ARGS
3527 for a call to a function whose data type is FNTYPE.
3528 For a library call, FNTYPE is 0. */
3531 init_cumulative_args (CUMULATIVE_ARGS *cum, /* Argument info to initialize */
3532 tree fntype, /* tree ptr for function decl */
3533 rtx libname, /* SYMBOL_REF of library name or 0 */
3536 struct cgraph_local_info *i = fndecl ? cgraph_local_info (fndecl) : NULL;
3537 memset (cum, 0, sizeof (*cum));
3539 /* Set up the number of registers to use for passing arguments. */
3540 cum->nregs = ix86_regparm;
3542 cum->sse_nregs = SSE_REGPARM_MAX;
3544 cum->mmx_nregs = MMX_REGPARM_MAX;
3545 cum->warn_sse = true;
3546 cum->warn_mmx = true;
3548 /* Because type might mismatch in between caller and callee, we need to
3549 use actual type of function for local calls.
3550 FIXME: cgraph_analyze can be told to actually record if function uses
3551 va_start so for local functions maybe_vaarg can be made aggressive
3553 FIXME: once typesytem is fixed, we won't need this code anymore. */
3555 fntype = TREE_TYPE (fndecl);
3556 cum->maybe_vaarg = (fntype
3557 ? (!prototype_p (fntype) || stdarg_p (fntype))
3562 /* If there are variable arguments, then we won't pass anything
3563 in registers in 32-bit mode. */
3564 if (stdarg_p (fntype))
3574 /* Use ecx and edx registers if function has fastcall attribute,
3575 else look for regparm information. */
3578 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (fntype)))
3584 cum->nregs = ix86_function_regparm (fntype, fndecl);
3587 /* Set up the number of SSE registers used for passing SFmode
3588 and DFmode arguments. Warn for mismatching ABI. */
3589 cum->float_in_sse = ix86_function_sseregparm (fntype, fndecl, true);
3593 /* Return the "natural" mode for TYPE. In most cases, this is just TYPE_MODE.
3594 But in the case of vector types, it is some vector mode.
3596 When we have only some of our vector isa extensions enabled, then there
3597 are some modes for which vector_mode_supported_p is false. For these
3598 modes, the generic vector support in gcc will choose some non-vector mode
3599 in order to implement the type. By computing the natural mode, we'll
3600 select the proper ABI location for the operand and not depend on whatever
3601 the middle-end decides to do with these vector types. */
3603 static enum machine_mode
3604 type_natural_mode (const_tree type)
3606 enum machine_mode mode = TYPE_MODE (type);
3608 if (TREE_CODE (type) == VECTOR_TYPE && !VECTOR_MODE_P (mode))
3610 HOST_WIDE_INT size = int_size_in_bytes (type);
3611 if ((size == 8 || size == 16)
3612 /* ??? Generic code allows us to create width 1 vectors. Ignore. */
3613 && TYPE_VECTOR_SUBPARTS (type) > 1)
3615 enum machine_mode innermode = TYPE_MODE (TREE_TYPE (type));
3617 if (TREE_CODE (TREE_TYPE (type)) == REAL_TYPE)
3618 mode = MIN_MODE_VECTOR_FLOAT;
3620 mode = MIN_MODE_VECTOR_INT;
3622 /* Get the mode which has this inner mode and number of units. */
3623 for (; mode != VOIDmode; mode = GET_MODE_WIDER_MODE (mode))
3624 if (GET_MODE_NUNITS (mode) == TYPE_VECTOR_SUBPARTS (type)
3625 && GET_MODE_INNER (mode) == innermode)
3635 /* We want to pass a value in REGNO whose "natural" mode is MODE. However,
3636 this may not agree with the mode that the type system has chosen for the
3637 register, which is ORIG_MODE. If ORIG_MODE is not BLKmode, then we can
3638 go ahead and use it. Otherwise we have to build a PARALLEL instead. */
3641 gen_reg_or_parallel (enum machine_mode mode, enum machine_mode orig_mode,
3646 if (orig_mode != BLKmode)
3647 tmp = gen_rtx_REG (orig_mode, regno);
3650 tmp = gen_rtx_REG (mode, regno);
3651 tmp = gen_rtx_EXPR_LIST (VOIDmode, tmp, const0_rtx);
3652 tmp = gen_rtx_PARALLEL (orig_mode, gen_rtvec (1, tmp));
3658 /* x86-64 register passing implementation. See x86-64 ABI for details. Goal
3659 of this code is to classify each 8bytes of incoming argument by the register
3660 class and assign registers accordingly. */
3662 /* Return the union class of CLASS1 and CLASS2.
3663 See the x86-64 PS ABI for details. */
3665 static enum x86_64_reg_class
3666 merge_classes (enum x86_64_reg_class class1, enum x86_64_reg_class class2)
3668 /* Rule #1: If both classes are equal, this is the resulting class. */
3669 if (class1 == class2)
3672 /* Rule #2: If one of the classes is NO_CLASS, the resulting class is
3674 if (class1 == X86_64_NO_CLASS)
3676 if (class2 == X86_64_NO_CLASS)
3679 /* Rule #3: If one of the classes is MEMORY, the result is MEMORY. */
3680 if (class1 == X86_64_MEMORY_CLASS || class2 == X86_64_MEMORY_CLASS)
3681 return X86_64_MEMORY_CLASS;
3683 /* Rule #4: If one of the classes is INTEGER, the result is INTEGER. */
3684 if ((class1 == X86_64_INTEGERSI_CLASS && class2 == X86_64_SSESF_CLASS)
3685 || (class2 == X86_64_INTEGERSI_CLASS && class1 == X86_64_SSESF_CLASS))
3686 return X86_64_INTEGERSI_CLASS;
3687 if (class1 == X86_64_INTEGER_CLASS || class1 == X86_64_INTEGERSI_CLASS
3688 || class2 == X86_64_INTEGER_CLASS || class2 == X86_64_INTEGERSI_CLASS)
3689 return X86_64_INTEGER_CLASS;
3691 /* Rule #5: If one of the classes is X87, X87UP, or COMPLEX_X87 class,
3693 if (class1 == X86_64_X87_CLASS
3694 || class1 == X86_64_X87UP_CLASS
3695 || class1 == X86_64_COMPLEX_X87_CLASS
3696 || class2 == X86_64_X87_CLASS
3697 || class2 == X86_64_X87UP_CLASS
3698 || class2 == X86_64_COMPLEX_X87_CLASS)
3699 return X86_64_MEMORY_CLASS;
3701 /* Rule #6: Otherwise class SSE is used. */
3702 return X86_64_SSE_CLASS;
3705 /* Classify the argument of type TYPE and mode MODE.
3706 CLASSES will be filled by the register class used to pass each word
3707 of the operand. The number of words is returned. In case the parameter
3708 should be passed in memory, 0 is returned. As a special case for zero
3709 sized containers, classes[0] will be NO_CLASS and 1 is returned.
3711 BIT_OFFSET is used internally for handling records and specifies offset
3712 of the offset in bits modulo 256 to avoid overflow cases.
3714 See the x86-64 PS ABI for details.
3718 classify_argument (enum machine_mode mode, const_tree type,
3719 enum x86_64_reg_class classes[MAX_CLASSES], int bit_offset)
3721 HOST_WIDE_INT bytes =
3722 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
3723 int words = (bytes + (bit_offset % 64) / 8 + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
3725 /* Variable sized entities are always passed/returned in memory. */
3729 if (mode != VOIDmode
3730 && targetm.calls.must_pass_in_stack (mode, type))
3733 if (type && AGGREGATE_TYPE_P (type))
3737 enum x86_64_reg_class subclasses[MAX_CLASSES];
3739 /* On x86-64 we pass structures larger than 16 bytes on the stack. */
3743 for (i = 0; i < words; i++)
3744 classes[i] = X86_64_NO_CLASS;
3746 /* Zero sized arrays or structures are NO_CLASS. We return 0 to
3747 signalize memory class, so handle it as special case. */
3750 classes[0] = X86_64_NO_CLASS;
3754 /* Classify each field of record and merge classes. */
3755 switch (TREE_CODE (type))
3758 /* And now merge the fields of structure. */
3759 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
3761 if (TREE_CODE (field) == FIELD_DECL)
3765 if (TREE_TYPE (field) == error_mark_node)
3768 /* Bitfields are always classified as integer. Handle them
3769 early, since later code would consider them to be
3770 misaligned integers. */
3771 if (DECL_BIT_FIELD (field))
3773 for (i = (int_bit_position (field) + (bit_offset % 64)) / 8 / 8;
3774 i < ((int_bit_position (field) + (bit_offset % 64))
3775 + tree_low_cst (DECL_SIZE (field), 0)
3778 merge_classes (X86_64_INTEGER_CLASS,
3783 num = classify_argument (TYPE_MODE (TREE_TYPE (field)),
3784 TREE_TYPE (field), subclasses,
3785 (int_bit_position (field)
3786 + bit_offset) % 256);
3789 for (i = 0; i < num; i++)
3792 (int_bit_position (field) + (bit_offset % 64)) / 8 / 8;
3794 merge_classes (subclasses[i], classes[i + pos]);
3802 /* Arrays are handled as small records. */
3805 num = classify_argument (TYPE_MODE (TREE_TYPE (type)),
3806 TREE_TYPE (type), subclasses, bit_offset);
3810 /* The partial classes are now full classes. */
3811 if (subclasses[0] == X86_64_SSESF_CLASS && bytes != 4)
3812 subclasses[0] = X86_64_SSE_CLASS;
3813 if (subclasses[0] == X86_64_INTEGERSI_CLASS && bytes != 4)
3814 subclasses[0] = X86_64_INTEGER_CLASS;
3816 for (i = 0; i < words; i++)
3817 classes[i] = subclasses[i % num];
3822 case QUAL_UNION_TYPE:
3823 /* Unions are similar to RECORD_TYPE but offset is always 0.
3825 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
3827 if (TREE_CODE (field) == FIELD_DECL)
3831 if (TREE_TYPE (field) == error_mark_node)
3834 num = classify_argument (TYPE_MODE (TREE_TYPE (field)),
3835 TREE_TYPE (field), subclasses,
3839 for (i = 0; i < num; i++)
3840 classes[i] = merge_classes (subclasses[i], classes[i]);
3849 /* Final merger cleanup. */
3850 for (i = 0; i < words; i++)
3852 /* If one class is MEMORY, everything should be passed in
3854 if (classes[i] == X86_64_MEMORY_CLASS)
3857 /* The X86_64_SSEUP_CLASS should be always preceded by
3858 X86_64_SSE_CLASS. */
3859 if (classes[i] == X86_64_SSEUP_CLASS
3860 && (i == 0 || classes[i - 1] != X86_64_SSE_CLASS))
3861 classes[i] = X86_64_SSE_CLASS;
3863 /* X86_64_X87UP_CLASS should be preceded by X86_64_X87_CLASS. */
3864 if (classes[i] == X86_64_X87UP_CLASS
3865 && (i == 0 || classes[i - 1] != X86_64_X87_CLASS))
3866 classes[i] = X86_64_SSE_CLASS;
3871 /* Compute alignment needed. We align all types to natural boundaries with
3872 exception of XFmode that is aligned to 64bits. */
3873 if (mode != VOIDmode && mode != BLKmode)
3875 int mode_alignment = GET_MODE_BITSIZE (mode);
3878 mode_alignment = 128;
3879 else if (mode == XCmode)
3880 mode_alignment = 256;
3881 if (COMPLEX_MODE_P (mode))
3882 mode_alignment /= 2;
3883 /* Misaligned fields are always returned in memory. */
3884 if (bit_offset % mode_alignment)
3888 /* for V1xx modes, just use the base mode */
3889 if (VECTOR_MODE_P (mode) && mode != V1DImode
3890 && GET_MODE_SIZE (GET_MODE_INNER (mode)) == bytes)
3891 mode = GET_MODE_INNER (mode);
3893 /* Classification of atomic types. */
3898 classes[0] = X86_64_SSE_CLASS;
3901 classes[0] = X86_64_SSE_CLASS;
3902 classes[1] = X86_64_SSEUP_CLASS;
3911 if (bit_offset + GET_MODE_BITSIZE (mode) <= 32)
3912 classes[0] = X86_64_INTEGERSI_CLASS;
3914 classes[0] = X86_64_INTEGER_CLASS;
3918 classes[0] = classes[1] = X86_64_INTEGER_CLASS;
3923 if (!(bit_offset % 64))
3924 classes[0] = X86_64_SSESF_CLASS;
3926 classes[0] = X86_64_SSE_CLASS;
3929 classes[0] = X86_64_SSEDF_CLASS;
3932 classes[0] = X86_64_X87_CLASS;
3933 classes[1] = X86_64_X87UP_CLASS;
3936 classes[0] = X86_64_SSE_CLASS;
3937 classes[1] = X86_64_SSEUP_CLASS;
3940 classes[0] = X86_64_SSE_CLASS;
3943 classes[0] = X86_64_SSEDF_CLASS;
3944 classes[1] = X86_64_SSEDF_CLASS;
3947 classes[0] = X86_64_COMPLEX_X87_CLASS;
3950 /* This modes is larger than 16 bytes. */
3958 classes[0] = X86_64_SSE_CLASS;
3959 classes[1] = X86_64_SSEUP_CLASS;
3966 classes[0] = X86_64_SSE_CLASS;
3972 gcc_assert (VECTOR_MODE_P (mode));
3977 gcc_assert (GET_MODE_CLASS (GET_MODE_INNER (mode)) == MODE_INT);
3979 if (bit_offset + GET_MODE_BITSIZE (mode) <= 32)
3980 classes[0] = X86_64_INTEGERSI_CLASS;
3982 classes[0] = X86_64_INTEGER_CLASS;
3983 classes[1] = X86_64_INTEGER_CLASS;
3984 return 1 + (bytes > 8);
3988 /* Examine the argument and return set number of register required in each
3989 class. Return 0 iff parameter should be passed in memory. */
3991 examine_argument (enum machine_mode mode, const_tree type, int in_return,
3992 int *int_nregs, int *sse_nregs)
3994 enum x86_64_reg_class regclass[MAX_CLASSES];
3995 int n = classify_argument (mode, type, regclass, 0);
4001 for (n--; n >= 0; n--)
4002 switch (regclass[n])
4004 case X86_64_INTEGER_CLASS:
4005 case X86_64_INTEGERSI_CLASS:
4008 case X86_64_SSE_CLASS:
4009 case X86_64_SSESF_CLASS:
4010 case X86_64_SSEDF_CLASS:
4013 case X86_64_NO_CLASS:
4014 case X86_64_SSEUP_CLASS:
4016 case X86_64_X87_CLASS:
4017 case X86_64_X87UP_CLASS:
4021 case X86_64_COMPLEX_X87_CLASS:
4022 return in_return ? 2 : 0;
4023 case X86_64_MEMORY_CLASS:
4029 /* Construct container for the argument used by GCC interface. See
4030 FUNCTION_ARG for the detailed description. */
4033 construct_container (enum machine_mode mode, enum machine_mode orig_mode,
4034 const_tree type, int in_return, int nintregs, int nsseregs,
4035 const int *intreg, int sse_regno)
4037 /* The following variables hold the static issued_error state. */
4038 static bool issued_sse_arg_error;
4039 static bool issued_sse_ret_error;
4040 static bool issued_x87_ret_error;
4042 enum machine_mode tmpmode;
4044 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
4045 enum x86_64_reg_class regclass[MAX_CLASSES];
4049 int needed_sseregs, needed_intregs;
4050 rtx exp[MAX_CLASSES];
4053 n = classify_argument (mode, type, regclass, 0);
4056 if (!examine_argument (mode, type, in_return, &needed_intregs,
4059 if (needed_intregs > nintregs || needed_sseregs > nsseregs)
4062 /* We allowed the user to turn off SSE for kernel mode. Don't crash if
4063 some less clueful developer tries to use floating-point anyway. */
4064 if (needed_sseregs && !TARGET_SSE)
4068 if (!issued_sse_ret_error)
4070 error ("SSE register return with SSE disabled");
4071 issued_sse_ret_error = true;
4074 else if (!issued_sse_arg_error)
4076 error ("SSE register argument with SSE disabled");
4077 issued_sse_arg_error = true;
4082 /* Likewise, error if the ABI requires us to return values in the
4083 x87 registers and the user specified -mno-80387. */
4084 if (!TARGET_80387 && in_return)
4085 for (i = 0; i < n; i++)
4086 if (regclass[i] == X86_64_X87_CLASS
4087 || regclass[i] == X86_64_X87UP_CLASS
4088 || regclass[i] == X86_64_COMPLEX_X87_CLASS)
4090 if (!issued_x87_ret_error)
4092 error ("x87 register return with x87 disabled");
4093 issued_x87_ret_error = true;
4098 /* First construct simple cases. Avoid SCmode, since we want to use
4099 single register to pass this type. */
4100 if (n == 1 && mode != SCmode)
4101 switch (regclass[0])
4103 case X86_64_INTEGER_CLASS:
4104 case X86_64_INTEGERSI_CLASS:
4105 return gen_rtx_REG (mode, intreg[0]);
4106 case X86_64_SSE_CLASS:
4107 case X86_64_SSESF_CLASS:
4108 case X86_64_SSEDF_CLASS:
4109 return gen_reg_or_parallel (mode, orig_mode, SSE_REGNO (sse_regno));
4110 case X86_64_X87_CLASS:
4111 case X86_64_COMPLEX_X87_CLASS:
4112 return gen_rtx_REG (mode, FIRST_STACK_REG);
4113 case X86_64_NO_CLASS:
4114 /* Zero sized array, struct or class. */
4119 if (n == 2 && regclass[0] == X86_64_SSE_CLASS
4120 && regclass[1] == X86_64_SSEUP_CLASS && mode != BLKmode)
4121 return gen_rtx_REG (mode, SSE_REGNO (sse_regno));
4124 && regclass[0] == X86_64_X87_CLASS && regclass[1] == X86_64_X87UP_CLASS)
4125 return gen_rtx_REG (XFmode, FIRST_STACK_REG);
4126 if (n == 2 && regclass[0] == X86_64_INTEGER_CLASS
4127 && regclass[1] == X86_64_INTEGER_CLASS
4128 && (mode == CDImode || mode == TImode || mode == TFmode)
4129 && intreg[0] + 1 == intreg[1])
4130 return gen_rtx_REG (mode, intreg[0]);
4132 /* Otherwise figure out the entries of the PARALLEL. */
4133 for (i = 0; i < n; i++)
4135 switch (regclass[i])
4137 case X86_64_NO_CLASS:
4139 case X86_64_INTEGER_CLASS:
4140 case X86_64_INTEGERSI_CLASS:
4141 /* Merge TImodes on aligned occasions here too. */
4142 if (i * 8 + 8 > bytes)
4143 tmpmode = mode_for_size ((bytes - i * 8) * BITS_PER_UNIT, MODE_INT, 0);
4144 else if (regclass[i] == X86_64_INTEGERSI_CLASS)
4148 /* We've requested 24 bytes we don't have mode for. Use DImode. */
4149 if (tmpmode == BLKmode)
4151 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
4152 gen_rtx_REG (tmpmode, *intreg),
4156 case X86_64_SSESF_CLASS:
4157 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
4158 gen_rtx_REG (SFmode,
4159 SSE_REGNO (sse_regno)),
4163 case X86_64_SSEDF_CLASS:
4164 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
4165 gen_rtx_REG (DFmode,
4166 SSE_REGNO (sse_regno)),
4170 case X86_64_SSE_CLASS:
4171 if (i < n - 1 && regclass[i + 1] == X86_64_SSEUP_CLASS)
4175 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
4176 gen_rtx_REG (tmpmode,
4177 SSE_REGNO (sse_regno)),
4179 if (tmpmode == TImode)
4188 /* Empty aligned struct, union or class. */
4192 ret = gen_rtx_PARALLEL (mode, rtvec_alloc (nexps));
4193 for (i = 0; i < nexps; i++)
4194 XVECEXP (ret, 0, i) = exp [i];
4198 /* Update the data in CUM to advance over an argument of mode MODE
4199 and data type TYPE. (TYPE is null for libcalls where that information
4200 may not be available.) */
4203 function_arg_advance_32 (CUMULATIVE_ARGS *cum, enum machine_mode mode,
4204 tree type, HOST_WIDE_INT bytes, HOST_WIDE_INT words)
4220 cum->words += words;
4221 cum->nregs -= words;
4222 cum->regno += words;
4224 if (cum->nregs <= 0)
4232 if (cum->float_in_sse < 2)
4235 if (cum->float_in_sse < 1)
4246 if (!type || !AGGREGATE_TYPE_P (type))
4248 cum->sse_words += words;
4249 cum->sse_nregs -= 1;
4250 cum->sse_regno += 1;
4251 if (cum->sse_nregs <= 0)
4264 if (!type || !AGGREGATE_TYPE_P (type))
4266 cum->mmx_words += words;
4267 cum->mmx_nregs -= 1;
4268 cum->mmx_regno += 1;
4269 if (cum->mmx_nregs <= 0)
4280 function_arg_advance_64 (CUMULATIVE_ARGS *cum, enum machine_mode mode,
4281 tree type, HOST_WIDE_INT words)
4283 int int_nregs, sse_nregs;
4285 if (!examine_argument (mode, type, 0, &int_nregs, &sse_nregs))
4286 cum->words += words;
4287 else if (sse_nregs <= cum->sse_nregs && int_nregs <= cum->nregs)
4289 cum->nregs -= int_nregs;
4290 cum->sse_nregs -= sse_nregs;
4291 cum->regno += int_nregs;
4292 cum->sse_regno += sse_nregs;
4295 cum->words += words;
4299 function_arg_advance_ms_64 (CUMULATIVE_ARGS *cum, HOST_WIDE_INT bytes,
4300 HOST_WIDE_INT words)
4302 /* Otherwise, this should be passed indirect. */
4303 gcc_assert (bytes == 1 || bytes == 2 || bytes == 4 || bytes == 8);
4305 cum->words += words;
4314 function_arg_advance (CUMULATIVE_ARGS *cum, enum machine_mode mode,
4315 tree type, int named ATTRIBUTE_UNUSED)
4317 HOST_WIDE_INT bytes, words;
4319 if (mode == BLKmode)
4320 bytes = int_size_in_bytes (type);
4322 bytes = GET_MODE_SIZE (mode);
4323 words = (bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
4326 mode = type_natural_mode (type);
4328 if (TARGET_64BIT_MS_ABI)
4329 function_arg_advance_ms_64 (cum, bytes, words);
4330 else if (TARGET_64BIT)
4331 function_arg_advance_64 (cum, mode, type, words);
4333 function_arg_advance_32 (cum, mode, type, bytes, words);
4336 /* Define where to put the arguments to a function.
4337 Value is zero to push the argument on the stack,
4338 or a hard register in which to store the argument.
4340 MODE is the argument's machine mode.
4341 TYPE is the data type of the argument (as a tree).
4342 This is null for libcalls where that information may
4344 CUM is a variable of type CUMULATIVE_ARGS which gives info about
4345 the preceding args and about the function being called.
4346 NAMED is nonzero if this argument is a named parameter
4347 (otherwise it is an extra parameter matching an ellipsis). */
4350 function_arg_32 (CUMULATIVE_ARGS *cum, enum machine_mode mode,
4351 enum machine_mode orig_mode, tree type,
4352 HOST_WIDE_INT bytes, HOST_WIDE_INT words)
4354 static bool warnedsse, warnedmmx;
4356 /* Avoid the AL settings for the Unix64 ABI. */
4357 if (mode == VOIDmode)
4373 if (words <= cum->nregs)
4375 int regno = cum->regno;
4377 /* Fastcall allocates the first two DWORD (SImode) or
4378 smaller arguments to ECX and EDX if it isn't an
4384 || (type && AGGREGATE_TYPE_P (type)))
4387 /* ECX not EAX is the first allocated register. */
4388 if (regno == AX_REG)
4391 return gen_rtx_REG (mode, regno);
4396 if (cum->float_in_sse < 2)
4399 if (cum->float_in_sse < 1)
4409 if (!type || !AGGREGATE_TYPE_P (type))
4411 if (!TARGET_SSE && !warnedsse && cum->warn_sse)
4414 warning (0, "SSE vector argument without SSE enabled "
4418 return gen_reg_or_parallel (mode, orig_mode,
4419 cum->sse_regno + FIRST_SSE_REG);
4428 if (!type || !AGGREGATE_TYPE_P (type))
4430 if (!TARGET_MMX && !warnedmmx && cum->warn_mmx)
4433 warning (0, "MMX vector argument without MMX enabled "
4437 return gen_reg_or_parallel (mode, orig_mode,
4438 cum->mmx_regno + FIRST_MMX_REG);
4447 function_arg_64 (CUMULATIVE_ARGS *cum, enum machine_mode mode,
4448 enum machine_mode orig_mode, tree type)
4450 /* Handle a hidden AL argument containing number of registers
4451 for varargs x86-64 functions. */
4452 if (mode == VOIDmode)
4453 return GEN_INT (cum->maybe_vaarg
4454 ? (cum->sse_nregs < 0
4459 return construct_container (mode, orig_mode, type, 0, cum->nregs,
4461 &x86_64_int_parameter_registers [cum->regno],
4466 function_arg_ms_64 (CUMULATIVE_ARGS *cum, enum machine_mode mode,
4467 enum machine_mode orig_mode, int named,
4468 HOST_WIDE_INT bytes)
4472 /* Avoid the AL settings for the Unix64 ABI. */
4473 if (mode == VOIDmode)
4476 /* If we've run out of registers, it goes on the stack. */
4477 if (cum->nregs == 0)
4480 regno = x86_64_ms_abi_int_parameter_registers[cum->regno];
4482 /* Only floating point modes are passed in anything but integer regs. */
4483 if (TARGET_SSE && (mode == SFmode || mode == DFmode))
4486 regno = cum->regno + FIRST_SSE_REG;
4491 /* Unnamed floating parameters are passed in both the
4492 SSE and integer registers. */
4493 t1 = gen_rtx_REG (mode, cum->regno + FIRST_SSE_REG);
4494 t2 = gen_rtx_REG (mode, regno);
4495 t1 = gen_rtx_EXPR_LIST (VOIDmode, t1, const0_rtx);
4496 t2 = gen_rtx_EXPR_LIST (VOIDmode, t2, const0_rtx);
4497 return gen_rtx_PARALLEL (mode, gen_rtvec (2, t1, t2));
4500 /* Handle aggregated types passed in register. */
4501 if (orig_mode == BLKmode)
4503 if (bytes > 0 && bytes <= 8)
4504 mode = (bytes > 4 ? DImode : SImode);
4505 if (mode == BLKmode)
4509 return gen_reg_or_parallel (mode, orig_mode, regno);
4513 function_arg (CUMULATIVE_ARGS *cum, enum machine_mode omode,
4514 tree type, int named)
4516 enum machine_mode mode = omode;
4517 HOST_WIDE_INT bytes, words;
4519 if (mode == BLKmode)
4520 bytes = int_size_in_bytes (type);
4522 bytes = GET_MODE_SIZE (mode);
4523 words = (bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
4525 /* To simplify the code below, represent vector types with a vector mode
4526 even if MMX/SSE are not active. */
4527 if (type && TREE_CODE (type) == VECTOR_TYPE)
4528 mode = type_natural_mode (type);
4530 if (TARGET_64BIT_MS_ABI)
4531 return function_arg_ms_64 (cum, mode, omode, named, bytes);
4532 else if (TARGET_64BIT)
4533 return function_arg_64 (cum, mode, omode, type);
4535 return function_arg_32 (cum, mode, omode, type, bytes, words);
4538 /* A C expression that indicates when an argument must be passed by
4539 reference. If nonzero for an argument, a copy of that argument is
4540 made in memory and a pointer to the argument is passed instead of
4541 the argument itself. The pointer is passed in whatever way is
4542 appropriate for passing a pointer to that type. */
4545 ix86_pass_by_reference (CUMULATIVE_ARGS *cum ATTRIBUTE_UNUSED,
4546 enum machine_mode mode ATTRIBUTE_UNUSED,
4547 const_tree type, bool named ATTRIBUTE_UNUSED)
4549 /* See Windows x64 Software Convention. */
4550 if (TARGET_64BIT_MS_ABI)
4552 int msize = (int) GET_MODE_SIZE (mode);
4555 /* Arrays are passed by reference. */
4556 if (TREE_CODE (type) == ARRAY_TYPE)
4559 if (AGGREGATE_TYPE_P (type))
4561 /* Structs/unions of sizes other than 8, 16, 32, or 64 bits
4562 are passed by reference. */
4563 msize = int_size_in_bytes (type);
4567 /* __m128 is passed by reference. */
4569 case 1: case 2: case 4: case 8:
4575 else if (TARGET_64BIT && type && int_size_in_bytes (type) == -1)
4581 /* Return true when TYPE should be 128bit aligned for 32bit argument passing
4584 contains_aligned_value_p (tree type)
4586 enum machine_mode mode = TYPE_MODE (type);
4587 if (((TARGET_SSE && SSE_REG_MODE_P (mode)) || mode == TDmode)
4588 && (!TYPE_USER_ALIGN (type) || TYPE_ALIGN (type) > 128))
4590 if (TYPE_ALIGN (type) < 128)
4593 if (AGGREGATE_TYPE_P (type))
4595 /* Walk the aggregates recursively. */
4596 switch (TREE_CODE (type))
4600 case QUAL_UNION_TYPE:
4604 /* Walk all the structure fields. */
4605 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
4607 if (TREE_CODE (field) == FIELD_DECL
4608 && contains_aligned_value_p (TREE_TYPE (field)))
4615 /* Just for use if some languages passes arrays by value. */
4616 if (contains_aligned_value_p (TREE_TYPE (type)))
4627 /* Gives the alignment boundary, in bits, of an argument with the
4628 specified mode and type. */
4631 ix86_function_arg_boundary (enum machine_mode mode, tree type)
4635 align = TYPE_ALIGN (type);
4637 align = GET_MODE_ALIGNMENT (mode);
4638 if (align < PARM_BOUNDARY)
4639 align = PARM_BOUNDARY;
4640 /* In 32bit, only _Decimal128 is aligned to its natural boundary. */
4641 if (!TARGET_64BIT && mode != TDmode)
4643 /* i386 ABI defines all arguments to be 4 byte aligned. We have to
4644 make an exception for SSE modes since these require 128bit
4647 The handling here differs from field_alignment. ICC aligns MMX
4648 arguments to 4 byte boundaries, while structure fields are aligned
4649 to 8 byte boundaries. */
4652 if (!(TARGET_SSE && SSE_REG_MODE_P (mode)) && mode != TDmode)
4653 align = PARM_BOUNDARY;
4657 if (!contains_aligned_value_p (type))
4658 align = PARM_BOUNDARY;
4661 if (align > BIGGEST_ALIGNMENT)
4662 align = BIGGEST_ALIGNMENT;
4666 /* Return true if N is a possible register number of function value. */
4669 ix86_function_value_regno_p (int regno)
4676 case FIRST_FLOAT_REG:
4677 if (TARGET_64BIT_MS_ABI)
4679 return TARGET_FLOAT_RETURNS_IN_80387;
4685 if (TARGET_MACHO || TARGET_64BIT)
4693 /* Define how to find the value returned by a function.
4694 VALTYPE is the data type of the value (as a tree).
4695 If the precise function being called is known, FUNC is its FUNCTION_DECL;
4696 otherwise, FUNC is 0. */
4699 function_value_32 (enum machine_mode orig_mode, enum machine_mode mode,
4700 const_tree fntype, const_tree fn)
4704 /* 8-byte vector modes in %mm0. See ix86_return_in_memory for where
4705 we normally prevent this case when mmx is not available. However
4706 some ABIs may require the result to be returned like DImode. */
4707 if (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 8)
4708 regno = TARGET_MMX ? FIRST_MMX_REG : 0;
4710 /* 16-byte vector modes in %xmm0. See ix86_return_in_memory for where
4711 we prevent this case when sse is not available. However some ABIs
4712 may require the result to be returned like integer TImode. */
4713 else if (mode == TImode
4714 || (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 16))
4715 regno = TARGET_SSE ? FIRST_SSE_REG : 0;
4717 /* Floating point return values in %st(0) (unless -mno-fp-ret-in-387). */
4718 else if (X87_FLOAT_MODE_P (mode) && TARGET_FLOAT_RETURNS_IN_80387)
4719 regno = FIRST_FLOAT_REG;
4721 /* Most things go in %eax. */
4724 /* Override FP return register with %xmm0 for local functions when
4725 SSE math is enabled or for functions with sseregparm attribute. */
4726 if ((fn || fntype) && (mode == SFmode || mode == DFmode))
4728 int sse_level = ix86_function_sseregparm (fntype, fn, false);
4729 if ((sse_level >= 1 && mode == SFmode)
4730 || (sse_level == 2 && mode == DFmode))
4731 regno = FIRST_SSE_REG;
4734 return gen_rtx_REG (orig_mode, regno);
4738 function_value_64 (enum machine_mode orig_mode, enum machine_mode mode,
4743 /* Handle libcalls, which don't provide a type node. */
4744 if (valtype == NULL)
4756 return gen_rtx_REG (mode, FIRST_SSE_REG);
4759 return gen_rtx_REG (mode, FIRST_FLOAT_REG);
4763 return gen_rtx_REG (mode, AX_REG);
4767 ret = construct_container (mode, orig_mode, valtype, 1,
4768 REGPARM_MAX, SSE_REGPARM_MAX,
4769 x86_64_int_return_registers, 0);
4771 /* For zero sized structures, construct_container returns NULL, but we
4772 need to keep rest of compiler happy by returning meaningful value. */
4774 ret = gen_rtx_REG (orig_mode, AX_REG);
4780 function_value_ms_64 (enum machine_mode orig_mode, enum machine_mode mode)
4782 unsigned int regno = AX_REG;
4786 switch (GET_MODE_SIZE (mode))
4789 if((SCALAR_INT_MODE_P (mode) || VECTOR_MODE_P (mode))
4790 && !COMPLEX_MODE_P (mode))
4791 regno = FIRST_SSE_REG;
4795 if (mode == SFmode || mode == DFmode)
4796 regno = FIRST_SSE_REG;
4802 return gen_rtx_REG (orig_mode, regno);
4806 ix86_function_value_1 (const_tree valtype, const_tree fntype_or_decl,
4807 enum machine_mode orig_mode, enum machine_mode mode)
4809 const_tree fn, fntype;
4812 if (fntype_or_decl && DECL_P (fntype_or_decl))
4813 fn = fntype_or_decl;
4814 fntype = fn ? TREE_TYPE (fn) : fntype_or_decl;
4816 if (TARGET_64BIT_MS_ABI)
4817 return function_value_ms_64 (orig_mode, mode);
4818 else if (TARGET_64BIT)
4819 return function_value_64 (orig_mode, mode, valtype);
4821 return function_value_32 (orig_mode, mode, fntype, fn);
4825 ix86_function_value (const_tree valtype, const_tree fntype_or_decl,
4826 bool outgoing ATTRIBUTE_UNUSED)
4828 enum machine_mode mode, orig_mode;
4830 orig_mode = TYPE_MODE (valtype);
4831 mode = type_natural_mode (valtype);
4832 return ix86_function_value_1 (valtype, fntype_or_decl, orig_mode, mode);
4836 ix86_libcall_value (enum machine_mode mode)
4838 return ix86_function_value_1 (NULL, NULL, mode, mode);
4841 /* Return true iff type is returned in memory. */
4844 return_in_memory_32 (const_tree type, enum machine_mode mode)
4848 if (mode == BLKmode)
4851 size = int_size_in_bytes (type);
4853 if (MS_AGGREGATE_RETURN && AGGREGATE_TYPE_P (type) && size <= 8)
4856 if (VECTOR_MODE_P (mode) || mode == TImode)
4858 /* User-created vectors small enough to fit in EAX. */
4862 /* MMX/3dNow values are returned in MM0,
4863 except when it doesn't exits. */
4865 return (TARGET_MMX ? 0 : 1);
4867 /* SSE values are returned in XMM0, except when it doesn't exist. */
4869 return (TARGET_SSE ? 0 : 1);
4884 return_in_memory_64 (const_tree type, enum machine_mode mode)
4886 int needed_intregs, needed_sseregs;
4887 return !examine_argument (mode, type, 1, &needed_intregs, &needed_sseregs);
4891 return_in_memory_ms_64 (const_tree type, enum machine_mode mode)
4893 HOST_WIDE_INT size = int_size_in_bytes (type);
4895 /* __m128 is returned in xmm0. */
4896 if ((SCALAR_INT_MODE_P (mode) || VECTOR_MODE_P (mode))
4897 && !COMPLEX_MODE_P (mode) && (GET_MODE_SIZE (mode) == 16 || size == 16))
4900 /* Otherwise, the size must be exactly in [1248]. */
4901 return (size != 1 && size != 2 && size != 4 && size != 8);
4905 ix86_return_in_memory (const_tree type, const_tree fntype ATTRIBUTE_UNUSED)
4907 const enum machine_mode mode = type_natural_mode (type);
4909 if (TARGET_64BIT_MS_ABI)
4910 return return_in_memory_ms_64 (type, mode);
4911 else if (TARGET_64BIT)
4912 return return_in_memory_64 (type, mode);
4914 return return_in_memory_32 (type, mode);
4917 /* Return false iff TYPE is returned in memory. This version is used
4918 on Solaris 10. It is similar to the generic ix86_return_in_memory,
4919 but differs notably in that when MMX is available, 8-byte vectors
4920 are returned in memory, rather than in MMX registers. */
4923 ix86_sol10_return_in_memory (const_tree type, const_tree fntype ATTRIBUTE_UNUSED)
4926 enum machine_mode mode = type_natural_mode (type);
4929 return return_in_memory_64 (type, mode);
4931 if (mode == BLKmode)
4934 size = int_size_in_bytes (type);
4936 if (VECTOR_MODE_P (mode))
4938 /* Return in memory only if MMX registers *are* available. This
4939 seems backwards, but it is consistent with the existing
4946 else if (mode == TImode)
4948 else if (mode == XFmode)
4955 ix86_i386elf_return_in_memory (const_tree type, const_tree fntype ATTRIBUTE_UNUSED)
4957 return (TYPE_MODE (type) == BLKmode
4958 || (VECTOR_MODE_P (TYPE_MODE (type)) && int_size_in_bytes (type) == 8));
4962 ix86_i386interix_return_in_memory (const_tree type, const_tree fntype ATTRIBUTE_UNUSED)
4964 return (TYPE_MODE (type) == BLKmode
4965 || (AGGREGATE_TYPE_P (type) && int_size_in_bytes(type) > 8 ));
4968 /* When returning SSE vector types, we have a choice of either
4969 (1) being abi incompatible with a -march switch, or
4970 (2) generating an error.
4971 Given no good solution, I think the safest thing is one warning.
4972 The user won't be able to use -Werror, but....
4974 Choose the STRUCT_VALUE_RTX hook because that's (at present) only
4975 called in response to actually generating a caller or callee that
4976 uses such a type. As opposed to TARGET_RETURN_IN_MEMORY, which is called
4977 via aggregate_value_p for general type probing from tree-ssa. */
4980 ix86_struct_value_rtx (tree type, int incoming ATTRIBUTE_UNUSED)
4982 static bool warnedsse, warnedmmx;
4984 if (!TARGET_64BIT && type)
4986 /* Look at the return type of the function, not the function type. */
4987 enum machine_mode mode = TYPE_MODE (TREE_TYPE (type));
4989 if (!TARGET_SSE && !warnedsse)
4992 || (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 16))
4995 warning (0, "SSE vector return without SSE enabled "
5000 if (!TARGET_MMX && !warnedmmx)
5002 if (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 8)
5005 warning (0, "MMX vector return without MMX enabled "
5015 /* Create the va_list data type. */
5018 ix86_build_builtin_va_list (void)
5020 tree f_gpr, f_fpr, f_ovf, f_sav, record, type_decl;
5022 /* For i386 we use plain pointer to argument area. */
5023 if (!TARGET_64BIT || TARGET_64BIT_MS_ABI)
5024 return build_pointer_type (char_type_node);
5026 record = (*lang_hooks.types.make_type) (RECORD_TYPE);
5027 type_decl = build_decl (TYPE_DECL, get_identifier ("__va_list_tag"), record);
5029 f_gpr = build_decl (FIELD_DECL, get_identifier ("gp_offset"),
5030 unsigned_type_node);
5031 f_fpr = build_decl (FIELD_DECL, get_identifier ("fp_offset"),
5032 unsigned_type_node);
5033 f_ovf = build_decl (FIELD_DECL, get_identifier ("overflow_arg_area"),
5035 f_sav = build_decl (FIELD_DECL, get_identifier ("reg_save_area"),
5038 va_list_gpr_counter_field = f_gpr;
5039 va_list_fpr_counter_field = f_fpr;
5041 DECL_FIELD_CONTEXT (f_gpr) = record;
5042 DECL_FIELD_CONTEXT (f_fpr) = record;
5043 DECL_FIELD_CONTEXT (f_ovf) = record;
5044 DECL_FIELD_CONTEXT (f_sav) = record;
5046 TREE_CHAIN (record) = type_decl;
5047 TYPE_NAME (record) = type_decl;
5048 TYPE_FIELDS (record) = f_gpr;
5049 TREE_CHAIN (f_gpr) = f_fpr;
5050 TREE_CHAIN (f_fpr) = f_ovf;
5051 TREE_CHAIN (f_ovf) = f_sav;
5053 layout_type (record);
5055 /* The correct type is an array type of one element. */
5056 return build_array_type (record, build_index_type (size_zero_node));
5059 /* Worker function for TARGET_SETUP_INCOMING_VARARGS. */
5062 setup_incoming_varargs_64 (CUMULATIVE_ARGS *cum)
5072 if (! cfun->va_list_gpr_size && ! cfun->va_list_fpr_size)
5075 /* Indicate to allocate space on the stack for varargs save area. */
5076 ix86_save_varrargs_registers = 1;
5077 /* We need 16-byte stack alignment to save SSE registers. If user
5078 asked for lower preferred_stack_boundary, lets just hope that he knows
5079 what he is doing and won't varargs SSE values.
5081 We also may end up assuming that only 64bit values are stored in SSE
5082 register let some floating point program work. */
5083 if (ix86_preferred_stack_boundary >= BIGGEST_ALIGNMENT)
5084 crtl->stack_alignment_needed = BIGGEST_ALIGNMENT;
5086 save_area = frame_pointer_rtx;
5087 set = get_varargs_alias_set ();
5089 for (i = cum->regno;
5091 && i < cum->regno + cfun->va_list_gpr_size / UNITS_PER_WORD;
5094 mem = gen_rtx_MEM (Pmode,
5095 plus_constant (save_area, i * UNITS_PER_WORD));
5096 MEM_NOTRAP_P (mem) = 1;
5097 set_mem_alias_set (mem, set);
5098 emit_move_insn (mem, gen_rtx_REG (Pmode,
5099 x86_64_int_parameter_registers[i]));
5102 if (cum->sse_nregs && cfun->va_list_fpr_size)
5104 /* Now emit code to save SSE registers. The AX parameter contains number
5105 of SSE parameter registers used to call this function. We use
5106 sse_prologue_save insn template that produces computed jump across
5107 SSE saves. We need some preparation work to get this working. */
5109 label = gen_label_rtx ();
5110 label_ref = gen_rtx_LABEL_REF (Pmode, label);
5112 /* Compute address to jump to :
5113 label - 5*eax + nnamed_sse_arguments*5 */
5114 tmp_reg = gen_reg_rtx (Pmode);
5115 nsse_reg = gen_reg_rtx (Pmode);
5116 emit_insn (gen_zero_extendqidi2 (nsse_reg, gen_rtx_REG (QImode, AX_REG)));
5117 emit_insn (gen_rtx_SET (VOIDmode, tmp_reg,
5118 gen_rtx_MULT (Pmode, nsse_reg,
5123 gen_rtx_CONST (DImode,
5124 gen_rtx_PLUS (DImode,
5126 GEN_INT (cum->sse_regno * 4))));
5128 emit_move_insn (nsse_reg, label_ref);
5129 emit_insn (gen_subdi3 (nsse_reg, nsse_reg, tmp_reg));
5131 /* Compute address of memory block we save into. We always use pointer
5132 pointing 127 bytes after first byte to store - this is needed to keep
5133 instruction size limited by 4 bytes. */
5134 tmp_reg = gen_reg_rtx (Pmode);
5135 emit_insn (gen_rtx_SET (VOIDmode, tmp_reg,
5136 plus_constant (save_area,
5137 8 * REGPARM_MAX + 127)));
5138 mem = gen_rtx_MEM (BLKmode, plus_constant (tmp_reg, -127));
5139 MEM_NOTRAP_P (mem) = 1;
5140 set_mem_alias_set (mem, set);
5141 set_mem_align (mem, BITS_PER_WORD);
5143 /* And finally do the dirty job! */
5144 emit_insn (gen_sse_prologue_save (mem, nsse_reg,
5145 GEN_INT (cum->sse_regno), label));
5150 setup_incoming_varargs_ms_64 (CUMULATIVE_ARGS *cum)
5152 alias_set_type set = get_varargs_alias_set ();
5155 for (i = cum->regno; i < REGPARM_MAX; i++)
5159 mem = gen_rtx_MEM (Pmode,
5160 plus_constant (virtual_incoming_args_rtx,
5161 i * UNITS_PER_WORD));
5162 MEM_NOTRAP_P (mem) = 1;
5163 set_mem_alias_set (mem, set);
5165 reg = gen_rtx_REG (Pmode, x86_64_ms_abi_int_parameter_registers[i]);
5166 emit_move_insn (mem, reg);
5171 ix86_setup_incoming_varargs (CUMULATIVE_ARGS *cum, enum machine_mode mode,
5172 tree type, int *pretend_size ATTRIBUTE_UNUSED,
5175 CUMULATIVE_ARGS next_cum;
5178 /* This argument doesn't appear to be used anymore. Which is good,
5179 because the old code here didn't suppress rtl generation. */
5180 gcc_assert (!no_rtl);
5185 fntype = TREE_TYPE (current_function_decl);
5187 /* For varargs, we do not want to skip the dummy va_dcl argument.
5188 For stdargs, we do want to skip the last named argument. */
5190 if (stdarg_p (fntype))
5191 function_arg_advance (&next_cum, mode, type, 1);
5193 if (TARGET_64BIT_MS_ABI)
5194 setup_incoming_varargs_ms_64 (&next_cum);
5196 setup_incoming_varargs_64 (&next_cum);
5199 /* Implement va_start. */
5202 ix86_va_start (tree valist, rtx nextarg)
5204 HOST_WIDE_INT words, n_gpr, n_fpr;
5205 tree f_gpr, f_fpr, f_ovf, f_sav;
5206 tree gpr, fpr, ovf, sav, t;
5209 /* Only 64bit target needs something special. */
5210 if (!TARGET_64BIT || TARGET_64BIT_MS_ABI)
5212 std_expand_builtin_va_start (valist, nextarg);
5216 f_gpr = TYPE_FIELDS (TREE_TYPE (va_list_type_node));
5217 f_fpr = TREE_CHAIN (f_gpr);
5218 f_ovf = TREE_CHAIN (f_fpr);
5219 f_sav = TREE_CHAIN (f_ovf);
5221 valist = build1 (INDIRECT_REF, TREE_TYPE (TREE_TYPE (valist)), valist);
5222 gpr = build3 (COMPONENT_REF, TREE_TYPE (f_gpr), valist, f_gpr, NULL_TREE);
5223 fpr = build3 (COMPONENT_REF, TREE_TYPE (f_fpr), valist, f_fpr, NULL_TREE);
5224 ovf = build3 (COMPONENT_REF, TREE_TYPE (f_ovf), valist, f_ovf, NULL_TREE);
5225 sav = build3 (COMPONENT_REF, TREE_TYPE (f_sav), valist, f_sav, NULL_TREE);
5227 /* Count number of gp and fp argument registers used. */
5228 words = crtl->args.info.words;
5229 n_gpr = crtl->args.info.regno;
5230 n_fpr = crtl->args.info.sse_regno;
5232 if (cfun->va_list_gpr_size)
5234 type = TREE_TYPE (gpr);
5235 t = build2 (GIMPLE_MODIFY_STMT, type, gpr,
5236 build_int_cst (type, n_gpr * 8));
5237 TREE_SIDE_EFFECTS (t) = 1;
5238 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
5241 if (cfun->va_list_fpr_size)
5243 type = TREE_TYPE (fpr);
5244 t = build2 (GIMPLE_MODIFY_STMT, type, fpr,
5245 build_int_cst (type, n_fpr * 16 + 8*REGPARM_MAX));
5246 TREE_SIDE_EFFECTS (t) = 1;
5247 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
5250 /* Find the overflow area. */
5251 type = TREE_TYPE (ovf);
5252 t = make_tree (type, virtual_incoming_args_rtx);
5254 t = build2 (POINTER_PLUS_EXPR, type, t,
5255 size_int (words * UNITS_PER_WORD));
5256 t = build2 (GIMPLE_MODIFY_STMT, type, ovf, t);
5257 TREE_SIDE_EFFECTS (t) = 1;
5258 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
5260 if (cfun->va_list_gpr_size || cfun->va_list_fpr_size)
5262 /* Find the register save area.
5263 Prologue of the function save it right above stack frame. */
5264 type = TREE_TYPE (sav);
5265 t = make_tree (type, frame_pointer_rtx);
5266 t = build2 (GIMPLE_MODIFY_STMT, type, sav, t);
5267 TREE_SIDE_EFFECTS (t) = 1;
5268 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
5272 /* Implement va_arg. */
5275 ix86_gimplify_va_arg (tree valist, tree type, tree *pre_p, tree *post_p)
5277 static const int intreg[6] = { 0, 1, 2, 3, 4, 5 };
5278 tree f_gpr, f_fpr, f_ovf, f_sav;
5279 tree gpr, fpr, ovf, sav, t;
5281 tree lab_false, lab_over = NULL_TREE;
5286 enum machine_mode nat_mode;
5288 /* Only 64bit target needs something special. */
5289 if (!TARGET_64BIT || TARGET_64BIT_MS_ABI)
5290 return std_gimplify_va_arg_expr (valist, type, pre_p, post_p);
5292 f_gpr = TYPE_FIELDS (TREE_TYPE (va_list_type_node));
5293 f_fpr = TREE_CHAIN (f_gpr);
5294 f_ovf = TREE_CHAIN (f_fpr);
5295 f_sav = TREE_CHAIN (f_ovf);
5297 valist = build_va_arg_indirect_ref (valist);
5298 gpr = build3 (COMPONENT_REF, TREE_TYPE (f_gpr), valist, f_gpr, NULL_TREE);
5299 fpr = build3 (COMPONENT_REF, TREE_TYPE (f_fpr), valist, f_fpr, NULL_TREE);
5300 ovf = build3 (COMPONENT_REF, TREE_TYPE (f_ovf), valist, f_ovf, NULL_TREE);
5301 sav = build3 (COMPONENT_REF, TREE_TYPE (f_sav), valist, f_sav, NULL_TREE);
5303 indirect_p = pass_by_reference (NULL, TYPE_MODE (type), type, false);
5305 type = build_pointer_type (type);
5306 size = int_size_in_bytes (type);
5307 rsize = (size + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
5309 nat_mode = type_natural_mode (type);
5310 container = construct_container (nat_mode, TYPE_MODE (type), type, 0,
5311 REGPARM_MAX, SSE_REGPARM_MAX, intreg, 0);
5313 /* Pull the value out of the saved registers. */
5315 addr = create_tmp_var (ptr_type_node, "addr");
5316 DECL_POINTER_ALIAS_SET (addr) = get_varargs_alias_set ();
5320 int needed_intregs, needed_sseregs;
5322 tree int_addr, sse_addr;
5324 lab_false = create_artificial_label ();
5325 lab_over = create_artificial_label ();
5327 examine_argument (nat_mode, type, 0, &needed_intregs, &needed_sseregs);
5329 need_temp = (!REG_P (container)
5330 && ((needed_intregs && TYPE_ALIGN (type) > 64)
5331 || TYPE_ALIGN (type) > 128));
5333 /* In case we are passing structure, verify that it is consecutive block
5334 on the register save area. If not we need to do moves. */
5335 if (!need_temp && !REG_P (container))
5337 /* Verify that all registers are strictly consecutive */
5338 if (SSE_REGNO_P (REGNO (XEXP (XVECEXP (container, 0, 0), 0))))
5342 for (i = 0; i < XVECLEN (container, 0) && !need_temp; i++)
5344 rtx slot = XVECEXP (container, 0, i);
5345 if (REGNO (XEXP (slot, 0)) != FIRST_SSE_REG + (unsigned int) i
5346 || INTVAL (XEXP (slot, 1)) != i * 16)
5354 for (i = 0; i < XVECLEN (container, 0) && !need_temp; i++)
5356 rtx slot = XVECEXP (container, 0, i);
5357 if (REGNO (XEXP (slot, 0)) != (unsigned int) i
5358 || INTVAL (XEXP (slot, 1)) != i * 8)
5370 int_addr = create_tmp_var (ptr_type_node, "int_addr");
5371 DECL_POINTER_ALIAS_SET (int_addr) = get_varargs_alias_set ();
5372 sse_addr = create_tmp_var (ptr_type_node, "sse_addr");
5373 DECL_POINTER_ALIAS_SET (sse_addr) = get_varargs_alias_set ();
5376 /* First ensure that we fit completely in registers. */
5379 t = build_int_cst (TREE_TYPE (gpr),
5380 (REGPARM_MAX - needed_intregs + 1) * 8);
5381 t = build2 (GE_EXPR, boolean_type_node, gpr, t);
5382 t2 = build1 (GOTO_EXPR, void_type_node, lab_false);
5383 t = build3 (COND_EXPR, void_type_node, t, t2, NULL_TREE);
5384 gimplify_and_add (t, pre_p);
5388 t = build_int_cst (TREE_TYPE (fpr),
5389 (SSE_REGPARM_MAX - needed_sseregs + 1) * 16
5391 t = build2 (GE_EXPR, boolean_type_node, fpr, t);
5392 t2 = build1 (GOTO_EXPR, void_type_node, lab_false);
5393 t = build3 (COND_EXPR, void_type_node, t, t2, NULL_TREE);
5394 gimplify_and_add (t, pre_p);
5397 /* Compute index to start of area used for integer regs. */
5400 /* int_addr = gpr + sav; */
5401 t = fold_convert (sizetype, gpr);
5402 t = build2 (POINTER_PLUS_EXPR, ptr_type_node, sav, t);
5403 t = build2 (GIMPLE_MODIFY_STMT, void_type_node, int_addr, t);
5404 gimplify_and_add (t, pre_p);
5408 /* sse_addr = fpr + sav; */
5409 t = fold_convert (sizetype, fpr);
5410 t = build2 (POINTER_PLUS_EXPR, ptr_type_node, sav, t);
5411 t = build2 (GIMPLE_MODIFY_STMT, void_type_node, sse_addr, t);
5412 gimplify_and_add (t, pre_p);
5417 tree temp = create_tmp_var (type, "va_arg_tmp");
5420 t = build1 (ADDR_EXPR, build_pointer_type (type), temp);
5421 t = build2 (GIMPLE_MODIFY_STMT, void_type_node, addr, t);
5422 gimplify_and_add (t, pre_p);
5424 for (i = 0; i < XVECLEN (container, 0); i++)
5426 rtx slot = XVECEXP (container, 0, i);
5427 rtx reg = XEXP (slot, 0);
5428 enum machine_mode mode = GET_MODE (reg);
5429 tree piece_type = lang_hooks.types.type_for_mode (mode, 1);
5430 tree addr_type = build_pointer_type (piece_type);
5433 tree dest_addr, dest;
5435 if (SSE_REGNO_P (REGNO (reg)))
5437 src_addr = sse_addr;
5438 src_offset = (REGNO (reg) - FIRST_SSE_REG) * 16;
5442 src_addr = int_addr;
5443 src_offset = REGNO (reg) * 8;
5445 src_addr = fold_convert (addr_type, src_addr);
5446 src_addr = fold_build2 (POINTER_PLUS_EXPR, addr_type, src_addr,
5447 size_int (src_offset));
5448 src = build_va_arg_indirect_ref (src_addr);
5450 dest_addr = fold_convert (addr_type, addr);
5451 dest_addr = fold_build2 (POINTER_PLUS_EXPR, addr_type, dest_addr,
5452 size_int (INTVAL (XEXP (slot, 1))));
5453 dest = build_va_arg_indirect_ref (dest_addr);
5455 t = build2 (GIMPLE_MODIFY_STMT, void_type_node, dest, src);
5456 gimplify_and_add (t, pre_p);
5462 t = build2 (PLUS_EXPR, TREE_TYPE (gpr), gpr,
5463 build_int_cst (TREE_TYPE (gpr), needed_intregs * 8));
5464 t = build2 (GIMPLE_MODIFY_STMT, TREE_TYPE (gpr), gpr, t);
5465 gimplify_and_add (t, pre_p);
5469 t = build2 (PLUS_EXPR, TREE_TYPE (fpr), fpr,
5470 build_int_cst (TREE_TYPE (fpr), needed_sseregs * 16));
5471 t = build2 (GIMPLE_MODIFY_STMT, TREE_TYPE (fpr), fpr, t);
5472 gimplify_and_add (t, pre_p);
5475 t = build1 (GOTO_EXPR, void_type_node, lab_over);
5476 gimplify_and_add (t, pre_p);
5478 t = build1 (LABEL_EXPR, void_type_node, lab_false);
5479 append_to_statement_list (t, pre_p);
5482 /* ... otherwise out of the overflow area. */
5484 /* Care for on-stack alignment if needed. */
5485 if (FUNCTION_ARG_BOUNDARY (VOIDmode, type) <= 64
5486 || integer_zerop (TYPE_SIZE (type)))
5490 HOST_WIDE_INT align = FUNCTION_ARG_BOUNDARY (VOIDmode, type) / 8;
5491 t = build2 (POINTER_PLUS_EXPR, TREE_TYPE (ovf), ovf,
5492 size_int (align - 1));
5493 t = fold_convert (sizetype, t);
5494 t = build2 (BIT_AND_EXPR, TREE_TYPE (t), t,
5496 t = fold_convert (TREE_TYPE (ovf), t);
5498 gimplify_expr (&t, pre_p, NULL, is_gimple_val, fb_rvalue);
5500 t2 = build2 (GIMPLE_MODIFY_STMT, void_type_node, addr, t);
5501 gimplify_and_add (t2, pre_p);
5503 t = build2 (POINTER_PLUS_EXPR, TREE_TYPE (t), t,
5504 size_int (rsize * UNITS_PER_WORD));
5505 t = build2 (GIMPLE_MODIFY_STMT, TREE_TYPE (ovf), ovf, t);
5506 gimplify_and_add (t, pre_p);
5510 t = build1 (LABEL_EXPR, void_type_node, lab_over);
5511 append_to_statement_list (t, pre_p);
5514 ptrtype = build_pointer_type (type);
5515 addr = fold_convert (ptrtype, addr);
5518 addr = build_va_arg_indirect_ref (addr);
5519 return build_va_arg_indirect_ref (addr);
5522 /* Return nonzero if OPNUM's MEM should be matched
5523 in movabs* patterns. */
5526 ix86_check_movabs (rtx insn, int opnum)
5530 set = PATTERN (insn);
5531 if (GET_CODE (set) == PARALLEL)
5532 set = XVECEXP (set, 0, 0);
5533 gcc_assert (GET_CODE (set) == SET);
5534 mem = XEXP (set, opnum);
5535 while (GET_CODE (mem) == SUBREG)
5536 mem = SUBREG_REG (mem);
5537 gcc_assert (MEM_P (mem));
5538 return (volatile_ok || !MEM_VOLATILE_P (mem));
5541 /* Initialize the table of extra 80387 mathematical constants. */
5544 init_ext_80387_constants (void)
5546 static const char * cst[5] =
5548 "0.3010299956639811952256464283594894482", /* 0: fldlg2 */
5549 "0.6931471805599453094286904741849753009", /* 1: fldln2 */
5550 "1.4426950408889634073876517827983434472", /* 2: fldl2e */
5551 "3.3219280948873623478083405569094566090", /* 3: fldl2t */
5552 "3.1415926535897932385128089594061862044", /* 4: fldpi */
5556 for (i = 0; i < 5; i++)
5558 real_from_string (&ext_80387_constants_table[i], cst[i]);
5559 /* Ensure each constant is rounded to XFmode precision. */
5560 real_convert (&ext_80387_constants_table[i],
5561 XFmode, &ext_80387_constants_table[i]);
5564 ext_80387_constants_init = 1;
5567 /* Return true if the constant is something that can be loaded with
5568 a special instruction. */
5571 standard_80387_constant_p (rtx x)
5573 enum machine_mode mode = GET_MODE (x);
5577 if (!(X87_FLOAT_MODE_P (mode) && (GET_CODE (x) == CONST_DOUBLE)))
5580 if (x == CONST0_RTX (mode))
5582 if (x == CONST1_RTX (mode))
5585 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
5587 /* For XFmode constants, try to find a special 80387 instruction when
5588 optimizing for size or on those CPUs that benefit from them. */
5590 && (optimize_size || TARGET_EXT_80387_CONSTANTS))
5594 if (! ext_80387_constants_init)
5595 init_ext_80387_constants ();
5597 for (i = 0; i < 5; i++)
5598 if (real_identical (&r, &ext_80387_constants_table[i]))
5602 /* Load of the constant -0.0 or -1.0 will be split as
5603 fldz;fchs or fld1;fchs sequence. */
5604 if (real_isnegzero (&r))
5606 if (real_identical (&r, &dconstm1))
5612 /* Return the opcode of the special instruction to be used to load
5616 standard_80387_constant_opcode (rtx x)
5618 switch (standard_80387_constant_p (x))
5642 /* Return the CONST_DOUBLE representing the 80387 constant that is
5643 loaded by the specified special instruction. The argument IDX
5644 matches the return value from standard_80387_constant_p. */
5647 standard_80387_constant_rtx (int idx)
5651 if (! ext_80387_constants_init)
5652 init_ext_80387_constants ();
5668 return CONST_DOUBLE_FROM_REAL_VALUE (ext_80387_constants_table[i],
5672 /* Return 1 if mode is a valid mode for sse. */
5674 standard_sse_mode_p (enum machine_mode mode)
5691 /* Return 1 if X is FP constant we can load to SSE register w/o using memory.
5694 standard_sse_constant_p (rtx x)
5696 enum machine_mode mode = GET_MODE (x);
5698 if (x == const0_rtx || x == CONST0_RTX (GET_MODE (x)))
5700 if (vector_all_ones_operand (x, mode)
5701 && standard_sse_mode_p (mode))
5702 return TARGET_SSE2 ? 2 : -1;
5707 /* Return the opcode of the special instruction to be used to load
5711 standard_sse_constant_opcode (rtx insn, rtx x)
5713 switch (standard_sse_constant_p (x))
5716 if (get_attr_mode (insn) == MODE_V4SF)
5717 return "xorps\t%0, %0";
5718 else if (get_attr_mode (insn) == MODE_V2DF)
5719 return "xorpd\t%0, %0";
5721 return "pxor\t%0, %0";
5723 return "pcmpeqd\t%0, %0";
5728 /* Returns 1 if OP contains a symbol reference */
5731 symbolic_reference_mentioned_p (rtx op)
5736 if (GET_CODE (op) == SYMBOL_REF || GET_CODE (op) == LABEL_REF)
5739 fmt = GET_RTX_FORMAT (GET_CODE (op));
5740 for (i = GET_RTX_LENGTH (GET_CODE (op)) - 1; i >= 0; i--)
5746 for (j = XVECLEN (op, i) - 1; j >= 0; j--)
5747 if (symbolic_reference_mentioned_p (XVECEXP (op, i, j)))
5751 else if (fmt[i] == 'e' && symbolic_reference_mentioned_p (XEXP (op, i)))
5758 /* Return 1 if it is appropriate to emit `ret' instructions in the
5759 body of a function. Do this only if the epilogue is simple, needing a
5760 couple of insns. Prior to reloading, we can't tell how many registers
5761 must be saved, so return 0 then. Return 0 if there is no frame
5762 marker to de-allocate. */
5765 ix86_can_use_return_insn_p (void)
5767 struct ix86_frame frame;
5769 if (! reload_completed || frame_pointer_needed)
5772 /* Don't allow more than 32 pop, since that's all we can do
5773 with one instruction. */
5774 if (crtl->args.pops_args
5775 && crtl->args.size >= 32768)
5778 ix86_compute_frame_layout (&frame);
5779 return frame.to_allocate == 0 && frame.nregs == 0;
5782 /* Value should be nonzero if functions must have frame pointers.
5783 Zero means the frame pointer need not be set up (and parms may
5784 be accessed via the stack pointer) in functions that seem suitable. */
5787 ix86_frame_pointer_required (void)
5789 /* If we accessed previous frames, then the generated code expects
5790 to be able to access the saved ebp value in our frame. */
5791 if (cfun->machine->accesses_prev_frame)
5794 /* Several x86 os'es need a frame pointer for other reasons,
5795 usually pertaining to setjmp. */
5796 if (SUBTARGET_FRAME_POINTER_REQUIRED)
5799 /* In override_options, TARGET_OMIT_LEAF_FRAME_POINTER turns off
5800 the frame pointer by default. Turn it back on now if we've not
5801 got a leaf function. */
5802 if (TARGET_OMIT_LEAF_FRAME_POINTER
5803 && (!current_function_is_leaf
5804 || ix86_current_function_calls_tls_descriptor))
5813 /* Record that the current function accesses previous call frames. */
5816 ix86_setup_frame_addresses (void)
5818 cfun->machine->accesses_prev_frame = 1;
5821 #if (defined(HAVE_GAS_HIDDEN) && (SUPPORTS_ONE_ONLY - 0)) || TARGET_MACHO
5822 # define USE_HIDDEN_LINKONCE 1
5824 # define USE_HIDDEN_LINKONCE 0
5827 static int pic_labels_used;
5829 /* Fills in the label name that should be used for a pc thunk for
5830 the given register. */
5833 get_pc_thunk_name (char name[32], unsigned int regno)
5835 gcc_assert (!TARGET_64BIT);
5837 if (USE_HIDDEN_LINKONCE)
5838 sprintf (name, "__i686.get_pc_thunk.%s", reg_names[regno]);
5840 ASM_GENERATE_INTERNAL_LABEL (name, "LPR", regno);
5844 /* This function generates code for -fpic that loads %ebx with
5845 the return address of the caller and then returns. */
5848 ix86_file_end (void)
5853 for (regno = 0; regno < 8; ++regno)
5857 if (! ((pic_labels_used >> regno) & 1))
5860 get_pc_thunk_name (name, regno);
5865 switch_to_section (darwin_sections[text_coal_section]);
5866 fputs ("\t.weak_definition\t", asm_out_file);
5867 assemble_name (asm_out_file, name);
5868 fputs ("\n\t.private_extern\t", asm_out_file);
5869 assemble_name (asm_out_file, name);
5870 fputs ("\n", asm_out_file);
5871 ASM_OUTPUT_LABEL (asm_out_file, name);
5875 if (USE_HIDDEN_LINKONCE)
5879 decl = build_decl (FUNCTION_DECL, get_identifier (name),
5881 TREE_PUBLIC (decl) = 1;
5882 TREE_STATIC (decl) = 1;
5883 DECL_ONE_ONLY (decl) = 1;
5885 (*targetm.asm_out.unique_section) (decl, 0);
5886 switch_to_section (get_named_section (decl, NULL, 0));
5888 (*targetm.asm_out.globalize_label) (asm_out_file, name);
5889 fputs ("\t.hidden\t", asm_out_file);
5890 assemble_name (asm_out_file, name);
5891 fputc ('\n', asm_out_file);
5892 ASM_DECLARE_FUNCTION_NAME (asm_out_file, name, decl);
5896 switch_to_section (text_section);
5897 ASM_OUTPUT_LABEL (asm_out_file, name);
5900 xops[0] = gen_rtx_REG (Pmode, regno);
5901 xops[1] = gen_rtx_MEM (Pmode, stack_pointer_rtx);
5903 output_asm_insn ("mov{q}\t{%1, %0|%0, %1}", xops);
5905 output_asm_insn ("mov{l}\t{%1, %0|%0, %1}", xops);
5906 output_asm_insn ("ret", xops);
5909 if (NEED_INDICATE_EXEC_STACK)
5910 file_end_indicate_exec_stack ();
5913 /* Emit code for the SET_GOT patterns. */
5916 output_set_got (rtx dest, rtx label ATTRIBUTE_UNUSED)
5922 if (TARGET_VXWORKS_RTP && flag_pic)
5924 /* Load (*VXWORKS_GOTT_BASE) into the PIC register. */
5925 xops[2] = gen_rtx_MEM (Pmode,
5926 gen_rtx_SYMBOL_REF (Pmode, VXWORKS_GOTT_BASE));
5927 output_asm_insn ("mov{l}\t{%2, %0|%0, %2}", xops);
5929 /* Load (*VXWORKS_GOTT_BASE)[VXWORKS_GOTT_INDEX] into the PIC register.
5930 Use %P and a local symbol in order to print VXWORKS_GOTT_INDEX as
5931 an unadorned address. */
5932 xops[2] = gen_rtx_SYMBOL_REF (Pmode, VXWORKS_GOTT_INDEX);
5933 SYMBOL_REF_FLAGS (xops[2]) |= SYMBOL_FLAG_LOCAL;
5934 output_asm_insn ("mov{l}\t{%P2(%0), %0|%0, DWORD PTR %P2[%0]}", xops);
5938 xops[1] = gen_rtx_SYMBOL_REF (Pmode, GOT_SYMBOL_NAME);
5940 if (! TARGET_DEEP_BRANCH_PREDICTION || !flag_pic)
5942 xops[2] = gen_rtx_LABEL_REF (Pmode, label ? label : gen_label_rtx ());
5947 output_asm_insn ("mov{q}\t{%2, %0|%0, %2}", xops);
5949 output_asm_insn ("mov{l}\t{%2, %0|%0, %2}", xops);
5952 output_asm_insn ("call\t%a2", xops);
5955 /* Output the Mach-O "canonical" label name ("Lxx$pb") here too. This
5956 is what will be referenced by the Mach-O PIC subsystem. */
5958 ASM_OUTPUT_LABEL (asm_out_file, machopic_function_base_name ());
5961 (*targetm.asm_out.internal_label) (asm_out_file, "L",
5962 CODE_LABEL_NUMBER (XEXP (xops[2], 0)));
5967 output_asm_insn ("pop{q}\t%0", xops);
5969 output_asm_insn ("pop{l}\t%0", xops);
5975 get_pc_thunk_name (name, REGNO (dest));
5976 pic_labels_used |= 1 << REGNO (dest);
5978 xops[2] = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (name));
5979 xops[2] = gen_rtx_MEM (QImode, xops[2]);
5980 output_asm_insn ("call\t%X2", xops);
5981 /* Output the Mach-O "canonical" label name ("Lxx$pb") here too. This
5982 is what will be referenced by the Mach-O PIC subsystem. */
5985 ASM_OUTPUT_LABEL (asm_out_file, machopic_function_base_name ());
5987 targetm.asm_out.internal_label (asm_out_file, "L",
5988 CODE_LABEL_NUMBER (label));
5995 if (!flag_pic || TARGET_DEEP_BRANCH_PREDICTION)
5998 output_asm_insn ("add{q}\t{%1, %0|%0, %1}", xops);
6000 output_asm_insn ("add{l}\t{%1, %0|%0, %1}", xops);
6005 output_asm_insn ("add{q}\t{%1+[.-%a2], %0|%0, %1+(.-%a2)}", xops);
6007 output_asm_insn ("add{l}\t{%1+[.-%a2], %0|%0, %1+(.-%a2)}", xops);
6013 /* Generate an "push" pattern for input ARG. */
6018 return gen_rtx_SET (VOIDmode,
6020 gen_rtx_PRE_DEC (Pmode,
6021 stack_pointer_rtx)),
6025 /* Return >= 0 if there is an unused call-clobbered register available
6026 for the entire function. */
6029 ix86_select_alt_pic_regnum (void)
6031 if (current_function_is_leaf && !crtl->profile
6032 && !ix86_current_function_calls_tls_descriptor)
6035 for (i = 2; i >= 0; --i)
6036 if (!df_regs_ever_live_p (i))
6040 return INVALID_REGNUM;
6043 /* Return 1 if we need to save REGNO. */
6045 ix86_save_reg (unsigned int regno, int maybe_eh_return)
6047 if (pic_offset_table_rtx
6048 && regno == REAL_PIC_OFFSET_TABLE_REGNUM
6049 && (df_regs_ever_live_p (REAL_PIC_OFFSET_TABLE_REGNUM)
6051 || crtl->calls_eh_return
6052 || crtl->uses_const_pool))
6054 if (ix86_select_alt_pic_regnum () != INVALID_REGNUM)
6059 if (crtl->calls_eh_return && maybe_eh_return)
6064 unsigned test = EH_RETURN_DATA_REGNO (i);
6065 if (test == INVALID_REGNUM)
6072 if (cfun->machine->force_align_arg_pointer
6073 && regno == REGNO (cfun->machine->force_align_arg_pointer))
6076 return (df_regs_ever_live_p (regno)
6077 && !call_used_regs[regno]
6078 && !fixed_regs[regno]
6079 && (regno != HARD_FRAME_POINTER_REGNUM || !frame_pointer_needed));
6082 /* Return number of registers to be saved on the stack. */
6085 ix86_nsaved_regs (void)
6090 for (regno = FIRST_PSEUDO_REGISTER - 1; regno >= 0; regno--)
6091 if (ix86_save_reg (regno, true))
6096 /* Return the offset between two registers, one to be eliminated, and the other
6097 its replacement, at the start of a routine. */
6100 ix86_initial_elimination_offset (int from, int to)
6102 struct ix86_frame frame;
6103 ix86_compute_frame_layout (&frame);
6105 if (from == ARG_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
6106 return frame.hard_frame_pointer_offset;
6107 else if (from == FRAME_POINTER_REGNUM
6108 && to == HARD_FRAME_POINTER_REGNUM)
6109 return frame.hard_frame_pointer_offset - frame.frame_pointer_offset;
6112 gcc_assert (to == STACK_POINTER_REGNUM);
6114 if (from == ARG_POINTER_REGNUM)
6115 return frame.stack_pointer_offset;
6117 gcc_assert (from == FRAME_POINTER_REGNUM);
6118 return frame.stack_pointer_offset - frame.frame_pointer_offset;
6122 /* Fill structure ix86_frame about frame of currently computed function. */
6125 ix86_compute_frame_layout (struct ix86_frame *frame)
6127 HOST_WIDE_INT total_size;
6128 unsigned int stack_alignment_needed;
6129 HOST_WIDE_INT offset;
6130 unsigned int preferred_alignment;
6131 HOST_WIDE_INT size = get_frame_size ();
6133 frame->nregs = ix86_nsaved_regs ();
6136 stack_alignment_needed = crtl->stack_alignment_needed / BITS_PER_UNIT;
6137 preferred_alignment = crtl->preferred_stack_boundary / BITS_PER_UNIT;
6139 /* During reload iteration the amount of registers saved can change.
6140 Recompute the value as needed. Do not recompute when amount of registers
6141 didn't change as reload does multiple calls to the function and does not
6142 expect the decision to change within single iteration. */
6144 && cfun->machine->use_fast_prologue_epilogue_nregs != frame->nregs)
6146 int count = frame->nregs;
6148 cfun->machine->use_fast_prologue_epilogue_nregs = count;
6149 /* The fast prologue uses move instead of push to save registers. This
6150 is significantly longer, but also executes faster as modern hardware
6151 can execute the moves in parallel, but can't do that for push/pop.
6153 Be careful about choosing what prologue to emit: When function takes
6154 many instructions to execute we may use slow version as well as in
6155 case function is known to be outside hot spot (this is known with
6156 feedback only). Weight the size of function by number of registers
6157 to save as it is cheap to use one or two push instructions but very
6158 slow to use many of them. */
6160 count = (count - 1) * FAST_PROLOGUE_INSN_COUNT;
6161 if (cfun->function_frequency < FUNCTION_FREQUENCY_NORMAL
6162 || (flag_branch_probabilities
6163 && cfun->function_frequency < FUNCTION_FREQUENCY_HOT))
6164 cfun->machine->use_fast_prologue_epilogue = false;
6166 cfun->machine->use_fast_prologue_epilogue
6167 = !expensive_function_p (count);
6169 if (TARGET_PROLOGUE_USING_MOVE
6170 && cfun->machine->use_fast_prologue_epilogue)
6171 frame->save_regs_using_mov = true;
6173 frame->save_regs_using_mov = false;
6176 /* Skip return address and saved base pointer. */
6177 offset = frame_pointer_needed ? UNITS_PER_WORD * 2 : UNITS_PER_WORD;
6179 frame->hard_frame_pointer_offset = offset;
6181 /* Do some sanity checking of stack_alignment_needed and
6182 preferred_alignment, since i386 port is the only using those features
6183 that may break easily. */
6185 gcc_assert (!size || stack_alignment_needed);
6186 gcc_assert (preferred_alignment >= STACK_BOUNDARY / BITS_PER_UNIT);
6187 gcc_assert (preferred_alignment <= PREFERRED_STACK_BOUNDARY / BITS_PER_UNIT);
6188 gcc_assert (stack_alignment_needed
6189 <= PREFERRED_STACK_BOUNDARY / BITS_PER_UNIT);
6191 if (stack_alignment_needed < STACK_BOUNDARY / BITS_PER_UNIT)
6192 stack_alignment_needed = STACK_BOUNDARY / BITS_PER_UNIT;
6194 /* Register save area */
6195 offset += frame->nregs * UNITS_PER_WORD;
6198 if (ix86_save_varrargs_registers)
6200 offset += X86_64_VARARGS_SIZE;
6201 frame->va_arg_size = X86_64_VARARGS_SIZE;
6204 frame->va_arg_size = 0;
6206 /* Align start of frame for local function. */
6207 frame->padding1 = ((offset + stack_alignment_needed - 1)
6208 & -stack_alignment_needed) - offset;
6210 offset += frame->padding1;
6212 /* Frame pointer points here. */
6213 frame->frame_pointer_offset = offset;
6217 /* Add outgoing arguments area. Can be skipped if we eliminated
6218 all the function calls as dead code.
6219 Skipping is however impossible when function calls alloca. Alloca
6220 expander assumes that last crtl->outgoing_args_size
6221 of stack frame are unused. */
6222 if (ACCUMULATE_OUTGOING_ARGS
6223 && (!current_function_is_leaf || cfun->calls_alloca
6224 || ix86_current_function_calls_tls_descriptor))
6226 offset += crtl->outgoing_args_size;
6227 frame->outgoing_arguments_size = crtl->outgoing_args_size;
6230 frame->outgoing_arguments_size = 0;
6232 /* Align stack boundary. Only needed if we're calling another function
6234 if (!current_function_is_leaf || cfun->calls_alloca
6235 || ix86_current_function_calls_tls_descriptor)
6236 frame->padding2 = ((offset + preferred_alignment - 1)
6237 & -preferred_alignment) - offset;
6239 frame->padding2 = 0;
6241 offset += frame->padding2;
6243 /* We've reached end of stack frame. */
6244 frame->stack_pointer_offset = offset;
6246 /* Size prologue needs to allocate. */
6247 frame->to_allocate =
6248 (size + frame->padding1 + frame->padding2
6249 + frame->outgoing_arguments_size + frame->va_arg_size);
6251 if ((!frame->to_allocate && frame->nregs <= 1)
6252 || (TARGET_64BIT && frame->to_allocate >= (HOST_WIDE_INT) 0x80000000))
6253 frame->save_regs_using_mov = false;
6255 if (TARGET_RED_ZONE && current_function_sp_is_unchanging
6256 && current_function_is_leaf
6257 && !ix86_current_function_calls_tls_descriptor)
6259 frame->red_zone_size = frame->to_allocate;
6260 if (frame->save_regs_using_mov)
6261 frame->red_zone_size += frame->nregs * UNITS_PER_WORD;
6262 if (frame->red_zone_size > RED_ZONE_SIZE - RED_ZONE_RESERVE)
6263 frame->red_zone_size = RED_ZONE_SIZE - RED_ZONE_RESERVE;
6266 frame->red_zone_size = 0;
6267 frame->to_allocate -= frame->red_zone_size;
6268 frame->stack_pointer_offset -= frame->red_zone_size;
6270 fprintf (stderr, "\n");
6271 fprintf (stderr, "nregs: %ld\n", (long)frame->nregs);
6272 fprintf (stderr, "size: %ld\n", (long)size);
6273 fprintf (stderr, "alignment1: %ld\n", (long)stack_alignment_needed);
6274 fprintf (stderr, "padding1: %ld\n", (long)frame->padding1);
6275 fprintf (stderr, "va_arg: %ld\n", (long)frame->va_arg_size);
6276 fprintf (stderr, "padding2: %ld\n", (long)frame->padding2);
6277 fprintf (stderr, "to_allocate: %ld\n", (long)frame->to_allocate);
6278 fprintf (stderr, "red_zone_size: %ld\n", (long)frame->red_zone_size);
6279 fprintf (stderr, "frame_pointer_offset: %ld\n", (long)frame->frame_pointer_offset);
6280 fprintf (stderr, "hard_frame_pointer_offset: %ld\n",
6281 (long)frame->hard_frame_pointer_offset);
6282 fprintf (stderr, "stack_pointer_offset: %ld\n", (long)frame->stack_pointer_offset);
6283 fprintf (stderr, "current_function_is_leaf: %ld\n", (long)current_function_is_leaf);
6284 fprintf (stderr, "cfun->calls_alloca: %ld\n", (long)cfun->calls_alloca);
6285 fprintf (stderr, "x86_current_function_calls_tls_descriptor: %ld\n", (long)ix86_current_function_calls_tls_descriptor);
6289 /* Emit code to save registers in the prologue. */
6292 ix86_emit_save_regs (void)
6297 for (regno = FIRST_PSEUDO_REGISTER; regno-- > 0; )
6298 if (ix86_save_reg (regno, true))
6300 insn = emit_insn (gen_push (gen_rtx_REG (Pmode, regno)));
6301 RTX_FRAME_RELATED_P (insn) = 1;
6305 /* Emit code to save registers using MOV insns. First register
6306 is restored from POINTER + OFFSET. */
6308 ix86_emit_save_regs_using_mov (rtx pointer, HOST_WIDE_INT offset)
6313 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
6314 if (ix86_save_reg (regno, true))
6316 insn = emit_move_insn (adjust_address (gen_rtx_MEM (Pmode, pointer),
6318 gen_rtx_REG (Pmode, regno));
6319 RTX_FRAME_RELATED_P (insn) = 1;
6320 offset += UNITS_PER_WORD;
6324 /* Expand prologue or epilogue stack adjustment.
6325 The pattern exist to put a dependency on all ebp-based memory accesses.
6326 STYLE should be negative if instructions should be marked as frame related,
6327 zero if %r11 register is live and cannot be freely used and positive
6331 pro_epilogue_adjust_stack (rtx dest, rtx src, rtx offset, int style)
6336 insn = emit_insn (gen_pro_epilogue_adjust_stack_1 (dest, src, offset));
6337 else if (x86_64_immediate_operand (offset, DImode))
6338 insn = emit_insn (gen_pro_epilogue_adjust_stack_rex64 (dest, src, offset));
6342 /* r11 is used by indirect sibcall return as well, set before the
6343 epilogue and used after the epilogue. ATM indirect sibcall
6344 shouldn't be used together with huge frame sizes in one
6345 function because of the frame_size check in sibcall.c. */
6347 r11 = gen_rtx_REG (DImode, R11_REG);
6348 insn = emit_insn (gen_rtx_SET (DImode, r11, offset));
6350 RTX_FRAME_RELATED_P (insn) = 1;
6351 insn = emit_insn (gen_pro_epilogue_adjust_stack_rex64_2 (dest, src, r11,
6355 RTX_FRAME_RELATED_P (insn) = 1;
6358 /* Handle the TARGET_INTERNAL_ARG_POINTER hook. */
6361 ix86_internal_arg_pointer (void)
6363 bool has_force_align_arg_pointer =
6364 (0 != lookup_attribute (ix86_force_align_arg_pointer_string,
6365 TYPE_ATTRIBUTES (TREE_TYPE (current_function_decl))));
6366 if ((FORCE_PREFERRED_STACK_BOUNDARY_IN_MAIN
6367 && DECL_NAME (current_function_decl)
6368 && MAIN_NAME_P (DECL_NAME (current_function_decl))
6369 && DECL_FILE_SCOPE_P (current_function_decl))
6370 || ix86_force_align_arg_pointer
6371 || has_force_align_arg_pointer)
6373 /* Nested functions can't realign the stack due to a register
6375 if (DECL_CONTEXT (current_function_decl)
6376 && TREE_CODE (DECL_CONTEXT (current_function_decl)) == FUNCTION_DECL)
6378 if (ix86_force_align_arg_pointer)
6379 warning (0, "-mstackrealign ignored for nested functions");
6380 if (has_force_align_arg_pointer)
6381 error ("%s not supported for nested functions",
6382 ix86_force_align_arg_pointer_string);
6383 return virtual_incoming_args_rtx;
6385 cfun->machine->force_align_arg_pointer = gen_rtx_REG (Pmode, CX_REG);
6386 return copy_to_reg (cfun->machine->force_align_arg_pointer);
6389 return virtual_incoming_args_rtx;
6392 /* Handle the TARGET_DWARF_HANDLE_FRAME_UNSPEC hook.
6393 This is called from dwarf2out.c to emit call frame instructions
6394 for frame-related insns containing UNSPECs and UNSPEC_VOLATILEs. */
6396 ix86_dwarf_handle_frame_unspec (const char *label, rtx pattern, int index)
6398 rtx unspec = SET_SRC (pattern);
6399 gcc_assert (GET_CODE (unspec) == UNSPEC);
6403 case UNSPEC_REG_SAVE:
6404 dwarf2out_reg_save_reg (label, XVECEXP (unspec, 0, 0),
6405 SET_DEST (pattern));
6407 case UNSPEC_DEF_CFA:
6408 dwarf2out_def_cfa (label, REGNO (SET_DEST (pattern)),
6409 INTVAL (XVECEXP (unspec, 0, 0)));
6416 /* Expand the prologue into a bunch of separate insns. */
6419 ix86_expand_prologue (void)
6423 struct ix86_frame frame;
6424 HOST_WIDE_INT allocate;
6426 ix86_compute_frame_layout (&frame);
6428 if (cfun->machine->force_align_arg_pointer)
6432 /* Grab the argument pointer. */
6433 x = plus_constant (stack_pointer_rtx, 4);
6434 y = cfun->machine->force_align_arg_pointer;
6435 insn = emit_insn (gen_rtx_SET (VOIDmode, y, x));
6436 RTX_FRAME_RELATED_P (insn) = 1;
6438 /* The unwind info consists of two parts: install the fafp as the cfa,
6439 and record the fafp as the "save register" of the stack pointer.
6440 The later is there in order that the unwinder can see where it
6441 should restore the stack pointer across the and insn. */
6442 x = gen_rtx_UNSPEC (VOIDmode, gen_rtvec (1, const0_rtx), UNSPEC_DEF_CFA);
6443 x = gen_rtx_SET (VOIDmode, y, x);
6444 RTX_FRAME_RELATED_P (x) = 1;
6445 y = gen_rtx_UNSPEC (VOIDmode, gen_rtvec (1, stack_pointer_rtx),
6447 y = gen_rtx_SET (VOIDmode, cfun->machine->force_align_arg_pointer, y);
6448 RTX_FRAME_RELATED_P (y) = 1;
6449 x = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, x, y));
6450 x = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR, x, NULL);
6451 REG_NOTES (insn) = x;
6453 /* Align the stack. */
6454 emit_insn (gen_andsi3 (stack_pointer_rtx, stack_pointer_rtx,
6457 /* And here we cheat like madmen with the unwind info. We force the
6458 cfa register back to sp+4, which is exactly what it was at the
6459 start of the function. Re-pushing the return address results in
6460 the return at the same spot relative to the cfa, and thus is
6461 correct wrt the unwind info. */
6462 x = cfun->machine->force_align_arg_pointer;
6463 x = gen_frame_mem (Pmode, plus_constant (x, -4));
6464 insn = emit_insn (gen_push (x));
6465 RTX_FRAME_RELATED_P (insn) = 1;
6468 x = gen_rtx_UNSPEC (VOIDmode, gen_rtvec (1, x), UNSPEC_DEF_CFA);
6469 x = gen_rtx_SET (VOIDmode, stack_pointer_rtx, x);
6470 x = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR, x, NULL);
6471 REG_NOTES (insn) = x;
6474 /* Note: AT&T enter does NOT have reversed args. Enter is probably
6475 slower on all targets. Also sdb doesn't like it. */
6477 if (frame_pointer_needed)
6479 insn = emit_insn (gen_push (hard_frame_pointer_rtx));
6480 RTX_FRAME_RELATED_P (insn) = 1;
6482 insn = emit_move_insn (hard_frame_pointer_rtx, stack_pointer_rtx);
6483 RTX_FRAME_RELATED_P (insn) = 1;
6486 allocate = frame.to_allocate;
6488 if (!frame.save_regs_using_mov)
6489 ix86_emit_save_regs ();
6491 allocate += frame.nregs * UNITS_PER_WORD;
6493 /* When using red zone we may start register saving before allocating
6494 the stack frame saving one cycle of the prologue. However I will
6495 avoid doing this if I am going to have to probe the stack since
6496 at least on x86_64 the stack probe can turn into a call that clobbers
6497 a red zone location */
6498 if (TARGET_RED_ZONE && frame.save_regs_using_mov
6499 && (! TARGET_STACK_PROBE || allocate < CHECK_STACK_LIMIT))
6500 ix86_emit_save_regs_using_mov (frame_pointer_needed ? hard_frame_pointer_rtx
6501 : stack_pointer_rtx,
6502 -frame.nregs * UNITS_PER_WORD);
6506 else if (! TARGET_STACK_PROBE || allocate < CHECK_STACK_LIMIT)
6507 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
6508 GEN_INT (-allocate), -1);
6511 /* Only valid for Win32. */
6512 rtx eax = gen_rtx_REG (Pmode, AX_REG);
6516 gcc_assert (!TARGET_64BIT || TARGET_64BIT_MS_ABI);
6518 if (TARGET_64BIT_MS_ABI)
6521 eax_live = ix86_eax_live_at_start_p ();
6525 emit_insn (gen_push (eax));
6526 allocate -= UNITS_PER_WORD;
6529 emit_move_insn (eax, GEN_INT (allocate));
6532 insn = gen_allocate_stack_worker_64 (eax);
6534 insn = gen_allocate_stack_worker_32 (eax);
6535 insn = emit_insn (insn);
6536 RTX_FRAME_RELATED_P (insn) = 1;
6537 t = gen_rtx_PLUS (Pmode, stack_pointer_rtx, GEN_INT (-allocate));
6538 t = gen_rtx_SET (VOIDmode, stack_pointer_rtx, t);
6539 REG_NOTES (insn) = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR,
6540 t, REG_NOTES (insn));
6544 if (frame_pointer_needed)
6545 t = plus_constant (hard_frame_pointer_rtx,
6548 - frame.nregs * UNITS_PER_WORD);
6550 t = plus_constant (stack_pointer_rtx, allocate);
6551 emit_move_insn (eax, gen_rtx_MEM (Pmode, t));
6555 if (frame.save_regs_using_mov
6556 && !(TARGET_RED_ZONE
6557 && (! TARGET_STACK_PROBE || allocate < CHECK_STACK_LIMIT)))
6559 if (!frame_pointer_needed || !frame.to_allocate)
6560 ix86_emit_save_regs_using_mov (stack_pointer_rtx, frame.to_allocate);
6562 ix86_emit_save_regs_using_mov (hard_frame_pointer_rtx,
6563 -frame.nregs * UNITS_PER_WORD);
6566 pic_reg_used = false;
6567 if (pic_offset_table_rtx
6568 && (df_regs_ever_live_p (REAL_PIC_OFFSET_TABLE_REGNUM)
6571 unsigned int alt_pic_reg_used = ix86_select_alt_pic_regnum ();
6573 if (alt_pic_reg_used != INVALID_REGNUM)
6574 SET_REGNO (pic_offset_table_rtx, alt_pic_reg_used);
6576 pic_reg_used = true;
6583 if (ix86_cmodel == CM_LARGE_PIC)
6585 rtx tmp_reg = gen_rtx_REG (DImode, R11_REG);
6586 rtx label = gen_label_rtx ();
6588 LABEL_PRESERVE_P (label) = 1;
6589 gcc_assert (REGNO (pic_offset_table_rtx) != REGNO (tmp_reg));
6590 insn = emit_insn (gen_set_rip_rex64 (pic_offset_table_rtx, label));
6591 insn = emit_insn (gen_set_got_offset_rex64 (tmp_reg, label));
6592 insn = emit_insn (gen_adddi3 (pic_offset_table_rtx,
6593 pic_offset_table_rtx, tmp_reg));
6596 insn = emit_insn (gen_set_got_rex64 (pic_offset_table_rtx));
6599 insn = emit_insn (gen_set_got (pic_offset_table_rtx));
6602 /* Prevent function calls from being scheduled before the call to mcount.
6603 In the pic_reg_used case, make sure that the got load isn't deleted. */
6607 emit_insn (gen_prologue_use (pic_offset_table_rtx));
6608 emit_insn (gen_blockage ());
6612 /* Emit code to restore saved registers using MOV insns. First register
6613 is restored from POINTER + OFFSET. */
6615 ix86_emit_restore_regs_using_mov (rtx pointer, HOST_WIDE_INT offset,
6616 int maybe_eh_return)
6619 rtx base_address = gen_rtx_MEM (Pmode, pointer);
6621 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
6622 if (ix86_save_reg (regno, maybe_eh_return))
6624 /* Ensure that adjust_address won't be forced to produce pointer
6625 out of range allowed by x86-64 instruction set. */
6626 if (TARGET_64BIT && offset != trunc_int_for_mode (offset, SImode))
6630 r11 = gen_rtx_REG (DImode, R11_REG);
6631 emit_move_insn (r11, GEN_INT (offset));
6632 emit_insn (gen_adddi3 (r11, r11, pointer));
6633 base_address = gen_rtx_MEM (Pmode, r11);
6636 emit_move_insn (gen_rtx_REG (Pmode, regno),
6637 adjust_address (base_address, Pmode, offset));
6638 offset += UNITS_PER_WORD;
6642 /* Restore function stack, frame, and registers. */
6645 ix86_expand_epilogue (int style)
6648 int sp_valid = !frame_pointer_needed || current_function_sp_is_unchanging;
6649 struct ix86_frame frame;
6650 HOST_WIDE_INT offset;
6652 ix86_compute_frame_layout (&frame);
6654 /* Calculate start of saved registers relative to ebp. Special care
6655 must be taken for the normal return case of a function using
6656 eh_return: the eax and edx registers are marked as saved, but not
6657 restored along this path. */
6658 offset = frame.nregs;
6659 if (crtl->calls_eh_return && style != 2)
6661 offset *= -UNITS_PER_WORD;
6663 /* If we're only restoring one register and sp is not valid then
6664 using a move instruction to restore the register since it's
6665 less work than reloading sp and popping the register.
6667 The default code result in stack adjustment using add/lea instruction,
6668 while this code results in LEAVE instruction (or discrete equivalent),
6669 so it is profitable in some other cases as well. Especially when there
6670 are no registers to restore. We also use this code when TARGET_USE_LEAVE
6671 and there is exactly one register to pop. This heuristic may need some
6672 tuning in future. */
6673 if ((!sp_valid && frame.nregs <= 1)
6674 || (TARGET_EPILOGUE_USING_MOVE
6675 && cfun->machine->use_fast_prologue_epilogue
6676 && (frame.nregs > 1 || frame.to_allocate))
6677 || (frame_pointer_needed && !frame.nregs && frame.to_allocate)
6678 || (frame_pointer_needed && TARGET_USE_LEAVE
6679 && cfun->machine->use_fast_prologue_epilogue
6680 && frame.nregs == 1)
6681 || crtl->calls_eh_return)
6683 /* Restore registers. We can use ebp or esp to address the memory
6684 locations. If both are available, default to ebp, since offsets
6685 are known to be small. Only exception is esp pointing directly to the
6686 end of block of saved registers, where we may simplify addressing
6689 if (!frame_pointer_needed || (sp_valid && !frame.to_allocate))
6690 ix86_emit_restore_regs_using_mov (stack_pointer_rtx,
6691 frame.to_allocate, style == 2);
6693 ix86_emit_restore_regs_using_mov (hard_frame_pointer_rtx,
6694 offset, style == 2);
6696 /* eh_return epilogues need %ecx added to the stack pointer. */
6699 rtx tmp, sa = EH_RETURN_STACKADJ_RTX;
6701 if (frame_pointer_needed)
6703 tmp = gen_rtx_PLUS (Pmode, hard_frame_pointer_rtx, sa);
6704 tmp = plus_constant (tmp, UNITS_PER_WORD);
6705 emit_insn (gen_rtx_SET (VOIDmode, sa, tmp));
6707 tmp = gen_rtx_MEM (Pmode, hard_frame_pointer_rtx);
6708 emit_move_insn (hard_frame_pointer_rtx, tmp);
6710 pro_epilogue_adjust_stack (stack_pointer_rtx, sa,
6715 tmp = gen_rtx_PLUS (Pmode, stack_pointer_rtx, sa);
6716 tmp = plus_constant (tmp, (frame.to_allocate
6717 + frame.nregs * UNITS_PER_WORD));
6718 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx, tmp));
6721 else if (!frame_pointer_needed)
6722 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
6723 GEN_INT (frame.to_allocate
6724 + frame.nregs * UNITS_PER_WORD),
6726 /* If not an i386, mov & pop is faster than "leave". */
6727 else if (TARGET_USE_LEAVE || optimize_size
6728 || !cfun->machine->use_fast_prologue_epilogue)
6729 emit_insn (TARGET_64BIT ? gen_leave_rex64 () : gen_leave ());
6732 pro_epilogue_adjust_stack (stack_pointer_rtx,
6733 hard_frame_pointer_rtx,
6736 emit_insn (gen_popdi1 (hard_frame_pointer_rtx));
6738 emit_insn (gen_popsi1 (hard_frame_pointer_rtx));
6743 /* First step is to deallocate the stack frame so that we can
6744 pop the registers. */
6747 gcc_assert (frame_pointer_needed);
6748 pro_epilogue_adjust_stack (stack_pointer_rtx,
6749 hard_frame_pointer_rtx,
6750 GEN_INT (offset), style);
6752 else if (frame.to_allocate)
6753 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
6754 GEN_INT (frame.to_allocate), style);
6756 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
6757 if (ix86_save_reg (regno, false))
6760 emit_insn (gen_popdi1 (gen_rtx_REG (Pmode, regno)));
6762 emit_insn (gen_popsi1 (gen_rtx_REG (Pmode, regno)));
6764 if (frame_pointer_needed)
6766 /* Leave results in shorter dependency chains on CPUs that are
6767 able to grok it fast. */
6768 if (TARGET_USE_LEAVE)
6769 emit_insn (TARGET_64BIT ? gen_leave_rex64 () : gen_leave ());
6770 else if (TARGET_64BIT)
6771 emit_insn (gen_popdi1 (hard_frame_pointer_rtx));
6773 emit_insn (gen_popsi1 (hard_frame_pointer_rtx));
6777 if (cfun->machine->force_align_arg_pointer)
6779 emit_insn (gen_addsi3 (stack_pointer_rtx,
6780 cfun->machine->force_align_arg_pointer,
6784 /* Sibcall epilogues don't want a return instruction. */
6788 if (crtl->args.pops_args && crtl->args.size)
6790 rtx popc = GEN_INT (crtl->args.pops_args);
6792 /* i386 can only pop 64K bytes. If asked to pop more, pop
6793 return address, do explicit add, and jump indirectly to the
6796 if (crtl->args.pops_args >= 65536)
6798 rtx ecx = gen_rtx_REG (SImode, CX_REG);
6800 /* There is no "pascal" calling convention in any 64bit ABI. */
6801 gcc_assert (!TARGET_64BIT);
6803 emit_insn (gen_popsi1 (ecx));
6804 emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx, popc));
6805 emit_jump_insn (gen_return_indirect_internal (ecx));
6808 emit_jump_insn (gen_return_pop_internal (popc));
6811 emit_jump_insn (gen_return_internal ());
6814 /* Reset from the function's potential modifications. */
6817 ix86_output_function_epilogue (FILE *file ATTRIBUTE_UNUSED,
6818 HOST_WIDE_INT size ATTRIBUTE_UNUSED)
6820 if (pic_offset_table_rtx)
6821 SET_REGNO (pic_offset_table_rtx, REAL_PIC_OFFSET_TABLE_REGNUM);
6823 /* Mach-O doesn't support labels at the end of objects, so if
6824 it looks like we might want one, insert a NOP. */
6826 rtx insn = get_last_insn ();
6829 && NOTE_KIND (insn) != NOTE_INSN_DELETED_LABEL)
6830 insn = PREV_INSN (insn);
6834 && NOTE_KIND (insn) == NOTE_INSN_DELETED_LABEL)))
6835 fputs ("\tnop\n", file);
6841 /* Extract the parts of an RTL expression that is a valid memory address
6842 for an instruction. Return 0 if the structure of the address is
6843 grossly off. Return -1 if the address contains ASHIFT, so it is not
6844 strictly valid, but still used for computing length of lea instruction. */
6847 ix86_decompose_address (rtx addr, struct ix86_address *out)
6849 rtx base = NULL_RTX, index = NULL_RTX, disp = NULL_RTX;
6850 rtx base_reg, index_reg;
6851 HOST_WIDE_INT scale = 1;
6852 rtx scale_rtx = NULL_RTX;
6854 enum ix86_address_seg seg = SEG_DEFAULT;
6856 if (REG_P (addr) || GET_CODE (addr) == SUBREG)
6858 else if (GET_CODE (addr) == PLUS)
6868 addends[n++] = XEXP (op, 1);
6871 while (GET_CODE (op) == PLUS);
6876 for (i = n; i >= 0; --i)
6879 switch (GET_CODE (op))
6884 index = XEXP (op, 0);
6885 scale_rtx = XEXP (op, 1);
6889 if (XINT (op, 1) == UNSPEC_TP
6890 && TARGET_TLS_DIRECT_SEG_REFS
6891 && seg == SEG_DEFAULT)
6892 seg = TARGET_64BIT ? SEG_FS : SEG_GS;
6921 else if (GET_CODE (addr) == MULT)
6923 index = XEXP (addr, 0); /* index*scale */
6924 scale_rtx = XEXP (addr, 1);
6926 else if (GET_CODE (addr) == ASHIFT)
6930 /* We're called for lea too, which implements ashift on occasion. */
6931 index = XEXP (addr, 0);
6932 tmp = XEXP (addr, 1);
6933 if (!CONST_INT_P (tmp))
6935 scale = INTVAL (tmp);
6936 if ((unsigned HOST_WIDE_INT) scale > 3)
6942 disp = addr; /* displacement */
6944 /* Extract the integral value of scale. */
6947 if (!CONST_INT_P (scale_rtx))
6949 scale = INTVAL (scale_rtx);
6952 base_reg = base && GET_CODE (base) == SUBREG ? SUBREG_REG (base) : base;
6953 index_reg = index && GET_CODE (index) == SUBREG ? SUBREG_REG (index) : index;
6955 /* Allow arg pointer and stack pointer as index if there is not scaling. */
6956 if (base_reg && index_reg && scale == 1
6957 && (index_reg == arg_pointer_rtx
6958 || index_reg == frame_pointer_rtx
6959 || (REG_P (index_reg) && REGNO (index_reg) == STACK_POINTER_REGNUM)))
6962 tmp = base, base = index, index = tmp;
6963 tmp = base_reg, base_reg = index_reg, index_reg = tmp;
6966 /* Special case: %ebp cannot be encoded as a base without a displacement. */
6967 if ((base_reg == hard_frame_pointer_rtx
6968 || base_reg == frame_pointer_rtx
6969 || base_reg == arg_pointer_rtx) && !disp)
6972 /* Special case: on K6, [%esi] makes the instruction vector decoded.
6973 Avoid this by transforming to [%esi+0]. */
6974 if (TARGET_K6 && !optimize_size
6975 && base_reg && !index_reg && !disp
6977 && REGNO_REG_CLASS (REGNO (base_reg)) == SIREG)
6980 /* Special case: encode reg+reg instead of reg*2. */
6981 if (!base && index && scale && scale == 2)
6982 base = index, base_reg = index_reg, scale = 1;
6984 /* Special case: scaling cannot be encoded without base or displacement. */
6985 if (!base && !disp && index && scale != 1)
6997 /* Return cost of the memory address x.
6998 For i386, it is better to use a complex address than let gcc copy
6999 the address into a reg and make a new pseudo. But not if the address
7000 requires to two regs - that would mean more pseudos with longer
7003 ix86_address_cost (rtx x)
7005 struct ix86_address parts;
7007 int ok = ix86_decompose_address (x, &parts);
7011 if (parts.base && GET_CODE (parts.base) == SUBREG)
7012 parts.base = SUBREG_REG (parts.base);
7013 if (parts.index && GET_CODE (parts.index) == SUBREG)
7014 parts.index = SUBREG_REG (parts.index);
7016 /* Attempt to minimize number of registers in the address. */
7018 && (!REG_P (parts.base) || REGNO (parts.base) >= FIRST_PSEUDO_REGISTER))
7020 && (!REG_P (parts.index)
7021 || REGNO (parts.index) >= FIRST_PSEUDO_REGISTER)))
7025 && (!REG_P (parts.base) || REGNO (parts.base) >= FIRST_PSEUDO_REGISTER)
7027 && (!REG_P (parts.index) || REGNO (parts.index) >= FIRST_PSEUDO_REGISTER)
7028 && parts.base != parts.index)
7031 /* AMD-K6 don't like addresses with ModR/M set to 00_xxx_100b,
7032 since it's predecode logic can't detect the length of instructions
7033 and it degenerates to vector decoded. Increase cost of such
7034 addresses here. The penalty is minimally 2 cycles. It may be worthwhile
7035 to split such addresses or even refuse such addresses at all.
7037 Following addressing modes are affected:
7042 The first and last case may be avoidable by explicitly coding the zero in
7043 memory address, but I don't have AMD-K6 machine handy to check this
7047 && ((!parts.disp && parts.base && parts.index && parts.scale != 1)
7048 || (parts.disp && !parts.base && parts.index && parts.scale != 1)
7049 || (!parts.disp && parts.base && parts.index && parts.scale == 1)))
7055 /* Allow {LABEL | SYMBOL}_REF - SYMBOL_REF-FOR-PICBASE for Mach-O as
7056 this is used for to form addresses to local data when -fPIC is in
7060 darwin_local_data_pic (rtx disp)
7062 if (GET_CODE (disp) == MINUS)
7064 if (GET_CODE (XEXP (disp, 0)) == LABEL_REF
7065 || GET_CODE (XEXP (disp, 0)) == SYMBOL_REF)
7066 if (GET_CODE (XEXP (disp, 1)) == SYMBOL_REF)
7068 const char *sym_name = XSTR (XEXP (disp, 1), 0);
7069 if (! strcmp (sym_name, "<pic base>"))
7077 /* Determine if a given RTX is a valid constant. We already know this
7078 satisfies CONSTANT_P. */
7081 legitimate_constant_p (rtx x)
7083 switch (GET_CODE (x))
7088 if (GET_CODE (x) == PLUS)
7090 if (!CONST_INT_P (XEXP (x, 1)))
7095 if (TARGET_MACHO && darwin_local_data_pic (x))
7098 /* Only some unspecs are valid as "constants". */
7099 if (GET_CODE (x) == UNSPEC)
7100 switch (XINT (x, 1))
7105 return TARGET_64BIT;
7108 x = XVECEXP (x, 0, 0);
7109 return (GET_CODE (x) == SYMBOL_REF
7110 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_EXEC);
7112 x = XVECEXP (x, 0, 0);
7113 return (GET_CODE (x) == SYMBOL_REF
7114 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_DYNAMIC);
7119 /* We must have drilled down to a symbol. */
7120 if (GET_CODE (x) == LABEL_REF)
7122 if (GET_CODE (x) != SYMBOL_REF)
7127 /* TLS symbols are never valid. */
7128 if (SYMBOL_REF_TLS_MODEL (x))
7131 /* DLLIMPORT symbols are never valid. */
7132 if (TARGET_DLLIMPORT_DECL_ATTRIBUTES
7133 && SYMBOL_REF_DLLIMPORT_P (x))
7138 if (GET_MODE (x) == TImode
7139 && x != CONST0_RTX (TImode)
7145 if (x == CONST0_RTX (GET_MODE (x)))
7153 /* Otherwise we handle everything else in the move patterns. */
7157 /* Determine if it's legal to put X into the constant pool. This
7158 is not possible for the address of thread-local symbols, which
7159 is checked above. */
7162 ix86_cannot_force_const_mem (rtx x)
7164 /* We can always put integral constants and vectors in memory. */
7165 switch (GET_CODE (x))
7175 return !legitimate_constant_p (x);
7178 /* Determine if a given RTX is a valid constant address. */
7181 constant_address_p (rtx x)
7183 return CONSTANT_P (x) && legitimate_address_p (Pmode, x, 1);
7186 /* Nonzero if the constant value X is a legitimate general operand
7187 when generating PIC code. It is given that flag_pic is on and
7188 that X satisfies CONSTANT_P or is a CONST_DOUBLE. */
7191 legitimate_pic_operand_p (rtx x)
7195 switch (GET_CODE (x))
7198 inner = XEXP (x, 0);
7199 if (GET_CODE (inner) == PLUS
7200 && CONST_INT_P (XEXP (inner, 1)))
7201 inner = XEXP (inner, 0);
7203 /* Only some unspecs are valid as "constants". */
7204 if (GET_CODE (inner) == UNSPEC)
7205 switch (XINT (inner, 1))
7210 return TARGET_64BIT;
7212 x = XVECEXP (inner, 0, 0);
7213 return (GET_CODE (x) == SYMBOL_REF
7214 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_EXEC);
7222 return legitimate_pic_address_disp_p (x);
7229 /* Determine if a given CONST RTX is a valid memory displacement
7233 legitimate_pic_address_disp_p (rtx disp)
7237 /* In 64bit mode we can allow direct addresses of symbols and labels
7238 when they are not dynamic symbols. */
7241 rtx op0 = disp, op1;
7243 switch (GET_CODE (disp))
7249 if (GET_CODE (XEXP (disp, 0)) != PLUS)
7251 op0 = XEXP (XEXP (disp, 0), 0);
7252 op1 = XEXP (XEXP (disp, 0), 1);
7253 if (!CONST_INT_P (op1)
7254 || INTVAL (op1) >= 16*1024*1024
7255 || INTVAL (op1) < -16*1024*1024)
7257 if (GET_CODE (op0) == LABEL_REF)
7259 if (GET_CODE (op0) != SYMBOL_REF)
7264 /* TLS references should always be enclosed in UNSPEC. */
7265 if (SYMBOL_REF_TLS_MODEL (op0))
7267 if (!SYMBOL_REF_FAR_ADDR_P (op0) && SYMBOL_REF_LOCAL_P (op0)
7268 && ix86_cmodel != CM_LARGE_PIC)
7276 if (GET_CODE (disp) != CONST)
7278 disp = XEXP (disp, 0);
7282 /* We are unsafe to allow PLUS expressions. This limit allowed distance
7283 of GOT tables. We should not need these anyway. */
7284 if (GET_CODE (disp) != UNSPEC
7285 || (XINT (disp, 1) != UNSPEC_GOTPCREL
7286 && XINT (disp, 1) != UNSPEC_GOTOFF
7287 && XINT (disp, 1) != UNSPEC_PLTOFF))
7290 if (GET_CODE (XVECEXP (disp, 0, 0)) != SYMBOL_REF
7291 && GET_CODE (XVECEXP (disp, 0, 0)) != LABEL_REF)
7297 if (GET_CODE (disp) == PLUS)
7299 if (!CONST_INT_P (XEXP (disp, 1)))
7301 disp = XEXP (disp, 0);
7305 if (TARGET_MACHO && darwin_local_data_pic (disp))
7308 if (GET_CODE (disp) != UNSPEC)
7311 switch (XINT (disp, 1))
7316 /* We need to check for both symbols and labels because VxWorks loads
7317 text labels with @GOT rather than @GOTOFF. See gotoff_operand for
7319 return (GET_CODE (XVECEXP (disp, 0, 0)) == SYMBOL_REF
7320 || GET_CODE (XVECEXP (disp, 0, 0)) == LABEL_REF);
7322 /* Refuse GOTOFF in 64bit mode since it is always 64bit when used.
7323 While ABI specify also 32bit relocation but we don't produce it in
7324 small PIC model at all. */
7325 if ((GET_CODE (XVECEXP (disp, 0, 0)) == SYMBOL_REF
7326 || GET_CODE (XVECEXP (disp, 0, 0)) == LABEL_REF)
7328 return gotoff_operand (XVECEXP (disp, 0, 0), Pmode);
7330 case UNSPEC_GOTTPOFF:
7331 case UNSPEC_GOTNTPOFF:
7332 case UNSPEC_INDNTPOFF:
7335 disp = XVECEXP (disp, 0, 0);
7336 return (GET_CODE (disp) == SYMBOL_REF
7337 && SYMBOL_REF_TLS_MODEL (disp) == TLS_MODEL_INITIAL_EXEC);
7339 disp = XVECEXP (disp, 0, 0);
7340 return (GET_CODE (disp) == SYMBOL_REF
7341 && SYMBOL_REF_TLS_MODEL (disp) == TLS_MODEL_LOCAL_EXEC);
7343 disp = XVECEXP (disp, 0, 0);
7344 return (GET_CODE (disp) == SYMBOL_REF
7345 && SYMBOL_REF_TLS_MODEL (disp) == TLS_MODEL_LOCAL_DYNAMIC);
7351 /* GO_IF_LEGITIMATE_ADDRESS recognizes an RTL expression that is a valid
7352 memory address for an instruction. The MODE argument is the machine mode
7353 for the MEM expression that wants to use this address.
7355 It only recognizes address in canonical form. LEGITIMIZE_ADDRESS should
7356 convert common non-canonical forms to canonical form so that they will
7360 legitimate_address_p (enum machine_mode mode ATTRIBUTE_UNUSED,
7361 rtx addr, int strict)
7363 struct ix86_address parts;
7364 rtx base, index, disp;
7365 HOST_WIDE_INT scale;
7366 const char *reason = NULL;
7367 rtx reason_rtx = NULL_RTX;
7369 if (ix86_decompose_address (addr, &parts) <= 0)
7371 reason = "decomposition failed";
7376 index = parts.index;
7378 scale = parts.scale;
7380 /* Validate base register.
7382 Don't allow SUBREG's that span more than a word here. It can lead to spill
7383 failures when the base is one word out of a two word structure, which is
7384 represented internally as a DImode int. */
7393 else if (GET_CODE (base) == SUBREG
7394 && REG_P (SUBREG_REG (base))
7395 && GET_MODE_SIZE (GET_MODE (SUBREG_REG (base)))
7397 reg = SUBREG_REG (base);
7400 reason = "base is not a register";
7404 if (GET_MODE (base) != Pmode)
7406 reason = "base is not in Pmode";
7410 if ((strict && ! REG_OK_FOR_BASE_STRICT_P (reg))
7411 || (! strict && ! REG_OK_FOR_BASE_NONSTRICT_P (reg)))
7413 reason = "base is not valid";
7418 /* Validate index register.
7420 Don't allow SUBREG's that span more than a word here -- same as above. */
7429 else if (GET_CODE (index) == SUBREG
7430 && REG_P (SUBREG_REG (index))
7431 && GET_MODE_SIZE (GET_MODE (SUBREG_REG (index)))
7433 reg = SUBREG_REG (index);
7436 reason = "index is not a register";
7440 if (GET_MODE (index) != Pmode)
7442 reason = "index is not in Pmode";
7446 if ((strict && ! REG_OK_FOR_INDEX_STRICT_P (reg))
7447 || (! strict && ! REG_OK_FOR_INDEX_NONSTRICT_P (reg)))
7449 reason = "index is not valid";
7454 /* Validate scale factor. */
7457 reason_rtx = GEN_INT (scale);
7460 reason = "scale without index";
7464 if (scale != 2 && scale != 4 && scale != 8)
7466 reason = "scale is not a valid multiplier";
7471 /* Validate displacement. */
7476 if (GET_CODE (disp) == CONST
7477 && GET_CODE (XEXP (disp, 0)) == UNSPEC)
7478 switch (XINT (XEXP (disp, 0), 1))
7480 /* Refuse GOTOFF and GOT in 64bit mode since it is always 64bit when
7481 used. While ABI specify also 32bit relocations, we don't produce
7482 them at all and use IP relative instead. */
7485 gcc_assert (flag_pic);
7487 goto is_legitimate_pic;
7488 reason = "64bit address unspec";
7491 case UNSPEC_GOTPCREL:
7492 gcc_assert (flag_pic);
7493 goto is_legitimate_pic;
7495 case UNSPEC_GOTTPOFF:
7496 case UNSPEC_GOTNTPOFF:
7497 case UNSPEC_INDNTPOFF:
7503 reason = "invalid address unspec";
7507 else if (SYMBOLIC_CONST (disp)
7511 && MACHOPIC_INDIRECT
7512 && !machopic_operand_p (disp)
7518 if (TARGET_64BIT && (index || base))
7520 /* foo@dtpoff(%rX) is ok. */
7521 if (GET_CODE (disp) != CONST
7522 || GET_CODE (XEXP (disp, 0)) != PLUS
7523 || GET_CODE (XEXP (XEXP (disp, 0), 0)) != UNSPEC
7524 || !CONST_INT_P (XEXP (XEXP (disp, 0), 1))
7525 || (XINT (XEXP (XEXP (disp, 0), 0), 1) != UNSPEC_DTPOFF
7526 && XINT (XEXP (XEXP (disp, 0), 0), 1) != UNSPEC_NTPOFF))
7528 reason = "non-constant pic memory reference";
7532 else if (! legitimate_pic_address_disp_p (disp))
7534 reason = "displacement is an invalid pic construct";
7538 /* This code used to verify that a symbolic pic displacement
7539 includes the pic_offset_table_rtx register.
7541 While this is good idea, unfortunately these constructs may
7542 be created by "adds using lea" optimization for incorrect
7551 This code is nonsensical, but results in addressing
7552 GOT table with pic_offset_table_rtx base. We can't
7553 just refuse it easily, since it gets matched by
7554 "addsi3" pattern, that later gets split to lea in the
7555 case output register differs from input. While this
7556 can be handled by separate addsi pattern for this case
7557 that never results in lea, this seems to be easier and
7558 correct fix for crash to disable this test. */
7560 else if (GET_CODE (disp) != LABEL_REF
7561 && !CONST_INT_P (disp)
7562 && (GET_CODE (disp) != CONST
7563 || !legitimate_constant_p (disp))
7564 && (GET_CODE (disp) != SYMBOL_REF
7565 || !legitimate_constant_p (disp)))
7567 reason = "displacement is not constant";
7570 else if (TARGET_64BIT
7571 && !x86_64_immediate_operand (disp, VOIDmode))
7573 reason = "displacement is out of range";
7578 /* Everything looks valid. */
7585 /* Return a unique alias set for the GOT. */
7587 static alias_set_type
7588 ix86_GOT_alias_set (void)
7590 static alias_set_type set = -1;
7592 set = new_alias_set ();
7596 /* Return a legitimate reference for ORIG (an address) using the
7597 register REG. If REG is 0, a new pseudo is generated.
7599 There are two types of references that must be handled:
7601 1. Global data references must load the address from the GOT, via
7602 the PIC reg. An insn is emitted to do this load, and the reg is
7605 2. Static data references, constant pool addresses, and code labels
7606 compute the address as an offset from the GOT, whose base is in
7607 the PIC reg. Static data objects have SYMBOL_FLAG_LOCAL set to
7608 differentiate them from global data objects. The returned
7609 address is the PIC reg + an unspec constant.
7611 GO_IF_LEGITIMATE_ADDRESS rejects symbolic references unless the PIC
7612 reg also appears in the address. */
7615 legitimize_pic_address (rtx orig, rtx reg)
7622 if (TARGET_MACHO && !TARGET_64BIT)
7625 reg = gen_reg_rtx (Pmode);
7626 /* Use the generic Mach-O PIC machinery. */
7627 return machopic_legitimize_pic_address (orig, GET_MODE (orig), reg);
7631 if (TARGET_64BIT && legitimate_pic_address_disp_p (addr))
7633 else if (TARGET_64BIT
7634 && ix86_cmodel != CM_SMALL_PIC
7635 && gotoff_operand (addr, Pmode))
7638 /* This symbol may be referenced via a displacement from the PIC
7639 base address (@GOTOFF). */
7641 if (reload_in_progress)
7642 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
7643 if (GET_CODE (addr) == CONST)
7644 addr = XEXP (addr, 0);
7645 if (GET_CODE (addr) == PLUS)
7647 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, XEXP (addr, 0)),
7649 new_rtx = gen_rtx_PLUS (Pmode, new_rtx, XEXP (addr, 1));
7652 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTOFF);
7653 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
7655 tmpreg = gen_reg_rtx (Pmode);
7658 emit_move_insn (tmpreg, new_rtx);
7662 new_rtx = expand_simple_binop (Pmode, PLUS, reg, pic_offset_table_rtx,
7663 tmpreg, 1, OPTAB_DIRECT);
7666 else new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, tmpreg);
7668 else if (!TARGET_64BIT && gotoff_operand (addr, Pmode))
7670 /* This symbol may be referenced via a displacement from the PIC
7671 base address (@GOTOFF). */
7673 if (reload_in_progress)
7674 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
7675 if (GET_CODE (addr) == CONST)
7676 addr = XEXP (addr, 0);
7677 if (GET_CODE (addr) == PLUS)
7679 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, XEXP (addr, 0)),
7681 new_rtx = gen_rtx_PLUS (Pmode, new_rtx, XEXP (addr, 1));
7684 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTOFF);
7685 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
7686 new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new_rtx);
7690 emit_move_insn (reg, new_rtx);
7694 else if ((GET_CODE (addr) == SYMBOL_REF && SYMBOL_REF_TLS_MODEL (addr) == 0)
7695 /* We can't use @GOTOFF for text labels on VxWorks;
7696 see gotoff_operand. */
7697 || (TARGET_VXWORKS_RTP && GET_CODE (addr) == LABEL_REF))
7699 if (TARGET_DLLIMPORT_DECL_ATTRIBUTES)
7701 if (GET_CODE (addr) == SYMBOL_REF && SYMBOL_REF_DLLIMPORT_P (addr))
7702 return legitimize_dllimport_symbol (addr, true);
7703 if (GET_CODE (addr) == CONST && GET_CODE (XEXP (addr, 0)) == PLUS
7704 && GET_CODE (XEXP (XEXP (addr, 0), 0)) == SYMBOL_REF
7705 && SYMBOL_REF_DLLIMPORT_P (XEXP (XEXP (addr, 0), 0)))
7707 rtx t = legitimize_dllimport_symbol (XEXP (XEXP (addr, 0), 0), true);
7708 return gen_rtx_PLUS (Pmode, t, XEXP (XEXP (addr, 0), 1));
7712 if (TARGET_64BIT && ix86_cmodel != CM_LARGE_PIC)
7714 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTPCREL);
7715 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
7716 new_rtx = gen_const_mem (Pmode, new_rtx);
7717 set_mem_alias_set (new_rtx, ix86_GOT_alias_set ());
7720 reg = gen_reg_rtx (Pmode);
7721 /* Use directly gen_movsi, otherwise the address is loaded
7722 into register for CSE. We don't want to CSE this addresses,
7723 instead we CSE addresses from the GOT table, so skip this. */
7724 emit_insn (gen_movsi (reg, new_rtx));
7729 /* This symbol must be referenced via a load from the
7730 Global Offset Table (@GOT). */
7732 if (reload_in_progress)
7733 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
7734 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOT);
7735 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
7737 new_rtx = force_reg (Pmode, new_rtx);
7738 new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new_rtx);
7739 new_rtx = gen_const_mem (Pmode, new_rtx);
7740 set_mem_alias_set (new_rtx, ix86_GOT_alias_set ());
7743 reg = gen_reg_rtx (Pmode);
7744 emit_move_insn (reg, new_rtx);
7750 if (CONST_INT_P (addr)
7751 && !x86_64_immediate_operand (addr, VOIDmode))
7755 emit_move_insn (reg, addr);
7759 new_rtx = force_reg (Pmode, addr);
7761 else if (GET_CODE (addr) == CONST)
7763 addr = XEXP (addr, 0);
7765 /* We must match stuff we generate before. Assume the only
7766 unspecs that can get here are ours. Not that we could do
7767 anything with them anyway.... */
7768 if (GET_CODE (addr) == UNSPEC
7769 || (GET_CODE (addr) == PLUS
7770 && GET_CODE (XEXP (addr, 0)) == UNSPEC))
7772 gcc_assert (GET_CODE (addr) == PLUS);
7774 if (GET_CODE (addr) == PLUS)
7776 rtx op0 = XEXP (addr, 0), op1 = XEXP (addr, 1);
7778 /* Check first to see if this is a constant offset from a @GOTOFF
7779 symbol reference. */
7780 if (gotoff_operand (op0, Pmode)
7781 && CONST_INT_P (op1))
7785 if (reload_in_progress)
7786 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
7787 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, op0),
7789 new_rtx = gen_rtx_PLUS (Pmode, new_rtx, op1);
7790 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
7791 new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new_rtx);
7795 emit_move_insn (reg, new_rtx);
7801 if (INTVAL (op1) < -16*1024*1024
7802 || INTVAL (op1) >= 16*1024*1024)
7804 if (!x86_64_immediate_operand (op1, Pmode))
7805 op1 = force_reg (Pmode, op1);
7806 new_rtx = gen_rtx_PLUS (Pmode, force_reg (Pmode, op0), op1);
7812 base = legitimize_pic_address (XEXP (addr, 0), reg);
7813 new_rtx = legitimize_pic_address (XEXP (addr, 1),
7814 base == reg ? NULL_RTX : reg);
7816 if (CONST_INT_P (new_rtx))
7817 new_rtx = plus_constant (base, INTVAL (new_rtx));
7820 if (GET_CODE (new_rtx) == PLUS && CONSTANT_P (XEXP (new_rtx, 1)))
7822 base = gen_rtx_PLUS (Pmode, base, XEXP (new_rtx, 0));
7823 new_rtx = XEXP (new_rtx, 1);
7825 new_rtx = gen_rtx_PLUS (Pmode, base, new_rtx);
7833 /* Load the thread pointer. If TO_REG is true, force it into a register. */
7836 get_thread_pointer (int to_reg)
7840 tp = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx), UNSPEC_TP);
7844 reg = gen_reg_rtx (Pmode);
7845 insn = gen_rtx_SET (VOIDmode, reg, tp);
7846 insn = emit_insn (insn);
7851 /* A subroutine of legitimize_address and ix86_expand_move. FOR_MOV is
7852 false if we expect this to be used for a memory address and true if
7853 we expect to load the address into a register. */
7856 legitimize_tls_address (rtx x, enum tls_model model, int for_mov)
7858 rtx dest, base, off, pic, tp;
7863 case TLS_MODEL_GLOBAL_DYNAMIC:
7864 dest = gen_reg_rtx (Pmode);
7865 tp = TARGET_GNU2_TLS ? get_thread_pointer (1) : 0;
7867 if (TARGET_64BIT && ! TARGET_GNU2_TLS)
7869 rtx rax = gen_rtx_REG (Pmode, AX_REG), insns;
7872 emit_call_insn (gen_tls_global_dynamic_64 (rax, x));
7873 insns = get_insns ();
7876 RTL_CONST_CALL_P (insns) = 1;
7877 emit_libcall_block (insns, dest, rax, x);
7879 else if (TARGET_64BIT && TARGET_GNU2_TLS)
7880 emit_insn (gen_tls_global_dynamic_64 (dest, x));
7882 emit_insn (gen_tls_global_dynamic_32 (dest, x));
7884 if (TARGET_GNU2_TLS)
7886 dest = force_reg (Pmode, gen_rtx_PLUS (Pmode, tp, dest));
7888 set_unique_reg_note (get_last_insn (), REG_EQUIV, x);
7892 case TLS_MODEL_LOCAL_DYNAMIC:
7893 base = gen_reg_rtx (Pmode);
7894 tp = TARGET_GNU2_TLS ? get_thread_pointer (1) : 0;
7896 if (TARGET_64BIT && ! TARGET_GNU2_TLS)
7898 rtx rax = gen_rtx_REG (Pmode, AX_REG), insns, note;
7901 emit_call_insn (gen_tls_local_dynamic_base_64 (rax));
7902 insns = get_insns ();
7905 note = gen_rtx_EXPR_LIST (VOIDmode, const0_rtx, NULL);
7906 note = gen_rtx_EXPR_LIST (VOIDmode, ix86_tls_get_addr (), note);
7907 RTL_CONST_CALL_P (insns) = 1;
7908 emit_libcall_block (insns, base, rax, note);
7910 else if (TARGET_64BIT && TARGET_GNU2_TLS)
7911 emit_insn (gen_tls_local_dynamic_base_64 (base));
7913 emit_insn (gen_tls_local_dynamic_base_32 (base));
7915 if (TARGET_GNU2_TLS)
7917 rtx x = ix86_tls_module_base ();
7919 set_unique_reg_note (get_last_insn (), REG_EQUIV,
7920 gen_rtx_MINUS (Pmode, x, tp));
7923 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x), UNSPEC_DTPOFF);
7924 off = gen_rtx_CONST (Pmode, off);
7926 dest = force_reg (Pmode, gen_rtx_PLUS (Pmode, base, off));
7928 if (TARGET_GNU2_TLS)
7930 dest = force_reg (Pmode, gen_rtx_PLUS (Pmode, dest, tp));
7932 set_unique_reg_note (get_last_insn (), REG_EQUIV, x);
7937 case TLS_MODEL_INITIAL_EXEC:
7941 type = UNSPEC_GOTNTPOFF;
7945 if (reload_in_progress)
7946 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
7947 pic = pic_offset_table_rtx;
7948 type = TARGET_ANY_GNU_TLS ? UNSPEC_GOTNTPOFF : UNSPEC_GOTTPOFF;
7950 else if (!TARGET_ANY_GNU_TLS)
7952 pic = gen_reg_rtx (Pmode);
7953 emit_insn (gen_set_got (pic));
7954 type = UNSPEC_GOTTPOFF;
7959 type = UNSPEC_INDNTPOFF;
7962 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x), type);
7963 off = gen_rtx_CONST (Pmode, off);
7965 off = gen_rtx_PLUS (Pmode, pic, off);
7966 off = gen_const_mem (Pmode, off);
7967 set_mem_alias_set (off, ix86_GOT_alias_set ());
7969 if (TARGET_64BIT || TARGET_ANY_GNU_TLS)
7971 base = get_thread_pointer (for_mov || !TARGET_TLS_DIRECT_SEG_REFS);
7972 off = force_reg (Pmode, off);
7973 return gen_rtx_PLUS (Pmode, base, off);
7977 base = get_thread_pointer (true);
7978 dest = gen_reg_rtx (Pmode);
7979 emit_insn (gen_subsi3 (dest, base, off));
7983 case TLS_MODEL_LOCAL_EXEC:
7984 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x),
7985 (TARGET_64BIT || TARGET_ANY_GNU_TLS)
7986 ? UNSPEC_NTPOFF : UNSPEC_TPOFF);
7987 off = gen_rtx_CONST (Pmode, off);
7989 if (TARGET_64BIT || TARGET_ANY_GNU_TLS)
7991 base = get_thread_pointer (for_mov || !TARGET_TLS_DIRECT_SEG_REFS);
7992 return gen_rtx_PLUS (Pmode, base, off);
7996 base = get_thread_pointer (true);
7997 dest = gen_reg_rtx (Pmode);
7998 emit_insn (gen_subsi3 (dest, base, off));
8009 /* Create or return the unique __imp_DECL dllimport symbol corresponding
8012 static GTY((if_marked ("tree_map_marked_p"), param_is (struct tree_map)))
8013 htab_t dllimport_map;
8016 get_dllimport_decl (tree decl)
8018 struct tree_map *h, in;
8022 size_t namelen, prefixlen;
8028 dllimport_map = htab_create_ggc (512, tree_map_hash, tree_map_eq, 0);
8030 in.hash = htab_hash_pointer (decl);
8031 in.base.from = decl;
8032 loc = htab_find_slot_with_hash (dllimport_map, &in, in.hash, INSERT);
8033 h = (struct tree_map *) *loc;
8037 *loc = h = GGC_NEW (struct tree_map);
8039 h->base.from = decl;
8040 h->to = to = build_decl (VAR_DECL, NULL, ptr_type_node);
8041 DECL_ARTIFICIAL (to) = 1;
8042 DECL_IGNORED_P (to) = 1;
8043 DECL_EXTERNAL (to) = 1;
8044 TREE_READONLY (to) = 1;
8046 name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl));
8047 name = targetm.strip_name_encoding (name);
8048 prefix = name[0] == FASTCALL_PREFIX ? "*__imp_": "*__imp__";
8049 namelen = strlen (name);
8050 prefixlen = strlen (prefix);
8051 imp_name = (char *) alloca (namelen + prefixlen + 1);
8052 memcpy (imp_name, prefix, prefixlen);
8053 memcpy (imp_name + prefixlen, name, namelen + 1);
8055 name = ggc_alloc_string (imp_name, namelen + prefixlen);
8056 rtl = gen_rtx_SYMBOL_REF (Pmode, name);
8057 SET_SYMBOL_REF_DECL (rtl, to);
8058 SYMBOL_REF_FLAGS (rtl) = SYMBOL_FLAG_LOCAL;
8060 rtl = gen_const_mem (Pmode, rtl);
8061 set_mem_alias_set (rtl, ix86_GOT_alias_set ());
8063 SET_DECL_RTL (to, rtl);
8064 SET_DECL_ASSEMBLER_NAME (to, get_identifier (name));
8069 /* Expand SYMBOL into its corresponding dllimport symbol. WANT_REG is
8070 true if we require the result be a register. */
8073 legitimize_dllimport_symbol (rtx symbol, bool want_reg)
8078 gcc_assert (SYMBOL_REF_DECL (symbol));
8079 imp_decl = get_dllimport_decl (SYMBOL_REF_DECL (symbol));
8081 x = DECL_RTL (imp_decl);
8083 x = force_reg (Pmode, x);
8087 /* Try machine-dependent ways of modifying an illegitimate address
8088 to be legitimate. If we find one, return the new, valid address.
8089 This macro is used in only one place: `memory_address' in explow.c.
8091 OLDX is the address as it was before break_out_memory_refs was called.
8092 In some cases it is useful to look at this to decide what needs to be done.
8094 MODE and WIN are passed so that this macro can use
8095 GO_IF_LEGITIMATE_ADDRESS.
8097 It is always safe for this macro to do nothing. It exists to recognize
8098 opportunities to optimize the output.
8100 For the 80386, we handle X+REG by loading X into a register R and
8101 using R+REG. R will go in a general reg and indexing will be used.
8102 However, if REG is a broken-out memory address or multiplication,
8103 nothing needs to be done because REG can certainly go in a general reg.
8105 When -fpic is used, special handling is needed for symbolic references.
8106 See comments by legitimize_pic_address in i386.c for details. */
8109 legitimize_address (rtx x, rtx oldx ATTRIBUTE_UNUSED, enum machine_mode mode)
8114 log = GET_CODE (x) == SYMBOL_REF ? SYMBOL_REF_TLS_MODEL (x) : 0;
8116 return legitimize_tls_address (x, (enum tls_model) log, false);
8117 if (GET_CODE (x) == CONST
8118 && GET_CODE (XEXP (x, 0)) == PLUS
8119 && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF
8120 && (log = SYMBOL_REF_TLS_MODEL (XEXP (XEXP (x, 0), 0))))
8122 rtx t = legitimize_tls_address (XEXP (XEXP (x, 0), 0),
8123 (enum tls_model) log, false);
8124 return gen_rtx_PLUS (Pmode, t, XEXP (XEXP (x, 0), 1));
8127 if (TARGET_DLLIMPORT_DECL_ATTRIBUTES)
8129 if (GET_CODE (x) == SYMBOL_REF && SYMBOL_REF_DLLIMPORT_P (x))
8130 return legitimize_dllimport_symbol (x, true);
8131 if (GET_CODE (x) == CONST
8132 && GET_CODE (XEXP (x, 0)) == PLUS
8133 && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF
8134 && SYMBOL_REF_DLLIMPORT_P (XEXP (XEXP (x, 0), 0)))
8136 rtx t = legitimize_dllimport_symbol (XEXP (XEXP (x, 0), 0), true);
8137 return gen_rtx_PLUS (Pmode, t, XEXP (XEXP (x, 0), 1));
8141 if (flag_pic && SYMBOLIC_CONST (x))
8142 return legitimize_pic_address (x, 0);
8144 /* Canonicalize shifts by 0, 1, 2, 3 into multiply */
8145 if (GET_CODE (x) == ASHIFT
8146 && CONST_INT_P (XEXP (x, 1))
8147 && (unsigned HOST_WIDE_INT) INTVAL (XEXP (x, 1)) < 4)
8150 log = INTVAL (XEXP (x, 1));
8151 x = gen_rtx_MULT (Pmode, force_reg (Pmode, XEXP (x, 0)),
8152 GEN_INT (1 << log));
8155 if (GET_CODE (x) == PLUS)
8157 /* Canonicalize shifts by 0, 1, 2, 3 into multiply. */
8159 if (GET_CODE (XEXP (x, 0)) == ASHIFT
8160 && CONST_INT_P (XEXP (XEXP (x, 0), 1))
8161 && (unsigned HOST_WIDE_INT) INTVAL (XEXP (XEXP (x, 0), 1)) < 4)
8164 log = INTVAL (XEXP (XEXP (x, 0), 1));
8165 XEXP (x, 0) = gen_rtx_MULT (Pmode,
8166 force_reg (Pmode, XEXP (XEXP (x, 0), 0)),
8167 GEN_INT (1 << log));
8170 if (GET_CODE (XEXP (x, 1)) == ASHIFT
8171 && CONST_INT_P (XEXP (XEXP (x, 1), 1))
8172 && (unsigned HOST_WIDE_INT) INTVAL (XEXP (XEXP (x, 1), 1)) < 4)
8175 log = INTVAL (XEXP (XEXP (x, 1), 1));
8176 XEXP (x, 1) = gen_rtx_MULT (Pmode,
8177 force_reg (Pmode, XEXP (XEXP (x, 1), 0)),
8178 GEN_INT (1 << log));
8181 /* Put multiply first if it isn't already. */
8182 if (GET_CODE (XEXP (x, 1)) == MULT)
8184 rtx tmp = XEXP (x, 0);
8185 XEXP (x, 0) = XEXP (x, 1);
8190 /* Canonicalize (plus (mult (reg) (const)) (plus (reg) (const)))
8191 into (plus (plus (mult (reg) (const)) (reg)) (const)). This can be
8192 created by virtual register instantiation, register elimination, and
8193 similar optimizations. */
8194 if (GET_CODE (XEXP (x, 0)) == MULT && GET_CODE (XEXP (x, 1)) == PLUS)
8197 x = gen_rtx_PLUS (Pmode,
8198 gen_rtx_PLUS (Pmode, XEXP (x, 0),
8199 XEXP (XEXP (x, 1), 0)),
8200 XEXP (XEXP (x, 1), 1));
8204 (plus (plus (mult (reg) (const)) (plus (reg) (const))) const)
8205 into (plus (plus (mult (reg) (const)) (reg)) (const)). */
8206 else if (GET_CODE (x) == PLUS && GET_CODE (XEXP (x, 0)) == PLUS
8207 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
8208 && GET_CODE (XEXP (XEXP (x, 0), 1)) == PLUS
8209 && CONSTANT_P (XEXP (x, 1)))
8212 rtx other = NULL_RTX;
8214 if (CONST_INT_P (XEXP (x, 1)))
8216 constant = XEXP (x, 1);
8217 other = XEXP (XEXP (XEXP (x, 0), 1), 1);
8219 else if (CONST_INT_P (XEXP (XEXP (XEXP (x, 0), 1), 1)))
8221 constant = XEXP (XEXP (XEXP (x, 0), 1), 1);
8222 other = XEXP (x, 1);
8230 x = gen_rtx_PLUS (Pmode,
8231 gen_rtx_PLUS (Pmode, XEXP (XEXP (x, 0), 0),
8232 XEXP (XEXP (XEXP (x, 0), 1), 0)),
8233 plus_constant (other, INTVAL (constant)));
8237 if (changed && legitimate_address_p (mode, x, FALSE))
8240 if (GET_CODE (XEXP (x, 0)) == MULT)
8243 XEXP (x, 0) = force_operand (XEXP (x, 0), 0);
8246 if (GET_CODE (XEXP (x, 1)) == MULT)
8249 XEXP (x, 1) = force_operand (XEXP (x, 1), 0);
8253 && REG_P (XEXP (x, 1))
8254 && REG_P (XEXP (x, 0)))
8257 if (flag_pic && SYMBOLIC_CONST (XEXP (x, 1)))
8260 x = legitimize_pic_address (x, 0);
8263 if (changed && legitimate_address_p (mode, x, FALSE))
8266 if (REG_P (XEXP (x, 0)))
8268 rtx temp = gen_reg_rtx (Pmode);
8269 rtx val = force_operand (XEXP (x, 1), temp);
8271 emit_move_insn (temp, val);
8277 else if (REG_P (XEXP (x, 1)))
8279 rtx temp = gen_reg_rtx (Pmode);
8280 rtx val = force_operand (XEXP (x, 0), temp);
8282 emit_move_insn (temp, val);
8292 /* Print an integer constant expression in assembler syntax. Addition
8293 and subtraction are the only arithmetic that may appear in these
8294 expressions. FILE is the stdio stream to write to, X is the rtx, and
8295 CODE is the operand print code from the output string. */
8298 output_pic_addr_const (FILE *file, rtx x, int code)
8302 switch (GET_CODE (x))
8305 gcc_assert (flag_pic);
8310 if (! TARGET_MACHO || TARGET_64BIT)
8311 output_addr_const (file, x);
8314 const char *name = XSTR (x, 0);
8316 /* Mark the decl as referenced so that cgraph will
8317 output the function. */
8318 if (SYMBOL_REF_DECL (x))
8319 mark_decl_referenced (SYMBOL_REF_DECL (x));
8322 if (MACHOPIC_INDIRECT
8323 && machopic_classify_symbol (x) == MACHOPIC_UNDEFINED_FUNCTION)
8324 name = machopic_indirection_name (x, /*stub_p=*/true);
8326 assemble_name (file, name);
8328 if (!TARGET_MACHO && !TARGET_64BIT_MS_ABI
8329 && code == 'P' && ! SYMBOL_REF_LOCAL_P (x))
8330 fputs ("@PLT", file);
8337 ASM_GENERATE_INTERNAL_LABEL (buf, "L", CODE_LABEL_NUMBER (x));
8338 assemble_name (asm_out_file, buf);
8342 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
8346 /* This used to output parentheses around the expression,
8347 but that does not work on the 386 (either ATT or BSD assembler). */
8348 output_pic_addr_const (file, XEXP (x, 0), code);
8352 if (GET_MODE (x) == VOIDmode)
8354 /* We can use %d if the number is <32 bits and positive. */
8355 if (CONST_DOUBLE_HIGH (x) || CONST_DOUBLE_LOW (x) < 0)
8356 fprintf (file, "0x%lx%08lx",
8357 (unsigned long) CONST_DOUBLE_HIGH (x),
8358 (unsigned long) CONST_DOUBLE_LOW (x));
8360 fprintf (file, HOST_WIDE_INT_PRINT_DEC, CONST_DOUBLE_LOW (x));
8363 /* We can't handle floating point constants;
8364 PRINT_OPERAND must handle them. */
8365 output_operand_lossage ("floating constant misused");
8369 /* Some assemblers need integer constants to appear first. */
8370 if (CONST_INT_P (XEXP (x, 0)))
8372 output_pic_addr_const (file, XEXP (x, 0), code);
8374 output_pic_addr_const (file, XEXP (x, 1), code);
8378 gcc_assert (CONST_INT_P (XEXP (x, 1)));
8379 output_pic_addr_const (file, XEXP (x, 1), code);
8381 output_pic_addr_const (file, XEXP (x, 0), code);
8387 putc (ASSEMBLER_DIALECT == ASM_INTEL ? '(' : '[', file);
8388 output_pic_addr_const (file, XEXP (x, 0), code);
8390 output_pic_addr_const (file, XEXP (x, 1), code);
8392 putc (ASSEMBLER_DIALECT == ASM_INTEL ? ')' : ']', file);
8396 gcc_assert (XVECLEN (x, 0) == 1);
8397 output_pic_addr_const (file, XVECEXP (x, 0, 0), code);
8398 switch (XINT (x, 1))
8401 fputs ("@GOT", file);
8404 fputs ("@GOTOFF", file);
8407 fputs ("@PLTOFF", file);
8409 case UNSPEC_GOTPCREL:
8410 fputs (ASSEMBLER_DIALECT == ASM_ATT ?
8411 "@GOTPCREL(%rip)" : "@GOTPCREL[rip]", file);
8413 case UNSPEC_GOTTPOFF:
8414 /* FIXME: This might be @TPOFF in Sun ld too. */
8415 fputs ("@GOTTPOFF", file);
8418 fputs ("@TPOFF", file);
8422 fputs ("@TPOFF", file);
8424 fputs ("@NTPOFF", file);
8427 fputs ("@DTPOFF", file);
8429 case UNSPEC_GOTNTPOFF:
8431 fputs (ASSEMBLER_DIALECT == ASM_ATT ?
8432 "@GOTTPOFF(%rip)": "@GOTTPOFF[rip]", file);
8434 fputs ("@GOTNTPOFF", file);
8436 case UNSPEC_INDNTPOFF:
8437 fputs ("@INDNTPOFF", file);
8440 output_operand_lossage ("invalid UNSPEC as operand");
8446 output_operand_lossage ("invalid expression as operand");
8450 /* This is called from dwarf2out.c via TARGET_ASM_OUTPUT_DWARF_DTPREL.
8451 We need to emit DTP-relative relocations. */
8453 static void ATTRIBUTE_UNUSED
8454 i386_output_dwarf_dtprel (FILE *file, int size, rtx x)
8456 fputs (ASM_LONG, file);
8457 output_addr_const (file, x);
8458 fputs ("@DTPOFF", file);
8464 fputs (", 0", file);
8471 /* In the name of slightly smaller debug output, and to cater to
8472 general assembler lossage, recognize PIC+GOTOFF and turn it back
8473 into a direct symbol reference.
8475 On Darwin, this is necessary to avoid a crash, because Darwin
8476 has a different PIC label for each routine but the DWARF debugging
8477 information is not associated with any particular routine, so it's
8478 necessary to remove references to the PIC label from RTL stored by
8479 the DWARF output code. */
8482 ix86_delegitimize_address (rtx orig_x)
8485 /* reg_addend is NULL or a multiple of some register. */
8486 rtx reg_addend = NULL_RTX;
8487 /* const_addend is NULL or a const_int. */
8488 rtx const_addend = NULL_RTX;
8489 /* This is the result, or NULL. */
8490 rtx result = NULL_RTX;
8497 if (GET_CODE (x) != CONST
8498 || GET_CODE (XEXP (x, 0)) != UNSPEC
8499 || XINT (XEXP (x, 0), 1) != UNSPEC_GOTPCREL
8502 return XVECEXP (XEXP (x, 0), 0, 0);
8505 if (GET_CODE (x) != PLUS
8506 || GET_CODE (XEXP (x, 1)) != CONST)
8509 if (REG_P (XEXP (x, 0))
8510 && REGNO (XEXP (x, 0)) == PIC_OFFSET_TABLE_REGNUM)
8511 /* %ebx + GOT/GOTOFF */
8513 else if (GET_CODE (XEXP (x, 0)) == PLUS)
8515 /* %ebx + %reg * scale + GOT/GOTOFF */
8516 reg_addend = XEXP (x, 0);
8517 if (REG_P (XEXP (reg_addend, 0))
8518 && REGNO (XEXP (reg_addend, 0)) == PIC_OFFSET_TABLE_REGNUM)
8519 reg_addend = XEXP (reg_addend, 1);
8520 else if (REG_P (XEXP (reg_addend, 1))
8521 && REGNO (XEXP (reg_addend, 1)) == PIC_OFFSET_TABLE_REGNUM)
8522 reg_addend = XEXP (reg_addend, 0);
8525 if (!REG_P (reg_addend)
8526 && GET_CODE (reg_addend) != MULT
8527 && GET_CODE (reg_addend) != ASHIFT)
8533 x = XEXP (XEXP (x, 1), 0);
8534 if (GET_CODE (x) == PLUS
8535 && CONST_INT_P (XEXP (x, 1)))
8537 const_addend = XEXP (x, 1);
8541 if (GET_CODE (x) == UNSPEC
8542 && ((XINT (x, 1) == UNSPEC_GOT && MEM_P (orig_x))
8543 || (XINT (x, 1) == UNSPEC_GOTOFF && !MEM_P (orig_x))))
8544 result = XVECEXP (x, 0, 0);
8546 if (TARGET_MACHO && darwin_local_data_pic (x)
8548 result = XEXP (x, 0);
8554 result = gen_rtx_PLUS (Pmode, result, const_addend);
8556 result = gen_rtx_PLUS (Pmode, reg_addend, result);
8560 /* If X is a machine specific address (i.e. a symbol or label being
8561 referenced as a displacement from the GOT implemented using an
8562 UNSPEC), then return the base term. Otherwise return X. */
8565 ix86_find_base_term (rtx x)
8571 if (GET_CODE (x) != CONST)
8574 if (GET_CODE (term) == PLUS
8575 && (CONST_INT_P (XEXP (term, 1))
8576 || GET_CODE (XEXP (term, 1)) == CONST_DOUBLE))
8577 term = XEXP (term, 0);
8578 if (GET_CODE (term) != UNSPEC
8579 || XINT (term, 1) != UNSPEC_GOTPCREL)
8582 term = XVECEXP (term, 0, 0);
8584 if (GET_CODE (term) != SYMBOL_REF
8585 && GET_CODE (term) != LABEL_REF)
8591 term = ix86_delegitimize_address (x);
8593 if (GET_CODE (term) != SYMBOL_REF
8594 && GET_CODE (term) != LABEL_REF)
8601 put_condition_code (enum rtx_code code, enum machine_mode mode, int reverse,
8606 if (mode == CCFPmode || mode == CCFPUmode)
8608 enum rtx_code second_code, bypass_code;
8609 ix86_fp_comparison_codes (code, &bypass_code, &code, &second_code);
8610 gcc_assert (bypass_code == UNKNOWN && second_code == UNKNOWN);
8611 code = ix86_fp_compare_code_to_integer (code);
8615 code = reverse_condition (code);
8666 gcc_assert (mode == CCmode || mode == CCNOmode || mode == CCGCmode);
8670 /* ??? Use "nbe" instead of "a" for fcmov lossage on some assemblers.
8671 Those same assemblers have the same but opposite lossage on cmov. */
8673 suffix = fp ? "nbe" : "a";
8674 else if (mode == CCCmode)
8697 gcc_assert (mode == CCmode || mode == CCCmode);
8719 gcc_assert (mode == CCmode || mode == CCCmode);
8720 suffix = fp ? "nb" : "ae";
8723 gcc_assert (mode == CCmode || mode == CCGCmode || mode == CCNOmode);
8730 else if (mode == CCCmode)
8731 suffix = fp ? "nb" : "ae";
8736 suffix = fp ? "u" : "p";
8739 suffix = fp ? "nu" : "np";
8744 fputs (suffix, file);
8747 /* Print the name of register X to FILE based on its machine mode and number.
8748 If CODE is 'w', pretend the mode is HImode.
8749 If CODE is 'b', pretend the mode is QImode.
8750 If CODE is 'k', pretend the mode is SImode.
8751 If CODE is 'q', pretend the mode is DImode.
8752 If CODE is 'h', pretend the reg is the 'high' byte register.
8753 If CODE is 'y', print "st(0)" instead of "st", if the reg is stack op. */
8756 print_reg (rtx x, int code, FILE *file)
8758 gcc_assert (x == pc_rtx
8759 || (REGNO (x) != ARG_POINTER_REGNUM
8760 && REGNO (x) != FRAME_POINTER_REGNUM
8761 && REGNO (x) != FLAGS_REG
8762 && REGNO (x) != FPSR_REG
8763 && REGNO (x) != FPCR_REG));
8765 if (ASSEMBLER_DIALECT == ASM_ATT)
8770 gcc_assert (TARGET_64BIT);
8771 fputs ("rip", file);
8775 if (code == 'w' || MMX_REG_P (x))
8777 else if (code == 'b')
8779 else if (code == 'k')
8781 else if (code == 'q')
8783 else if (code == 'y')
8785 else if (code == 'h')
8788 code = GET_MODE_SIZE (GET_MODE (x));
8790 /* Irritatingly, AMD extended registers use different naming convention
8791 from the normal registers. */
8792 if (REX_INT_REG_P (x))
8794 gcc_assert (TARGET_64BIT);
8798 error ("extended registers have no high halves");
8801 fprintf (file, "r%ib", REGNO (x) - FIRST_REX_INT_REG + 8);
8804 fprintf (file, "r%iw", REGNO (x) - FIRST_REX_INT_REG + 8);
8807 fprintf (file, "r%id", REGNO (x) - FIRST_REX_INT_REG + 8);
8810 fprintf (file, "r%i", REGNO (x) - FIRST_REX_INT_REG + 8);
8813 error ("unsupported operand size for extended register");
8821 if (STACK_TOP_P (x))
8823 fputs ("st(0)", file);
8830 if (! ANY_FP_REG_P (x))
8831 putc (code == 8 && TARGET_64BIT ? 'r' : 'e', file);
8836 fputs (hi_reg_name[REGNO (x)], file);
8839 if (REGNO (x) >= ARRAY_SIZE (qi_reg_name))
8841 fputs (qi_reg_name[REGNO (x)], file);
8844 if (REGNO (x) >= ARRAY_SIZE (qi_high_reg_name))
8846 fputs (qi_high_reg_name[REGNO (x)], file);
8853 /* Locate some local-dynamic symbol still in use by this function
8854 so that we can print its name in some tls_local_dynamic_base
8858 get_some_local_dynamic_name_1 (rtx *px, void *data ATTRIBUTE_UNUSED)
8862 if (GET_CODE (x) == SYMBOL_REF
8863 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_DYNAMIC)
8865 cfun->machine->some_ld_name = XSTR (x, 0);
8873 get_some_local_dynamic_name (void)
8877 if (cfun->machine->some_ld_name)
8878 return cfun->machine->some_ld_name;
8880 for (insn = get_insns (); insn ; insn = NEXT_INSN (insn))
8882 && for_each_rtx (&PATTERN (insn), get_some_local_dynamic_name_1, 0))
8883 return cfun->machine->some_ld_name;
8889 L,W,B,Q,S,T -- print the opcode suffix for specified size of operand.
8890 C -- print opcode suffix for set/cmov insn.
8891 c -- like C, but print reversed condition
8892 F,f -- likewise, but for floating-point.
8893 O -- if HAVE_AS_IX86_CMOV_SUN_SYNTAX, expand to "w.", "l." or "q.",
8895 R -- print the prefix for register names.
8896 z -- print the opcode suffix for the size of the current operand.
8897 * -- print a star (in certain assembler syntax)
8898 A -- print an absolute memory reference.
8899 w -- print the operand as if it's a "word" (HImode) even if it isn't.
8900 s -- print a shift double count, followed by the assemblers argument
8902 b -- print the QImode name of the register for the indicated operand.
8903 %b0 would print %al if operands[0] is reg 0.
8904 w -- likewise, print the HImode name of the register.
8905 k -- likewise, print the SImode name of the register.
8906 q -- likewise, print the DImode name of the register.
8907 h -- print the QImode name for a "high" register, either ah, bh, ch or dh.
8908 y -- print "st(0)" instead of "st" as a register.
8909 D -- print condition for SSE cmp instruction.
8910 P -- if PIC, print an @PLT suffix.
8911 X -- don't print any sort of PIC '@' suffix for a symbol.
8912 & -- print some in-use local-dynamic symbol name.
8913 H -- print a memory address offset by 8; used for sse high-parts
8914 Y -- print condition for SSE5 com* instruction.
8915 + -- print a branch hint as 'cs' or 'ds' prefix
8916 ; -- print a semicolon (after prefixes due to bug in older gas).
8920 print_operand (FILE *file, rtx x, int code)
8927 if (ASSEMBLER_DIALECT == ASM_ATT)
8932 assemble_name (file, get_some_local_dynamic_name ());
8936 switch (ASSEMBLER_DIALECT)
8943 /* Intel syntax. For absolute addresses, registers should not
8944 be surrounded by braces. */
8948 PRINT_OPERAND (file, x, 0);
8958 PRINT_OPERAND (file, x, 0);
8963 if (ASSEMBLER_DIALECT == ASM_ATT)
8968 if (ASSEMBLER_DIALECT == ASM_ATT)
8973 if (ASSEMBLER_DIALECT == ASM_ATT)
8978 if (ASSEMBLER_DIALECT == ASM_ATT)
8983 if (ASSEMBLER_DIALECT == ASM_ATT)
8988 if (ASSEMBLER_DIALECT == ASM_ATT)
8993 /* 387 opcodes don't get size suffixes if the operands are
8995 if (STACK_REG_P (x))
8998 /* Likewise if using Intel opcodes. */
8999 if (ASSEMBLER_DIALECT == ASM_INTEL)
9002 /* This is the size of op from size of operand. */
9003 switch (GET_MODE_SIZE (GET_MODE (x)))
9012 #ifdef HAVE_GAS_FILDS_FISTS
9022 if (GET_MODE (x) == SFmode)
9037 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)
9039 #ifdef GAS_MNEMONICS
9065 if (CONST_INT_P (x) || ! SHIFT_DOUBLE_OMITS_COUNT)
9067 PRINT_OPERAND (file, x, 0);
9073 /* Little bit of braindamage here. The SSE compare instructions
9074 does use completely different names for the comparisons that the
9075 fp conditional moves. */
9076 switch (GET_CODE (x))
9091 fputs ("unord", file);
9095 fputs ("neq", file);
9099 fputs ("nlt", file);
9103 fputs ("nle", file);
9106 fputs ("ord", file);
9113 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
9114 if (ASSEMBLER_DIALECT == ASM_ATT)
9116 switch (GET_MODE (x))
9118 case HImode: putc ('w', file); break;
9120 case SFmode: putc ('l', file); break;
9122 case DFmode: putc ('q', file); break;
9123 default: gcc_unreachable ();
9130 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 0, 0, file);
9133 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
9134 if (ASSEMBLER_DIALECT == ASM_ATT)
9137 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 0, 1, file);
9140 /* Like above, but reverse condition */
9142 /* Check to see if argument to %c is really a constant
9143 and not a condition code which needs to be reversed. */
9144 if (!COMPARISON_P (x))
9146 output_operand_lossage ("operand is neither a constant nor a condition code, invalid operand code 'c'");
9149 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 1, 0, file);
9152 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
9153 if (ASSEMBLER_DIALECT == ASM_ATT)
9156 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 1, 1, file);
9160 /* It doesn't actually matter what mode we use here, as we're
9161 only going to use this for printing. */
9162 x = adjust_address_nv (x, DImode, 8);
9169 if (!optimize || optimize_size || !TARGET_BRANCH_PREDICTION_HINTS)
9172 x = find_reg_note (current_output_insn, REG_BR_PROB, 0);
9175 int pred_val = INTVAL (XEXP (x, 0));
9177 if (pred_val < REG_BR_PROB_BASE * 45 / 100
9178 || pred_val > REG_BR_PROB_BASE * 55 / 100)
9180 int taken = pred_val > REG_BR_PROB_BASE / 2;
9181 int cputaken = final_forward_branch_p (current_output_insn) == 0;
9183 /* Emit hints only in the case default branch prediction
9184 heuristics would fail. */
9185 if (taken != cputaken)
9187 /* We use 3e (DS) prefix for taken branches and
9188 2e (CS) prefix for not taken branches. */
9190 fputs ("ds ; ", file);
9192 fputs ("cs ; ", file);
9200 switch (GET_CODE (x))
9203 fputs ("neq", file);
9210 fputs (INTEGRAL_MODE_P (GET_MODE (x)) ? "ge" : "unlt", file);
9214 fputs (INTEGRAL_MODE_P (GET_MODE (x)) ? "gt" : "unle", file);
9225 fputs ("unord", file);
9228 fputs ("ord", file);
9231 fputs ("ueq", file);
9234 fputs ("nlt", file);
9237 fputs ("nle", file);
9240 fputs ("ule", file);
9243 fputs ("ult", file);
9246 fputs ("une", file);
9255 fputs (" ; ", file);
9262 output_operand_lossage ("invalid operand code '%c'", code);
9267 print_reg (x, code, file);
9271 /* No `byte ptr' prefix for call instructions or BLKmode operands. */
9272 if (ASSEMBLER_DIALECT == ASM_INTEL && code != 'X' && code != 'P'
9273 && GET_MODE (x) != BLKmode)
9276 switch (GET_MODE_SIZE (GET_MODE (x)))
9278 case 1: size = "BYTE"; break;
9279 case 2: size = "WORD"; break;
9280 case 4: size = "DWORD"; break;
9281 case 8: size = "QWORD"; break;
9282 case 12: size = "XWORD"; break;
9284 if (GET_MODE (x) == XFmode)
9293 /* Check for explicit size override (codes 'b', 'w' and 'k') */
9296 else if (code == 'w')
9298 else if (code == 'k')
9302 fputs (" PTR ", file);
9306 /* Avoid (%rip) for call operands. */
9307 if (CONSTANT_ADDRESS_P (x) && code == 'P'
9308 && !CONST_INT_P (x))
9309 output_addr_const (file, x);
9310 else if (this_is_asm_operands && ! address_operand (x, VOIDmode))
9311 output_operand_lossage ("invalid constraints for operand");
9316 else if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) == SFmode)
9321 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
9322 REAL_VALUE_TO_TARGET_SINGLE (r, l);
9324 if (ASSEMBLER_DIALECT == ASM_ATT)
9326 fprintf (file, "0x%08lx", (long unsigned int) l);
9329 /* These float cases don't actually occur as immediate operands. */
9330 else if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) == DFmode)
9334 real_to_decimal (dstr, CONST_DOUBLE_REAL_VALUE (x), sizeof (dstr), 0, 1);
9335 fprintf (file, "%s", dstr);
9338 else if (GET_CODE (x) == CONST_DOUBLE
9339 && GET_MODE (x) == XFmode)
9343 real_to_decimal (dstr, CONST_DOUBLE_REAL_VALUE (x), sizeof (dstr), 0, 1);
9344 fprintf (file, "%s", dstr);
9349 /* We have patterns that allow zero sets of memory, for instance.
9350 In 64-bit mode, we should probably support all 8-byte vectors,
9351 since we can in fact encode that into an immediate. */
9352 if (GET_CODE (x) == CONST_VECTOR)
9354 gcc_assert (x == CONST0_RTX (GET_MODE (x)));
9360 if (CONST_INT_P (x) || GET_CODE (x) == CONST_DOUBLE)
9362 if (ASSEMBLER_DIALECT == ASM_ATT)
9365 else if (GET_CODE (x) == CONST || GET_CODE (x) == SYMBOL_REF
9366 || GET_CODE (x) == LABEL_REF)
9368 if (ASSEMBLER_DIALECT == ASM_ATT)
9371 fputs ("OFFSET FLAT:", file);
9374 if (CONST_INT_P (x))
9375 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
9377 output_pic_addr_const (file, x, code);
9379 output_addr_const (file, x);
9383 /* Print a memory operand whose address is ADDR. */
9386 print_operand_address (FILE *file, rtx addr)
9388 struct ix86_address parts;
9389 rtx base, index, disp;
9391 int ok = ix86_decompose_address (addr, &parts);
9396 index = parts.index;
9398 scale = parts.scale;
9406 if (ASSEMBLER_DIALECT == ASM_ATT)
9408 fputs ((parts.seg == SEG_FS ? "fs:" : "gs:"), file);
9414 /* Use one byte shorter RIP relative addressing for 64bit mode. */
9415 if (TARGET_64BIT && !base && !index)
9419 if (GET_CODE (disp) == CONST
9420 && GET_CODE (XEXP (disp, 0)) == PLUS
9421 && CONST_INT_P (XEXP (XEXP (disp, 0), 1)))
9422 symbol = XEXP (XEXP (disp, 0), 0);
9424 if (GET_CODE (symbol) == LABEL_REF
9425 || (GET_CODE (symbol) == SYMBOL_REF
9426 && SYMBOL_REF_TLS_MODEL (symbol) == 0))
9429 if (!base && !index)
9431 /* Displacement only requires special attention. */
9433 if (CONST_INT_P (disp))
9435 if (ASSEMBLER_DIALECT == ASM_INTEL && parts.seg == SEG_DEFAULT)
9436 fputs ("ds:", file);
9437 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (disp));
9440 output_pic_addr_const (file, disp, 0);
9442 output_addr_const (file, disp);
9446 if (ASSEMBLER_DIALECT == ASM_ATT)
9451 output_pic_addr_const (file, disp, 0);
9452 else if (GET_CODE (disp) == LABEL_REF)
9453 output_asm_label (disp);
9455 output_addr_const (file, disp);
9460 print_reg (base, 0, file);
9464 print_reg (index, 0, file);
9466 fprintf (file, ",%d", scale);
9472 rtx offset = NULL_RTX;
9476 /* Pull out the offset of a symbol; print any symbol itself. */
9477 if (GET_CODE (disp) == CONST
9478 && GET_CODE (XEXP (disp, 0)) == PLUS
9479 && CONST_INT_P (XEXP (XEXP (disp, 0), 1)))
9481 offset = XEXP (XEXP (disp, 0), 1);
9482 disp = gen_rtx_CONST (VOIDmode,
9483 XEXP (XEXP (disp, 0), 0));
9487 output_pic_addr_const (file, disp, 0);
9488 else if (GET_CODE (disp) == LABEL_REF)
9489 output_asm_label (disp);
9490 else if (CONST_INT_P (disp))
9493 output_addr_const (file, disp);
9499 print_reg (base, 0, file);
9502 if (INTVAL (offset) >= 0)
9504 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (offset));
9508 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (offset));
9515 print_reg (index, 0, file);
9517 fprintf (file, "*%d", scale);
9525 output_addr_const_extra (FILE *file, rtx x)
9529 if (GET_CODE (x) != UNSPEC)
9532 op = XVECEXP (x, 0, 0);
9533 switch (XINT (x, 1))
9535 case UNSPEC_GOTTPOFF:
9536 output_addr_const (file, op);
9537 /* FIXME: This might be @TPOFF in Sun ld. */
9538 fputs ("@GOTTPOFF", file);
9541 output_addr_const (file, op);
9542 fputs ("@TPOFF", file);
9545 output_addr_const (file, op);
9547 fputs ("@TPOFF", file);
9549 fputs ("@NTPOFF", file);
9552 output_addr_const (file, op);
9553 fputs ("@DTPOFF", file);
9555 case UNSPEC_GOTNTPOFF:
9556 output_addr_const (file, op);
9558 fputs (ASSEMBLER_DIALECT == ASM_ATT ?
9559 "@GOTTPOFF(%rip)" : "@GOTTPOFF[rip]", file);
9561 fputs ("@GOTNTPOFF", file);
9563 case UNSPEC_INDNTPOFF:
9564 output_addr_const (file, op);
9565 fputs ("@INDNTPOFF", file);
9575 /* Split one or more DImode RTL references into pairs of SImode
9576 references. The RTL can be REG, offsettable MEM, integer constant, or
9577 CONST_DOUBLE. "operands" is a pointer to an array of DImode RTL to
9578 split and "num" is its length. lo_half and hi_half are output arrays
9579 that parallel "operands". */
9582 split_di (rtx operands[], int num, rtx lo_half[], rtx hi_half[])
9586 rtx op = operands[num];
9588 /* simplify_subreg refuse to split volatile memory addresses,
9589 but we still have to handle it. */
9592 lo_half[num] = adjust_address (op, SImode, 0);
9593 hi_half[num] = adjust_address (op, SImode, 4);
9597 lo_half[num] = simplify_gen_subreg (SImode, op,
9598 GET_MODE (op) == VOIDmode
9599 ? DImode : GET_MODE (op), 0);
9600 hi_half[num] = simplify_gen_subreg (SImode, op,
9601 GET_MODE (op) == VOIDmode
9602 ? DImode : GET_MODE (op), 4);
9606 /* Split one or more TImode RTL references into pairs of DImode
9607 references. The RTL can be REG, offsettable MEM, integer constant, or
9608 CONST_DOUBLE. "operands" is a pointer to an array of DImode RTL to
9609 split and "num" is its length. lo_half and hi_half are output arrays
9610 that parallel "operands". */
9613 split_ti (rtx operands[], int num, rtx lo_half[], rtx hi_half[])
9617 rtx op = operands[num];
9619 /* simplify_subreg refuse to split volatile memory addresses, but we
9620 still have to handle it. */
9623 lo_half[num] = adjust_address (op, DImode, 0);
9624 hi_half[num] = adjust_address (op, DImode, 8);
9628 lo_half[num] = simplify_gen_subreg (DImode, op, TImode, 0);
9629 hi_half[num] = simplify_gen_subreg (DImode, op, TImode, 8);
9634 /* Output code to perform a 387 binary operation in INSN, one of PLUS,
9635 MINUS, MULT or DIV. OPERANDS are the insn operands, where operands[3]
9636 is the expression of the binary operation. The output may either be
9637 emitted here, or returned to the caller, like all output_* functions.
9639 There is no guarantee that the operands are the same mode, as they
9640 might be within FLOAT or FLOAT_EXTEND expressions. */
9642 #ifndef SYSV386_COMPAT
9643 /* Set to 1 for compatibility with brain-damaged assemblers. No-one
9644 wants to fix the assemblers because that causes incompatibility
9645 with gcc. No-one wants to fix gcc because that causes
9646 incompatibility with assemblers... You can use the option of
9647 -DSYSV386_COMPAT=0 if you recompile both gcc and gas this way. */
9648 #define SYSV386_COMPAT 1
9652 output_387_binary_op (rtx insn, rtx *operands)
9654 static char buf[30];
9657 int is_sse = SSE_REG_P (operands[0]) || SSE_REG_P (operands[1]) || SSE_REG_P (operands[2]);
9659 #ifdef ENABLE_CHECKING
9660 /* Even if we do not want to check the inputs, this documents input
9661 constraints. Which helps in understanding the following code. */
9662 if (STACK_REG_P (operands[0])
9663 && ((REG_P (operands[1])
9664 && REGNO (operands[0]) == REGNO (operands[1])
9665 && (STACK_REG_P (operands[2]) || MEM_P (operands[2])))
9666 || (REG_P (operands[2])
9667 && REGNO (operands[0]) == REGNO (operands[2])
9668 && (STACK_REG_P (operands[1]) || MEM_P (operands[1]))))
9669 && (STACK_TOP_P (operands[1]) || STACK_TOP_P (operands[2])))
9672 gcc_assert (is_sse);
9675 switch (GET_CODE (operands[3]))
9678 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
9679 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
9687 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
9688 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
9696 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
9697 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
9705 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
9706 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
9720 if (GET_MODE (operands[0]) == SFmode)
9721 strcat (buf, "ss\t{%2, %0|%0, %2}");
9723 strcat (buf, "sd\t{%2, %0|%0, %2}");
9728 switch (GET_CODE (operands[3]))
9732 if (REG_P (operands[2]) && REGNO (operands[0]) == REGNO (operands[2]))
9734 rtx temp = operands[2];
9735 operands[2] = operands[1];
9739 /* know operands[0] == operands[1]. */
9741 if (MEM_P (operands[2]))
9747 if (find_regno_note (insn, REG_DEAD, REGNO (operands[2])))
9749 if (STACK_TOP_P (operands[0]))
9750 /* How is it that we are storing to a dead operand[2]?
9751 Well, presumably operands[1] is dead too. We can't
9752 store the result to st(0) as st(0) gets popped on this
9753 instruction. Instead store to operands[2] (which I
9754 think has to be st(1)). st(1) will be popped later.
9755 gcc <= 2.8.1 didn't have this check and generated
9756 assembly code that the Unixware assembler rejected. */
9757 p = "p\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
9759 p = "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
9763 if (STACK_TOP_P (operands[0]))
9764 p = "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
9766 p = "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
9771 if (MEM_P (operands[1]))
9777 if (MEM_P (operands[2]))
9783 if (find_regno_note (insn, REG_DEAD, REGNO (operands[2])))
9786 /* The SystemV/386 SVR3.2 assembler, and probably all AT&T
9787 derived assemblers, confusingly reverse the direction of
9788 the operation for fsub{r} and fdiv{r} when the
9789 destination register is not st(0). The Intel assembler
9790 doesn't have this brain damage. Read !SYSV386_COMPAT to
9791 figure out what the hardware really does. */
9792 if (STACK_TOP_P (operands[0]))
9793 p = "{p\t%0, %2|rp\t%2, %0}";
9795 p = "{rp\t%2, %0|p\t%0, %2}";
9797 if (STACK_TOP_P (operands[0]))
9798 /* As above for fmul/fadd, we can't store to st(0). */
9799 p = "rp\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
9801 p = "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
9806 if (find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
9809 if (STACK_TOP_P (operands[0]))
9810 p = "{rp\t%0, %1|p\t%1, %0}";
9812 p = "{p\t%1, %0|rp\t%0, %1}";
9814 if (STACK_TOP_P (operands[0]))
9815 p = "p\t{%0, %1|%1, %0}"; /* st(1) = st(1) op st(0); pop */
9817 p = "rp\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2); pop */
9822 if (STACK_TOP_P (operands[0]))
9824 if (STACK_TOP_P (operands[1]))
9825 p = "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
9827 p = "r\t{%y1, %0|%0, %y1}"; /* st(0) = st(r1) op st(0) */
9830 else if (STACK_TOP_P (operands[1]))
9833 p = "{\t%1, %0|r\t%0, %1}";
9835 p = "r\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2) */
9841 p = "{r\t%2, %0|\t%0, %2}";
9843 p = "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
9856 /* Return needed mode for entity in optimize_mode_switching pass. */
9859 ix86_mode_needed (int entity, rtx insn)
9861 enum attr_i387_cw mode;
9863 /* The mode UNINITIALIZED is used to store control word after a
9864 function call or ASM pattern. The mode ANY specify that function
9865 has no requirements on the control word and make no changes in the
9866 bits we are interested in. */
9869 || (NONJUMP_INSN_P (insn)
9870 && (asm_noperands (PATTERN (insn)) >= 0
9871 || GET_CODE (PATTERN (insn)) == ASM_INPUT)))
9872 return I387_CW_UNINITIALIZED;
9874 if (recog_memoized (insn) < 0)
9877 mode = get_attr_i387_cw (insn);
9882 if (mode == I387_CW_TRUNC)
9887 if (mode == I387_CW_FLOOR)
9892 if (mode == I387_CW_CEIL)
9897 if (mode == I387_CW_MASK_PM)
9908 /* Output code to initialize control word copies used by trunc?f?i and
9909 rounding patterns. CURRENT_MODE is set to current control word,
9910 while NEW_MODE is set to new control word. */
9913 emit_i387_cw_initialization (int mode)
9915 rtx stored_mode = assign_386_stack_local (HImode, SLOT_CW_STORED);
9918 enum ix86_stack_slot slot;
9920 rtx reg = gen_reg_rtx (HImode);
9922 emit_insn (gen_x86_fnstcw_1 (stored_mode));
9923 emit_move_insn (reg, copy_rtx (stored_mode));
9925 if (TARGET_64BIT || TARGET_PARTIAL_REG_STALL || optimize_size)
9930 /* round toward zero (truncate) */
9931 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0c00)));
9932 slot = SLOT_CW_TRUNC;
9936 /* round down toward -oo */
9937 emit_insn (gen_andhi3 (reg, reg, GEN_INT (~0x0c00)));
9938 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0400)));
9939 slot = SLOT_CW_FLOOR;
9943 /* round up toward +oo */
9944 emit_insn (gen_andhi3 (reg, reg, GEN_INT (~0x0c00)));
9945 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0800)));
9946 slot = SLOT_CW_CEIL;
9949 case I387_CW_MASK_PM:
9950 /* mask precision exception for nearbyint() */
9951 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0020)));
9952 slot = SLOT_CW_MASK_PM;
9964 /* round toward zero (truncate) */
9965 emit_insn (gen_movsi_insv_1 (reg, GEN_INT (0xc)));
9966 slot = SLOT_CW_TRUNC;
9970 /* round down toward -oo */
9971 emit_insn (gen_movsi_insv_1 (reg, GEN_INT (0x4)));
9972 slot = SLOT_CW_FLOOR;
9976 /* round up toward +oo */
9977 emit_insn (gen_movsi_insv_1 (reg, GEN_INT (0x8)));
9978 slot = SLOT_CW_CEIL;
9981 case I387_CW_MASK_PM:
9982 /* mask precision exception for nearbyint() */
9983 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0020)));
9984 slot = SLOT_CW_MASK_PM;
9992 gcc_assert (slot < MAX_386_STACK_LOCALS);
9994 new_mode = assign_386_stack_local (HImode, slot);
9995 emit_move_insn (new_mode, reg);
9998 /* Output code for INSN to convert a float to a signed int. OPERANDS
9999 are the insn operands. The output may be [HSD]Imode and the input
10000 operand may be [SDX]Fmode. */
10003 output_fix_trunc (rtx insn, rtx *operands, int fisttp)
10005 int stack_top_dies = find_regno_note (insn, REG_DEAD, FIRST_STACK_REG) != 0;
10006 int dimode_p = GET_MODE (operands[0]) == DImode;
10007 int round_mode = get_attr_i387_cw (insn);
10009 /* Jump through a hoop or two for DImode, since the hardware has no
10010 non-popping instruction. We used to do this a different way, but
10011 that was somewhat fragile and broke with post-reload splitters. */
10012 if ((dimode_p || fisttp) && !stack_top_dies)
10013 output_asm_insn ("fld\t%y1", operands);
10015 gcc_assert (STACK_TOP_P (operands[1]));
10016 gcc_assert (MEM_P (operands[0]));
10017 gcc_assert (GET_MODE (operands[1]) != TFmode);
10020 output_asm_insn ("fisttp%z0\t%0", operands);
10023 if (round_mode != I387_CW_ANY)
10024 output_asm_insn ("fldcw\t%3", operands);
10025 if (stack_top_dies || dimode_p)
10026 output_asm_insn ("fistp%z0\t%0", operands);
10028 output_asm_insn ("fist%z0\t%0", operands);
10029 if (round_mode != I387_CW_ANY)
10030 output_asm_insn ("fldcw\t%2", operands);
10036 /* Output code for x87 ffreep insn. The OPNO argument, which may only
10037 have the values zero or one, indicates the ffreep insn's operand
10038 from the OPERANDS array. */
10040 static const char *
10041 output_387_ffreep (rtx *operands ATTRIBUTE_UNUSED, int opno)
10043 if (TARGET_USE_FFREEP)
10044 #if HAVE_AS_IX86_FFREEP
10045 return opno ? "ffreep\t%y1" : "ffreep\t%y0";
10048 static char retval[] = ".word\t0xc_df";
10049 int regno = REGNO (operands[opno]);
10051 gcc_assert (FP_REGNO_P (regno));
10053 retval[9] = '0' + (regno - FIRST_STACK_REG);
10058 return opno ? "fstp\t%y1" : "fstp\t%y0";
10062 /* Output code for INSN to compare OPERANDS. EFLAGS_P is 1 when fcomi
10063 should be used. UNORDERED_P is true when fucom should be used. */
10066 output_fp_compare (rtx insn, rtx *operands, int eflags_p, int unordered_p)
10068 int stack_top_dies;
10069 rtx cmp_op0, cmp_op1;
10070 int is_sse = SSE_REG_P (operands[0]) || SSE_REG_P (operands[1]);
10074 cmp_op0 = operands[0];
10075 cmp_op1 = operands[1];
10079 cmp_op0 = operands[1];
10080 cmp_op1 = operands[2];
10085 if (GET_MODE (operands[0]) == SFmode)
10087 return "ucomiss\t{%1, %0|%0, %1}";
10089 return "comiss\t{%1, %0|%0, %1}";
10092 return "ucomisd\t{%1, %0|%0, %1}";
10094 return "comisd\t{%1, %0|%0, %1}";
10097 gcc_assert (STACK_TOP_P (cmp_op0));
10099 stack_top_dies = find_regno_note (insn, REG_DEAD, FIRST_STACK_REG) != 0;
10101 if (cmp_op1 == CONST0_RTX (GET_MODE (cmp_op1)))
10103 if (stack_top_dies)
10105 output_asm_insn ("ftst\n\tfnstsw\t%0", operands);
10106 return output_387_ffreep (operands, 1);
10109 return "ftst\n\tfnstsw\t%0";
10112 if (STACK_REG_P (cmp_op1)
10114 && find_regno_note (insn, REG_DEAD, REGNO (cmp_op1))
10115 && REGNO (cmp_op1) != FIRST_STACK_REG)
10117 /* If both the top of the 387 stack dies, and the other operand
10118 is also a stack register that dies, then this must be a
10119 `fcompp' float compare */
10123 /* There is no double popping fcomi variant. Fortunately,
10124 eflags is immune from the fstp's cc clobbering. */
10126 output_asm_insn ("fucomip\t{%y1, %0|%0, %y1}", operands);
10128 output_asm_insn ("fcomip\t{%y1, %0|%0, %y1}", operands);
10129 return output_387_ffreep (operands, 0);
10134 return "fucompp\n\tfnstsw\t%0";
10136 return "fcompp\n\tfnstsw\t%0";
10141 /* Encoded here as eflags_p | intmode | unordered_p | stack_top_dies. */
10143 static const char * const alt[16] =
10145 "fcom%z2\t%y2\n\tfnstsw\t%0",
10146 "fcomp%z2\t%y2\n\tfnstsw\t%0",
10147 "fucom%z2\t%y2\n\tfnstsw\t%0",
10148 "fucomp%z2\t%y2\n\tfnstsw\t%0",
10150 "ficom%z2\t%y2\n\tfnstsw\t%0",
10151 "ficomp%z2\t%y2\n\tfnstsw\t%0",
10155 "fcomi\t{%y1, %0|%0, %y1}",
10156 "fcomip\t{%y1, %0|%0, %y1}",
10157 "fucomi\t{%y1, %0|%0, %y1}",
10158 "fucomip\t{%y1, %0|%0, %y1}",
10169 mask = eflags_p << 3;
10170 mask |= (GET_MODE_CLASS (GET_MODE (cmp_op1)) == MODE_INT) << 2;
10171 mask |= unordered_p << 1;
10172 mask |= stack_top_dies;
10174 gcc_assert (mask < 16);
10183 ix86_output_addr_vec_elt (FILE *file, int value)
10185 const char *directive = ASM_LONG;
10189 directive = ASM_QUAD;
10191 gcc_assert (!TARGET_64BIT);
10194 fprintf (file, "%s%s%d\n", directive, LPREFIX, value);
10198 ix86_output_addr_diff_elt (FILE *file, int value, int rel)
10200 const char *directive = ASM_LONG;
10203 if (TARGET_64BIT && CASE_VECTOR_MODE == DImode)
10204 directive = ASM_QUAD;
10206 gcc_assert (!TARGET_64BIT);
10208 /* We can't use @GOTOFF for text labels on VxWorks; see gotoff_operand. */
10209 if (TARGET_64BIT || TARGET_VXWORKS_RTP)
10210 fprintf (file, "%s%s%d-%s%d\n",
10211 directive, LPREFIX, value, LPREFIX, rel);
10212 else if (HAVE_AS_GOTOFF_IN_DATA)
10213 fprintf (file, "%s%s%d@GOTOFF\n", ASM_LONG, LPREFIX, value);
10215 else if (TARGET_MACHO)
10217 fprintf (file, "%s%s%d-", ASM_LONG, LPREFIX, value);
10218 machopic_output_function_base_name (file);
10219 fprintf(file, "\n");
10223 asm_fprintf (file, "%s%U%s+[.-%s%d]\n",
10224 ASM_LONG, GOT_SYMBOL_NAME, LPREFIX, value);
10227 /* Generate either "mov $0, reg" or "xor reg, reg", as appropriate
10231 ix86_expand_clear (rtx dest)
10235 /* We play register width games, which are only valid after reload. */
10236 gcc_assert (reload_completed);
10238 /* Avoid HImode and its attendant prefix byte. */
10239 if (GET_MODE_SIZE (GET_MODE (dest)) < 4)
10240 dest = gen_rtx_REG (SImode, REGNO (dest));
10241 tmp = gen_rtx_SET (VOIDmode, dest, const0_rtx);
10243 /* This predicate should match that for movsi_xor and movdi_xor_rex64. */
10244 if (reload_completed && (!TARGET_USE_MOV0 || optimize_size))
10246 rtx clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
10247 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, tmp, clob));
10253 /* X is an unchanging MEM. If it is a constant pool reference, return
10254 the constant pool rtx, else NULL. */
10257 maybe_get_pool_constant (rtx x)
10259 x = ix86_delegitimize_address (XEXP (x, 0));
10261 if (GET_CODE (x) == SYMBOL_REF && CONSTANT_POOL_ADDRESS_P (x))
10262 return get_pool_constant (x);
10268 ix86_expand_move (enum machine_mode mode, rtx operands[])
10271 enum tls_model model;
10276 if (GET_CODE (op1) == SYMBOL_REF)
10278 model = SYMBOL_REF_TLS_MODEL (op1);
10281 op1 = legitimize_tls_address (op1, model, true);
10282 op1 = force_operand (op1, op0);
10286 else if (TARGET_DLLIMPORT_DECL_ATTRIBUTES
10287 && SYMBOL_REF_DLLIMPORT_P (op1))
10288 op1 = legitimize_dllimport_symbol (op1, false);
10290 else if (GET_CODE (op1) == CONST
10291 && GET_CODE (XEXP (op1, 0)) == PLUS
10292 && GET_CODE (XEXP (XEXP (op1, 0), 0)) == SYMBOL_REF)
10294 rtx addend = XEXP (XEXP (op1, 0), 1);
10295 rtx symbol = XEXP (XEXP (op1, 0), 0);
10298 model = SYMBOL_REF_TLS_MODEL (symbol);
10300 tmp = legitimize_tls_address (symbol, model, true);
10301 else if (TARGET_DLLIMPORT_DECL_ATTRIBUTES
10302 && SYMBOL_REF_DLLIMPORT_P (symbol))
10303 tmp = legitimize_dllimport_symbol (symbol, true);
10307 tmp = force_operand (tmp, NULL);
10308 tmp = expand_simple_binop (Pmode, PLUS, tmp, addend,
10309 op0, 1, OPTAB_DIRECT);
10315 if (flag_pic && mode == Pmode && symbolic_operand (op1, Pmode))
10317 if (TARGET_MACHO && !TARGET_64BIT)
10322 rtx temp = ((reload_in_progress
10323 || ((op0 && REG_P (op0))
10325 ? op0 : gen_reg_rtx (Pmode));
10326 op1 = machopic_indirect_data_reference (op1, temp);
10327 op1 = machopic_legitimize_pic_address (op1, mode,
10328 temp == op1 ? 0 : temp);
10330 else if (MACHOPIC_INDIRECT)
10331 op1 = machopic_indirect_data_reference (op1, 0);
10339 op1 = force_reg (Pmode, op1);
10340 else if (!TARGET_64BIT || !x86_64_movabs_operand (op1, Pmode))
10342 rtx reg = !can_create_pseudo_p () ? op0 : NULL_RTX;
10343 op1 = legitimize_pic_address (op1, reg);
10352 && (PUSH_ROUNDING (GET_MODE_SIZE (mode)) != GET_MODE_SIZE (mode)
10353 || !push_operand (op0, mode))
10355 op1 = force_reg (mode, op1);
10357 if (push_operand (op0, mode)
10358 && ! general_no_elim_operand (op1, mode))
10359 op1 = copy_to_mode_reg (mode, op1);
10361 /* Force large constants in 64bit compilation into register
10362 to get them CSEed. */
10363 if (can_create_pseudo_p ()
10364 && (mode == DImode) && TARGET_64BIT
10365 && immediate_operand (op1, mode)
10366 && !x86_64_zext_immediate_operand (op1, VOIDmode)
10367 && !register_operand (op0, mode)
10369 op1 = copy_to_mode_reg (mode, op1);
10371 if (can_create_pseudo_p ()
10372 && FLOAT_MODE_P (mode)
10373 && GET_CODE (op1) == CONST_DOUBLE)
10375 /* If we are loading a floating point constant to a register,
10376 force the value to memory now, since we'll get better code
10377 out the back end. */
10379 op1 = validize_mem (force_const_mem (mode, op1));
10380 if (!register_operand (op0, mode))
10382 rtx temp = gen_reg_rtx (mode);
10383 emit_insn (gen_rtx_SET (VOIDmode, temp, op1));
10384 emit_move_insn (op0, temp);
10390 emit_insn (gen_rtx_SET (VOIDmode, op0, op1));
10394 ix86_expand_vector_move (enum machine_mode mode, rtx operands[])
10396 rtx op0 = operands[0], op1 = operands[1];
10397 unsigned int align = GET_MODE_ALIGNMENT (mode);
10399 /* Force constants other than zero into memory. We do not know how
10400 the instructions used to build constants modify the upper 64 bits
10401 of the register, once we have that information we may be able
10402 to handle some of them more efficiently. */
10403 if (can_create_pseudo_p ()
10404 && register_operand (op0, mode)
10405 && (CONSTANT_P (op1)
10406 || (GET_CODE (op1) == SUBREG
10407 && CONSTANT_P (SUBREG_REG (op1))))
10408 && standard_sse_constant_p (op1) <= 0)
10409 op1 = validize_mem (force_const_mem (mode, op1));
10411 /* TDmode values are passed as TImode on the stack. TImode values
10412 are moved via xmm registers, and moving them to stack can result in
10413 unaligned memory access. Use ix86_expand_vector_move_misalign()
10414 if memory operand is not aligned correctly. */
10415 if (can_create_pseudo_p ()
10416 && (mode == TImode) && !TARGET_64BIT
10417 && ((MEM_P (op0) && (MEM_ALIGN (op0) < align))
10418 || (MEM_P (op1) && (MEM_ALIGN (op1) < align))))
10422 /* ix86_expand_vector_move_misalign() does not like constants ... */
10423 if (CONSTANT_P (op1)
10424 || (GET_CODE (op1) == SUBREG
10425 && CONSTANT_P (SUBREG_REG (op1))))
10426 op1 = validize_mem (force_const_mem (mode, op1));
10428 /* ... nor both arguments in memory. */
10429 if (!register_operand (op0, mode)
10430 && !register_operand (op1, mode))
10431 op1 = force_reg (mode, op1);
10433 tmp[0] = op0; tmp[1] = op1;
10434 ix86_expand_vector_move_misalign (mode, tmp);
10438 /* Make operand1 a register if it isn't already. */
10439 if (can_create_pseudo_p ()
10440 && !register_operand (op0, mode)
10441 && !register_operand (op1, mode))
10443 emit_move_insn (op0, force_reg (GET_MODE (op0), op1));
10447 emit_insn (gen_rtx_SET (VOIDmode, op0, op1));
10450 /* Implement the movmisalign patterns for SSE. Non-SSE modes go
10451 straight to ix86_expand_vector_move. */
10452 /* Code generation for scalar reg-reg moves of single and double precision data:
10453 if (x86_sse_partial_reg_dependency == true | x86_sse_split_regs == true)
10457 if (x86_sse_partial_reg_dependency == true)
10462 Code generation for scalar loads of double precision data:
10463 if (x86_sse_split_regs == true)
10464 movlpd mem, reg (gas syntax)
10468 Code generation for unaligned packed loads of single precision data
10469 (x86_sse_unaligned_move_optimal overrides x86_sse_partial_reg_dependency):
10470 if (x86_sse_unaligned_move_optimal)
10473 if (x86_sse_partial_reg_dependency == true)
10485 Code generation for unaligned packed loads of double precision data
10486 (x86_sse_unaligned_move_optimal overrides x86_sse_split_regs):
10487 if (x86_sse_unaligned_move_optimal)
10490 if (x86_sse_split_regs == true)
10503 ix86_expand_vector_move_misalign (enum machine_mode mode, rtx operands[])
10512 /* If we're optimizing for size, movups is the smallest. */
10515 op0 = gen_lowpart (V4SFmode, op0);
10516 op1 = gen_lowpart (V4SFmode, op1);
10517 emit_insn (gen_sse_movups (op0, op1));
10521 /* ??? If we have typed data, then it would appear that using
10522 movdqu is the only way to get unaligned data loaded with
10524 if (TARGET_SSE2 && GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
10526 op0 = gen_lowpart (V16QImode, op0);
10527 op1 = gen_lowpart (V16QImode, op1);
10528 emit_insn (gen_sse2_movdqu (op0, op1));
10532 if (TARGET_SSE2 && mode == V2DFmode)
10536 if (TARGET_SSE_UNALIGNED_MOVE_OPTIMAL)
10538 op0 = gen_lowpart (V2DFmode, op0);
10539 op1 = gen_lowpart (V2DFmode, op1);
10540 emit_insn (gen_sse2_movupd (op0, op1));
10544 /* When SSE registers are split into halves, we can avoid
10545 writing to the top half twice. */
10546 if (TARGET_SSE_SPLIT_REGS)
10548 emit_insn (gen_rtx_CLOBBER (VOIDmode, op0));
10553 /* ??? Not sure about the best option for the Intel chips.
10554 The following would seem to satisfy; the register is
10555 entirely cleared, breaking the dependency chain. We
10556 then store to the upper half, with a dependency depth
10557 of one. A rumor has it that Intel recommends two movsd
10558 followed by an unpacklpd, but this is unconfirmed. And
10559 given that the dependency depth of the unpacklpd would
10560 still be one, I'm not sure why this would be better. */
10561 zero = CONST0_RTX (V2DFmode);
10564 m = adjust_address (op1, DFmode, 0);
10565 emit_insn (gen_sse2_loadlpd (op0, zero, m));
10566 m = adjust_address (op1, DFmode, 8);
10567 emit_insn (gen_sse2_loadhpd (op0, op0, m));
10571 if (TARGET_SSE_UNALIGNED_MOVE_OPTIMAL)
10573 op0 = gen_lowpart (V4SFmode, op0);
10574 op1 = gen_lowpart (V4SFmode, op1);
10575 emit_insn (gen_sse_movups (op0, op1));
10579 if (TARGET_SSE_PARTIAL_REG_DEPENDENCY)
10580 emit_move_insn (op0, CONST0_RTX (mode));
10582 emit_insn (gen_rtx_CLOBBER (VOIDmode, op0));
10584 if (mode != V4SFmode)
10585 op0 = gen_lowpart (V4SFmode, op0);
10586 m = adjust_address (op1, V2SFmode, 0);
10587 emit_insn (gen_sse_loadlps (op0, op0, m));
10588 m = adjust_address (op1, V2SFmode, 8);
10589 emit_insn (gen_sse_loadhps (op0, op0, m));
10592 else if (MEM_P (op0))
10594 /* If we're optimizing for size, movups is the smallest. */
10597 op0 = gen_lowpart (V4SFmode, op0);
10598 op1 = gen_lowpart (V4SFmode, op1);
10599 emit_insn (gen_sse_movups (op0, op1));
10603 /* ??? Similar to above, only less clear because of quote
10604 typeless stores unquote. */
10605 if (TARGET_SSE2 && !TARGET_SSE_TYPELESS_STORES
10606 && GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
10608 op0 = gen_lowpart (V16QImode, op0);
10609 op1 = gen_lowpart (V16QImode, op1);
10610 emit_insn (gen_sse2_movdqu (op0, op1));
10614 if (TARGET_SSE2 && mode == V2DFmode)
10616 m = adjust_address (op0, DFmode, 0);
10617 emit_insn (gen_sse2_storelpd (m, op1));
10618 m = adjust_address (op0, DFmode, 8);
10619 emit_insn (gen_sse2_storehpd (m, op1));
10623 if (mode != V4SFmode)
10624 op1 = gen_lowpart (V4SFmode, op1);
10625 m = adjust_address (op0, V2SFmode, 0);
10626 emit_insn (gen_sse_storelps (m, op1));
10627 m = adjust_address (op0, V2SFmode, 8);
10628 emit_insn (gen_sse_storehps (m, op1));
10632 gcc_unreachable ();
10635 /* Expand a push in MODE. This is some mode for which we do not support
10636 proper push instructions, at least from the registers that we expect
10637 the value to live in. */
10640 ix86_expand_push (enum machine_mode mode, rtx x)
10644 tmp = expand_simple_binop (Pmode, PLUS, stack_pointer_rtx,
10645 GEN_INT (-GET_MODE_SIZE (mode)),
10646 stack_pointer_rtx, 1, OPTAB_DIRECT);
10647 if (tmp != stack_pointer_rtx)
10648 emit_move_insn (stack_pointer_rtx, tmp);
10650 tmp = gen_rtx_MEM (mode, stack_pointer_rtx);
10651 emit_move_insn (tmp, x);
10654 /* Helper function of ix86_fixup_binary_operands to canonicalize
10655 operand order. Returns true if the operands should be swapped. */
10658 ix86_swap_binary_operands_p (enum rtx_code code, enum machine_mode mode,
10661 rtx dst = operands[0];
10662 rtx src1 = operands[1];
10663 rtx src2 = operands[2];
10665 /* If the operation is not commutative, we can't do anything. */
10666 if (GET_RTX_CLASS (code) != RTX_COMM_ARITH)
10669 /* Highest priority is that src1 should match dst. */
10670 if (rtx_equal_p (dst, src1))
10672 if (rtx_equal_p (dst, src2))
10675 /* Next highest priority is that immediate constants come second. */
10676 if (immediate_operand (src2, mode))
10678 if (immediate_operand (src1, mode))
10681 /* Lowest priority is that memory references should come second. */
10691 /* Fix up OPERANDS to satisfy ix86_binary_operator_ok. Return the
10692 destination to use for the operation. If different from the true
10693 destination in operands[0], a copy operation will be required. */
10696 ix86_fixup_binary_operands (enum rtx_code code, enum machine_mode mode,
10699 rtx dst = operands[0];
10700 rtx src1 = operands[1];
10701 rtx src2 = operands[2];
10703 /* Canonicalize operand order. */
10704 if (ix86_swap_binary_operands_p (code, mode, operands))
10708 /* It is invalid to swap operands of different modes. */
10709 gcc_assert (GET_MODE (src1) == GET_MODE (src2));
10716 /* Both source operands cannot be in memory. */
10717 if (MEM_P (src1) && MEM_P (src2))
10719 /* Optimization: Only read from memory once. */
10720 if (rtx_equal_p (src1, src2))
10722 src2 = force_reg (mode, src2);
10726 src2 = force_reg (mode, src2);
10729 /* If the destination is memory, and we do not have matching source
10730 operands, do things in registers. */
10731 if (MEM_P (dst) && !rtx_equal_p (dst, src1))
10732 dst = gen_reg_rtx (mode);
10734 /* Source 1 cannot be a constant. */
10735 if (CONSTANT_P (src1))
10736 src1 = force_reg (mode, src1);
10738 /* Source 1 cannot be a non-matching memory. */
10739 if (MEM_P (src1) && !rtx_equal_p (dst, src1))
10740 src1 = force_reg (mode, src1);
10742 operands[1] = src1;
10743 operands[2] = src2;
10747 /* Similarly, but assume that the destination has already been
10748 set up properly. */
10751 ix86_fixup_binary_operands_no_copy (enum rtx_code code,
10752 enum machine_mode mode, rtx operands[])
10754 rtx dst = ix86_fixup_binary_operands (code, mode, operands);
10755 gcc_assert (dst == operands[0]);
10758 /* Attempt to expand a binary operator. Make the expansion closer to the
10759 actual machine, then just general_operand, which will allow 3 separate
10760 memory references (one output, two input) in a single insn. */
10763 ix86_expand_binary_operator (enum rtx_code code, enum machine_mode mode,
10766 rtx src1, src2, dst, op, clob;
10768 dst = ix86_fixup_binary_operands (code, mode, operands);
10769 src1 = operands[1];
10770 src2 = operands[2];
10772 /* Emit the instruction. */
10774 op = gen_rtx_SET (VOIDmode, dst, gen_rtx_fmt_ee (code, mode, src1, src2));
10775 if (reload_in_progress)
10777 /* Reload doesn't know about the flags register, and doesn't know that
10778 it doesn't want to clobber it. We can only do this with PLUS. */
10779 gcc_assert (code == PLUS);
10784 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
10785 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
10788 /* Fix up the destination if needed. */
10789 if (dst != operands[0])
10790 emit_move_insn (operands[0], dst);
10793 /* Return TRUE or FALSE depending on whether the binary operator meets the
10794 appropriate constraints. */
10797 ix86_binary_operator_ok (enum rtx_code code, enum machine_mode mode,
10800 rtx dst = operands[0];
10801 rtx src1 = operands[1];
10802 rtx src2 = operands[2];
10804 /* Both source operands cannot be in memory. */
10805 if (MEM_P (src1) && MEM_P (src2))
10808 /* Canonicalize operand order for commutative operators. */
10809 if (ix86_swap_binary_operands_p (code, mode, operands))
10816 /* If the destination is memory, we must have a matching source operand. */
10817 if (MEM_P (dst) && !rtx_equal_p (dst, src1))
10820 /* Source 1 cannot be a constant. */
10821 if (CONSTANT_P (src1))
10824 /* Source 1 cannot be a non-matching memory. */
10825 if (MEM_P (src1) && !rtx_equal_p (dst, src1))
10831 /* Attempt to expand a unary operator. Make the expansion closer to the
10832 actual machine, then just general_operand, which will allow 2 separate
10833 memory references (one output, one input) in a single insn. */
10836 ix86_expand_unary_operator (enum rtx_code code, enum machine_mode mode,
10839 int matching_memory;
10840 rtx src, dst, op, clob;
10845 /* If the destination is memory, and we do not have matching source
10846 operands, do things in registers. */
10847 matching_memory = 0;
10850 if (rtx_equal_p (dst, src))
10851 matching_memory = 1;
10853 dst = gen_reg_rtx (mode);
10856 /* When source operand is memory, destination must match. */
10857 if (MEM_P (src) && !matching_memory)
10858 src = force_reg (mode, src);
10860 /* Emit the instruction. */
10862 op = gen_rtx_SET (VOIDmode, dst, gen_rtx_fmt_e (code, mode, src));
10863 if (reload_in_progress || code == NOT)
10865 /* Reload doesn't know about the flags register, and doesn't know that
10866 it doesn't want to clobber it. */
10867 gcc_assert (code == NOT);
10872 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
10873 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
10876 /* Fix up the destination if needed. */
10877 if (dst != operands[0])
10878 emit_move_insn (operands[0], dst);
10881 /* Return TRUE or FALSE depending on whether the unary operator meets the
10882 appropriate constraints. */
10885 ix86_unary_operator_ok (enum rtx_code code ATTRIBUTE_UNUSED,
10886 enum machine_mode mode ATTRIBUTE_UNUSED,
10887 rtx operands[2] ATTRIBUTE_UNUSED)
10889 /* If one of operands is memory, source and destination must match. */
10890 if ((MEM_P (operands[0])
10891 || MEM_P (operands[1]))
10892 && ! rtx_equal_p (operands[0], operands[1]))
10897 /* Post-reload splitter for converting an SF or DFmode value in an
10898 SSE register into an unsigned SImode. */
10901 ix86_split_convert_uns_si_sse (rtx operands[])
10903 enum machine_mode vecmode;
10904 rtx value, large, zero_or_two31, input, two31, x;
10906 large = operands[1];
10907 zero_or_two31 = operands[2];
10908 input = operands[3];
10909 two31 = operands[4];
10910 vecmode = GET_MODE (large);
10911 value = gen_rtx_REG (vecmode, REGNO (operands[0]));
10913 /* Load up the value into the low element. We must ensure that the other
10914 elements are valid floats -- zero is the easiest such value. */
10917 if (vecmode == V4SFmode)
10918 emit_insn (gen_vec_setv4sf_0 (value, CONST0_RTX (V4SFmode), input));
10920 emit_insn (gen_sse2_loadlpd (value, CONST0_RTX (V2DFmode), input));
10924 input = gen_rtx_REG (vecmode, REGNO (input));
10925 emit_move_insn (value, CONST0_RTX (vecmode));
10926 if (vecmode == V4SFmode)
10927 emit_insn (gen_sse_movss (value, value, input));
10929 emit_insn (gen_sse2_movsd (value, value, input));
10932 emit_move_insn (large, two31);
10933 emit_move_insn (zero_or_two31, MEM_P (two31) ? large : two31);
10935 x = gen_rtx_fmt_ee (LE, vecmode, large, value);
10936 emit_insn (gen_rtx_SET (VOIDmode, large, x));
10938 x = gen_rtx_AND (vecmode, zero_or_two31, large);
10939 emit_insn (gen_rtx_SET (VOIDmode, zero_or_two31, x));
10941 x = gen_rtx_MINUS (vecmode, value, zero_or_two31);
10942 emit_insn (gen_rtx_SET (VOIDmode, value, x));
10944 large = gen_rtx_REG (V4SImode, REGNO (large));
10945 emit_insn (gen_ashlv4si3 (large, large, GEN_INT (31)));
10947 x = gen_rtx_REG (V4SImode, REGNO (value));
10948 if (vecmode == V4SFmode)
10949 emit_insn (gen_sse2_cvttps2dq (x, value));
10951 emit_insn (gen_sse2_cvttpd2dq (x, value));
10954 emit_insn (gen_xorv4si3 (value, value, large));
10957 /* Convert an unsigned DImode value into a DFmode, using only SSE.
10958 Expects the 64-bit DImode to be supplied in a pair of integral
10959 registers. Requires SSE2; will use SSE3 if available. For x86_32,
10960 -mfpmath=sse, !optimize_size only. */
10963 ix86_expand_convert_uns_didf_sse (rtx target, rtx input)
10965 REAL_VALUE_TYPE bias_lo_rvt, bias_hi_rvt;
10966 rtx int_xmm, fp_xmm;
10967 rtx biases, exponents;
10970 int_xmm = gen_reg_rtx (V4SImode);
10971 if (TARGET_INTER_UNIT_MOVES)
10972 emit_insn (gen_movdi_to_sse (int_xmm, input));
10973 else if (TARGET_SSE_SPLIT_REGS)
10975 emit_insn (gen_rtx_CLOBBER (VOIDmode, int_xmm));
10976 emit_move_insn (gen_lowpart (DImode, int_xmm), input);
10980 x = gen_reg_rtx (V2DImode);
10981 ix86_expand_vector_init_one_nonzero (false, V2DImode, x, input, 0);
10982 emit_move_insn (int_xmm, gen_lowpart (V4SImode, x));
10985 x = gen_rtx_CONST_VECTOR (V4SImode,
10986 gen_rtvec (4, GEN_INT (0x43300000UL),
10987 GEN_INT (0x45300000UL),
10988 const0_rtx, const0_rtx));
10989 exponents = validize_mem (force_const_mem (V4SImode, x));
10991 /* int_xmm = {0x45300000UL, fp_xmm/hi, 0x43300000, fp_xmm/lo } */
10992 emit_insn (gen_sse2_punpckldq (int_xmm, int_xmm, exponents));
10994 /* Concatenating (juxtaposing) (0x43300000UL ## fp_value_low_xmm)
10995 yields a valid DF value equal to (0x1.0p52 + double(fp_value_lo_xmm)).
10996 Similarly (0x45300000UL ## fp_value_hi_xmm) yields
10997 (0x1.0p84 + double(fp_value_hi_xmm)).
10998 Note these exponents differ by 32. */
11000 fp_xmm = copy_to_mode_reg (V2DFmode, gen_lowpart (V2DFmode, int_xmm));
11002 /* Subtract off those 0x1.0p52 and 0x1.0p84 biases, to produce values
11003 in [0,2**32-1] and [0]+[2**32,2**64-1] respectively. */
11004 real_ldexp (&bias_lo_rvt, &dconst1, 52);
11005 real_ldexp (&bias_hi_rvt, &dconst1, 84);
11006 biases = const_double_from_real_value (bias_lo_rvt, DFmode);
11007 x = const_double_from_real_value (bias_hi_rvt, DFmode);
11008 biases = gen_rtx_CONST_VECTOR (V2DFmode, gen_rtvec (2, biases, x));
11009 biases = validize_mem (force_const_mem (V2DFmode, biases));
11010 emit_insn (gen_subv2df3 (fp_xmm, fp_xmm, biases));
11012 /* Add the upper and lower DFmode values together. */
11014 emit_insn (gen_sse3_haddv2df3 (fp_xmm, fp_xmm, fp_xmm));
11017 x = copy_to_mode_reg (V2DFmode, fp_xmm);
11018 emit_insn (gen_sse2_unpckhpd (fp_xmm, fp_xmm, fp_xmm));
11019 emit_insn (gen_addv2df3 (fp_xmm, fp_xmm, x));
11022 ix86_expand_vector_extract (false, target, fp_xmm, 0);
11025 /* Not used, but eases macroization of patterns. */
11027 ix86_expand_convert_uns_sixf_sse (rtx target ATTRIBUTE_UNUSED,
11028 rtx input ATTRIBUTE_UNUSED)
11030 gcc_unreachable ();
11033 /* Convert an unsigned SImode value into a DFmode. Only currently used
11034 for SSE, but applicable anywhere. */
11037 ix86_expand_convert_uns_sidf_sse (rtx target, rtx input)
11039 REAL_VALUE_TYPE TWO31r;
11042 x = expand_simple_binop (SImode, PLUS, input, GEN_INT (-2147483647 - 1),
11043 NULL, 1, OPTAB_DIRECT);
11045 fp = gen_reg_rtx (DFmode);
11046 emit_insn (gen_floatsidf2 (fp, x));
11048 real_ldexp (&TWO31r, &dconst1, 31);
11049 x = const_double_from_real_value (TWO31r, DFmode);
11051 x = expand_simple_binop (DFmode, PLUS, fp, x, target, 0, OPTAB_DIRECT);
11053 emit_move_insn (target, x);
11056 /* Convert a signed DImode value into a DFmode. Only used for SSE in
11057 32-bit mode; otherwise we have a direct convert instruction. */
11060 ix86_expand_convert_sign_didf_sse (rtx target, rtx input)
11062 REAL_VALUE_TYPE TWO32r;
11063 rtx fp_lo, fp_hi, x;
11065 fp_lo = gen_reg_rtx (DFmode);
11066 fp_hi = gen_reg_rtx (DFmode);
11068 emit_insn (gen_floatsidf2 (fp_hi, gen_highpart (SImode, input)));
11070 real_ldexp (&TWO32r, &dconst1, 32);
11071 x = const_double_from_real_value (TWO32r, DFmode);
11072 fp_hi = expand_simple_binop (DFmode, MULT, fp_hi, x, fp_hi, 0, OPTAB_DIRECT);
11074 ix86_expand_convert_uns_sidf_sse (fp_lo, gen_lowpart (SImode, input));
11076 x = expand_simple_binop (DFmode, PLUS, fp_hi, fp_lo, target,
11079 emit_move_insn (target, x);
11082 /* Convert an unsigned SImode value into a SFmode, using only SSE.
11083 For x86_32, -mfpmath=sse, !optimize_size only. */
11085 ix86_expand_convert_uns_sisf_sse (rtx target, rtx input)
11087 REAL_VALUE_TYPE ONE16r;
11088 rtx fp_hi, fp_lo, int_hi, int_lo, x;
11090 real_ldexp (&ONE16r, &dconst1, 16);
11091 x = const_double_from_real_value (ONE16r, SFmode);
11092 int_lo = expand_simple_binop (SImode, AND, input, GEN_INT(0xffff),
11093 NULL, 0, OPTAB_DIRECT);
11094 int_hi = expand_simple_binop (SImode, LSHIFTRT, input, GEN_INT(16),
11095 NULL, 0, OPTAB_DIRECT);
11096 fp_hi = gen_reg_rtx (SFmode);
11097 fp_lo = gen_reg_rtx (SFmode);
11098 emit_insn (gen_floatsisf2 (fp_hi, int_hi));
11099 emit_insn (gen_floatsisf2 (fp_lo, int_lo));
11100 fp_hi = expand_simple_binop (SFmode, MULT, fp_hi, x, fp_hi,
11102 fp_hi = expand_simple_binop (SFmode, PLUS, fp_hi, fp_lo, target,
11104 if (!rtx_equal_p (target, fp_hi))
11105 emit_move_insn (target, fp_hi);
11108 /* A subroutine of ix86_build_signbit_mask_vector. If VECT is true,
11109 then replicate the value for all elements of the vector
11113 ix86_build_const_vector (enum machine_mode mode, bool vect, rtx value)
11120 v = gen_rtvec (4, value, value, value, value);
11121 return gen_rtx_CONST_VECTOR (V4SImode, v);
11125 v = gen_rtvec (2, value, value);
11126 return gen_rtx_CONST_VECTOR (V2DImode, v);
11130 v = gen_rtvec (4, value, value, value, value);
11132 v = gen_rtvec (4, value, CONST0_RTX (SFmode),
11133 CONST0_RTX (SFmode), CONST0_RTX (SFmode));
11134 return gen_rtx_CONST_VECTOR (V4SFmode, v);
11138 v = gen_rtvec (2, value, value);
11140 v = gen_rtvec (2, value, CONST0_RTX (DFmode));
11141 return gen_rtx_CONST_VECTOR (V2DFmode, v);
11144 gcc_unreachable ();
11148 /* A subroutine of ix86_expand_fp_absneg_operator, copysign expanders
11149 and ix86_expand_int_vcond. Create a mask for the sign bit in MODE
11150 for an SSE register. If VECT is true, then replicate the mask for
11151 all elements of the vector register. If INVERT is true, then create
11152 a mask excluding the sign bit. */
11155 ix86_build_signbit_mask (enum machine_mode mode, bool vect, bool invert)
11157 enum machine_mode vec_mode, imode;
11158 HOST_WIDE_INT hi, lo;
11163 /* Find the sign bit, sign extended to 2*HWI. */
11169 vec_mode = (mode == SImode) ? V4SImode : V4SFmode;
11170 lo = 0x80000000, hi = lo < 0;
11176 vec_mode = (mode == DImode) ? V2DImode : V2DFmode;
11177 if (HOST_BITS_PER_WIDE_INT >= 64)
11178 lo = (HOST_WIDE_INT)1 << shift, hi = -1;
11180 lo = 0, hi = (HOST_WIDE_INT)1 << (shift - HOST_BITS_PER_WIDE_INT);
11186 vec_mode = VOIDmode;
11187 gcc_assert (HOST_BITS_PER_WIDE_INT >= 64);
11188 lo = 0, hi = (HOST_WIDE_INT)1 << shift;
11192 gcc_unreachable ();
11196 lo = ~lo, hi = ~hi;
11198 /* Force this value into the low part of a fp vector constant. */
11199 mask = immed_double_const (lo, hi, imode);
11200 mask = gen_lowpart (mode, mask);
11202 if (vec_mode == VOIDmode)
11203 return force_reg (mode, mask);
11205 v = ix86_build_const_vector (mode, vect, mask);
11206 return force_reg (vec_mode, v);
11209 /* Generate code for floating point ABS or NEG. */
11212 ix86_expand_fp_absneg_operator (enum rtx_code code, enum machine_mode mode,
11215 rtx mask, set, use, clob, dst, src;
11216 bool use_sse = false;
11217 bool vector_mode = VECTOR_MODE_P (mode);
11218 enum machine_mode elt_mode = mode;
11222 elt_mode = GET_MODE_INNER (mode);
11225 else if (mode == TFmode)
11227 else if (TARGET_SSE_MATH)
11228 use_sse = SSE_FLOAT_MODE_P (mode);
11230 /* NEG and ABS performed with SSE use bitwise mask operations.
11231 Create the appropriate mask now. */
11233 mask = ix86_build_signbit_mask (elt_mode, vector_mode, code == ABS);
11242 set = gen_rtx_fmt_ee (code == NEG ? XOR : AND, mode, src, mask);
11243 set = gen_rtx_SET (VOIDmode, dst, set);
11248 set = gen_rtx_fmt_e (code, mode, src);
11249 set = gen_rtx_SET (VOIDmode, dst, set);
11252 use = gen_rtx_USE (VOIDmode, mask);
11253 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
11254 emit_insn (gen_rtx_PARALLEL (VOIDmode,
11255 gen_rtvec (3, set, use, clob)));
11262 /* Expand a copysign operation. Special case operand 0 being a constant. */
11265 ix86_expand_copysign (rtx operands[])
11267 enum machine_mode mode;
11268 rtx dest, op0, op1, mask, nmask;
11270 dest = operands[0];
11274 mode = GET_MODE (dest);
11276 if (GET_CODE (op0) == CONST_DOUBLE)
11278 rtx (*copysign_insn)(rtx, rtx, rtx, rtx);
11280 if (real_isneg (CONST_DOUBLE_REAL_VALUE (op0)))
11281 op0 = simplify_unary_operation (ABS, mode, op0, mode);
11283 if (mode == SFmode || mode == DFmode)
11285 enum machine_mode vmode;
11287 vmode = mode == SFmode ? V4SFmode : V2DFmode;
11289 if (op0 == CONST0_RTX (mode))
11290 op0 = CONST0_RTX (vmode);
11295 if (mode == SFmode)
11296 v = gen_rtvec (4, op0, CONST0_RTX (SFmode),
11297 CONST0_RTX (SFmode), CONST0_RTX (SFmode));
11299 v = gen_rtvec (2, op0, CONST0_RTX (DFmode));
11301 op0 = force_reg (vmode, gen_rtx_CONST_VECTOR (vmode, v));
11304 else if (op0 != CONST0_RTX (mode))
11305 op0 = force_reg (mode, op0);
11307 mask = ix86_build_signbit_mask (mode, 0, 0);
11309 if (mode == SFmode)
11310 copysign_insn = gen_copysignsf3_const;
11311 else if (mode == DFmode)
11312 copysign_insn = gen_copysigndf3_const;
11314 copysign_insn = gen_copysigntf3_const;
11316 emit_insn (copysign_insn (dest, op0, op1, mask));
11320 rtx (*copysign_insn)(rtx, rtx, rtx, rtx, rtx, rtx);
11322 nmask = ix86_build_signbit_mask (mode, 0, 1);
11323 mask = ix86_build_signbit_mask (mode, 0, 0);
11325 if (mode == SFmode)
11326 copysign_insn = gen_copysignsf3_var;
11327 else if (mode == DFmode)
11328 copysign_insn = gen_copysigndf3_var;
11330 copysign_insn = gen_copysigntf3_var;
11332 emit_insn (copysign_insn (dest, NULL_RTX, op0, op1, nmask, mask));
11336 /* Deconstruct a copysign operation into bit masks. Operand 0 is known to
11337 be a constant, and so has already been expanded into a vector constant. */
11340 ix86_split_copysign_const (rtx operands[])
11342 enum machine_mode mode, vmode;
11343 rtx dest, op0, op1, mask, x;
11345 dest = operands[0];
11348 mask = operands[3];
11350 mode = GET_MODE (dest);
11351 vmode = GET_MODE (mask);
11353 dest = simplify_gen_subreg (vmode, dest, mode, 0);
11354 x = gen_rtx_AND (vmode, dest, mask);
11355 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
11357 if (op0 != CONST0_RTX (vmode))
11359 x = gen_rtx_IOR (vmode, dest, op0);
11360 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
11364 /* Deconstruct a copysign operation into bit masks. Operand 0 is variable,
11365 so we have to do two masks. */
11368 ix86_split_copysign_var (rtx operands[])
11370 enum machine_mode mode, vmode;
11371 rtx dest, scratch, op0, op1, mask, nmask, x;
11373 dest = operands[0];
11374 scratch = operands[1];
11377 nmask = operands[4];
11378 mask = operands[5];
11380 mode = GET_MODE (dest);
11381 vmode = GET_MODE (mask);
11383 if (rtx_equal_p (op0, op1))
11385 /* Shouldn't happen often (it's useless, obviously), but when it does
11386 we'd generate incorrect code if we continue below. */
11387 emit_move_insn (dest, op0);
11391 if (REG_P (mask) && REGNO (dest) == REGNO (mask)) /* alternative 0 */
11393 gcc_assert (REGNO (op1) == REGNO (scratch));
11395 x = gen_rtx_AND (vmode, scratch, mask);
11396 emit_insn (gen_rtx_SET (VOIDmode, scratch, x));
11399 op0 = simplify_gen_subreg (vmode, op0, mode, 0);
11400 x = gen_rtx_NOT (vmode, dest);
11401 x = gen_rtx_AND (vmode, x, op0);
11402 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
11406 if (REGNO (op1) == REGNO (scratch)) /* alternative 1,3 */
11408 x = gen_rtx_AND (vmode, scratch, mask);
11410 else /* alternative 2,4 */
11412 gcc_assert (REGNO (mask) == REGNO (scratch));
11413 op1 = simplify_gen_subreg (vmode, op1, mode, 0);
11414 x = gen_rtx_AND (vmode, scratch, op1);
11416 emit_insn (gen_rtx_SET (VOIDmode, scratch, x));
11418 if (REGNO (op0) == REGNO (dest)) /* alternative 1,2 */
11420 dest = simplify_gen_subreg (vmode, op0, mode, 0);
11421 x = gen_rtx_AND (vmode, dest, nmask);
11423 else /* alternative 3,4 */
11425 gcc_assert (REGNO (nmask) == REGNO (dest));
11427 op0 = simplify_gen_subreg (vmode, op0, mode, 0);
11428 x = gen_rtx_AND (vmode, dest, op0);
11430 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
11433 x = gen_rtx_IOR (vmode, dest, scratch);
11434 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
11437 /* Return TRUE or FALSE depending on whether the first SET in INSN
11438 has source and destination with matching CC modes, and that the
11439 CC mode is at least as constrained as REQ_MODE. */
11442 ix86_match_ccmode (rtx insn, enum machine_mode req_mode)
11445 enum machine_mode set_mode;
11447 set = PATTERN (insn);
11448 if (GET_CODE (set) == PARALLEL)
11449 set = XVECEXP (set, 0, 0);
11450 gcc_assert (GET_CODE (set) == SET);
11451 gcc_assert (GET_CODE (SET_SRC (set)) == COMPARE);
11453 set_mode = GET_MODE (SET_DEST (set));
11457 if (req_mode != CCNOmode
11458 && (req_mode != CCmode
11459 || XEXP (SET_SRC (set), 1) != const0_rtx))
11463 if (req_mode == CCGCmode)
11467 if (req_mode == CCGOCmode || req_mode == CCNOmode)
11471 if (req_mode == CCZmode)
11478 gcc_unreachable ();
11481 return (GET_MODE (SET_SRC (set)) == set_mode);
11484 /* Generate insn patterns to do an integer compare of OPERANDS. */
11487 ix86_expand_int_compare (enum rtx_code code, rtx op0, rtx op1)
11489 enum machine_mode cmpmode;
11492 cmpmode = SELECT_CC_MODE (code, op0, op1);
11493 flags = gen_rtx_REG (cmpmode, FLAGS_REG);
11495 /* This is very simple, but making the interface the same as in the
11496 FP case makes the rest of the code easier. */
11497 tmp = gen_rtx_COMPARE (cmpmode, op0, op1);
11498 emit_insn (gen_rtx_SET (VOIDmode, flags, tmp));
11500 /* Return the test that should be put into the flags user, i.e.
11501 the bcc, scc, or cmov instruction. */
11502 return gen_rtx_fmt_ee (code, VOIDmode, flags, const0_rtx);
11505 /* Figure out whether to use ordered or unordered fp comparisons.
11506 Return the appropriate mode to use. */
11509 ix86_fp_compare_mode (enum rtx_code code ATTRIBUTE_UNUSED)
11511 /* ??? In order to make all comparisons reversible, we do all comparisons
11512 non-trapping when compiling for IEEE. Once gcc is able to distinguish
11513 all forms trapping and nontrapping comparisons, we can make inequality
11514 comparisons trapping again, since it results in better code when using
11515 FCOM based compares. */
11516 return TARGET_IEEE_FP ? CCFPUmode : CCFPmode;
11520 ix86_cc_mode (enum rtx_code code, rtx op0, rtx op1)
11522 enum machine_mode mode = GET_MODE (op0);
11524 if (SCALAR_FLOAT_MODE_P (mode))
11526 gcc_assert (!DECIMAL_FLOAT_MODE_P (mode));
11527 return ix86_fp_compare_mode (code);
11532 /* Only zero flag is needed. */
11533 case EQ: /* ZF=0 */
11534 case NE: /* ZF!=0 */
11536 /* Codes needing carry flag. */
11537 case GEU: /* CF=0 */
11538 case LTU: /* CF=1 */
11539 /* Detect overflow checks. They need just the carry flag. */
11540 if (GET_CODE (op0) == PLUS
11541 && rtx_equal_p (op1, XEXP (op0, 0)))
11545 case GTU: /* CF=0 & ZF=0 */
11546 case LEU: /* CF=1 | ZF=1 */
11547 /* Detect overflow checks. They need just the carry flag. */
11548 if (GET_CODE (op0) == MINUS
11549 && rtx_equal_p (op1, XEXP (op0, 0)))
11553 /* Codes possibly doable only with sign flag when
11554 comparing against zero. */
11555 case GE: /* SF=OF or SF=0 */
11556 case LT: /* SF<>OF or SF=1 */
11557 if (op1 == const0_rtx)
11560 /* For other cases Carry flag is not required. */
11562 /* Codes doable only with sign flag when comparing
11563 against zero, but we miss jump instruction for it
11564 so we need to use relational tests against overflow
11565 that thus needs to be zero. */
11566 case GT: /* ZF=0 & SF=OF */
11567 case LE: /* ZF=1 | SF<>OF */
11568 if (op1 == const0_rtx)
11572 /* strcmp pattern do (use flags) and combine may ask us for proper
11577 gcc_unreachable ();
11581 /* Return the fixed registers used for condition codes. */
11584 ix86_fixed_condition_code_regs (unsigned int *p1, unsigned int *p2)
11591 /* If two condition code modes are compatible, return a condition code
11592 mode which is compatible with both. Otherwise, return
11595 static enum machine_mode
11596 ix86_cc_modes_compatible (enum machine_mode m1, enum machine_mode m2)
11601 if (GET_MODE_CLASS (m1) != MODE_CC || GET_MODE_CLASS (m2) != MODE_CC)
11604 if ((m1 == CCGCmode && m2 == CCGOCmode)
11605 || (m1 == CCGOCmode && m2 == CCGCmode))
11611 gcc_unreachable ();
11641 /* These are only compatible with themselves, which we already
11647 /* Split comparison code CODE into comparisons we can do using branch
11648 instructions. BYPASS_CODE is comparison code for branch that will
11649 branch around FIRST_CODE and SECOND_CODE. If some of branches
11650 is not required, set value to UNKNOWN.
11651 We never require more than two branches. */
11654 ix86_fp_comparison_codes (enum rtx_code code, enum rtx_code *bypass_code,
11655 enum rtx_code *first_code,
11656 enum rtx_code *second_code)
11658 *first_code = code;
11659 *bypass_code = UNKNOWN;
11660 *second_code = UNKNOWN;
11662 /* The fcomi comparison sets flags as follows:
11672 case GT: /* GTU - CF=0 & ZF=0 */
11673 case GE: /* GEU - CF=0 */
11674 case ORDERED: /* PF=0 */
11675 case UNORDERED: /* PF=1 */
11676 case UNEQ: /* EQ - ZF=1 */
11677 case UNLT: /* LTU - CF=1 */
11678 case UNLE: /* LEU - CF=1 | ZF=1 */
11679 case LTGT: /* EQ - ZF=0 */
11681 case LT: /* LTU - CF=1 - fails on unordered */
11682 *first_code = UNLT;
11683 *bypass_code = UNORDERED;
11685 case LE: /* LEU - CF=1 | ZF=1 - fails on unordered */
11686 *first_code = UNLE;
11687 *bypass_code = UNORDERED;
11689 case EQ: /* EQ - ZF=1 - fails on unordered */
11690 *first_code = UNEQ;
11691 *bypass_code = UNORDERED;
11693 case NE: /* NE - ZF=0 - fails on unordered */
11694 *first_code = LTGT;
11695 *second_code = UNORDERED;
11697 case UNGE: /* GEU - CF=0 - fails on unordered */
11699 *second_code = UNORDERED;
11701 case UNGT: /* GTU - CF=0 & ZF=0 - fails on unordered */
11703 *second_code = UNORDERED;
11706 gcc_unreachable ();
11708 if (!TARGET_IEEE_FP)
11710 *second_code = UNKNOWN;
11711 *bypass_code = UNKNOWN;
11715 /* Return cost of comparison done fcom + arithmetics operations on AX.
11716 All following functions do use number of instructions as a cost metrics.
11717 In future this should be tweaked to compute bytes for optimize_size and
11718 take into account performance of various instructions on various CPUs. */
11720 ix86_fp_comparison_arithmetics_cost (enum rtx_code code)
11722 if (!TARGET_IEEE_FP)
11724 /* The cost of code output by ix86_expand_fp_compare. */
11748 gcc_unreachable ();
11752 /* Return cost of comparison done using fcomi operation.
11753 See ix86_fp_comparison_arithmetics_cost for the metrics. */
11755 ix86_fp_comparison_fcomi_cost (enum rtx_code code)
11757 enum rtx_code bypass_code, first_code, second_code;
11758 /* Return arbitrarily high cost when instruction is not supported - this
11759 prevents gcc from using it. */
11762 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
11763 return (bypass_code != UNKNOWN || second_code != UNKNOWN) + 2;
11766 /* Return cost of comparison done using sahf operation.
11767 See ix86_fp_comparison_arithmetics_cost for the metrics. */
11769 ix86_fp_comparison_sahf_cost (enum rtx_code code)
11771 enum rtx_code bypass_code, first_code, second_code;
11772 /* Return arbitrarily high cost when instruction is not preferred - this
11773 avoids gcc from using it. */
11774 if (!(TARGET_SAHF && (TARGET_USE_SAHF || optimize_size)))
11776 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
11777 return (bypass_code != UNKNOWN || second_code != UNKNOWN) + 3;
11780 /* Compute cost of the comparison done using any method.
11781 See ix86_fp_comparison_arithmetics_cost for the metrics. */
11783 ix86_fp_comparison_cost (enum rtx_code code)
11785 int fcomi_cost, sahf_cost, arithmetics_cost = 1024;
11788 fcomi_cost = ix86_fp_comparison_fcomi_cost (code);
11789 sahf_cost = ix86_fp_comparison_sahf_cost (code);
11791 min = arithmetics_cost = ix86_fp_comparison_arithmetics_cost (code);
11792 if (min > sahf_cost)
11794 if (min > fcomi_cost)
11799 /* Return true if we should use an FCOMI instruction for this
11803 ix86_use_fcomi_compare (enum rtx_code code ATTRIBUTE_UNUSED)
11805 enum rtx_code swapped_code = swap_condition (code);
11807 return ((ix86_fp_comparison_cost (code)
11808 == ix86_fp_comparison_fcomi_cost (code))
11809 || (ix86_fp_comparison_cost (swapped_code)
11810 == ix86_fp_comparison_fcomi_cost (swapped_code)));
11813 /* Swap, force into registers, or otherwise massage the two operands
11814 to a fp comparison. The operands are updated in place; the new
11815 comparison code is returned. */
11817 static enum rtx_code
11818 ix86_prepare_fp_compare_args (enum rtx_code code, rtx *pop0, rtx *pop1)
11820 enum machine_mode fpcmp_mode = ix86_fp_compare_mode (code);
11821 rtx op0 = *pop0, op1 = *pop1;
11822 enum machine_mode op_mode = GET_MODE (op0);
11823 int is_sse = TARGET_SSE_MATH && SSE_FLOAT_MODE_P (op_mode);
11825 /* All of the unordered compare instructions only work on registers.
11826 The same is true of the fcomi compare instructions. The XFmode
11827 compare instructions require registers except when comparing
11828 against zero or when converting operand 1 from fixed point to
11832 && (fpcmp_mode == CCFPUmode
11833 || (op_mode == XFmode
11834 && ! (standard_80387_constant_p (op0) == 1
11835 || standard_80387_constant_p (op1) == 1)
11836 && GET_CODE (op1) != FLOAT)
11837 || ix86_use_fcomi_compare (code)))
11839 op0 = force_reg (op_mode, op0);
11840 op1 = force_reg (op_mode, op1);
11844 /* %%% We only allow op1 in memory; op0 must be st(0). So swap
11845 things around if they appear profitable, otherwise force op0
11846 into a register. */
11848 if (standard_80387_constant_p (op0) == 0
11850 && ! (standard_80387_constant_p (op1) == 0
11854 tmp = op0, op0 = op1, op1 = tmp;
11855 code = swap_condition (code);
11859 op0 = force_reg (op_mode, op0);
11861 if (CONSTANT_P (op1))
11863 int tmp = standard_80387_constant_p (op1);
11865 op1 = validize_mem (force_const_mem (op_mode, op1));
11869 op1 = force_reg (op_mode, op1);
11872 op1 = force_reg (op_mode, op1);
11876 /* Try to rearrange the comparison to make it cheaper. */
11877 if (ix86_fp_comparison_cost (code)
11878 > ix86_fp_comparison_cost (swap_condition (code))
11879 && (REG_P (op1) || can_create_pseudo_p ()))
11882 tmp = op0, op0 = op1, op1 = tmp;
11883 code = swap_condition (code);
11885 op0 = force_reg (op_mode, op0);
11893 /* Convert comparison codes we use to represent FP comparison to integer
11894 code that will result in proper branch. Return UNKNOWN if no such code
11898 ix86_fp_compare_code_to_integer (enum rtx_code code)
11927 /* Generate insn patterns to do a floating point compare of OPERANDS. */
11930 ix86_expand_fp_compare (enum rtx_code code, rtx op0, rtx op1, rtx scratch,
11931 rtx *second_test, rtx *bypass_test)
11933 enum machine_mode fpcmp_mode, intcmp_mode;
11935 int cost = ix86_fp_comparison_cost (code);
11936 enum rtx_code bypass_code, first_code, second_code;
11938 fpcmp_mode = ix86_fp_compare_mode (code);
11939 code = ix86_prepare_fp_compare_args (code, &op0, &op1);
11942 *second_test = NULL_RTX;
11944 *bypass_test = NULL_RTX;
11946 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
11948 /* Do fcomi/sahf based test when profitable. */
11949 if (ix86_fp_comparison_arithmetics_cost (code) > cost
11950 && (bypass_code == UNKNOWN || bypass_test)
11951 && (second_code == UNKNOWN || second_test))
11953 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
11954 tmp = gen_rtx_SET (VOIDmode, gen_rtx_REG (fpcmp_mode, FLAGS_REG),
11960 gcc_assert (TARGET_SAHF);
11963 scratch = gen_reg_rtx (HImode);
11964 tmp2 = gen_rtx_CLOBBER (VOIDmode, scratch);
11966 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, tmp, tmp2)));
11969 /* The FP codes work out to act like unsigned. */
11970 intcmp_mode = fpcmp_mode;
11972 if (bypass_code != UNKNOWN)
11973 *bypass_test = gen_rtx_fmt_ee (bypass_code, VOIDmode,
11974 gen_rtx_REG (intcmp_mode, FLAGS_REG),
11976 if (second_code != UNKNOWN)
11977 *second_test = gen_rtx_fmt_ee (second_code, VOIDmode,
11978 gen_rtx_REG (intcmp_mode, FLAGS_REG),
11983 /* Sadness wrt reg-stack pops killing fpsr -- gotta get fnstsw first. */
11984 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
11985 tmp2 = gen_rtx_UNSPEC (HImode, gen_rtvec (1, tmp), UNSPEC_FNSTSW);
11987 scratch = gen_reg_rtx (HImode);
11988 emit_insn (gen_rtx_SET (VOIDmode, scratch, tmp2));
11990 /* In the unordered case, we have to check C2 for NaN's, which
11991 doesn't happen to work out to anything nice combination-wise.
11992 So do some bit twiddling on the value we've got in AH to come
11993 up with an appropriate set of condition codes. */
11995 intcmp_mode = CCNOmode;
12000 if (code == GT || !TARGET_IEEE_FP)
12002 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x45)));
12007 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
12008 emit_insn (gen_addqi_ext_1 (scratch, scratch, constm1_rtx));
12009 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x44)));
12010 intcmp_mode = CCmode;
12016 if (code == LT && TARGET_IEEE_FP)
12018 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
12019 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x01)));
12020 intcmp_mode = CCmode;
12025 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x01)));
12031 if (code == GE || !TARGET_IEEE_FP)
12033 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x05)));
12038 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
12039 emit_insn (gen_xorqi_cc_ext_1 (scratch, scratch,
12046 if (code == LE && TARGET_IEEE_FP)
12048 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
12049 emit_insn (gen_addqi_ext_1 (scratch, scratch, constm1_rtx));
12050 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x40)));
12051 intcmp_mode = CCmode;
12056 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x45)));
12062 if (code == EQ && TARGET_IEEE_FP)
12064 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
12065 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x40)));
12066 intcmp_mode = CCmode;
12071 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x40)));
12078 if (code == NE && TARGET_IEEE_FP)
12080 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
12081 emit_insn (gen_xorqi_cc_ext_1 (scratch, scratch,
12087 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x40)));
12093 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x04)));
12097 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x04)));
12102 gcc_unreachable ();
12106 /* Return the test that should be put into the flags user, i.e.
12107 the bcc, scc, or cmov instruction. */
12108 return gen_rtx_fmt_ee (code, VOIDmode,
12109 gen_rtx_REG (intcmp_mode, FLAGS_REG),
12114 ix86_expand_compare (enum rtx_code code, rtx *second_test, rtx *bypass_test)
12117 op0 = ix86_compare_op0;
12118 op1 = ix86_compare_op1;
12121 *second_test = NULL_RTX;
12123 *bypass_test = NULL_RTX;
12125 if (ix86_compare_emitted)
12127 ret = gen_rtx_fmt_ee (code, VOIDmode, ix86_compare_emitted, const0_rtx);
12128 ix86_compare_emitted = NULL_RTX;
12130 else if (SCALAR_FLOAT_MODE_P (GET_MODE (op0)))
12132 gcc_assert (!DECIMAL_FLOAT_MODE_P (GET_MODE (op0)));
12133 ret = ix86_expand_fp_compare (code, op0, op1, NULL_RTX,
12134 second_test, bypass_test);
12137 ret = ix86_expand_int_compare (code, op0, op1);
12142 /* Return true if the CODE will result in nontrivial jump sequence. */
12144 ix86_fp_jump_nontrivial_p (enum rtx_code code)
12146 enum rtx_code bypass_code, first_code, second_code;
12149 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
12150 return bypass_code != UNKNOWN || second_code != UNKNOWN;
12154 ix86_expand_branch (enum rtx_code code, rtx label)
12158 /* If we have emitted a compare insn, go straight to simple.
12159 ix86_expand_compare won't emit anything if ix86_compare_emitted
12161 if (ix86_compare_emitted)
12164 switch (GET_MODE (ix86_compare_op0))
12170 tmp = ix86_expand_compare (code, NULL, NULL);
12171 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
12172 gen_rtx_LABEL_REF (VOIDmode, label),
12174 emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
12183 enum rtx_code bypass_code, first_code, second_code;
12185 code = ix86_prepare_fp_compare_args (code, &ix86_compare_op0,
12186 &ix86_compare_op1);
12188 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
12190 /* Check whether we will use the natural sequence with one jump. If
12191 so, we can expand jump early. Otherwise delay expansion by
12192 creating compound insn to not confuse optimizers. */
12193 if (bypass_code == UNKNOWN && second_code == UNKNOWN)
12195 ix86_split_fp_branch (code, ix86_compare_op0, ix86_compare_op1,
12196 gen_rtx_LABEL_REF (VOIDmode, label),
12197 pc_rtx, NULL_RTX, NULL_RTX);
12201 tmp = gen_rtx_fmt_ee (code, VOIDmode,
12202 ix86_compare_op0, ix86_compare_op1);
12203 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
12204 gen_rtx_LABEL_REF (VOIDmode, label),
12206 tmp = gen_rtx_SET (VOIDmode, pc_rtx, tmp);
12208 use_fcomi = ix86_use_fcomi_compare (code);
12209 vec = rtvec_alloc (3 + !use_fcomi);
12210 RTVEC_ELT (vec, 0) = tmp;
12212 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCFPmode, FPSR_REG));
12214 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCFPmode, FLAGS_REG));
12217 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (HImode));
12219 emit_jump_insn (gen_rtx_PARALLEL (VOIDmode, vec));
12228 /* Expand DImode branch into multiple compare+branch. */
12230 rtx lo[2], hi[2], label2;
12231 enum rtx_code code1, code2, code3;
12232 enum machine_mode submode;
12234 if (CONSTANT_P (ix86_compare_op0) && ! CONSTANT_P (ix86_compare_op1))
12236 tmp = ix86_compare_op0;
12237 ix86_compare_op0 = ix86_compare_op1;
12238 ix86_compare_op1 = tmp;
12239 code = swap_condition (code);
12241 if (GET_MODE (ix86_compare_op0) == DImode)
12243 split_di (&ix86_compare_op0, 1, lo+0, hi+0);
12244 split_di (&ix86_compare_op1, 1, lo+1, hi+1);
12249 split_ti (&ix86_compare_op0, 1, lo+0, hi+0);
12250 split_ti (&ix86_compare_op1, 1, lo+1, hi+1);
12254 /* When comparing for equality, we can use (hi0^hi1)|(lo0^lo1) to
12255 avoid two branches. This costs one extra insn, so disable when
12256 optimizing for size. */
12258 if ((code == EQ || code == NE)
12260 || hi[1] == const0_rtx || lo[1] == const0_rtx))
12265 if (hi[1] != const0_rtx)
12266 xor1 = expand_binop (submode, xor_optab, xor1, hi[1],
12267 NULL_RTX, 0, OPTAB_WIDEN);
12270 if (lo[1] != const0_rtx)
12271 xor0 = expand_binop (submode, xor_optab, xor0, lo[1],
12272 NULL_RTX, 0, OPTAB_WIDEN);
12274 tmp = expand_binop (submode, ior_optab, xor1, xor0,
12275 NULL_RTX, 0, OPTAB_WIDEN);
12277 ix86_compare_op0 = tmp;
12278 ix86_compare_op1 = const0_rtx;
12279 ix86_expand_branch (code, label);
12283 /* Otherwise, if we are doing less-than or greater-or-equal-than,
12284 op1 is a constant and the low word is zero, then we can just
12285 examine the high word. Similarly for low word -1 and
12286 less-or-equal-than or greater-than. */
12288 if (CONST_INT_P (hi[1]))
12291 case LT: case LTU: case GE: case GEU:
12292 if (lo[1] == const0_rtx)
12294 ix86_compare_op0 = hi[0];
12295 ix86_compare_op1 = hi[1];
12296 ix86_expand_branch (code, label);
12300 case LE: case LEU: case GT: case GTU:
12301 if (lo[1] == constm1_rtx)
12303 ix86_compare_op0 = hi[0];
12304 ix86_compare_op1 = hi[1];
12305 ix86_expand_branch (code, label);
12313 /* Otherwise, we need two or three jumps. */
12315 label2 = gen_label_rtx ();
12318 code2 = swap_condition (code);
12319 code3 = unsigned_condition (code);
12323 case LT: case GT: case LTU: case GTU:
12326 case LE: code1 = LT; code2 = GT; break;
12327 case GE: code1 = GT; code2 = LT; break;
12328 case LEU: code1 = LTU; code2 = GTU; break;
12329 case GEU: code1 = GTU; code2 = LTU; break;
12331 case EQ: code1 = UNKNOWN; code2 = NE; break;
12332 case NE: code2 = UNKNOWN; break;
12335 gcc_unreachable ();
12340 * if (hi(a) < hi(b)) goto true;
12341 * if (hi(a) > hi(b)) goto false;
12342 * if (lo(a) < lo(b)) goto true;
12346 ix86_compare_op0 = hi[0];
12347 ix86_compare_op1 = hi[1];
12349 if (code1 != UNKNOWN)
12350 ix86_expand_branch (code1, label);
12351 if (code2 != UNKNOWN)
12352 ix86_expand_branch (code2, label2);
12354 ix86_compare_op0 = lo[0];
12355 ix86_compare_op1 = lo[1];
12356 ix86_expand_branch (code3, label);
12358 if (code2 != UNKNOWN)
12359 emit_label (label2);
12364 gcc_unreachable ();
12368 /* Split branch based on floating point condition. */
12370 ix86_split_fp_branch (enum rtx_code code, rtx op1, rtx op2,
12371 rtx target1, rtx target2, rtx tmp, rtx pushed)
12373 rtx second, bypass;
12374 rtx label = NULL_RTX;
12376 int bypass_probability = -1, second_probability = -1, probability = -1;
12379 if (target2 != pc_rtx)
12382 code = reverse_condition_maybe_unordered (code);
12387 condition = ix86_expand_fp_compare (code, op1, op2,
12388 tmp, &second, &bypass);
12390 /* Remove pushed operand from stack. */
12392 ix86_free_from_memory (GET_MODE (pushed));
12394 if (split_branch_probability >= 0)
12396 /* Distribute the probabilities across the jumps.
12397 Assume the BYPASS and SECOND to be always test
12399 probability = split_branch_probability;
12401 /* Value of 1 is low enough to make no need for probability
12402 to be updated. Later we may run some experiments and see
12403 if unordered values are more frequent in practice. */
12405 bypass_probability = 1;
12407 second_probability = 1;
12409 if (bypass != NULL_RTX)
12411 label = gen_label_rtx ();
12412 i = emit_jump_insn (gen_rtx_SET
12414 gen_rtx_IF_THEN_ELSE (VOIDmode,
12416 gen_rtx_LABEL_REF (VOIDmode,
12419 if (bypass_probability >= 0)
12421 = gen_rtx_EXPR_LIST (REG_BR_PROB,
12422 GEN_INT (bypass_probability),
12425 i = emit_jump_insn (gen_rtx_SET
12427 gen_rtx_IF_THEN_ELSE (VOIDmode,
12428 condition, target1, target2)));
12429 if (probability >= 0)
12431 = gen_rtx_EXPR_LIST (REG_BR_PROB,
12432 GEN_INT (probability),
12434 if (second != NULL_RTX)
12436 i = emit_jump_insn (gen_rtx_SET
12438 gen_rtx_IF_THEN_ELSE (VOIDmode, second, target1,
12440 if (second_probability >= 0)
12442 = gen_rtx_EXPR_LIST (REG_BR_PROB,
12443 GEN_INT (second_probability),
12446 if (label != NULL_RTX)
12447 emit_label (label);
12451 ix86_expand_setcc (enum rtx_code code, rtx dest)
12453 rtx ret, tmp, tmpreg, equiv;
12454 rtx second_test, bypass_test;
12456 if (GET_MODE (ix86_compare_op0) == (TARGET_64BIT ? TImode : DImode))
12457 return 0; /* FAIL */
12459 gcc_assert (GET_MODE (dest) == QImode);
12461 ret = ix86_expand_compare (code, &second_test, &bypass_test);
12462 PUT_MODE (ret, QImode);
12467 emit_insn (gen_rtx_SET (VOIDmode, tmp, ret));
12468 if (bypass_test || second_test)
12470 rtx test = second_test;
12472 rtx tmp2 = gen_reg_rtx (QImode);
12475 gcc_assert (!second_test);
12476 test = bypass_test;
12478 PUT_CODE (test, reverse_condition_maybe_unordered (GET_CODE (test)));
12480 PUT_MODE (test, QImode);
12481 emit_insn (gen_rtx_SET (VOIDmode, tmp2, test));
12484 emit_insn (gen_andqi3 (tmp, tmpreg, tmp2));
12486 emit_insn (gen_iorqi3 (tmp, tmpreg, tmp2));
12489 /* Attach a REG_EQUAL note describing the comparison result. */
12490 if (ix86_compare_op0 && ix86_compare_op1)
12492 equiv = simplify_gen_relational (code, QImode,
12493 GET_MODE (ix86_compare_op0),
12494 ix86_compare_op0, ix86_compare_op1);
12495 set_unique_reg_note (get_last_insn (), REG_EQUAL, equiv);
12498 return 1; /* DONE */
12501 /* Expand comparison setting or clearing carry flag. Return true when
12502 successful and set pop for the operation. */
12504 ix86_expand_carry_flag_compare (enum rtx_code code, rtx op0, rtx op1, rtx *pop)
12506 enum machine_mode mode =
12507 GET_MODE (op0) != VOIDmode ? GET_MODE (op0) : GET_MODE (op1);
12509 /* Do not handle DImode compares that go through special path. */
12510 if (mode == (TARGET_64BIT ? TImode : DImode))
12513 if (SCALAR_FLOAT_MODE_P (mode))
12515 rtx second_test = NULL, bypass_test = NULL;
12516 rtx compare_op, compare_seq;
12518 gcc_assert (!DECIMAL_FLOAT_MODE_P (mode));
12520 /* Shortcut: following common codes never translate
12521 into carry flag compares. */
12522 if (code == EQ || code == NE || code == UNEQ || code == LTGT
12523 || code == ORDERED || code == UNORDERED)
12526 /* These comparisons require zero flag; swap operands so they won't. */
12527 if ((code == GT || code == UNLE || code == LE || code == UNGT)
12528 && !TARGET_IEEE_FP)
12533 code = swap_condition (code);
12536 /* Try to expand the comparison and verify that we end up with
12537 carry flag based comparison. This fails to be true only when
12538 we decide to expand comparison using arithmetic that is not
12539 too common scenario. */
12541 compare_op = ix86_expand_fp_compare (code, op0, op1, NULL_RTX,
12542 &second_test, &bypass_test);
12543 compare_seq = get_insns ();
12546 if (second_test || bypass_test)
12549 if (GET_MODE (XEXP (compare_op, 0)) == CCFPmode
12550 || GET_MODE (XEXP (compare_op, 0)) == CCFPUmode)
12551 code = ix86_fp_compare_code_to_integer (GET_CODE (compare_op));
12553 code = GET_CODE (compare_op);
12555 if (code != LTU && code != GEU)
12558 emit_insn (compare_seq);
12563 if (!INTEGRAL_MODE_P (mode))
12572 /* Convert a==0 into (unsigned)a<1. */
12575 if (op1 != const0_rtx)
12578 code = (code == EQ ? LTU : GEU);
12581 /* Convert a>b into b<a or a>=b-1. */
12584 if (CONST_INT_P (op1))
12586 op1 = gen_int_mode (INTVAL (op1) + 1, GET_MODE (op0));
12587 /* Bail out on overflow. We still can swap operands but that
12588 would force loading of the constant into register. */
12589 if (op1 == const0_rtx
12590 || !x86_64_immediate_operand (op1, GET_MODE (op1)))
12592 code = (code == GTU ? GEU : LTU);
12599 code = (code == GTU ? LTU : GEU);
12603 /* Convert a>=0 into (unsigned)a<0x80000000. */
12606 if (mode == DImode || op1 != const0_rtx)
12608 op1 = gen_int_mode (1 << (GET_MODE_BITSIZE (mode) - 1), mode);
12609 code = (code == LT ? GEU : LTU);
12613 if (mode == DImode || op1 != constm1_rtx)
12615 op1 = gen_int_mode (1 << (GET_MODE_BITSIZE (mode) - 1), mode);
12616 code = (code == LE ? GEU : LTU);
12622 /* Swapping operands may cause constant to appear as first operand. */
12623 if (!nonimmediate_operand (op0, VOIDmode))
12625 if (!can_create_pseudo_p ())
12627 op0 = force_reg (mode, op0);
12629 ix86_compare_op0 = op0;
12630 ix86_compare_op1 = op1;
12631 *pop = ix86_expand_compare (code, NULL, NULL);
12632 gcc_assert (GET_CODE (*pop) == LTU || GET_CODE (*pop) == GEU);
12637 ix86_expand_int_movcc (rtx operands[])
12639 enum rtx_code code = GET_CODE (operands[1]), compare_code;
12640 rtx compare_seq, compare_op;
12641 rtx second_test, bypass_test;
12642 enum machine_mode mode = GET_MODE (operands[0]);
12643 bool sign_bit_compare_p = false;;
12646 compare_op = ix86_expand_compare (code, &second_test, &bypass_test);
12647 compare_seq = get_insns ();
12650 compare_code = GET_CODE (compare_op);
12652 if ((ix86_compare_op1 == const0_rtx && (code == GE || code == LT))
12653 || (ix86_compare_op1 == constm1_rtx && (code == GT || code == LE)))
12654 sign_bit_compare_p = true;
12656 /* Don't attempt mode expansion here -- if we had to expand 5 or 6
12657 HImode insns, we'd be swallowed in word prefix ops. */
12659 if ((mode != HImode || TARGET_FAST_PREFIX)
12660 && (mode != (TARGET_64BIT ? TImode : DImode))
12661 && CONST_INT_P (operands[2])
12662 && CONST_INT_P (operands[3]))
12664 rtx out = operands[0];
12665 HOST_WIDE_INT ct = INTVAL (operands[2]);
12666 HOST_WIDE_INT cf = INTVAL (operands[3]);
12667 HOST_WIDE_INT diff;
12670 /* Sign bit compares are better done using shifts than we do by using
12672 if (sign_bit_compare_p
12673 || ix86_expand_carry_flag_compare (code, ix86_compare_op0,
12674 ix86_compare_op1, &compare_op))
12676 /* Detect overlap between destination and compare sources. */
12679 if (!sign_bit_compare_p)
12681 bool fpcmp = false;
12683 compare_code = GET_CODE (compare_op);
12685 if (GET_MODE (XEXP (compare_op, 0)) == CCFPmode
12686 || GET_MODE (XEXP (compare_op, 0)) == CCFPUmode)
12689 compare_code = ix86_fp_compare_code_to_integer (compare_code);
12692 /* To simplify rest of code, restrict to the GEU case. */
12693 if (compare_code == LTU)
12695 HOST_WIDE_INT tmp = ct;
12698 compare_code = reverse_condition (compare_code);
12699 code = reverse_condition (code);
12704 PUT_CODE (compare_op,
12705 reverse_condition_maybe_unordered
12706 (GET_CODE (compare_op)));
12708 PUT_CODE (compare_op, reverse_condition (GET_CODE (compare_op)));
12712 if (reg_overlap_mentioned_p (out, ix86_compare_op0)
12713 || reg_overlap_mentioned_p (out, ix86_compare_op1))
12714 tmp = gen_reg_rtx (mode);
12716 if (mode == DImode)
12717 emit_insn (gen_x86_movdicc_0_m1_rex64 (tmp, compare_op));
12719 emit_insn (gen_x86_movsicc_0_m1 (gen_lowpart (SImode, tmp), compare_op));
12723 if (code == GT || code == GE)
12724 code = reverse_condition (code);
12727 HOST_WIDE_INT tmp = ct;
12732 tmp = emit_store_flag (tmp, code, ix86_compare_op0,
12733 ix86_compare_op1, VOIDmode, 0, -1);
12746 tmp = expand_simple_binop (mode, PLUS,
12748 copy_rtx (tmp), 1, OPTAB_DIRECT);
12759 tmp = expand_simple_binop (mode, IOR,
12761 copy_rtx (tmp), 1, OPTAB_DIRECT);
12763 else if (diff == -1 && ct)
12773 tmp = expand_simple_unop (mode, NOT, tmp, copy_rtx (tmp), 1);
12775 tmp = expand_simple_binop (mode, PLUS,
12776 copy_rtx (tmp), GEN_INT (cf),
12777 copy_rtx (tmp), 1, OPTAB_DIRECT);
12785 * andl cf - ct, dest
12795 tmp = expand_simple_unop (mode, NOT, tmp, copy_rtx (tmp), 1);
12798 tmp = expand_simple_binop (mode, AND,
12800 gen_int_mode (cf - ct, mode),
12801 copy_rtx (tmp), 1, OPTAB_DIRECT);
12803 tmp = expand_simple_binop (mode, PLUS,
12804 copy_rtx (tmp), GEN_INT (ct),
12805 copy_rtx (tmp), 1, OPTAB_DIRECT);
12808 if (!rtx_equal_p (tmp, out))
12809 emit_move_insn (copy_rtx (out), copy_rtx (tmp));
12811 return 1; /* DONE */
12816 enum machine_mode cmp_mode = GET_MODE (ix86_compare_op0);
12819 tmp = ct, ct = cf, cf = tmp;
12822 if (SCALAR_FLOAT_MODE_P (cmp_mode))
12824 gcc_assert (!DECIMAL_FLOAT_MODE_P (cmp_mode));
12826 /* We may be reversing unordered compare to normal compare, that
12827 is not valid in general (we may convert non-trapping condition
12828 to trapping one), however on i386 we currently emit all
12829 comparisons unordered. */
12830 compare_code = reverse_condition_maybe_unordered (compare_code);
12831 code = reverse_condition_maybe_unordered (code);
12835 compare_code = reverse_condition (compare_code);
12836 code = reverse_condition (code);
12840 compare_code = UNKNOWN;
12841 if (GET_MODE_CLASS (GET_MODE (ix86_compare_op0)) == MODE_INT
12842 && CONST_INT_P (ix86_compare_op1))
12844 if (ix86_compare_op1 == const0_rtx
12845 && (code == LT || code == GE))
12846 compare_code = code;
12847 else if (ix86_compare_op1 == constm1_rtx)
12851 else if (code == GT)
12856 /* Optimize dest = (op0 < 0) ? -1 : cf. */
12857 if (compare_code != UNKNOWN
12858 && GET_MODE (ix86_compare_op0) == GET_MODE (out)
12859 && (cf == -1 || ct == -1))
12861 /* If lea code below could be used, only optimize
12862 if it results in a 2 insn sequence. */
12864 if (! (diff == 1 || diff == 2 || diff == 4 || diff == 8
12865 || diff == 3 || diff == 5 || diff == 9)
12866 || (compare_code == LT && ct == -1)
12867 || (compare_code == GE && cf == -1))
12870 * notl op1 (if necessary)
12878 code = reverse_condition (code);
12881 out = emit_store_flag (out, code, ix86_compare_op0,
12882 ix86_compare_op1, VOIDmode, 0, -1);
12884 out = expand_simple_binop (mode, IOR,
12886 out, 1, OPTAB_DIRECT);
12887 if (out != operands[0])
12888 emit_move_insn (operands[0], out);
12890 return 1; /* DONE */
12895 if ((diff == 1 || diff == 2 || diff == 4 || diff == 8
12896 || diff == 3 || diff == 5 || diff == 9)
12897 && ((mode != QImode && mode != HImode) || !TARGET_PARTIAL_REG_STALL)
12899 || x86_64_immediate_operand (GEN_INT (cf), VOIDmode)))
12905 * lea cf(dest*(ct-cf)),dest
12909 * This also catches the degenerate setcc-only case.
12915 out = emit_store_flag (out, code, ix86_compare_op0,
12916 ix86_compare_op1, VOIDmode, 0, 1);
12919 /* On x86_64 the lea instruction operates on Pmode, so we need
12920 to get arithmetics done in proper mode to match. */
12922 tmp = copy_rtx (out);
12926 out1 = copy_rtx (out);
12927 tmp = gen_rtx_MULT (mode, out1, GEN_INT (diff & ~1));
12931 tmp = gen_rtx_PLUS (mode, tmp, out1);
12937 tmp = gen_rtx_PLUS (mode, tmp, GEN_INT (cf));
12940 if (!rtx_equal_p (tmp, out))
12943 out = force_operand (tmp, copy_rtx (out));
12945 emit_insn (gen_rtx_SET (VOIDmode, copy_rtx (out), copy_rtx (tmp)));
12947 if (!rtx_equal_p (out, operands[0]))
12948 emit_move_insn (operands[0], copy_rtx (out));
12950 return 1; /* DONE */
12954 * General case: Jumpful:
12955 * xorl dest,dest cmpl op1, op2
12956 * cmpl op1, op2 movl ct, dest
12957 * setcc dest jcc 1f
12958 * decl dest movl cf, dest
12959 * andl (cf-ct),dest 1:
12962 * Size 20. Size 14.
12964 * This is reasonably steep, but branch mispredict costs are
12965 * high on modern cpus, so consider failing only if optimizing
12969 if ((!TARGET_CMOVE || (mode == QImode && TARGET_PARTIAL_REG_STALL))
12970 && BRANCH_COST >= 2)
12974 enum machine_mode cmp_mode = GET_MODE (ix86_compare_op0);
12979 if (SCALAR_FLOAT_MODE_P (cmp_mode))
12981 gcc_assert (!DECIMAL_FLOAT_MODE_P (cmp_mode));
12983 /* We may be reversing unordered compare to normal compare,
12984 that is not valid in general (we may convert non-trapping
12985 condition to trapping one), however on i386 we currently
12986 emit all comparisons unordered. */
12987 code = reverse_condition_maybe_unordered (code);
12991 code = reverse_condition (code);
12992 if (compare_code != UNKNOWN)
12993 compare_code = reverse_condition (compare_code);
12997 if (compare_code != UNKNOWN)
12999 /* notl op1 (if needed)
13004 For x < 0 (resp. x <= -1) there will be no notl,
13005 so if possible swap the constants to get rid of the
13007 True/false will be -1/0 while code below (store flag
13008 followed by decrement) is 0/-1, so the constants need
13009 to be exchanged once more. */
13011 if (compare_code == GE || !cf)
13013 code = reverse_condition (code);
13018 HOST_WIDE_INT tmp = cf;
13023 out = emit_store_flag (out, code, ix86_compare_op0,
13024 ix86_compare_op1, VOIDmode, 0, -1);
13028 out = emit_store_flag (out, code, ix86_compare_op0,
13029 ix86_compare_op1, VOIDmode, 0, 1);
13031 out = expand_simple_binop (mode, PLUS, copy_rtx (out), constm1_rtx,
13032 copy_rtx (out), 1, OPTAB_DIRECT);
13035 out = expand_simple_binop (mode, AND, copy_rtx (out),
13036 gen_int_mode (cf - ct, mode),
13037 copy_rtx (out), 1, OPTAB_DIRECT);
13039 out = expand_simple_binop (mode, PLUS, copy_rtx (out), GEN_INT (ct),
13040 copy_rtx (out), 1, OPTAB_DIRECT);
13041 if (!rtx_equal_p (out, operands[0]))
13042 emit_move_insn (operands[0], copy_rtx (out));
13044 return 1; /* DONE */
13048 if (!TARGET_CMOVE || (mode == QImode && TARGET_PARTIAL_REG_STALL))
13050 /* Try a few things more with specific constants and a variable. */
13053 rtx var, orig_out, out, tmp;
13055 if (BRANCH_COST <= 2)
13056 return 0; /* FAIL */
13058 /* If one of the two operands is an interesting constant, load a
13059 constant with the above and mask it in with a logical operation. */
13061 if (CONST_INT_P (operands[2]))
13064 if (INTVAL (operands[2]) == 0 && operands[3] != constm1_rtx)
13065 operands[3] = constm1_rtx, op = and_optab;
13066 else if (INTVAL (operands[2]) == -1 && operands[3] != const0_rtx)
13067 operands[3] = const0_rtx, op = ior_optab;
13069 return 0; /* FAIL */
13071 else if (CONST_INT_P (operands[3]))
13074 if (INTVAL (operands[3]) == 0 && operands[2] != constm1_rtx)
13075 operands[2] = constm1_rtx, op = and_optab;
13076 else if (INTVAL (operands[3]) == -1 && operands[3] != const0_rtx)
13077 operands[2] = const0_rtx, op = ior_optab;
13079 return 0; /* FAIL */
13082 return 0; /* FAIL */
13084 orig_out = operands[0];
13085 tmp = gen_reg_rtx (mode);
13088 /* Recurse to get the constant loaded. */
13089 if (ix86_expand_int_movcc (operands) == 0)
13090 return 0; /* FAIL */
13092 /* Mask in the interesting variable. */
13093 out = expand_binop (mode, op, var, tmp, orig_out, 0,
13095 if (!rtx_equal_p (out, orig_out))
13096 emit_move_insn (copy_rtx (orig_out), copy_rtx (out));
13098 return 1; /* DONE */
13102 * For comparison with above,
13112 if (! nonimmediate_operand (operands[2], mode))
13113 operands[2] = force_reg (mode, operands[2]);
13114 if (! nonimmediate_operand (operands[3], mode))
13115 operands[3] = force_reg (mode, operands[3]);
13117 if (bypass_test && reg_overlap_mentioned_p (operands[0], operands[3]))
13119 rtx tmp = gen_reg_rtx (mode);
13120 emit_move_insn (tmp, operands[3]);
13123 if (second_test && reg_overlap_mentioned_p (operands[0], operands[2]))
13125 rtx tmp = gen_reg_rtx (mode);
13126 emit_move_insn (tmp, operands[2]);
13130 if (! register_operand (operands[2], VOIDmode)
13132 || ! register_operand (operands[3], VOIDmode)))
13133 operands[2] = force_reg (mode, operands[2]);
13136 && ! register_operand (operands[3], VOIDmode))
13137 operands[3] = force_reg (mode, operands[3]);
13139 emit_insn (compare_seq);
13140 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
13141 gen_rtx_IF_THEN_ELSE (mode,
13142 compare_op, operands[2],
13145 emit_insn (gen_rtx_SET (VOIDmode, copy_rtx (operands[0]),
13146 gen_rtx_IF_THEN_ELSE (mode,
13148 copy_rtx (operands[3]),
13149 copy_rtx (operands[0]))));
13151 emit_insn (gen_rtx_SET (VOIDmode, copy_rtx (operands[0]),
13152 gen_rtx_IF_THEN_ELSE (mode,
13154 copy_rtx (operands[2]),
13155 copy_rtx (operands[0]))));
13157 return 1; /* DONE */
13160 /* Swap, force into registers, or otherwise massage the two operands
13161 to an sse comparison with a mask result. Thus we differ a bit from
13162 ix86_prepare_fp_compare_args which expects to produce a flags result.
13164 The DEST operand exists to help determine whether to commute commutative
13165 operators. The POP0/POP1 operands are updated in place. The new
13166 comparison code is returned, or UNKNOWN if not implementable. */
13168 static enum rtx_code
13169 ix86_prepare_sse_fp_compare_args (rtx dest, enum rtx_code code,
13170 rtx *pop0, rtx *pop1)
13178 /* We have no LTGT as an operator. We could implement it with
13179 NE & ORDERED, but this requires an extra temporary. It's
13180 not clear that it's worth it. */
13187 /* These are supported directly. */
13194 /* For commutative operators, try to canonicalize the destination
13195 operand to be first in the comparison - this helps reload to
13196 avoid extra moves. */
13197 if (!dest || !rtx_equal_p (dest, *pop1))
13205 /* These are not supported directly. Swap the comparison operands
13206 to transform into something that is supported. */
13210 code = swap_condition (code);
13214 gcc_unreachable ();
13220 /* Detect conditional moves that exactly match min/max operational
13221 semantics. Note that this is IEEE safe, as long as we don't
13222 interchange the operands.
13224 Returns FALSE if this conditional move doesn't match a MIN/MAX,
13225 and TRUE if the operation is successful and instructions are emitted. */
13228 ix86_expand_sse_fp_minmax (rtx dest, enum rtx_code code, rtx cmp_op0,
13229 rtx cmp_op1, rtx if_true, rtx if_false)
13231 enum machine_mode mode;
13237 else if (code == UNGE)
13240 if_true = if_false;
13246 if (rtx_equal_p (cmp_op0, if_true) && rtx_equal_p (cmp_op1, if_false))
13248 else if (rtx_equal_p (cmp_op1, if_true) && rtx_equal_p (cmp_op0, if_false))
13253 mode = GET_MODE (dest);
13255 /* We want to check HONOR_NANS and HONOR_SIGNED_ZEROS here,
13256 but MODE may be a vector mode and thus not appropriate. */
13257 if (!flag_finite_math_only || !flag_unsafe_math_optimizations)
13259 int u = is_min ? UNSPEC_IEEE_MIN : UNSPEC_IEEE_MAX;
13262 if_true = force_reg (mode, if_true);
13263 v = gen_rtvec (2, if_true, if_false);
13264 tmp = gen_rtx_UNSPEC (mode, v, u);
13268 code = is_min ? SMIN : SMAX;
13269 tmp = gen_rtx_fmt_ee (code, mode, if_true, if_false);
13272 emit_insn (gen_rtx_SET (VOIDmode, dest, tmp));
13276 /* Expand an sse vector comparison. Return the register with the result. */
13279 ix86_expand_sse_cmp (rtx dest, enum rtx_code code, rtx cmp_op0, rtx cmp_op1,
13280 rtx op_true, rtx op_false)
13282 enum machine_mode mode = GET_MODE (dest);
13285 cmp_op0 = force_reg (mode, cmp_op0);
13286 if (!nonimmediate_operand (cmp_op1, mode))
13287 cmp_op1 = force_reg (mode, cmp_op1);
13290 || reg_overlap_mentioned_p (dest, op_true)
13291 || reg_overlap_mentioned_p (dest, op_false))
13292 dest = gen_reg_rtx (mode);
13294 x = gen_rtx_fmt_ee (code, mode, cmp_op0, cmp_op1);
13295 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
13300 /* Expand DEST = CMP ? OP_TRUE : OP_FALSE into a sequence of logical
13301 operations. This is used for both scalar and vector conditional moves. */
13304 ix86_expand_sse_movcc (rtx dest, rtx cmp, rtx op_true, rtx op_false)
13306 enum machine_mode mode = GET_MODE (dest);
13309 if (op_false == CONST0_RTX (mode))
13311 op_true = force_reg (mode, op_true);
13312 x = gen_rtx_AND (mode, cmp, op_true);
13313 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
13315 else if (op_true == CONST0_RTX (mode))
13317 op_false = force_reg (mode, op_false);
13318 x = gen_rtx_NOT (mode, cmp);
13319 x = gen_rtx_AND (mode, x, op_false);
13320 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
13322 else if (TARGET_SSE5)
13324 rtx pcmov = gen_rtx_SET (mode, dest,
13325 gen_rtx_IF_THEN_ELSE (mode, cmp,
13332 op_true = force_reg (mode, op_true);
13333 op_false = force_reg (mode, op_false);
13335 t2 = gen_reg_rtx (mode);
13337 t3 = gen_reg_rtx (mode);
13341 x = gen_rtx_AND (mode, op_true, cmp);
13342 emit_insn (gen_rtx_SET (VOIDmode, t2, x));
13344 x = gen_rtx_NOT (mode, cmp);
13345 x = gen_rtx_AND (mode, x, op_false);
13346 emit_insn (gen_rtx_SET (VOIDmode, t3, x));
13348 x = gen_rtx_IOR (mode, t3, t2);
13349 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
13353 /* Expand a floating-point conditional move. Return true if successful. */
13356 ix86_expand_fp_movcc (rtx operands[])
13358 enum machine_mode mode = GET_MODE (operands[0]);
13359 enum rtx_code code = GET_CODE (operands[1]);
13360 rtx tmp, compare_op, second_test, bypass_test;
13362 if (TARGET_SSE_MATH && SSE_FLOAT_MODE_P (mode))
13364 enum machine_mode cmode;
13366 /* Since we've no cmove for sse registers, don't force bad register
13367 allocation just to gain access to it. Deny movcc when the
13368 comparison mode doesn't match the move mode. */
13369 cmode = GET_MODE (ix86_compare_op0);
13370 if (cmode == VOIDmode)
13371 cmode = GET_MODE (ix86_compare_op1);
13375 code = ix86_prepare_sse_fp_compare_args (operands[0], code,
13377 &ix86_compare_op1);
13378 if (code == UNKNOWN)
13381 if (ix86_expand_sse_fp_minmax (operands[0], code, ix86_compare_op0,
13382 ix86_compare_op1, operands[2],
13386 tmp = ix86_expand_sse_cmp (operands[0], code, ix86_compare_op0,
13387 ix86_compare_op1, operands[2], operands[3]);
13388 ix86_expand_sse_movcc (operands[0], tmp, operands[2], operands[3]);
13392 /* The floating point conditional move instructions don't directly
13393 support conditions resulting from a signed integer comparison. */
13395 compare_op = ix86_expand_compare (code, &second_test, &bypass_test);
13397 /* The floating point conditional move instructions don't directly
13398 support signed integer comparisons. */
13400 if (!fcmov_comparison_operator (compare_op, VOIDmode))
13402 gcc_assert (!second_test && !bypass_test);
13403 tmp = gen_reg_rtx (QImode);
13404 ix86_expand_setcc (code, tmp);
13406 ix86_compare_op0 = tmp;
13407 ix86_compare_op1 = const0_rtx;
13408 compare_op = ix86_expand_compare (code, &second_test, &bypass_test);
13410 if (bypass_test && reg_overlap_mentioned_p (operands[0], operands[3]))
13412 tmp = gen_reg_rtx (mode);
13413 emit_move_insn (tmp, operands[3]);
13416 if (second_test && reg_overlap_mentioned_p (operands[0], operands[2]))
13418 tmp = gen_reg_rtx (mode);
13419 emit_move_insn (tmp, operands[2]);
13423 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
13424 gen_rtx_IF_THEN_ELSE (mode, compare_op,
13425 operands[2], operands[3])));
13427 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
13428 gen_rtx_IF_THEN_ELSE (mode, bypass_test,
13429 operands[3], operands[0])));
13431 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
13432 gen_rtx_IF_THEN_ELSE (mode, second_test,
13433 operands[2], operands[0])));
13438 /* Expand a floating-point vector conditional move; a vcond operation
13439 rather than a movcc operation. */
13442 ix86_expand_fp_vcond (rtx operands[])
13444 enum rtx_code code = GET_CODE (operands[3]);
13447 code = ix86_prepare_sse_fp_compare_args (operands[0], code,
13448 &operands[4], &operands[5]);
13449 if (code == UNKNOWN)
13452 if (ix86_expand_sse_fp_minmax (operands[0], code, operands[4],
13453 operands[5], operands[1], operands[2]))
13456 cmp = ix86_expand_sse_cmp (operands[0], code, operands[4], operands[5],
13457 operands[1], operands[2]);
13458 ix86_expand_sse_movcc (operands[0], cmp, operands[1], operands[2]);
13462 /* Expand a signed/unsigned integral vector conditional move. */
13465 ix86_expand_int_vcond (rtx operands[])
13467 enum machine_mode mode = GET_MODE (operands[0]);
13468 enum rtx_code code = GET_CODE (operands[3]);
13469 bool negate = false;
13472 cop0 = operands[4];
13473 cop1 = operands[5];
13475 /* SSE5 supports all of the comparisons on all vector int types. */
13478 /* Canonicalize the comparison to EQ, GT, GTU. */
13489 code = reverse_condition (code);
13495 code = reverse_condition (code);
13501 code = swap_condition (code);
13502 x = cop0, cop0 = cop1, cop1 = x;
13506 gcc_unreachable ();
13509 /* Only SSE4.1/SSE4.2 supports V2DImode. */
13510 if (mode == V2DImode)
13515 /* SSE4.1 supports EQ. */
13516 if (!TARGET_SSE4_1)
13522 /* SSE4.2 supports GT/GTU. */
13523 if (!TARGET_SSE4_2)
13528 gcc_unreachable ();
13532 /* Unsigned parallel compare is not supported by the hardware. Play some
13533 tricks to turn this into a signed comparison against 0. */
13536 cop0 = force_reg (mode, cop0);
13545 /* Perform a parallel modulo subtraction. */
13546 t1 = gen_reg_rtx (mode);
13547 emit_insn ((mode == V4SImode
13549 : gen_subv2di3) (t1, cop0, cop1));
13551 /* Extract the original sign bit of op0. */
13552 mask = ix86_build_signbit_mask (GET_MODE_INNER (mode),
13554 t2 = gen_reg_rtx (mode);
13555 emit_insn ((mode == V4SImode
13557 : gen_andv2di3) (t2, cop0, mask));
13559 /* XOR it back into the result of the subtraction. This results
13560 in the sign bit set iff we saw unsigned underflow. */
13561 x = gen_reg_rtx (mode);
13562 emit_insn ((mode == V4SImode
13564 : gen_xorv2di3) (x, t1, t2));
13572 /* Perform a parallel unsigned saturating subtraction. */
13573 x = gen_reg_rtx (mode);
13574 emit_insn (gen_rtx_SET (VOIDmode, x,
13575 gen_rtx_US_MINUS (mode, cop0, cop1)));
13582 gcc_unreachable ();
13586 cop1 = CONST0_RTX (mode);
13590 x = ix86_expand_sse_cmp (operands[0], code, cop0, cop1,
13591 operands[1+negate], operands[2-negate]);
13593 ix86_expand_sse_movcc (operands[0], x, operands[1+negate],
13594 operands[2-negate]);
13598 /* Unpack OP[1] into the next wider integer vector type. UNSIGNED_P is
13599 true if we should do zero extension, else sign extension. HIGH_P is
13600 true if we want the N/2 high elements, else the low elements. */
13603 ix86_expand_sse_unpack (rtx operands[2], bool unsigned_p, bool high_p)
13605 enum machine_mode imode = GET_MODE (operands[1]);
13606 rtx (*unpack)(rtx, rtx, rtx);
13613 unpack = gen_vec_interleave_highv16qi;
13615 unpack = gen_vec_interleave_lowv16qi;
13619 unpack = gen_vec_interleave_highv8hi;
13621 unpack = gen_vec_interleave_lowv8hi;
13625 unpack = gen_vec_interleave_highv4si;
13627 unpack = gen_vec_interleave_lowv4si;
13630 gcc_unreachable ();
13633 dest = gen_lowpart (imode, operands[0]);
13636 se = force_reg (imode, CONST0_RTX (imode));
13638 se = ix86_expand_sse_cmp (gen_reg_rtx (imode), GT, CONST0_RTX (imode),
13639 operands[1], pc_rtx, pc_rtx);
13641 emit_insn (unpack (dest, operands[1], se));
13644 /* This function performs the same task as ix86_expand_sse_unpack,
13645 but with SSE4.1 instructions. */
13648 ix86_expand_sse4_unpack (rtx operands[2], bool unsigned_p, bool high_p)
13650 enum machine_mode imode = GET_MODE (operands[1]);
13651 rtx (*unpack)(rtx, rtx);
13658 unpack = gen_sse4_1_zero_extendv8qiv8hi2;
13660 unpack = gen_sse4_1_extendv8qiv8hi2;
13664 unpack = gen_sse4_1_zero_extendv4hiv4si2;
13666 unpack = gen_sse4_1_extendv4hiv4si2;
13670 unpack = gen_sse4_1_zero_extendv2siv2di2;
13672 unpack = gen_sse4_1_extendv2siv2di2;
13675 gcc_unreachable ();
13678 dest = operands[0];
13681 /* Shift higher 8 bytes to lower 8 bytes. */
13682 src = gen_reg_rtx (imode);
13683 emit_insn (gen_sse2_lshrti3 (gen_lowpart (TImode, src),
13684 gen_lowpart (TImode, operands[1]),
13690 emit_insn (unpack (dest, src));
13693 /* This function performs the same task as ix86_expand_sse_unpack,
13694 but with sse5 instructions. */
13697 ix86_expand_sse5_unpack (rtx operands[2], bool unsigned_p, bool high_p)
13699 enum machine_mode imode = GET_MODE (operands[1]);
13700 int pperm_bytes[16];
13702 int h = (high_p) ? 8 : 0;
13705 rtvec v = rtvec_alloc (16);
13708 rtx op0 = operands[0], op1 = operands[1];
13713 vs = rtvec_alloc (8);
13714 h2 = (high_p) ? 8 : 0;
13715 for (i = 0; i < 8; i++)
13717 pperm_bytes[2*i+0] = PPERM_SRC | PPERM_SRC2 | i | h;
13718 pperm_bytes[2*i+1] = ((unsigned_p)
13720 : PPERM_SIGN | PPERM_SRC2 | i | h);
13723 for (i = 0; i < 16; i++)
13724 RTVEC_ELT (v, i) = GEN_INT (pperm_bytes[i]);
13726 for (i = 0; i < 8; i++)
13727 RTVEC_ELT (vs, i) = GEN_INT (i + h2);
13729 p = gen_rtx_PARALLEL (VOIDmode, vs);
13730 x = force_reg (V16QImode, gen_rtx_CONST_VECTOR (V16QImode, v));
13732 emit_insn (gen_sse5_pperm_zero_v16qi_v8hi (op0, op1, p, x));
13734 emit_insn (gen_sse5_pperm_sign_v16qi_v8hi (op0, op1, p, x));
13738 vs = rtvec_alloc (4);
13739 h2 = (high_p) ? 4 : 0;
13740 for (i = 0; i < 4; i++)
13742 sign_extend = ((unsigned_p)
13744 : PPERM_SIGN | PPERM_SRC2 | ((2*i) + 1 + h));
13745 pperm_bytes[4*i+0] = PPERM_SRC | PPERM_SRC2 | ((2*i) + 0 + h);
13746 pperm_bytes[4*i+1] = PPERM_SRC | PPERM_SRC2 | ((2*i) + 1 + h);
13747 pperm_bytes[4*i+2] = sign_extend;
13748 pperm_bytes[4*i+3] = sign_extend;
13751 for (i = 0; i < 16; i++)
13752 RTVEC_ELT (v, i) = GEN_INT (pperm_bytes[i]);
13754 for (i = 0; i < 4; i++)
13755 RTVEC_ELT (vs, i) = GEN_INT (i + h2);
13757 p = gen_rtx_PARALLEL (VOIDmode, vs);
13758 x = force_reg (V16QImode, gen_rtx_CONST_VECTOR (V16QImode, v));
13760 emit_insn (gen_sse5_pperm_zero_v8hi_v4si (op0, op1, p, x));
13762 emit_insn (gen_sse5_pperm_sign_v8hi_v4si (op0, op1, p, x));
13766 vs = rtvec_alloc (2);
13767 h2 = (high_p) ? 2 : 0;
13768 for (i = 0; i < 2; i++)
13770 sign_extend = ((unsigned_p)
13772 : PPERM_SIGN | PPERM_SRC2 | ((4*i) + 3 + h));
13773 pperm_bytes[8*i+0] = PPERM_SRC | PPERM_SRC2 | ((4*i) + 0 + h);
13774 pperm_bytes[8*i+1] = PPERM_SRC | PPERM_SRC2 | ((4*i) + 1 + h);
13775 pperm_bytes[8*i+2] = PPERM_SRC | PPERM_SRC2 | ((4*i) + 2 + h);
13776 pperm_bytes[8*i+3] = PPERM_SRC | PPERM_SRC2 | ((4*i) + 3 + h);
13777 pperm_bytes[8*i+4] = sign_extend;
13778 pperm_bytes[8*i+5] = sign_extend;
13779 pperm_bytes[8*i+6] = sign_extend;
13780 pperm_bytes[8*i+7] = sign_extend;
13783 for (i = 0; i < 16; i++)
13784 RTVEC_ELT (v, i) = GEN_INT (pperm_bytes[i]);
13786 for (i = 0; i < 2; i++)
13787 RTVEC_ELT (vs, i) = GEN_INT (i + h2);
13789 p = gen_rtx_PARALLEL (VOIDmode, vs);
13790 x = force_reg (V16QImode, gen_rtx_CONST_VECTOR (V16QImode, v));
13792 emit_insn (gen_sse5_pperm_zero_v4si_v2di (op0, op1, p, x));
13794 emit_insn (gen_sse5_pperm_sign_v4si_v2di (op0, op1, p, x));
13798 gcc_unreachable ();
13804 /* Pack the high bits from OPERANDS[1] and low bits from OPERANDS[2] into the
13805 next narrower integer vector type */
13807 ix86_expand_sse5_pack (rtx operands[3])
13809 enum machine_mode imode = GET_MODE (operands[0]);
13810 int pperm_bytes[16];
13812 rtvec v = rtvec_alloc (16);
13814 rtx op0 = operands[0];
13815 rtx op1 = operands[1];
13816 rtx op2 = operands[2];
13821 for (i = 0; i < 8; i++)
13823 pperm_bytes[i+0] = PPERM_SRC | PPERM_SRC1 | (i*2);
13824 pperm_bytes[i+8] = PPERM_SRC | PPERM_SRC2 | (i*2);
13827 for (i = 0; i < 16; i++)
13828 RTVEC_ELT (v, i) = GEN_INT (pperm_bytes[i]);
13830 x = force_reg (V16QImode, gen_rtx_CONST_VECTOR (V16QImode, v));
13831 emit_insn (gen_sse5_pperm_pack_v8hi_v16qi (op0, op1, op2, x));
13835 for (i = 0; i < 4; i++)
13837 pperm_bytes[(2*i)+0] = PPERM_SRC | PPERM_SRC1 | ((i*4) + 0);
13838 pperm_bytes[(2*i)+1] = PPERM_SRC | PPERM_SRC1 | ((i*4) + 1);
13839 pperm_bytes[(2*i)+8] = PPERM_SRC | PPERM_SRC2 | ((i*4) + 0);
13840 pperm_bytes[(2*i)+9] = PPERM_SRC | PPERM_SRC2 | ((i*4) + 1);
13843 for (i = 0; i < 16; i++)
13844 RTVEC_ELT (v, i) = GEN_INT (pperm_bytes[i]);
13846 x = force_reg (V16QImode, gen_rtx_CONST_VECTOR (V16QImode, v));
13847 emit_insn (gen_sse5_pperm_pack_v4si_v8hi (op0, op1, op2, x));
13851 for (i = 0; i < 2; i++)
13853 pperm_bytes[(4*i)+0] = PPERM_SRC | PPERM_SRC1 | ((i*8) + 0);
13854 pperm_bytes[(4*i)+1] = PPERM_SRC | PPERM_SRC1 | ((i*8) + 1);
13855 pperm_bytes[(4*i)+2] = PPERM_SRC | PPERM_SRC1 | ((i*8) + 2);
13856 pperm_bytes[(4*i)+3] = PPERM_SRC | PPERM_SRC1 | ((i*8) + 3);
13857 pperm_bytes[(4*i)+8] = PPERM_SRC | PPERM_SRC2 | ((i*8) + 0);
13858 pperm_bytes[(4*i)+9] = PPERM_SRC | PPERM_SRC2 | ((i*8) + 1);
13859 pperm_bytes[(4*i)+10] = PPERM_SRC | PPERM_SRC2 | ((i*8) + 2);
13860 pperm_bytes[(4*i)+11] = PPERM_SRC | PPERM_SRC2 | ((i*8) + 3);
13863 for (i = 0; i < 16; i++)
13864 RTVEC_ELT (v, i) = GEN_INT (pperm_bytes[i]);
13866 x = force_reg (V16QImode, gen_rtx_CONST_VECTOR (V16QImode, v));
13867 emit_insn (gen_sse5_pperm_pack_v2di_v4si (op0, op1, op2, x));
13871 gcc_unreachable ();
13877 /* Expand conditional increment or decrement using adb/sbb instructions.
13878 The default case using setcc followed by the conditional move can be
13879 done by generic code. */
13881 ix86_expand_int_addcc (rtx operands[])
13883 enum rtx_code code = GET_CODE (operands[1]);
13885 rtx val = const0_rtx;
13886 bool fpcmp = false;
13887 enum machine_mode mode = GET_MODE (operands[0]);
13889 if (operands[3] != const1_rtx
13890 && operands[3] != constm1_rtx)
13892 if (!ix86_expand_carry_flag_compare (code, ix86_compare_op0,
13893 ix86_compare_op1, &compare_op))
13895 code = GET_CODE (compare_op);
13897 if (GET_MODE (XEXP (compare_op, 0)) == CCFPmode
13898 || GET_MODE (XEXP (compare_op, 0)) == CCFPUmode)
13901 code = ix86_fp_compare_code_to_integer (code);
13908 PUT_CODE (compare_op,
13909 reverse_condition_maybe_unordered
13910 (GET_CODE (compare_op)));
13912 PUT_CODE (compare_op, reverse_condition (GET_CODE (compare_op)));
13914 PUT_MODE (compare_op, mode);
13916 /* Construct either adc or sbb insn. */
13917 if ((code == LTU) == (operands[3] == constm1_rtx))
13919 switch (GET_MODE (operands[0]))
13922 emit_insn (gen_subqi3_carry (operands[0], operands[2], val, compare_op));
13925 emit_insn (gen_subhi3_carry (operands[0], operands[2], val, compare_op));
13928 emit_insn (gen_subsi3_carry (operands[0], operands[2], val, compare_op));
13931 emit_insn (gen_subdi3_carry_rex64 (operands[0], operands[2], val, compare_op));
13934 gcc_unreachable ();
13939 switch (GET_MODE (operands[0]))
13942 emit_insn (gen_addqi3_carry (operands[0], operands[2], val, compare_op));
13945 emit_insn (gen_addhi3_carry (operands[0], operands[2], val, compare_op));
13948 emit_insn (gen_addsi3_carry (operands[0], operands[2], val, compare_op));
13951 emit_insn (gen_adddi3_carry_rex64 (operands[0], operands[2], val, compare_op));
13954 gcc_unreachable ();
13957 return 1; /* DONE */
13961 /* Split operands 0 and 1 into SImode parts. Similar to split_di, but
13962 works for floating pointer parameters and nonoffsetable memories.
13963 For pushes, it returns just stack offsets; the values will be saved
13964 in the right order. Maximally three parts are generated. */
13967 ix86_split_to_parts (rtx operand, rtx *parts, enum machine_mode mode)
13972 size = mode==XFmode ? 3 : GET_MODE_SIZE (mode) / 4;
13974 size = (GET_MODE_SIZE (mode) + 4) / 8;
13976 gcc_assert (!REG_P (operand) || !MMX_REGNO_P (REGNO (operand)));
13977 gcc_assert (size >= 2 && size <= 3);
13979 /* Optimize constant pool reference to immediates. This is used by fp
13980 moves, that force all constants to memory to allow combining. */
13981 if (MEM_P (operand) && MEM_READONLY_P (operand))
13983 rtx tmp = maybe_get_pool_constant (operand);
13988 if (MEM_P (operand) && !offsettable_memref_p (operand))
13990 /* The only non-offsetable memories we handle are pushes. */
13991 int ok = push_operand (operand, VOIDmode);
13995 operand = copy_rtx (operand);
13996 PUT_MODE (operand, Pmode);
13997 parts[0] = parts[1] = parts[2] = operand;
14001 if (GET_CODE (operand) == CONST_VECTOR)
14003 enum machine_mode imode = int_mode_for_mode (mode);
14004 /* Caution: if we looked through a constant pool memory above,
14005 the operand may actually have a different mode now. That's
14006 ok, since we want to pun this all the way back to an integer. */
14007 operand = simplify_subreg (imode, operand, GET_MODE (operand), 0);
14008 gcc_assert (operand != NULL);
14014 if (mode == DImode)
14015 split_di (&operand, 1, &parts[0], &parts[1]);
14018 if (REG_P (operand))
14020 gcc_assert (reload_completed);
14021 parts[0] = gen_rtx_REG (SImode, REGNO (operand) + 0);
14022 parts[1] = gen_rtx_REG (SImode, REGNO (operand) + 1);
14024 parts[2] = gen_rtx_REG (SImode, REGNO (operand) + 2);
14026 else if (offsettable_memref_p (operand))
14028 operand = adjust_address (operand, SImode, 0);
14029 parts[0] = operand;
14030 parts[1] = adjust_address (operand, SImode, 4);
14032 parts[2] = adjust_address (operand, SImode, 8);
14034 else if (GET_CODE (operand) == CONST_DOUBLE)
14039 REAL_VALUE_FROM_CONST_DOUBLE (r, operand);
14043 REAL_VALUE_TO_TARGET_LONG_DOUBLE (r, l);
14044 parts[2] = gen_int_mode (l[2], SImode);
14047 REAL_VALUE_TO_TARGET_DOUBLE (r, l);
14050 gcc_unreachable ();
14052 parts[1] = gen_int_mode (l[1], SImode);
14053 parts[0] = gen_int_mode (l[0], SImode);
14056 gcc_unreachable ();
14061 if (mode == TImode)
14062 split_ti (&operand, 1, &parts[0], &parts[1]);
14063 if (mode == XFmode || mode == TFmode)
14065 enum machine_mode upper_mode = mode==XFmode ? SImode : DImode;
14066 if (REG_P (operand))
14068 gcc_assert (reload_completed);
14069 parts[0] = gen_rtx_REG (DImode, REGNO (operand) + 0);
14070 parts[1] = gen_rtx_REG (upper_mode, REGNO (operand) + 1);
14072 else if (offsettable_memref_p (operand))
14074 operand = adjust_address (operand, DImode, 0);
14075 parts[0] = operand;
14076 parts[1] = adjust_address (operand, upper_mode, 8);
14078 else if (GET_CODE (operand) == CONST_DOUBLE)
14083 REAL_VALUE_FROM_CONST_DOUBLE (r, operand);
14084 real_to_target (l, &r, mode);
14086 /* Do not use shift by 32 to avoid warning on 32bit systems. */
14087 if (HOST_BITS_PER_WIDE_INT >= 64)
14090 ((l[0] & (((HOST_WIDE_INT) 2 << 31) - 1))
14091 + ((((HOST_WIDE_INT) l[1]) << 31) << 1),
14094 parts[0] = immed_double_const (l[0], l[1], DImode);
14096 if (upper_mode == SImode)
14097 parts[1] = gen_int_mode (l[2], SImode);
14098 else if (HOST_BITS_PER_WIDE_INT >= 64)
14101 ((l[2] & (((HOST_WIDE_INT) 2 << 31) - 1))
14102 + ((((HOST_WIDE_INT) l[3]) << 31) << 1),
14105 parts[1] = immed_double_const (l[2], l[3], DImode);
14108 gcc_unreachable ();
14115 /* Emit insns to perform a move or push of DI, DF, and XF values.
14116 Return false when normal moves are needed; true when all required
14117 insns have been emitted. Operands 2-4 contain the input values
14118 int the correct order; operands 5-7 contain the output values. */
14121 ix86_split_long_move (rtx operands[])
14126 int collisions = 0;
14127 enum machine_mode mode = GET_MODE (operands[0]);
14129 /* The DFmode expanders may ask us to move double.
14130 For 64bit target this is single move. By hiding the fact
14131 here we simplify i386.md splitters. */
14132 if (GET_MODE_SIZE (GET_MODE (operands[0])) == 8 && TARGET_64BIT)
14134 /* Optimize constant pool reference to immediates. This is used by
14135 fp moves, that force all constants to memory to allow combining. */
14137 if (MEM_P (operands[1])
14138 && GET_CODE (XEXP (operands[1], 0)) == SYMBOL_REF
14139 && CONSTANT_POOL_ADDRESS_P (XEXP (operands[1], 0)))
14140 operands[1] = get_pool_constant (XEXP (operands[1], 0));
14141 if (push_operand (operands[0], VOIDmode))
14143 operands[0] = copy_rtx (operands[0]);
14144 PUT_MODE (operands[0], Pmode);
14147 operands[0] = gen_lowpart (DImode, operands[0]);
14148 operands[1] = gen_lowpart (DImode, operands[1]);
14149 emit_move_insn (operands[0], operands[1]);
14153 /* The only non-offsettable memory we handle is push. */
14154 if (push_operand (operands[0], VOIDmode))
14157 gcc_assert (!MEM_P (operands[0])
14158 || offsettable_memref_p (operands[0]));
14160 nparts = ix86_split_to_parts (operands[1], part[1], GET_MODE (operands[0]));
14161 ix86_split_to_parts (operands[0], part[0], GET_MODE (operands[0]));
14163 /* When emitting push, take care for source operands on the stack. */
14164 if (push && MEM_P (operands[1])
14165 && reg_overlap_mentioned_p (stack_pointer_rtx, operands[1]))
14168 part[1][1] = change_address (part[1][1], GET_MODE (part[1][1]),
14169 XEXP (part[1][2], 0));
14170 part[1][0] = change_address (part[1][0], GET_MODE (part[1][0]),
14171 XEXP (part[1][1], 0));
14174 /* We need to do copy in the right order in case an address register
14175 of the source overlaps the destination. */
14176 if (REG_P (part[0][0]) && MEM_P (part[1][0]))
14178 if (reg_overlap_mentioned_p (part[0][0], XEXP (part[1][0], 0)))
14180 if (reg_overlap_mentioned_p (part[0][1], XEXP (part[1][0], 0)))
14183 && reg_overlap_mentioned_p (part[0][2], XEXP (part[1][0], 0)))
14186 /* Collision in the middle part can be handled by reordering. */
14187 if (collisions == 1 && nparts == 3
14188 && reg_overlap_mentioned_p (part[0][1], XEXP (part[1][0], 0)))
14191 tmp = part[0][1]; part[0][1] = part[0][2]; part[0][2] = tmp;
14192 tmp = part[1][1]; part[1][1] = part[1][2]; part[1][2] = tmp;
14195 /* If there are more collisions, we can't handle it by reordering.
14196 Do an lea to the last part and use only one colliding move. */
14197 else if (collisions > 1)
14203 base = part[0][nparts - 1];
14205 /* Handle the case when the last part isn't valid for lea.
14206 Happens in 64-bit mode storing the 12-byte XFmode. */
14207 if (GET_MODE (base) != Pmode)
14208 base = gen_rtx_REG (Pmode, REGNO (base));
14210 emit_insn (gen_rtx_SET (VOIDmode, base, XEXP (part[1][0], 0)));
14211 part[1][0] = replace_equiv_address (part[1][0], base);
14212 part[1][1] = replace_equiv_address (part[1][1],
14213 plus_constant (base, UNITS_PER_WORD));
14215 part[1][2] = replace_equiv_address (part[1][2],
14216 plus_constant (base, 8));
14226 if (TARGET_128BIT_LONG_DOUBLE && mode == XFmode)
14227 emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx, GEN_INT (-4)));
14228 emit_move_insn (part[0][2], part[1][2]);
14233 /* In 64bit mode we don't have 32bit push available. In case this is
14234 register, it is OK - we will just use larger counterpart. We also
14235 retype memory - these comes from attempt to avoid REX prefix on
14236 moving of second half of TFmode value. */
14237 if (GET_MODE (part[1][1]) == SImode)
14239 switch (GET_CODE (part[1][1]))
14242 part[1][1] = adjust_address (part[1][1], DImode, 0);
14246 part[1][1] = gen_rtx_REG (DImode, REGNO (part[1][1]));
14250 gcc_unreachable ();
14253 if (GET_MODE (part[1][0]) == SImode)
14254 part[1][0] = part[1][1];
14257 emit_move_insn (part[0][1], part[1][1]);
14258 emit_move_insn (part[0][0], part[1][0]);
14262 /* Choose correct order to not overwrite the source before it is copied. */
14263 if ((REG_P (part[0][0])
14264 && REG_P (part[1][1])
14265 && (REGNO (part[0][0]) == REGNO (part[1][1])
14267 && REGNO (part[0][0]) == REGNO (part[1][2]))))
14269 && reg_overlap_mentioned_p (part[0][0], XEXP (part[1][0], 0))))
14273 operands[2] = part[0][2];
14274 operands[3] = part[0][1];
14275 operands[4] = part[0][0];
14276 operands[5] = part[1][2];
14277 operands[6] = part[1][1];
14278 operands[7] = part[1][0];
14282 operands[2] = part[0][1];
14283 operands[3] = part[0][0];
14284 operands[5] = part[1][1];
14285 operands[6] = part[1][0];
14292 operands[2] = part[0][0];
14293 operands[3] = part[0][1];
14294 operands[4] = part[0][2];
14295 operands[5] = part[1][0];
14296 operands[6] = part[1][1];
14297 operands[7] = part[1][2];
14301 operands[2] = part[0][0];
14302 operands[3] = part[0][1];
14303 operands[5] = part[1][0];
14304 operands[6] = part[1][1];
14308 /* If optimizing for size, attempt to locally unCSE nonzero constants. */
14311 if (CONST_INT_P (operands[5])
14312 && operands[5] != const0_rtx
14313 && REG_P (operands[2]))
14315 if (CONST_INT_P (operands[6])
14316 && INTVAL (operands[6]) == INTVAL (operands[5]))
14317 operands[6] = operands[2];
14320 && CONST_INT_P (operands[7])
14321 && INTVAL (operands[7]) == INTVAL (operands[5]))
14322 operands[7] = operands[2];
14326 && CONST_INT_P (operands[6])
14327 && operands[6] != const0_rtx
14328 && REG_P (operands[3])
14329 && CONST_INT_P (operands[7])
14330 && INTVAL (operands[7]) == INTVAL (operands[6]))
14331 operands[7] = operands[3];
14334 emit_move_insn (operands[2], operands[5]);
14335 emit_move_insn (operands[3], operands[6]);
14337 emit_move_insn (operands[4], operands[7]);
14342 /* Helper function of ix86_split_ashl used to generate an SImode/DImode
14343 left shift by a constant, either using a single shift or
14344 a sequence of add instructions. */
14347 ix86_expand_ashl_const (rtx operand, int count, enum machine_mode mode)
14351 emit_insn ((mode == DImode
14353 : gen_adddi3) (operand, operand, operand));
14355 else if (!optimize_size
14356 && count * ix86_cost->add <= ix86_cost->shift_const)
14359 for (i=0; i<count; i++)
14361 emit_insn ((mode == DImode
14363 : gen_adddi3) (operand, operand, operand));
14367 emit_insn ((mode == DImode
14369 : gen_ashldi3) (operand, operand, GEN_INT (count)));
14373 ix86_split_ashl (rtx *operands, rtx scratch, enum machine_mode mode)
14375 rtx low[2], high[2];
14377 const int single_width = mode == DImode ? 32 : 64;
14379 if (CONST_INT_P (operands[2]))
14381 (mode == DImode ? split_di : split_ti) (operands, 2, low, high);
14382 count = INTVAL (operands[2]) & (single_width * 2 - 1);
14384 if (count >= single_width)
14386 emit_move_insn (high[0], low[1]);
14387 emit_move_insn (low[0], const0_rtx);
14389 if (count > single_width)
14390 ix86_expand_ashl_const (high[0], count - single_width, mode);
14394 if (!rtx_equal_p (operands[0], operands[1]))
14395 emit_move_insn (operands[0], operands[1]);
14396 emit_insn ((mode == DImode
14398 : gen_x86_64_shld) (high[0], low[0], GEN_INT (count)));
14399 ix86_expand_ashl_const (low[0], count, mode);
14404 (mode == DImode ? split_di : split_ti) (operands, 1, low, high);
14406 if (operands[1] == const1_rtx)
14408 /* Assuming we've chosen a QImode capable registers, then 1 << N
14409 can be done with two 32/64-bit shifts, no branches, no cmoves. */
14410 if (ANY_QI_REG_P (low[0]) && ANY_QI_REG_P (high[0]))
14412 rtx s, d, flags = gen_rtx_REG (CCZmode, FLAGS_REG);
14414 ix86_expand_clear (low[0]);
14415 ix86_expand_clear (high[0]);
14416 emit_insn (gen_testqi_ccz_1 (operands[2], GEN_INT (single_width)));
14418 d = gen_lowpart (QImode, low[0]);
14419 d = gen_rtx_STRICT_LOW_PART (VOIDmode, d);
14420 s = gen_rtx_EQ (QImode, flags, const0_rtx);
14421 emit_insn (gen_rtx_SET (VOIDmode, d, s));
14423 d = gen_lowpart (QImode, high[0]);
14424 d = gen_rtx_STRICT_LOW_PART (VOIDmode, d);
14425 s = gen_rtx_NE (QImode, flags, const0_rtx);
14426 emit_insn (gen_rtx_SET (VOIDmode, d, s));
14429 /* Otherwise, we can get the same results by manually performing
14430 a bit extract operation on bit 5/6, and then performing the two
14431 shifts. The two methods of getting 0/1 into low/high are exactly
14432 the same size. Avoiding the shift in the bit extract case helps
14433 pentium4 a bit; no one else seems to care much either way. */
14438 if (TARGET_PARTIAL_REG_STALL && !optimize_size)
14439 x = gen_rtx_ZERO_EXTEND (mode == DImode ? SImode : DImode, operands[2]);
14441 x = gen_lowpart (mode == DImode ? SImode : DImode, operands[2]);
14442 emit_insn (gen_rtx_SET (VOIDmode, high[0], x));
14444 emit_insn ((mode == DImode
14446 : gen_lshrdi3) (high[0], high[0], GEN_INT (mode == DImode ? 5 : 6)));
14447 emit_insn ((mode == DImode
14449 : gen_anddi3) (high[0], high[0], GEN_INT (1)));
14450 emit_move_insn (low[0], high[0]);
14451 emit_insn ((mode == DImode
14453 : gen_xordi3) (low[0], low[0], GEN_INT (1)));
14456 emit_insn ((mode == DImode
14458 : gen_ashldi3) (low[0], low[0], operands[2]));
14459 emit_insn ((mode == DImode
14461 : gen_ashldi3) (high[0], high[0], operands[2]));
14465 if (operands[1] == constm1_rtx)
14467 /* For -1 << N, we can avoid the shld instruction, because we
14468 know that we're shifting 0...31/63 ones into a -1. */
14469 emit_move_insn (low[0], constm1_rtx);
14471 emit_move_insn (high[0], low[0]);
14473 emit_move_insn (high[0], constm1_rtx);
14477 if (!rtx_equal_p (operands[0], operands[1]))
14478 emit_move_insn (operands[0], operands[1]);
14480 (mode == DImode ? split_di : split_ti) (operands, 1, low, high);
14481 emit_insn ((mode == DImode
14483 : gen_x86_64_shld) (high[0], low[0], operands[2]));
14486 emit_insn ((mode == DImode ? gen_ashlsi3 : gen_ashldi3) (low[0], low[0], operands[2]));
14488 if (TARGET_CMOVE && scratch)
14490 ix86_expand_clear (scratch);
14491 emit_insn ((mode == DImode
14492 ? gen_x86_shift_adj_1
14493 : gen_x86_64_shift_adj) (high[0], low[0], operands[2], scratch));
14496 emit_insn (gen_x86_shift_adj_2 (high[0], low[0], operands[2]));
14500 ix86_split_ashr (rtx *operands, rtx scratch, enum machine_mode mode)
14502 rtx low[2], high[2];
14504 const int single_width = mode == DImode ? 32 : 64;
14506 if (CONST_INT_P (operands[2]))
14508 (mode == DImode ? split_di : split_ti) (operands, 2, low, high);
14509 count = INTVAL (operands[2]) & (single_width * 2 - 1);
14511 if (count == single_width * 2 - 1)
14513 emit_move_insn (high[0], high[1]);
14514 emit_insn ((mode == DImode
14516 : gen_ashrdi3) (high[0], high[0],
14517 GEN_INT (single_width - 1)));
14518 emit_move_insn (low[0], high[0]);
14521 else if (count >= single_width)
14523 emit_move_insn (low[0], high[1]);
14524 emit_move_insn (high[0], low[0]);
14525 emit_insn ((mode == DImode
14527 : gen_ashrdi3) (high[0], high[0],
14528 GEN_INT (single_width - 1)));
14529 if (count > single_width)
14530 emit_insn ((mode == DImode
14532 : gen_ashrdi3) (low[0], low[0],
14533 GEN_INT (count - single_width)));
14537 if (!rtx_equal_p (operands[0], operands[1]))
14538 emit_move_insn (operands[0], operands[1]);
14539 emit_insn ((mode == DImode
14541 : gen_x86_64_shrd) (low[0], high[0], GEN_INT (count)));
14542 emit_insn ((mode == DImode
14544 : gen_ashrdi3) (high[0], high[0], GEN_INT (count)));
14549 if (!rtx_equal_p (operands[0], operands[1]))
14550 emit_move_insn (operands[0], operands[1]);
14552 (mode == DImode ? split_di : split_ti) (operands, 1, low, high);
14554 emit_insn ((mode == DImode
14556 : gen_x86_64_shrd) (low[0], high[0], operands[2]));
14557 emit_insn ((mode == DImode
14559 : gen_ashrdi3) (high[0], high[0], operands[2]));
14561 if (TARGET_CMOVE && scratch)
14563 emit_move_insn (scratch, high[0]);
14564 emit_insn ((mode == DImode
14566 : gen_ashrdi3) (scratch, scratch,
14567 GEN_INT (single_width - 1)));
14568 emit_insn ((mode == DImode
14569 ? gen_x86_shift_adj_1
14570 : gen_x86_64_shift_adj) (low[0], high[0], operands[2],
14574 emit_insn (gen_x86_shift_adj_3 (low[0], high[0], operands[2]));
14579 ix86_split_lshr (rtx *operands, rtx scratch, enum machine_mode mode)
14581 rtx low[2], high[2];
14583 const int single_width = mode == DImode ? 32 : 64;
14585 if (CONST_INT_P (operands[2]))
14587 (mode == DImode ? split_di : split_ti) (operands, 2, low, high);
14588 count = INTVAL (operands[2]) & (single_width * 2 - 1);
14590 if (count >= single_width)
14592 emit_move_insn (low[0], high[1]);
14593 ix86_expand_clear (high[0]);
14595 if (count > single_width)
14596 emit_insn ((mode == DImode
14598 : gen_lshrdi3) (low[0], low[0],
14599 GEN_INT (count - single_width)));
14603 if (!rtx_equal_p (operands[0], operands[1]))
14604 emit_move_insn (operands[0], operands[1]);
14605 emit_insn ((mode == DImode
14607 : gen_x86_64_shrd) (low[0], high[0], GEN_INT (count)));
14608 emit_insn ((mode == DImode
14610 : gen_lshrdi3) (high[0], high[0], GEN_INT (count)));
14615 if (!rtx_equal_p (operands[0], operands[1]))
14616 emit_move_insn (operands[0], operands[1]);
14618 (mode == DImode ? split_di : split_ti) (operands, 1, low, high);
14620 emit_insn ((mode == DImode
14622 : gen_x86_64_shrd) (low[0], high[0], operands[2]));
14623 emit_insn ((mode == DImode
14625 : gen_lshrdi3) (high[0], high[0], operands[2]));
14627 /* Heh. By reversing the arguments, we can reuse this pattern. */
14628 if (TARGET_CMOVE && scratch)
14630 ix86_expand_clear (scratch);
14631 emit_insn ((mode == DImode
14632 ? gen_x86_shift_adj_1
14633 : gen_x86_64_shift_adj) (low[0], high[0], operands[2],
14637 emit_insn (gen_x86_shift_adj_2 (low[0], high[0], operands[2]));
14641 /* Predict just emitted jump instruction to be taken with probability PROB. */
14643 predict_jump (int prob)
14645 rtx insn = get_last_insn ();
14646 gcc_assert (JUMP_P (insn));
14648 = gen_rtx_EXPR_LIST (REG_BR_PROB,
14653 /* Helper function for the string operations below. Dest VARIABLE whether
14654 it is aligned to VALUE bytes. If true, jump to the label. */
14656 ix86_expand_aligntest (rtx variable, int value, bool epilogue)
14658 rtx label = gen_label_rtx ();
14659 rtx tmpcount = gen_reg_rtx (GET_MODE (variable));
14660 if (GET_MODE (variable) == DImode)
14661 emit_insn (gen_anddi3 (tmpcount, variable, GEN_INT (value)));
14663 emit_insn (gen_andsi3 (tmpcount, variable, GEN_INT (value)));
14664 emit_cmp_and_jump_insns (tmpcount, const0_rtx, EQ, 0, GET_MODE (variable),
14667 predict_jump (REG_BR_PROB_BASE * 50 / 100);
14669 predict_jump (REG_BR_PROB_BASE * 90 / 100);
14673 /* Adjust COUNTER by the VALUE. */
14675 ix86_adjust_counter (rtx countreg, HOST_WIDE_INT value)
14677 if (GET_MODE (countreg) == DImode)
14678 emit_insn (gen_adddi3 (countreg, countreg, GEN_INT (-value)));
14680 emit_insn (gen_addsi3 (countreg, countreg, GEN_INT (-value)));
14683 /* Zero extend possibly SImode EXP to Pmode register. */
14685 ix86_zero_extend_to_Pmode (rtx exp)
14688 if (GET_MODE (exp) == VOIDmode)
14689 return force_reg (Pmode, exp);
14690 if (GET_MODE (exp) == Pmode)
14691 return copy_to_mode_reg (Pmode, exp);
14692 r = gen_reg_rtx (Pmode);
14693 emit_insn (gen_zero_extendsidi2 (r, exp));
14697 /* Divide COUNTREG by SCALE. */
14699 scale_counter (rtx countreg, int scale)
14702 rtx piece_size_mask;
14706 if (CONST_INT_P (countreg))
14707 return GEN_INT (INTVAL (countreg) / scale);
14708 gcc_assert (REG_P (countreg));
14710 piece_size_mask = GEN_INT (scale - 1);
14711 sc = expand_simple_binop (GET_MODE (countreg), LSHIFTRT, countreg,
14712 GEN_INT (exact_log2 (scale)),
14713 NULL, 1, OPTAB_DIRECT);
14717 /* Return mode for the memcpy/memset loop counter. Prefer SImode over
14718 DImode for constant loop counts. */
14720 static enum machine_mode
14721 counter_mode (rtx count_exp)
14723 if (GET_MODE (count_exp) != VOIDmode)
14724 return GET_MODE (count_exp);
14725 if (GET_CODE (count_exp) != CONST_INT)
14727 if (TARGET_64BIT && (INTVAL (count_exp) & ~0xffffffff))
14732 /* When SRCPTR is non-NULL, output simple loop to move memory
14733 pointer to SRCPTR to DESTPTR via chunks of MODE unrolled UNROLL times,
14734 overall size is COUNT specified in bytes. When SRCPTR is NULL, output the
14735 equivalent loop to set memory by VALUE (supposed to be in MODE).
14737 The size is rounded down to whole number of chunk size moved at once.
14738 SRCMEM and DESTMEM provide MEMrtx to feed proper aliasing info. */
14742 expand_set_or_movmem_via_loop (rtx destmem, rtx srcmem,
14743 rtx destptr, rtx srcptr, rtx value,
14744 rtx count, enum machine_mode mode, int unroll,
14747 rtx out_label, top_label, iter, tmp;
14748 enum machine_mode iter_mode = counter_mode (count);
14749 rtx piece_size = GEN_INT (GET_MODE_SIZE (mode) * unroll);
14750 rtx piece_size_mask = GEN_INT (~((GET_MODE_SIZE (mode) * unroll) - 1));
14756 top_label = gen_label_rtx ();
14757 out_label = gen_label_rtx ();
14758 iter = gen_reg_rtx (iter_mode);
14760 size = expand_simple_binop (iter_mode, AND, count, piece_size_mask,
14761 NULL, 1, OPTAB_DIRECT);
14762 /* Those two should combine. */
14763 if (piece_size == const1_rtx)
14765 emit_cmp_and_jump_insns (size, const0_rtx, EQ, NULL_RTX, iter_mode,
14767 predict_jump (REG_BR_PROB_BASE * 10 / 100);
14769 emit_move_insn (iter, const0_rtx);
14771 emit_label (top_label);
14773 tmp = convert_modes (Pmode, iter_mode, iter, true);
14774 x_addr = gen_rtx_PLUS (Pmode, destptr, tmp);
14775 destmem = change_address (destmem, mode, x_addr);
14779 y_addr = gen_rtx_PLUS (Pmode, srcptr, copy_rtx (tmp));
14780 srcmem = change_address (srcmem, mode, y_addr);
14782 /* When unrolling for chips that reorder memory reads and writes,
14783 we can save registers by using single temporary.
14784 Also using 4 temporaries is overkill in 32bit mode. */
14785 if (!TARGET_64BIT && 0)
14787 for (i = 0; i < unroll; i++)
14792 adjust_address (copy_rtx (destmem), mode, GET_MODE_SIZE (mode));
14794 adjust_address (copy_rtx (srcmem), mode, GET_MODE_SIZE (mode));
14796 emit_move_insn (destmem, srcmem);
14802 gcc_assert (unroll <= 4);
14803 for (i = 0; i < unroll; i++)
14805 tmpreg[i] = gen_reg_rtx (mode);
14809 adjust_address (copy_rtx (srcmem), mode, GET_MODE_SIZE (mode));
14811 emit_move_insn (tmpreg[i], srcmem);
14813 for (i = 0; i < unroll; i++)
14818 adjust_address (copy_rtx (destmem), mode, GET_MODE_SIZE (mode));
14820 emit_move_insn (destmem, tmpreg[i]);
14825 for (i = 0; i < unroll; i++)
14829 adjust_address (copy_rtx (destmem), mode, GET_MODE_SIZE (mode));
14830 emit_move_insn (destmem, value);
14833 tmp = expand_simple_binop (iter_mode, PLUS, iter, piece_size, iter,
14834 true, OPTAB_LIB_WIDEN);
14836 emit_move_insn (iter, tmp);
14838 emit_cmp_and_jump_insns (iter, size, LT, NULL_RTX, iter_mode,
14840 if (expected_size != -1)
14842 expected_size /= GET_MODE_SIZE (mode) * unroll;
14843 if (expected_size == 0)
14845 else if (expected_size > REG_BR_PROB_BASE)
14846 predict_jump (REG_BR_PROB_BASE - 1);
14848 predict_jump (REG_BR_PROB_BASE - (REG_BR_PROB_BASE + expected_size / 2) / expected_size);
14851 predict_jump (REG_BR_PROB_BASE * 80 / 100);
14852 iter = ix86_zero_extend_to_Pmode (iter);
14853 tmp = expand_simple_binop (Pmode, PLUS, destptr, iter, destptr,
14854 true, OPTAB_LIB_WIDEN);
14855 if (tmp != destptr)
14856 emit_move_insn (destptr, tmp);
14859 tmp = expand_simple_binop (Pmode, PLUS, srcptr, iter, srcptr,
14860 true, OPTAB_LIB_WIDEN);
14862 emit_move_insn (srcptr, tmp);
14864 emit_label (out_label);
14867 /* Output "rep; mov" instruction.
14868 Arguments have same meaning as for previous function */
14870 expand_movmem_via_rep_mov (rtx destmem, rtx srcmem,
14871 rtx destptr, rtx srcptr,
14873 enum machine_mode mode)
14879 /* If the size is known, it is shorter to use rep movs. */
14880 if (mode == QImode && CONST_INT_P (count)
14881 && !(INTVAL (count) & 3))
14884 if (destptr != XEXP (destmem, 0) || GET_MODE (destmem) != BLKmode)
14885 destmem = adjust_automodify_address_nv (destmem, BLKmode, destptr, 0);
14886 if (srcptr != XEXP (srcmem, 0) || GET_MODE (srcmem) != BLKmode)
14887 srcmem = adjust_automodify_address_nv (srcmem, BLKmode, srcptr, 0);
14888 countreg = ix86_zero_extend_to_Pmode (scale_counter (count, GET_MODE_SIZE (mode)));
14889 if (mode != QImode)
14891 destexp = gen_rtx_ASHIFT (Pmode, countreg,
14892 GEN_INT (exact_log2 (GET_MODE_SIZE (mode))));
14893 destexp = gen_rtx_PLUS (Pmode, destexp, destptr);
14894 srcexp = gen_rtx_ASHIFT (Pmode, countreg,
14895 GEN_INT (exact_log2 (GET_MODE_SIZE (mode))));
14896 srcexp = gen_rtx_PLUS (Pmode, srcexp, srcptr);
14900 destexp = gen_rtx_PLUS (Pmode, destptr, countreg);
14901 srcexp = gen_rtx_PLUS (Pmode, srcptr, countreg);
14903 emit_insn (gen_rep_mov (destptr, destmem, srcptr, srcmem, countreg,
14907 /* Output "rep; stos" instruction.
14908 Arguments have same meaning as for previous function */
14910 expand_setmem_via_rep_stos (rtx destmem, rtx destptr, rtx value,
14912 enum machine_mode mode)
14917 if (destptr != XEXP (destmem, 0) || GET_MODE (destmem) != BLKmode)
14918 destmem = adjust_automodify_address_nv (destmem, BLKmode, destptr, 0);
14919 value = force_reg (mode, gen_lowpart (mode, value));
14920 countreg = ix86_zero_extend_to_Pmode (scale_counter (count, GET_MODE_SIZE (mode)));
14921 if (mode != QImode)
14923 destexp = gen_rtx_ASHIFT (Pmode, countreg,
14924 GEN_INT (exact_log2 (GET_MODE_SIZE (mode))));
14925 destexp = gen_rtx_PLUS (Pmode, destexp, destptr);
14928 destexp = gen_rtx_PLUS (Pmode, destptr, countreg);
14929 emit_insn (gen_rep_stos (destptr, countreg, destmem, value, destexp));
14933 emit_strmov (rtx destmem, rtx srcmem,
14934 rtx destptr, rtx srcptr, enum machine_mode mode, int offset)
14936 rtx src = adjust_automodify_address_nv (srcmem, mode, srcptr, offset);
14937 rtx dest = adjust_automodify_address_nv (destmem, mode, destptr, offset);
14938 emit_insn (gen_strmov (destptr, dest, srcptr, src));
14941 /* Output code to copy at most count & (max_size - 1) bytes from SRC to DEST. */
14943 expand_movmem_epilogue (rtx destmem, rtx srcmem,
14944 rtx destptr, rtx srcptr, rtx count, int max_size)
14947 if (CONST_INT_P (count))
14949 HOST_WIDE_INT countval = INTVAL (count);
14952 if ((countval & 0x10) && max_size > 16)
14956 emit_strmov (destmem, srcmem, destptr, srcptr, DImode, offset);
14957 emit_strmov (destmem, srcmem, destptr, srcptr, DImode, offset + 8);
14960 gcc_unreachable ();
14963 if ((countval & 0x08) && max_size > 8)
14966 emit_strmov (destmem, srcmem, destptr, srcptr, DImode, offset);
14969 emit_strmov (destmem, srcmem, destptr, srcptr, SImode, offset);
14970 emit_strmov (destmem, srcmem, destptr, srcptr, SImode, offset + 4);
14974 if ((countval & 0x04) && max_size > 4)
14976 emit_strmov (destmem, srcmem, destptr, srcptr, SImode, offset);
14979 if ((countval & 0x02) && max_size > 2)
14981 emit_strmov (destmem, srcmem, destptr, srcptr, HImode, offset);
14984 if ((countval & 0x01) && max_size > 1)
14986 emit_strmov (destmem, srcmem, destptr, srcptr, QImode, offset);
14993 count = expand_simple_binop (GET_MODE (count), AND, count, GEN_INT (max_size - 1),
14994 count, 1, OPTAB_DIRECT);
14995 expand_set_or_movmem_via_loop (destmem, srcmem, destptr, srcptr, NULL,
14996 count, QImode, 1, 4);
15000 /* When there are stringops, we can cheaply increase dest and src pointers.
15001 Otherwise we save code size by maintaining offset (zero is readily
15002 available from preceding rep operation) and using x86 addressing modes.
15004 if (TARGET_SINGLE_STRINGOP)
15008 rtx label = ix86_expand_aligntest (count, 4, true);
15009 src = change_address (srcmem, SImode, srcptr);
15010 dest = change_address (destmem, SImode, destptr);
15011 emit_insn (gen_strmov (destptr, dest, srcptr, src));
15012 emit_label (label);
15013 LABEL_NUSES (label) = 1;
15017 rtx label = ix86_expand_aligntest (count, 2, true);
15018 src = change_address (srcmem, HImode, srcptr);
15019 dest = change_address (destmem, HImode, destptr);
15020 emit_insn (gen_strmov (destptr, dest, srcptr, src));
15021 emit_label (label);
15022 LABEL_NUSES (label) = 1;
15026 rtx label = ix86_expand_aligntest (count, 1, true);
15027 src = change_address (srcmem, QImode, srcptr);
15028 dest = change_address (destmem, QImode, destptr);
15029 emit_insn (gen_strmov (destptr, dest, srcptr, src));
15030 emit_label (label);
15031 LABEL_NUSES (label) = 1;
15036 rtx offset = force_reg (Pmode, const0_rtx);
15041 rtx label = ix86_expand_aligntest (count, 4, true);
15042 src = change_address (srcmem, SImode, srcptr);
15043 dest = change_address (destmem, SImode, destptr);
15044 emit_move_insn (dest, src);
15045 tmp = expand_simple_binop (Pmode, PLUS, offset, GEN_INT (4), NULL,
15046 true, OPTAB_LIB_WIDEN);
15048 emit_move_insn (offset, tmp);
15049 emit_label (label);
15050 LABEL_NUSES (label) = 1;
15054 rtx label = ix86_expand_aligntest (count, 2, true);
15055 tmp = gen_rtx_PLUS (Pmode, srcptr, offset);
15056 src = change_address (srcmem, HImode, tmp);
15057 tmp = gen_rtx_PLUS (Pmode, destptr, offset);
15058 dest = change_address (destmem, HImode, tmp);
15059 emit_move_insn (dest, src);
15060 tmp = expand_simple_binop (Pmode, PLUS, offset, GEN_INT (2), tmp,
15061 true, OPTAB_LIB_WIDEN);
15063 emit_move_insn (offset, tmp);
15064 emit_label (label);
15065 LABEL_NUSES (label) = 1;
15069 rtx label = ix86_expand_aligntest (count, 1, true);
15070 tmp = gen_rtx_PLUS (Pmode, srcptr, offset);
15071 src = change_address (srcmem, QImode, tmp);
15072 tmp = gen_rtx_PLUS (Pmode, destptr, offset);
15073 dest = change_address (destmem, QImode, tmp);
15074 emit_move_insn (dest, src);
15075 emit_label (label);
15076 LABEL_NUSES (label) = 1;
15081 /* Output code to set at most count & (max_size - 1) bytes starting by DEST. */
15083 expand_setmem_epilogue_via_loop (rtx destmem, rtx destptr, rtx value,
15084 rtx count, int max_size)
15087 expand_simple_binop (counter_mode (count), AND, count,
15088 GEN_INT (max_size - 1), count, 1, OPTAB_DIRECT);
15089 expand_set_or_movmem_via_loop (destmem, NULL, destptr, NULL,
15090 gen_lowpart (QImode, value), count, QImode,
15094 /* Output code to set at most count & (max_size - 1) bytes starting by DEST. */
15096 expand_setmem_epilogue (rtx destmem, rtx destptr, rtx value, rtx count, int max_size)
15100 if (CONST_INT_P (count))
15102 HOST_WIDE_INT countval = INTVAL (count);
15105 if ((countval & 0x10) && max_size > 16)
15109 dest = adjust_automodify_address_nv (destmem, DImode, destptr, offset);
15110 emit_insn (gen_strset (destptr, dest, value));
15111 dest = adjust_automodify_address_nv (destmem, DImode, destptr, offset + 8);
15112 emit_insn (gen_strset (destptr, dest, value));
15115 gcc_unreachable ();
15118 if ((countval & 0x08) && max_size > 8)
15122 dest = adjust_automodify_address_nv (destmem, DImode, destptr, offset);
15123 emit_insn (gen_strset (destptr, dest, value));
15127 dest = adjust_automodify_address_nv (destmem, SImode, destptr, offset);
15128 emit_insn (gen_strset (destptr, dest, value));
15129 dest = adjust_automodify_address_nv (destmem, SImode, destptr, offset + 4);
15130 emit_insn (gen_strset (destptr, dest, value));
15134 if ((countval & 0x04) && max_size > 4)
15136 dest = adjust_automodify_address_nv (destmem, SImode, destptr, offset);
15137 emit_insn (gen_strset (destptr, dest, gen_lowpart (SImode, value)));
15140 if ((countval & 0x02) && max_size > 2)
15142 dest = adjust_automodify_address_nv (destmem, HImode, destptr, offset);
15143 emit_insn (gen_strset (destptr, dest, gen_lowpart (HImode, value)));
15146 if ((countval & 0x01) && max_size > 1)
15148 dest = adjust_automodify_address_nv (destmem, QImode, destptr, offset);
15149 emit_insn (gen_strset (destptr, dest, gen_lowpart (QImode, value)));
15156 expand_setmem_epilogue_via_loop (destmem, destptr, value, count, max_size);
15161 rtx label = ix86_expand_aligntest (count, 16, true);
15164 dest = change_address (destmem, DImode, destptr);
15165 emit_insn (gen_strset (destptr, dest, value));
15166 emit_insn (gen_strset (destptr, dest, value));
15170 dest = change_address (destmem, SImode, destptr);
15171 emit_insn (gen_strset (destptr, dest, value));
15172 emit_insn (gen_strset (destptr, dest, value));
15173 emit_insn (gen_strset (destptr, dest, value));
15174 emit_insn (gen_strset (destptr, dest, value));
15176 emit_label (label);
15177 LABEL_NUSES (label) = 1;
15181 rtx label = ix86_expand_aligntest (count, 8, true);
15184 dest = change_address (destmem, DImode, destptr);
15185 emit_insn (gen_strset (destptr, dest, value));
15189 dest = change_address (destmem, SImode, destptr);
15190 emit_insn (gen_strset (destptr, dest, value));
15191 emit_insn (gen_strset (destptr, dest, value));
15193 emit_label (label);
15194 LABEL_NUSES (label) = 1;
15198 rtx label = ix86_expand_aligntest (count, 4, true);
15199 dest = change_address (destmem, SImode, destptr);
15200 emit_insn (gen_strset (destptr, dest, gen_lowpart (SImode, value)));
15201 emit_label (label);
15202 LABEL_NUSES (label) = 1;
15206 rtx label = ix86_expand_aligntest (count, 2, true);
15207 dest = change_address (destmem, HImode, destptr);
15208 emit_insn (gen_strset (destptr, dest, gen_lowpart (HImode, value)));
15209 emit_label (label);
15210 LABEL_NUSES (label) = 1;
15214 rtx label = ix86_expand_aligntest (count, 1, true);
15215 dest = change_address (destmem, QImode, destptr);
15216 emit_insn (gen_strset (destptr, dest, gen_lowpart (QImode, value)));
15217 emit_label (label);
15218 LABEL_NUSES (label) = 1;
15222 /* Copy enough from DEST to SRC to align DEST known to by aligned by ALIGN to
15223 DESIRED_ALIGNMENT. */
15225 expand_movmem_prologue (rtx destmem, rtx srcmem,
15226 rtx destptr, rtx srcptr, rtx count,
15227 int align, int desired_alignment)
15229 if (align <= 1 && desired_alignment > 1)
15231 rtx label = ix86_expand_aligntest (destptr, 1, false);
15232 srcmem = change_address (srcmem, QImode, srcptr);
15233 destmem = change_address (destmem, QImode, destptr);
15234 emit_insn (gen_strmov (destptr, destmem, srcptr, srcmem));
15235 ix86_adjust_counter (count, 1);
15236 emit_label (label);
15237 LABEL_NUSES (label) = 1;
15239 if (align <= 2 && desired_alignment > 2)
15241 rtx label = ix86_expand_aligntest (destptr, 2, false);
15242 srcmem = change_address (srcmem, HImode, srcptr);
15243 destmem = change_address (destmem, HImode, destptr);
15244 emit_insn (gen_strmov (destptr, destmem, srcptr, srcmem));
15245 ix86_adjust_counter (count, 2);
15246 emit_label (label);
15247 LABEL_NUSES (label) = 1;
15249 if (align <= 4 && desired_alignment > 4)
15251 rtx label = ix86_expand_aligntest (destptr, 4, false);
15252 srcmem = change_address (srcmem, SImode, srcptr);
15253 destmem = change_address (destmem, SImode, destptr);
15254 emit_insn (gen_strmov (destptr, destmem, srcptr, srcmem));
15255 ix86_adjust_counter (count, 4);
15256 emit_label (label);
15257 LABEL_NUSES (label) = 1;
15259 gcc_assert (desired_alignment <= 8);
15262 /* Set enough from DEST to align DEST known to by aligned by ALIGN to
15263 DESIRED_ALIGNMENT. */
15265 expand_setmem_prologue (rtx destmem, rtx destptr, rtx value, rtx count,
15266 int align, int desired_alignment)
15268 if (align <= 1 && desired_alignment > 1)
15270 rtx label = ix86_expand_aligntest (destptr, 1, false);
15271 destmem = change_address (destmem, QImode, destptr);
15272 emit_insn (gen_strset (destptr, destmem, gen_lowpart (QImode, value)));
15273 ix86_adjust_counter (count, 1);
15274 emit_label (label);
15275 LABEL_NUSES (label) = 1;
15277 if (align <= 2 && desired_alignment > 2)
15279 rtx label = ix86_expand_aligntest (destptr, 2, false);
15280 destmem = change_address (destmem, HImode, destptr);
15281 emit_insn (gen_strset (destptr, destmem, gen_lowpart (HImode, value)));
15282 ix86_adjust_counter (count, 2);
15283 emit_label (label);
15284 LABEL_NUSES (label) = 1;
15286 if (align <= 4 && desired_alignment > 4)
15288 rtx label = ix86_expand_aligntest (destptr, 4, false);
15289 destmem = change_address (destmem, SImode, destptr);
15290 emit_insn (gen_strset (destptr, destmem, gen_lowpart (SImode, value)));
15291 ix86_adjust_counter (count, 4);
15292 emit_label (label);
15293 LABEL_NUSES (label) = 1;
15295 gcc_assert (desired_alignment <= 8);
15298 /* Given COUNT and EXPECTED_SIZE, decide on codegen of string operation. */
15299 static enum stringop_alg
15300 decide_alg (HOST_WIDE_INT count, HOST_WIDE_INT expected_size, bool memset,
15301 int *dynamic_check)
15303 const struct stringop_algs * algs;
15304 /* Algorithms using the rep prefix want at least edi and ecx;
15305 additionally, memset wants eax and memcpy wants esi. Don't
15306 consider such algorithms if the user has appropriated those
15307 registers for their own purposes. */
15308 bool rep_prefix_usable = !(fixed_regs[CX_REG] || fixed_regs[DI_REG]
15310 ? fixed_regs[AX_REG] : fixed_regs[SI_REG]));
15312 #define ALG_USABLE_P(alg) (rep_prefix_usable \
15313 || (alg != rep_prefix_1_byte \
15314 && alg != rep_prefix_4_byte \
15315 && alg != rep_prefix_8_byte))
15317 *dynamic_check = -1;
15319 algs = &ix86_cost->memset[TARGET_64BIT != 0];
15321 algs = &ix86_cost->memcpy[TARGET_64BIT != 0];
15322 if (stringop_alg != no_stringop && ALG_USABLE_P (stringop_alg))
15323 return stringop_alg;
15324 /* rep; movq or rep; movl is the smallest variant. */
15325 else if (optimize_size)
15327 if (!count || (count & 3))
15328 return rep_prefix_usable ? rep_prefix_1_byte : loop_1_byte;
15330 return rep_prefix_usable ? rep_prefix_4_byte : loop;
15332 /* Very tiny blocks are best handled via the loop, REP is expensive to setup.
15334 else if (expected_size != -1 && expected_size < 4)
15335 return loop_1_byte;
15336 else if (expected_size != -1)
15339 enum stringop_alg alg = libcall;
15340 for (i = 0; i < NAX_STRINGOP_ALGS; i++)
15342 /* We get here if the algorithms that were not libcall-based
15343 were rep-prefix based and we are unable to use rep prefixes
15344 based on global register usage. Break out of the loop and
15345 use the heuristic below. */
15346 if (algs->size[i].max == 0)
15348 if (algs->size[i].max >= expected_size || algs->size[i].max == -1)
15350 enum stringop_alg candidate = algs->size[i].alg;
15352 if (candidate != libcall && ALG_USABLE_P (candidate))
15354 /* Honor TARGET_INLINE_ALL_STRINGOPS by picking
15355 last non-libcall inline algorithm. */
15356 if (TARGET_INLINE_ALL_STRINGOPS)
15358 /* When the current size is best to be copied by a libcall,
15359 but we are still forced to inline, run the heuristic below
15360 that will pick code for medium sized blocks. */
15361 if (alg != libcall)
15365 else if (ALG_USABLE_P (candidate))
15369 gcc_assert (TARGET_INLINE_ALL_STRINGOPS || !rep_prefix_usable);
15371 /* When asked to inline the call anyway, try to pick meaningful choice.
15372 We look for maximal size of block that is faster to copy by hand and
15373 take blocks of at most of that size guessing that average size will
15374 be roughly half of the block.
15376 If this turns out to be bad, we might simply specify the preferred
15377 choice in ix86_costs. */
15378 if ((TARGET_INLINE_ALL_STRINGOPS || TARGET_INLINE_STRINGOPS_DYNAMICALLY)
15379 && (algs->unknown_size == libcall || !ALG_USABLE_P (algs->unknown_size)))
15382 enum stringop_alg alg;
15384 bool any_alg_usable_p = true;
15386 for (i = 0; i < NAX_STRINGOP_ALGS; i++)
15388 enum stringop_alg candidate = algs->size[i].alg;
15389 any_alg_usable_p = any_alg_usable_p && ALG_USABLE_P (candidate);
15391 if (candidate != libcall && candidate
15392 && ALG_USABLE_P (candidate))
15393 max = algs->size[i].max;
15395 /* If there aren't any usable algorithms, then recursing on
15396 smaller sizes isn't going to find anything. Just return the
15397 simple byte-at-a-time copy loop. */
15398 if (!any_alg_usable_p)
15400 /* Pick something reasonable. */
15401 if (TARGET_INLINE_STRINGOPS_DYNAMICALLY)
15402 *dynamic_check = 128;
15403 return loop_1_byte;
15407 alg = decide_alg (count, max / 2, memset, dynamic_check);
15408 gcc_assert (*dynamic_check == -1);
15409 gcc_assert (alg != libcall);
15410 if (TARGET_INLINE_STRINGOPS_DYNAMICALLY)
15411 *dynamic_check = max;
15414 return ALG_USABLE_P (algs->unknown_size) ? algs->unknown_size : libcall;
15415 #undef ALG_USABLE_P
15418 /* Decide on alignment. We know that the operand is already aligned to ALIGN
15419 (ALIGN can be based on profile feedback and thus it is not 100% guaranteed). */
15421 decide_alignment (int align,
15422 enum stringop_alg alg,
15425 int desired_align = 0;
15429 gcc_unreachable ();
15431 case unrolled_loop:
15432 desired_align = GET_MODE_SIZE (Pmode);
15434 case rep_prefix_8_byte:
15437 case rep_prefix_4_byte:
15438 /* PentiumPro has special logic triggering for 8 byte aligned blocks.
15439 copying whole cacheline at once. */
15440 if (TARGET_PENTIUMPRO)
15445 case rep_prefix_1_byte:
15446 /* PentiumPro has special logic triggering for 8 byte aligned blocks.
15447 copying whole cacheline at once. */
15448 if (TARGET_PENTIUMPRO)
15462 if (desired_align < align)
15463 desired_align = align;
15464 if (expected_size != -1 && expected_size < 4)
15465 desired_align = align;
15466 return desired_align;
15469 /* Return the smallest power of 2 greater than VAL. */
15471 smallest_pow2_greater_than (int val)
15479 /* Expand string move (memcpy) operation. Use i386 string operations when
15480 profitable. expand_setmem contains similar code. The code depends upon
15481 architecture, block size and alignment, but always has the same
15484 1) Prologue guard: Conditional that jumps up to epilogues for small
15485 blocks that can be handled by epilogue alone. This is faster but
15486 also needed for correctness, since prologue assume the block is larger
15487 than the desired alignment.
15489 Optional dynamic check for size and libcall for large
15490 blocks is emitted here too, with -minline-stringops-dynamically.
15492 2) Prologue: copy first few bytes in order to get destination aligned
15493 to DESIRED_ALIGN. It is emitted only when ALIGN is less than
15494 DESIRED_ALIGN and and up to DESIRED_ALIGN - ALIGN bytes can be copied.
15495 We emit either a jump tree on power of two sized blocks, or a byte loop.
15497 3) Main body: the copying loop itself, copying in SIZE_NEEDED chunks
15498 with specified algorithm.
15500 4) Epilogue: code copying tail of the block that is too small to be
15501 handled by main body (or up to size guarded by prologue guard). */
15504 ix86_expand_movmem (rtx dst, rtx src, rtx count_exp, rtx align_exp,
15505 rtx expected_align_exp, rtx expected_size_exp)
15511 rtx jump_around_label = NULL;
15512 HOST_WIDE_INT align = 1;
15513 unsigned HOST_WIDE_INT count = 0;
15514 HOST_WIDE_INT expected_size = -1;
15515 int size_needed = 0, epilogue_size_needed;
15516 int desired_align = 0;
15517 enum stringop_alg alg;
15520 if (CONST_INT_P (align_exp))
15521 align = INTVAL (align_exp);
15522 /* i386 can do misaligned access on reasonably increased cost. */
15523 if (CONST_INT_P (expected_align_exp)
15524 && INTVAL (expected_align_exp) > align)
15525 align = INTVAL (expected_align_exp);
15526 if (CONST_INT_P (count_exp))
15527 count = expected_size = INTVAL (count_exp);
15528 if (CONST_INT_P (expected_size_exp) && count == 0)
15529 expected_size = INTVAL (expected_size_exp);
15531 /* Make sure we don't need to care about overflow later on. */
15532 if (count > ((unsigned HOST_WIDE_INT) 1 << 30))
15535 /* Step 0: Decide on preferred algorithm, desired alignment and
15536 size of chunks to be copied by main loop. */
15538 alg = decide_alg (count, expected_size, false, &dynamic_check);
15539 desired_align = decide_alignment (align, alg, expected_size);
15541 if (!TARGET_ALIGN_STRINGOPS)
15542 align = desired_align;
15544 if (alg == libcall)
15546 gcc_assert (alg != no_stringop);
15548 count_exp = copy_to_mode_reg (GET_MODE (count_exp), count_exp);
15549 destreg = copy_to_mode_reg (Pmode, XEXP (dst, 0));
15550 srcreg = copy_to_mode_reg (Pmode, XEXP (src, 0));
15555 gcc_unreachable ();
15557 size_needed = GET_MODE_SIZE (Pmode);
15559 case unrolled_loop:
15560 size_needed = GET_MODE_SIZE (Pmode) * (TARGET_64BIT ? 4 : 2);
15562 case rep_prefix_8_byte:
15565 case rep_prefix_4_byte:
15568 case rep_prefix_1_byte:
15574 epilogue_size_needed = size_needed;
15576 /* Step 1: Prologue guard. */
15578 /* Alignment code needs count to be in register. */
15579 if (CONST_INT_P (count_exp) && desired_align > align)
15580 count_exp = force_reg (counter_mode (count_exp), count_exp);
15581 gcc_assert (desired_align >= 1 && align >= 1);
15583 /* Ensure that alignment prologue won't copy past end of block. */
15584 if (size_needed > 1 || (desired_align > 1 && desired_align > align))
15586 epilogue_size_needed = MAX (size_needed - 1, desired_align - align);
15587 /* Epilogue always copies COUNT_EXP & EPILOGUE_SIZE_NEEDED bytes.
15588 Make sure it is power of 2. */
15589 epilogue_size_needed = smallest_pow2_greater_than (epilogue_size_needed);
15591 if (CONST_INT_P (count_exp))
15593 if (UINTVAL (count_exp) < (unsigned HOST_WIDE_INT)epilogue_size_needed)
15598 label = gen_label_rtx ();
15599 emit_cmp_and_jump_insns (count_exp,
15600 GEN_INT (epilogue_size_needed),
15601 LTU, 0, counter_mode (count_exp), 1, label);
15602 if (expected_size == -1 || expected_size < epilogue_size_needed)
15603 predict_jump (REG_BR_PROB_BASE * 60 / 100);
15605 predict_jump (REG_BR_PROB_BASE * 20 / 100);
15609 /* Emit code to decide on runtime whether library call or inline should be
15611 if (dynamic_check != -1)
15613 if (CONST_INT_P (count_exp))
15615 if (UINTVAL (count_exp) >= (unsigned HOST_WIDE_INT)dynamic_check)
15617 emit_block_move_via_libcall (dst, src, count_exp, false);
15618 count_exp = const0_rtx;
15624 rtx hot_label = gen_label_rtx ();
15625 jump_around_label = gen_label_rtx ();
15626 emit_cmp_and_jump_insns (count_exp, GEN_INT (dynamic_check - 1),
15627 LEU, 0, GET_MODE (count_exp), 1, hot_label);
15628 predict_jump (REG_BR_PROB_BASE * 90 / 100);
15629 emit_block_move_via_libcall (dst, src, count_exp, false);
15630 emit_jump (jump_around_label);
15631 emit_label (hot_label);
15635 /* Step 2: Alignment prologue. */
15637 if (desired_align > align)
15639 /* Except for the first move in epilogue, we no longer know
15640 constant offset in aliasing info. It don't seems to worth
15641 the pain to maintain it for the first move, so throw away
15643 src = change_address (src, BLKmode, srcreg);
15644 dst = change_address (dst, BLKmode, destreg);
15645 expand_movmem_prologue (dst, src, destreg, srcreg, count_exp, align,
15648 if (label && size_needed == 1)
15650 emit_label (label);
15651 LABEL_NUSES (label) = 1;
15655 /* Step 3: Main loop. */
15661 gcc_unreachable ();
15663 expand_set_or_movmem_via_loop (dst, src, destreg, srcreg, NULL,
15664 count_exp, QImode, 1, expected_size);
15667 expand_set_or_movmem_via_loop (dst, src, destreg, srcreg, NULL,
15668 count_exp, Pmode, 1, expected_size);
15670 case unrolled_loop:
15671 /* Unroll only by factor of 2 in 32bit mode, since we don't have enough
15672 registers for 4 temporaries anyway. */
15673 expand_set_or_movmem_via_loop (dst, src, destreg, srcreg, NULL,
15674 count_exp, Pmode, TARGET_64BIT ? 4 : 2,
15677 case rep_prefix_8_byte:
15678 expand_movmem_via_rep_mov (dst, src, destreg, srcreg, count_exp,
15681 case rep_prefix_4_byte:
15682 expand_movmem_via_rep_mov (dst, src, destreg, srcreg, count_exp,
15685 case rep_prefix_1_byte:
15686 expand_movmem_via_rep_mov (dst, src, destreg, srcreg, count_exp,
15690 /* Adjust properly the offset of src and dest memory for aliasing. */
15691 if (CONST_INT_P (count_exp))
15693 src = adjust_automodify_address_nv (src, BLKmode, srcreg,
15694 (count / size_needed) * size_needed);
15695 dst = adjust_automodify_address_nv (dst, BLKmode, destreg,
15696 (count / size_needed) * size_needed);
15700 src = change_address (src, BLKmode, srcreg);
15701 dst = change_address (dst, BLKmode, destreg);
15704 /* Step 4: Epilogue to copy the remaining bytes. */
15708 /* When the main loop is done, COUNT_EXP might hold original count,
15709 while we want to copy only COUNT_EXP & SIZE_NEEDED bytes.
15710 Epilogue code will actually copy COUNT_EXP & EPILOGUE_SIZE_NEEDED
15711 bytes. Compensate if needed. */
15713 if (size_needed < epilogue_size_needed)
15716 expand_simple_binop (counter_mode (count_exp), AND, count_exp,
15717 GEN_INT (size_needed - 1), count_exp, 1,
15719 if (tmp != count_exp)
15720 emit_move_insn (count_exp, tmp);
15722 emit_label (label);
15723 LABEL_NUSES (label) = 1;
15726 if (count_exp != const0_rtx && epilogue_size_needed > 1)
15727 expand_movmem_epilogue (dst, src, destreg, srcreg, count_exp,
15728 epilogue_size_needed);
15729 if (jump_around_label)
15730 emit_label (jump_around_label);
15734 /* Helper function for memcpy. For QImode value 0xXY produce
15735 0xXYXYXYXY of wide specified by MODE. This is essentially
15736 a * 0x10101010, but we can do slightly better than
15737 synth_mult by unwinding the sequence by hand on CPUs with
15740 promote_duplicated_reg (enum machine_mode mode, rtx val)
15742 enum machine_mode valmode = GET_MODE (val);
15744 int nops = mode == DImode ? 3 : 2;
15746 gcc_assert (mode == SImode || mode == DImode);
15747 if (val == const0_rtx)
15748 return copy_to_mode_reg (mode, const0_rtx);
15749 if (CONST_INT_P (val))
15751 HOST_WIDE_INT v = INTVAL (val) & 255;
15755 if (mode == DImode)
15756 v |= (v << 16) << 16;
15757 return copy_to_mode_reg (mode, gen_int_mode (v, mode));
15760 if (valmode == VOIDmode)
15762 if (valmode != QImode)
15763 val = gen_lowpart (QImode, val);
15764 if (mode == QImode)
15766 if (!TARGET_PARTIAL_REG_STALL)
15768 if (ix86_cost->mult_init[mode == DImode ? 3 : 2]
15769 + ix86_cost->mult_bit * (mode == DImode ? 8 : 4)
15770 <= (ix86_cost->shift_const + ix86_cost->add) * nops
15771 + (COSTS_N_INSNS (TARGET_PARTIAL_REG_STALL == 0)))
15773 rtx reg = convert_modes (mode, QImode, val, true);
15774 tmp = promote_duplicated_reg (mode, const1_rtx);
15775 return expand_simple_binop (mode, MULT, reg, tmp, NULL, 1,
15780 rtx reg = convert_modes (mode, QImode, val, true);
15782 if (!TARGET_PARTIAL_REG_STALL)
15783 if (mode == SImode)
15784 emit_insn (gen_movsi_insv_1 (reg, reg));
15786 emit_insn (gen_movdi_insv_1_rex64 (reg, reg));
15789 tmp = expand_simple_binop (mode, ASHIFT, reg, GEN_INT (8),
15790 NULL, 1, OPTAB_DIRECT);
15792 expand_simple_binop (mode, IOR, reg, tmp, reg, 1, OPTAB_DIRECT);
15794 tmp = expand_simple_binop (mode, ASHIFT, reg, GEN_INT (16),
15795 NULL, 1, OPTAB_DIRECT);
15796 reg = expand_simple_binop (mode, IOR, reg, tmp, reg, 1, OPTAB_DIRECT);
15797 if (mode == SImode)
15799 tmp = expand_simple_binop (mode, ASHIFT, reg, GEN_INT (32),
15800 NULL, 1, OPTAB_DIRECT);
15801 reg = expand_simple_binop (mode, IOR, reg, tmp, reg, 1, OPTAB_DIRECT);
15806 /* Duplicate value VAL using promote_duplicated_reg into maximal size that will
15807 be needed by main loop copying SIZE_NEEDED chunks and prologue getting
15808 alignment from ALIGN to DESIRED_ALIGN. */
15810 promote_duplicated_reg_to_size (rtx val, int size_needed, int desired_align, int align)
15815 && (size_needed > 4 || (desired_align > align && desired_align > 4)))
15816 promoted_val = promote_duplicated_reg (DImode, val);
15817 else if (size_needed > 2 || (desired_align > align && desired_align > 2))
15818 promoted_val = promote_duplicated_reg (SImode, val);
15819 else if (size_needed > 1 || (desired_align > align && desired_align > 1))
15820 promoted_val = promote_duplicated_reg (HImode, val);
15822 promoted_val = val;
15824 return promoted_val;
15827 /* Expand string clear operation (bzero). Use i386 string operations when
15828 profitable. See expand_movmem comment for explanation of individual
15829 steps performed. */
15831 ix86_expand_setmem (rtx dst, rtx count_exp, rtx val_exp, rtx align_exp,
15832 rtx expected_align_exp, rtx expected_size_exp)
15837 rtx jump_around_label = NULL;
15838 HOST_WIDE_INT align = 1;
15839 unsigned HOST_WIDE_INT count = 0;
15840 HOST_WIDE_INT expected_size = -1;
15841 int size_needed = 0, epilogue_size_needed;
15842 int desired_align = 0;
15843 enum stringop_alg alg;
15844 rtx promoted_val = NULL;
15845 bool force_loopy_epilogue = false;
15848 if (CONST_INT_P (align_exp))
15849 align = INTVAL (align_exp);
15850 /* i386 can do misaligned access on reasonably increased cost. */
15851 if (CONST_INT_P (expected_align_exp)
15852 && INTVAL (expected_align_exp) > align)
15853 align = INTVAL (expected_align_exp);
15854 if (CONST_INT_P (count_exp))
15855 count = expected_size = INTVAL (count_exp);
15856 if (CONST_INT_P (expected_size_exp) && count == 0)
15857 expected_size = INTVAL (expected_size_exp);
15859 /* Make sure we don't need to care about overflow later on. */
15860 if (count > ((unsigned HOST_WIDE_INT) 1 << 30))
15863 /* Step 0: Decide on preferred algorithm, desired alignment and
15864 size of chunks to be copied by main loop. */
15866 alg = decide_alg (count, expected_size, true, &dynamic_check);
15867 desired_align = decide_alignment (align, alg, expected_size);
15869 if (!TARGET_ALIGN_STRINGOPS)
15870 align = desired_align;
15872 if (alg == libcall)
15874 gcc_assert (alg != no_stringop);
15876 count_exp = copy_to_mode_reg (counter_mode (count_exp), count_exp);
15877 destreg = copy_to_mode_reg (Pmode, XEXP (dst, 0));
15882 gcc_unreachable ();
15884 size_needed = GET_MODE_SIZE (Pmode);
15886 case unrolled_loop:
15887 size_needed = GET_MODE_SIZE (Pmode) * 4;
15889 case rep_prefix_8_byte:
15892 case rep_prefix_4_byte:
15895 case rep_prefix_1_byte:
15900 epilogue_size_needed = size_needed;
15902 /* Step 1: Prologue guard. */
15904 /* Alignment code needs count to be in register. */
15905 if (CONST_INT_P (count_exp) && desired_align > align)
15907 enum machine_mode mode = SImode;
15908 if (TARGET_64BIT && (count & ~0xffffffff))
15910 count_exp = force_reg (mode, count_exp);
15912 /* Do the cheap promotion to allow better CSE across the
15913 main loop and epilogue (ie one load of the big constant in the
15914 front of all code. */
15915 if (CONST_INT_P (val_exp))
15916 promoted_val = promote_duplicated_reg_to_size (val_exp, size_needed,
15917 desired_align, align);
15918 /* Ensure that alignment prologue won't copy past end of block. */
15919 if (size_needed > 1 || (desired_align > 1 && desired_align > align))
15921 epilogue_size_needed = MAX (size_needed - 1, desired_align - align);
15922 /* Epilogue always copies COUNT_EXP & EPILOGUE_SIZE_NEEDED bytes.
15923 Make sure it is power of 2. */
15924 epilogue_size_needed = smallest_pow2_greater_than (epilogue_size_needed);
15926 /* To improve performance of small blocks, we jump around the VAL
15927 promoting mode. This mean that if the promoted VAL is not constant,
15928 we might not use it in the epilogue and have to use byte
15930 if (epilogue_size_needed > 2 && !promoted_val)
15931 force_loopy_epilogue = true;
15932 label = gen_label_rtx ();
15933 emit_cmp_and_jump_insns (count_exp,
15934 GEN_INT (epilogue_size_needed),
15935 LTU, 0, counter_mode (count_exp), 1, label);
15936 if (GET_CODE (count_exp) == CONST_INT)
15938 else if (expected_size == -1 || expected_size <= epilogue_size_needed)
15939 predict_jump (REG_BR_PROB_BASE * 60 / 100);
15941 predict_jump (REG_BR_PROB_BASE * 20 / 100);
15943 if (dynamic_check != -1)
15945 rtx hot_label = gen_label_rtx ();
15946 jump_around_label = gen_label_rtx ();
15947 emit_cmp_and_jump_insns (count_exp, GEN_INT (dynamic_check - 1),
15948 LEU, 0, counter_mode (count_exp), 1, hot_label);
15949 predict_jump (REG_BR_PROB_BASE * 90 / 100);
15950 set_storage_via_libcall (dst, count_exp, val_exp, false);
15951 emit_jump (jump_around_label);
15952 emit_label (hot_label);
15955 /* Step 2: Alignment prologue. */
15957 /* Do the expensive promotion once we branched off the small blocks. */
15959 promoted_val = promote_duplicated_reg_to_size (val_exp, size_needed,
15960 desired_align, align);
15961 gcc_assert (desired_align >= 1 && align >= 1);
15963 if (desired_align > align)
15965 /* Except for the first move in epilogue, we no longer know
15966 constant offset in aliasing info. It don't seems to worth
15967 the pain to maintain it for the first move, so throw away
15969 dst = change_address (dst, BLKmode, destreg);
15970 expand_setmem_prologue (dst, destreg, promoted_val, count_exp, align,
15973 if (label && size_needed == 1)
15975 emit_label (label);
15976 LABEL_NUSES (label) = 1;
15980 /* Step 3: Main loop. */
15986 gcc_unreachable ();
15988 expand_set_or_movmem_via_loop (dst, NULL, destreg, NULL, promoted_val,
15989 count_exp, QImode, 1, expected_size);
15992 expand_set_or_movmem_via_loop (dst, NULL, destreg, NULL, promoted_val,
15993 count_exp, Pmode, 1, expected_size);
15995 case unrolled_loop:
15996 expand_set_or_movmem_via_loop (dst, NULL, destreg, NULL, promoted_val,
15997 count_exp, Pmode, 4, expected_size);
15999 case rep_prefix_8_byte:
16000 expand_setmem_via_rep_stos (dst, destreg, promoted_val, count_exp,
16003 case rep_prefix_4_byte:
16004 expand_setmem_via_rep_stos (dst, destreg, promoted_val, count_exp,
16007 case rep_prefix_1_byte:
16008 expand_setmem_via_rep_stos (dst, destreg, promoted_val, count_exp,
16012 /* Adjust properly the offset of src and dest memory for aliasing. */
16013 if (CONST_INT_P (count_exp))
16014 dst = adjust_automodify_address_nv (dst, BLKmode, destreg,
16015 (count / size_needed) * size_needed);
16017 dst = change_address (dst, BLKmode, destreg);
16019 /* Step 4: Epilogue to copy the remaining bytes. */
16023 /* When the main loop is done, COUNT_EXP might hold original count,
16024 while we want to copy only COUNT_EXP & SIZE_NEEDED bytes.
16025 Epilogue code will actually copy COUNT_EXP & EPILOGUE_SIZE_NEEDED
16026 bytes. Compensate if needed. */
16028 if (size_needed < desired_align - align)
16031 expand_simple_binop (counter_mode (count_exp), AND, count_exp,
16032 GEN_INT (size_needed - 1), count_exp, 1,
16034 size_needed = desired_align - align + 1;
16035 if (tmp != count_exp)
16036 emit_move_insn (count_exp, tmp);
16038 emit_label (label);
16039 LABEL_NUSES (label) = 1;
16041 if (count_exp != const0_rtx && epilogue_size_needed > 1)
16043 if (force_loopy_epilogue)
16044 expand_setmem_epilogue_via_loop (dst, destreg, val_exp, count_exp,
16047 expand_setmem_epilogue (dst, destreg, promoted_val, count_exp,
16050 if (jump_around_label)
16051 emit_label (jump_around_label);
16055 /* Expand the appropriate insns for doing strlen if not just doing
16058 out = result, initialized with the start address
16059 align_rtx = alignment of the address.
16060 scratch = scratch register, initialized with the startaddress when
16061 not aligned, otherwise undefined
16063 This is just the body. It needs the initializations mentioned above and
16064 some address computing at the end. These things are done in i386.md. */
16067 ix86_expand_strlensi_unroll_1 (rtx out, rtx src, rtx align_rtx)
16071 rtx align_2_label = NULL_RTX;
16072 rtx align_3_label = NULL_RTX;
16073 rtx align_4_label = gen_label_rtx ();
16074 rtx end_0_label = gen_label_rtx ();
16076 rtx tmpreg = gen_reg_rtx (SImode);
16077 rtx scratch = gen_reg_rtx (SImode);
16081 if (CONST_INT_P (align_rtx))
16082 align = INTVAL (align_rtx);
16084 /* Loop to check 1..3 bytes for null to get an aligned pointer. */
16086 /* Is there a known alignment and is it less than 4? */
16089 rtx scratch1 = gen_reg_rtx (Pmode);
16090 emit_move_insn (scratch1, out);
16091 /* Is there a known alignment and is it not 2? */
16094 align_3_label = gen_label_rtx (); /* Label when aligned to 3-byte */
16095 align_2_label = gen_label_rtx (); /* Label when aligned to 2-byte */
16097 /* Leave just the 3 lower bits. */
16098 align_rtx = expand_binop (Pmode, and_optab, scratch1, GEN_INT (3),
16099 NULL_RTX, 0, OPTAB_WIDEN);
16101 emit_cmp_and_jump_insns (align_rtx, const0_rtx, EQ, NULL,
16102 Pmode, 1, align_4_label);
16103 emit_cmp_and_jump_insns (align_rtx, const2_rtx, EQ, NULL,
16104 Pmode, 1, align_2_label);
16105 emit_cmp_and_jump_insns (align_rtx, const2_rtx, GTU, NULL,
16106 Pmode, 1, align_3_label);
16110 /* Since the alignment is 2, we have to check 2 or 0 bytes;
16111 check if is aligned to 4 - byte. */
16113 align_rtx = expand_binop (Pmode, and_optab, scratch1, const2_rtx,
16114 NULL_RTX, 0, OPTAB_WIDEN);
16116 emit_cmp_and_jump_insns (align_rtx, const0_rtx, EQ, NULL,
16117 Pmode, 1, align_4_label);
16120 mem = change_address (src, QImode, out);
16122 /* Now compare the bytes. */
16124 /* Compare the first n unaligned byte on a byte per byte basis. */
16125 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL,
16126 QImode, 1, end_0_label);
16128 /* Increment the address. */
16130 emit_insn (gen_adddi3 (out, out, const1_rtx));
16132 emit_insn (gen_addsi3 (out, out, const1_rtx));
16134 /* Not needed with an alignment of 2 */
16137 emit_label (align_2_label);
16139 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL, QImode, 1,
16143 emit_insn (gen_adddi3 (out, out, const1_rtx));
16145 emit_insn (gen_addsi3 (out, out, const1_rtx));
16147 emit_label (align_3_label);
16150 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL, QImode, 1,
16154 emit_insn (gen_adddi3 (out, out, const1_rtx));
16156 emit_insn (gen_addsi3 (out, out, const1_rtx));
16159 /* Generate loop to check 4 bytes at a time. It is not a good idea to
16160 align this loop. It gives only huge programs, but does not help to
16162 emit_label (align_4_label);
16164 mem = change_address (src, SImode, out);
16165 emit_move_insn (scratch, mem);
16167 emit_insn (gen_adddi3 (out, out, GEN_INT (4)));
16169 emit_insn (gen_addsi3 (out, out, GEN_INT (4)));
16171 /* This formula yields a nonzero result iff one of the bytes is zero.
16172 This saves three branches inside loop and many cycles. */
16174 emit_insn (gen_addsi3 (tmpreg, scratch, GEN_INT (-0x01010101)));
16175 emit_insn (gen_one_cmplsi2 (scratch, scratch));
16176 emit_insn (gen_andsi3 (tmpreg, tmpreg, scratch));
16177 emit_insn (gen_andsi3 (tmpreg, tmpreg,
16178 gen_int_mode (0x80808080, SImode)));
16179 emit_cmp_and_jump_insns (tmpreg, const0_rtx, EQ, 0, SImode, 1,
16184 rtx reg = gen_reg_rtx (SImode);
16185 rtx reg2 = gen_reg_rtx (Pmode);
16186 emit_move_insn (reg, tmpreg);
16187 emit_insn (gen_lshrsi3 (reg, reg, GEN_INT (16)));
16189 /* If zero is not in the first two bytes, move two bytes forward. */
16190 emit_insn (gen_testsi_ccno_1 (tmpreg, GEN_INT (0x8080)));
16191 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
16192 tmp = gen_rtx_EQ (VOIDmode, tmp, const0_rtx);
16193 emit_insn (gen_rtx_SET (VOIDmode, tmpreg,
16194 gen_rtx_IF_THEN_ELSE (SImode, tmp,
16197 /* Emit lea manually to avoid clobbering of flags. */
16198 emit_insn (gen_rtx_SET (SImode, reg2,
16199 gen_rtx_PLUS (Pmode, out, const2_rtx)));
16201 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
16202 tmp = gen_rtx_EQ (VOIDmode, tmp, const0_rtx);
16203 emit_insn (gen_rtx_SET (VOIDmode, out,
16204 gen_rtx_IF_THEN_ELSE (Pmode, tmp,
16211 rtx end_2_label = gen_label_rtx ();
16212 /* Is zero in the first two bytes? */
16214 emit_insn (gen_testsi_ccno_1 (tmpreg, GEN_INT (0x8080)));
16215 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
16216 tmp = gen_rtx_NE (VOIDmode, tmp, const0_rtx);
16217 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
16218 gen_rtx_LABEL_REF (VOIDmode, end_2_label),
16220 tmp = emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
16221 JUMP_LABEL (tmp) = end_2_label;
16223 /* Not in the first two. Move two bytes forward. */
16224 emit_insn (gen_lshrsi3 (tmpreg, tmpreg, GEN_INT (16)));
16226 emit_insn (gen_adddi3 (out, out, const2_rtx));
16228 emit_insn (gen_addsi3 (out, out, const2_rtx));
16230 emit_label (end_2_label);
16234 /* Avoid branch in fixing the byte. */
16235 tmpreg = gen_lowpart (QImode, tmpreg);
16236 emit_insn (gen_addqi3_cc (tmpreg, tmpreg, tmpreg));
16237 cmp = gen_rtx_LTU (Pmode, gen_rtx_REG (CCmode, FLAGS_REG), const0_rtx);
16239 emit_insn (gen_subdi3_carry_rex64 (out, out, GEN_INT (3), cmp));
16241 emit_insn (gen_subsi3_carry (out, out, GEN_INT (3), cmp));
16243 emit_label (end_0_label);
16246 /* Expand strlen. */
16249 ix86_expand_strlen (rtx out, rtx src, rtx eoschar, rtx align)
16251 rtx addr, scratch1, scratch2, scratch3, scratch4;
16253 /* The generic case of strlen expander is long. Avoid it's
16254 expanding unless TARGET_INLINE_ALL_STRINGOPS. */
16256 if (TARGET_UNROLL_STRLEN && eoschar == const0_rtx && optimize > 1
16257 && !TARGET_INLINE_ALL_STRINGOPS
16259 && (!CONST_INT_P (align) || INTVAL (align) < 4))
16262 addr = force_reg (Pmode, XEXP (src, 0));
16263 scratch1 = gen_reg_rtx (Pmode);
16265 if (TARGET_UNROLL_STRLEN && eoschar == const0_rtx && optimize > 1
16268 /* Well it seems that some optimizer does not combine a call like
16269 foo(strlen(bar), strlen(bar));
16270 when the move and the subtraction is done here. It does calculate
16271 the length just once when these instructions are done inside of
16272 output_strlen_unroll(). But I think since &bar[strlen(bar)] is
16273 often used and I use one fewer register for the lifetime of
16274 output_strlen_unroll() this is better. */
16276 emit_move_insn (out, addr);
16278 ix86_expand_strlensi_unroll_1 (out, src, align);
16280 /* strlensi_unroll_1 returns the address of the zero at the end of
16281 the string, like memchr(), so compute the length by subtracting
16282 the start address. */
16284 emit_insn (gen_subdi3 (out, out, addr));
16286 emit_insn (gen_subsi3 (out, out, addr));
16292 /* Can't use this if the user has appropriated eax, ecx, or edi. */
16293 if (fixed_regs[AX_REG] || fixed_regs[CX_REG] || fixed_regs[DI_REG])
16296 scratch2 = gen_reg_rtx (Pmode);
16297 scratch3 = gen_reg_rtx (Pmode);
16298 scratch4 = force_reg (Pmode, constm1_rtx);
16300 emit_move_insn (scratch3, addr);
16301 eoschar = force_reg (QImode, eoschar);
16303 src = replace_equiv_address_nv (src, scratch3);
16305 /* If .md starts supporting :P, this can be done in .md. */
16306 unspec = gen_rtx_UNSPEC (Pmode, gen_rtvec (4, src, eoschar, align,
16307 scratch4), UNSPEC_SCAS);
16308 emit_insn (gen_strlenqi_1 (scratch1, scratch3, unspec));
16311 emit_insn (gen_one_cmpldi2 (scratch2, scratch1));
16312 emit_insn (gen_adddi3 (out, scratch2, constm1_rtx));
16316 emit_insn (gen_one_cmplsi2 (scratch2, scratch1));
16317 emit_insn (gen_addsi3 (out, scratch2, constm1_rtx));
16323 /* For given symbol (function) construct code to compute address of it's PLT
16324 entry in large x86-64 PIC model. */
16326 construct_plt_address (rtx symbol)
16328 rtx tmp = gen_reg_rtx (Pmode);
16329 rtx unspec = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, symbol), UNSPEC_PLTOFF);
16331 gcc_assert (GET_CODE (symbol) == SYMBOL_REF);
16332 gcc_assert (ix86_cmodel == CM_LARGE_PIC);
16334 emit_move_insn (tmp, gen_rtx_CONST (Pmode, unspec));
16335 emit_insn (gen_adddi3 (tmp, tmp, pic_offset_table_rtx));
16340 ix86_expand_call (rtx retval, rtx fnaddr, rtx callarg1,
16341 rtx callarg2 ATTRIBUTE_UNUSED,
16342 rtx pop, int sibcall)
16344 rtx use = NULL, call;
16346 if (pop == const0_rtx)
16348 gcc_assert (!TARGET_64BIT || !pop);
16350 if (TARGET_MACHO && !TARGET_64BIT)
16353 if (flag_pic && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF)
16354 fnaddr = machopic_indirect_call_target (fnaddr);
16359 /* Static functions and indirect calls don't need the pic register. */
16360 if (flag_pic && (!TARGET_64BIT || ix86_cmodel == CM_LARGE_PIC)
16361 && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF
16362 && ! SYMBOL_REF_LOCAL_P (XEXP (fnaddr, 0)))
16363 use_reg (&use, pic_offset_table_rtx);
16366 if (TARGET_64BIT && INTVAL (callarg2) >= 0)
16368 rtx al = gen_rtx_REG (QImode, AX_REG);
16369 emit_move_insn (al, callarg2);
16370 use_reg (&use, al);
16373 if (ix86_cmodel == CM_LARGE_PIC
16374 && GET_CODE (fnaddr) == MEM
16375 && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF
16376 && !local_symbolic_operand (XEXP (fnaddr, 0), VOIDmode))
16377 fnaddr = gen_rtx_MEM (QImode, construct_plt_address (XEXP (fnaddr, 0)));
16378 else if (! call_insn_operand (XEXP (fnaddr, 0), Pmode))
16380 fnaddr = copy_to_mode_reg (Pmode, XEXP (fnaddr, 0));
16381 fnaddr = gen_rtx_MEM (QImode, fnaddr);
16383 if (sibcall && TARGET_64BIT
16384 && !constant_call_address_operand (XEXP (fnaddr, 0), Pmode))
16387 addr = copy_to_mode_reg (Pmode, XEXP (fnaddr, 0));
16388 fnaddr = gen_rtx_REG (Pmode, R11_REG);
16389 emit_move_insn (fnaddr, addr);
16390 fnaddr = gen_rtx_MEM (QImode, fnaddr);
16393 call = gen_rtx_CALL (VOIDmode, fnaddr, callarg1);
16395 call = gen_rtx_SET (VOIDmode, retval, call);
16398 pop = gen_rtx_PLUS (Pmode, stack_pointer_rtx, pop);
16399 pop = gen_rtx_SET (VOIDmode, stack_pointer_rtx, pop);
16400 call = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, call, pop));
16403 call = emit_call_insn (call);
16405 CALL_INSN_FUNCTION_USAGE (call) = use;
16409 /* Clear stack slot assignments remembered from previous functions.
16410 This is called from INIT_EXPANDERS once before RTL is emitted for each
16413 static struct machine_function *
16414 ix86_init_machine_status (void)
16416 struct machine_function *f;
16418 f = GGC_CNEW (struct machine_function);
16419 f->use_fast_prologue_epilogue_nregs = -1;
16420 f->tls_descriptor_call_expanded_p = 0;
16425 /* Return a MEM corresponding to a stack slot with mode MODE.
16426 Allocate a new slot if necessary.
16428 The RTL for a function can have several slots available: N is
16429 which slot to use. */
16432 assign_386_stack_local (enum machine_mode mode, enum ix86_stack_slot n)
16434 struct stack_local_entry *s;
16436 gcc_assert (n < MAX_386_STACK_LOCALS);
16438 /* Virtual slot is valid only before vregs are instantiated. */
16439 gcc_assert ((n == SLOT_VIRTUAL) == !virtuals_instantiated);
16441 for (s = ix86_stack_locals; s; s = s->next)
16442 if (s->mode == mode && s->n == n)
16443 return copy_rtx (s->rtl);
16445 s = (struct stack_local_entry *)
16446 ggc_alloc (sizeof (struct stack_local_entry));
16449 s->rtl = assign_stack_local (mode, GET_MODE_SIZE (mode), 0);
16451 s->next = ix86_stack_locals;
16452 ix86_stack_locals = s;
16456 /* Construct the SYMBOL_REF for the tls_get_addr function. */
16458 static GTY(()) rtx ix86_tls_symbol;
16460 ix86_tls_get_addr (void)
16463 if (!ix86_tls_symbol)
16465 ix86_tls_symbol = gen_rtx_SYMBOL_REF (Pmode,
16466 (TARGET_ANY_GNU_TLS
16468 ? "___tls_get_addr"
16469 : "__tls_get_addr");
16472 return ix86_tls_symbol;
16475 /* Construct the SYMBOL_REF for the _TLS_MODULE_BASE_ symbol. */
16477 static GTY(()) rtx ix86_tls_module_base_symbol;
16479 ix86_tls_module_base (void)
16482 if (!ix86_tls_module_base_symbol)
16484 ix86_tls_module_base_symbol = gen_rtx_SYMBOL_REF (Pmode,
16485 "_TLS_MODULE_BASE_");
16486 SYMBOL_REF_FLAGS (ix86_tls_module_base_symbol)
16487 |= TLS_MODEL_GLOBAL_DYNAMIC << SYMBOL_FLAG_TLS_SHIFT;
16490 return ix86_tls_module_base_symbol;
16493 /* Calculate the length of the memory address in the instruction
16494 encoding. Does not include the one-byte modrm, opcode, or prefix. */
16497 memory_address_length (rtx addr)
16499 struct ix86_address parts;
16500 rtx base, index, disp;
16504 if (GET_CODE (addr) == PRE_DEC
16505 || GET_CODE (addr) == POST_INC
16506 || GET_CODE (addr) == PRE_MODIFY
16507 || GET_CODE (addr) == POST_MODIFY)
16510 ok = ix86_decompose_address (addr, &parts);
16513 if (parts.base && GET_CODE (parts.base) == SUBREG)
16514 parts.base = SUBREG_REG (parts.base);
16515 if (parts.index && GET_CODE (parts.index) == SUBREG)
16516 parts.index = SUBREG_REG (parts.index);
16519 index = parts.index;
16524 - esp as the base always wants an index,
16525 - ebp as the base always wants a displacement. */
16527 /* Register Indirect. */
16528 if (base && !index && !disp)
16530 /* esp (for its index) and ebp (for its displacement) need
16531 the two-byte modrm form. */
16532 if (addr == stack_pointer_rtx
16533 || addr == arg_pointer_rtx
16534 || addr == frame_pointer_rtx
16535 || addr == hard_frame_pointer_rtx)
16539 /* Direct Addressing. */
16540 else if (disp && !base && !index)
16545 /* Find the length of the displacement constant. */
16548 if (base && satisfies_constraint_K (disp))
16553 /* ebp always wants a displacement. */
16554 else if (base == hard_frame_pointer_rtx)
16557 /* An index requires the two-byte modrm form.... */
16559 /* ...like esp, which always wants an index. */
16560 || base == stack_pointer_rtx
16561 || base == arg_pointer_rtx
16562 || base == frame_pointer_rtx)
16569 /* Compute default value for "length_immediate" attribute. When SHORTFORM
16570 is set, expect that insn have 8bit immediate alternative. */
16572 ix86_attr_length_immediate_default (rtx insn, int shortform)
16576 extract_insn_cached (insn);
16577 for (i = recog_data.n_operands - 1; i >= 0; --i)
16578 if (CONSTANT_P (recog_data.operand[i]))
16581 if (shortform && satisfies_constraint_K (recog_data.operand[i]))
16585 switch (get_attr_mode (insn))
16596 /* Immediates for DImode instructions are encoded as 32bit sign extended values. */
16601 fatal_insn ("unknown insn mode", insn);
16607 /* Compute default value for "length_address" attribute. */
16609 ix86_attr_length_address_default (rtx insn)
16613 if (get_attr_type (insn) == TYPE_LEA)
16615 rtx set = PATTERN (insn);
16617 if (GET_CODE (set) == PARALLEL)
16618 set = XVECEXP (set, 0, 0);
16620 gcc_assert (GET_CODE (set) == SET);
16622 return memory_address_length (SET_SRC (set));
16625 extract_insn_cached (insn);
16626 for (i = recog_data.n_operands - 1; i >= 0; --i)
16627 if (MEM_P (recog_data.operand[i]))
16629 return memory_address_length (XEXP (recog_data.operand[i], 0));
16635 /* Return the maximum number of instructions a cpu can issue. */
16638 ix86_issue_rate (void)
16642 case PROCESSOR_PENTIUM:
16646 case PROCESSOR_PENTIUMPRO:
16647 case PROCESSOR_PENTIUM4:
16648 case PROCESSOR_ATHLON:
16650 case PROCESSOR_AMDFAM10:
16651 case PROCESSOR_NOCONA:
16652 case PROCESSOR_GENERIC32:
16653 case PROCESSOR_GENERIC64:
16656 case PROCESSOR_CORE2:
16664 /* A subroutine of ix86_adjust_cost -- return true iff INSN reads flags set
16665 by DEP_INSN and nothing set by DEP_INSN. */
16668 ix86_flags_dependent (rtx insn, rtx dep_insn, enum attr_type insn_type)
16672 /* Simplify the test for uninteresting insns. */
16673 if (insn_type != TYPE_SETCC
16674 && insn_type != TYPE_ICMOV
16675 && insn_type != TYPE_FCMOV
16676 && insn_type != TYPE_IBR)
16679 if ((set = single_set (dep_insn)) != 0)
16681 set = SET_DEST (set);
16684 else if (GET_CODE (PATTERN (dep_insn)) == PARALLEL
16685 && XVECLEN (PATTERN (dep_insn), 0) == 2
16686 && GET_CODE (XVECEXP (PATTERN (dep_insn), 0, 0)) == SET
16687 && GET_CODE (XVECEXP (PATTERN (dep_insn), 0, 1)) == SET)
16689 set = SET_DEST (XVECEXP (PATTERN (dep_insn), 0, 0));
16690 set2 = SET_DEST (XVECEXP (PATTERN (dep_insn), 0, 0));
16695 if (!REG_P (set) || REGNO (set) != FLAGS_REG)
16698 /* This test is true if the dependent insn reads the flags but
16699 not any other potentially set register. */
16700 if (!reg_overlap_mentioned_p (set, PATTERN (insn)))
16703 if (set2 && reg_overlap_mentioned_p (set2, PATTERN (insn)))
16709 /* A subroutine of ix86_adjust_cost -- return true iff INSN has a memory
16710 address with operands set by DEP_INSN. */
16713 ix86_agi_dependent (rtx insn, rtx dep_insn, enum attr_type insn_type)
16717 if (insn_type == TYPE_LEA
16720 addr = PATTERN (insn);
16722 if (GET_CODE (addr) == PARALLEL)
16723 addr = XVECEXP (addr, 0, 0);
16725 gcc_assert (GET_CODE (addr) == SET);
16727 addr = SET_SRC (addr);
16732 extract_insn_cached (insn);
16733 for (i = recog_data.n_operands - 1; i >= 0; --i)
16734 if (MEM_P (recog_data.operand[i]))
16736 addr = XEXP (recog_data.operand[i], 0);
16743 return modified_in_p (addr, dep_insn);
16747 ix86_adjust_cost (rtx insn, rtx link, rtx dep_insn, int cost)
16749 enum attr_type insn_type, dep_insn_type;
16750 enum attr_memory memory;
16752 int dep_insn_code_number;
16754 /* Anti and output dependencies have zero cost on all CPUs. */
16755 if (REG_NOTE_KIND (link) != 0)
16758 dep_insn_code_number = recog_memoized (dep_insn);
16760 /* If we can't recognize the insns, we can't really do anything. */
16761 if (dep_insn_code_number < 0 || recog_memoized (insn) < 0)
16764 insn_type = get_attr_type (insn);
16765 dep_insn_type = get_attr_type (dep_insn);
16769 case PROCESSOR_PENTIUM:
16770 /* Address Generation Interlock adds a cycle of latency. */
16771 if (ix86_agi_dependent (insn, dep_insn, insn_type))
16774 /* ??? Compares pair with jump/setcc. */
16775 if (ix86_flags_dependent (insn, dep_insn, insn_type))
16778 /* Floating point stores require value to be ready one cycle earlier. */
16779 if (insn_type == TYPE_FMOV
16780 && get_attr_memory (insn) == MEMORY_STORE
16781 && !ix86_agi_dependent (insn, dep_insn, insn_type))
16785 case PROCESSOR_PENTIUMPRO:
16786 memory = get_attr_memory (insn);
16788 /* INT->FP conversion is expensive. */
16789 if (get_attr_fp_int_src (dep_insn))
16792 /* There is one cycle extra latency between an FP op and a store. */
16793 if (insn_type == TYPE_FMOV
16794 && (set = single_set (dep_insn)) != NULL_RTX
16795 && (set2 = single_set (insn)) != NULL_RTX
16796 && rtx_equal_p (SET_DEST (set), SET_SRC (set2))
16797 && MEM_P (SET_DEST (set2)))
16800 /* Show ability of reorder buffer to hide latency of load by executing
16801 in parallel with previous instruction in case
16802 previous instruction is not needed to compute the address. */
16803 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
16804 && !ix86_agi_dependent (insn, dep_insn, insn_type))
16806 /* Claim moves to take one cycle, as core can issue one load
16807 at time and the next load can start cycle later. */
16808 if (dep_insn_type == TYPE_IMOV
16809 || dep_insn_type == TYPE_FMOV)
16817 memory = get_attr_memory (insn);
16819 /* The esp dependency is resolved before the instruction is really
16821 if ((insn_type == TYPE_PUSH || insn_type == TYPE_POP)
16822 && (dep_insn_type == TYPE_PUSH || dep_insn_type == TYPE_POP))
16825 /* INT->FP conversion is expensive. */
16826 if (get_attr_fp_int_src (dep_insn))
16829 /* Show ability of reorder buffer to hide latency of load by executing
16830 in parallel with previous instruction in case
16831 previous instruction is not needed to compute the address. */
16832 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
16833 && !ix86_agi_dependent (insn, dep_insn, insn_type))
16835 /* Claim moves to take one cycle, as core can issue one load
16836 at time and the next load can start cycle later. */
16837 if (dep_insn_type == TYPE_IMOV
16838 || dep_insn_type == TYPE_FMOV)
16847 case PROCESSOR_ATHLON:
16849 case PROCESSOR_AMDFAM10:
16850 case PROCESSOR_GENERIC32:
16851 case PROCESSOR_GENERIC64:
16852 memory = get_attr_memory (insn);
16854 /* Show ability of reorder buffer to hide latency of load by executing
16855 in parallel with previous instruction in case
16856 previous instruction is not needed to compute the address. */
16857 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
16858 && !ix86_agi_dependent (insn, dep_insn, insn_type))
16860 enum attr_unit unit = get_attr_unit (insn);
16863 /* Because of the difference between the length of integer and
16864 floating unit pipeline preparation stages, the memory operands
16865 for floating point are cheaper.
16867 ??? For Athlon it the difference is most probably 2. */
16868 if (unit == UNIT_INTEGER || unit == UNIT_UNKNOWN)
16871 loadcost = TARGET_ATHLON ? 2 : 0;
16873 if (cost >= loadcost)
16886 /* How many alternative schedules to try. This should be as wide as the
16887 scheduling freedom in the DFA, but no wider. Making this value too
16888 large results extra work for the scheduler. */
16891 ia32_multipass_dfa_lookahead (void)
16895 case PROCESSOR_PENTIUM:
16898 case PROCESSOR_PENTIUMPRO:
16908 /* Compute the alignment given to a constant that is being placed in memory.
16909 EXP is the constant and ALIGN is the alignment that the object would
16911 The value of this function is used instead of that alignment to align
16915 ix86_constant_alignment (tree exp, int align)
16917 if (TREE_CODE (exp) == REAL_CST || TREE_CODE (exp) == VECTOR_CST
16918 || TREE_CODE (exp) == INTEGER_CST)
16920 if (TYPE_MODE (TREE_TYPE (exp)) == DFmode && align < 64)
16922 else if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (exp))) && align < 128)
16925 else if (!optimize_size && TREE_CODE (exp) == STRING_CST
16926 && TREE_STRING_LENGTH (exp) >= 31 && align < BITS_PER_WORD)
16927 return BITS_PER_WORD;
16932 /* Compute the alignment for a static variable.
16933 TYPE is the data type, and ALIGN is the alignment that
16934 the object would ordinarily have. The value of this function is used
16935 instead of that alignment to align the object. */
16938 ix86_data_alignment (tree type, int align)
16940 int max_align = optimize_size ? BITS_PER_WORD : MIN (256, MAX_OFILE_ALIGNMENT);
16942 if (AGGREGATE_TYPE_P (type)
16943 && TYPE_SIZE (type)
16944 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
16945 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= (unsigned) max_align
16946 || TREE_INT_CST_HIGH (TYPE_SIZE (type)))
16947 && align < max_align)
16950 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
16951 to 16byte boundary. */
16954 if (AGGREGATE_TYPE_P (type)
16955 && TYPE_SIZE (type)
16956 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
16957 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= 128
16958 || TREE_INT_CST_HIGH (TYPE_SIZE (type))) && align < 128)
16962 if (TREE_CODE (type) == ARRAY_TYPE)
16964 if (TYPE_MODE (TREE_TYPE (type)) == DFmode && align < 64)
16966 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type))) && align < 128)
16969 else if (TREE_CODE (type) == COMPLEX_TYPE)
16972 if (TYPE_MODE (type) == DCmode && align < 64)
16974 if (TYPE_MODE (type) == XCmode && align < 128)
16977 else if ((TREE_CODE (type) == RECORD_TYPE
16978 || TREE_CODE (type) == UNION_TYPE
16979 || TREE_CODE (type) == QUAL_UNION_TYPE)
16980 && TYPE_FIELDS (type))
16982 if (DECL_MODE (TYPE_FIELDS (type)) == DFmode && align < 64)
16984 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type))) && align < 128)
16987 else if (TREE_CODE (type) == REAL_TYPE || TREE_CODE (type) == VECTOR_TYPE
16988 || TREE_CODE (type) == INTEGER_TYPE)
16990 if (TYPE_MODE (type) == DFmode && align < 64)
16992 if (ALIGN_MODE_128 (TYPE_MODE (type)) && align < 128)
16999 /* Compute the alignment for a local variable.
17000 TYPE is the data type, and ALIGN is the alignment that
17001 the object would ordinarily have. The value of this macro is used
17002 instead of that alignment to align the object. */
17005 ix86_local_alignment (tree type, int align)
17007 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
17008 to 16byte boundary. */
17011 if (AGGREGATE_TYPE_P (type)
17012 && TYPE_SIZE (type)
17013 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
17014 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= 16
17015 || TREE_INT_CST_HIGH (TYPE_SIZE (type))) && align < 128)
17018 if (TREE_CODE (type) == ARRAY_TYPE)
17020 if (TYPE_MODE (TREE_TYPE (type)) == DFmode && align < 64)
17022 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type))) && align < 128)
17025 else if (TREE_CODE (type) == COMPLEX_TYPE)
17027 if (TYPE_MODE (type) == DCmode && align < 64)
17029 if (TYPE_MODE (type) == XCmode && align < 128)
17032 else if ((TREE_CODE (type) == RECORD_TYPE
17033 || TREE_CODE (type) == UNION_TYPE
17034 || TREE_CODE (type) == QUAL_UNION_TYPE)
17035 && TYPE_FIELDS (type))
17037 if (DECL_MODE (TYPE_FIELDS (type)) == DFmode && align < 64)
17039 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type))) && align < 128)
17042 else if (TREE_CODE (type) == REAL_TYPE || TREE_CODE (type) == VECTOR_TYPE
17043 || TREE_CODE (type) == INTEGER_TYPE)
17046 if (TYPE_MODE (type) == DFmode && align < 64)
17048 if (ALIGN_MODE_128 (TYPE_MODE (type)) && align < 128)
17054 /* Emit RTL insns to initialize the variable parts of a trampoline.
17055 FNADDR is an RTX for the address of the function's pure code.
17056 CXT is an RTX for the static chain value for the function. */
17058 x86_initialize_trampoline (rtx tramp, rtx fnaddr, rtx cxt)
17062 /* Compute offset from the end of the jmp to the target function. */
17063 rtx disp = expand_binop (SImode, sub_optab, fnaddr,
17064 plus_constant (tramp, 10),
17065 NULL_RTX, 1, OPTAB_DIRECT);
17066 emit_move_insn (gen_rtx_MEM (QImode, tramp),
17067 gen_int_mode (0xb9, QImode));
17068 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 1)), cxt);
17069 emit_move_insn (gen_rtx_MEM (QImode, plus_constant (tramp, 5)),
17070 gen_int_mode (0xe9, QImode));
17071 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 6)), disp);
17076 /* Try to load address using shorter movl instead of movabs.
17077 We may want to support movq for kernel mode, but kernel does not use
17078 trampolines at the moment. */
17079 if (x86_64_zext_immediate_operand (fnaddr, VOIDmode))
17081 fnaddr = copy_to_mode_reg (DImode, fnaddr);
17082 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
17083 gen_int_mode (0xbb41, HImode));
17084 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, offset + 2)),
17085 gen_lowpart (SImode, fnaddr));
17090 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
17091 gen_int_mode (0xbb49, HImode));
17092 emit_move_insn (gen_rtx_MEM (DImode, plus_constant (tramp, offset + 2)),
17096 /* Load static chain using movabs to r10. */
17097 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
17098 gen_int_mode (0xba49, HImode));
17099 emit_move_insn (gen_rtx_MEM (DImode, plus_constant (tramp, offset + 2)),
17102 /* Jump to the r11 */
17103 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
17104 gen_int_mode (0xff49, HImode));
17105 emit_move_insn (gen_rtx_MEM (QImode, plus_constant (tramp, offset+2)),
17106 gen_int_mode (0xe3, QImode));
17108 gcc_assert (offset <= TRAMPOLINE_SIZE);
17111 #ifdef ENABLE_EXECUTE_STACK
17112 emit_library_call (gen_rtx_SYMBOL_REF (Pmode, "__enable_execute_stack"),
17113 LCT_NORMAL, VOIDmode, 1, tramp, Pmode);
17117 /* Codes for all the SSE/MMX builtins. */
17120 IX86_BUILTIN_ADDPS,
17121 IX86_BUILTIN_ADDSS,
17122 IX86_BUILTIN_DIVPS,
17123 IX86_BUILTIN_DIVSS,
17124 IX86_BUILTIN_MULPS,
17125 IX86_BUILTIN_MULSS,
17126 IX86_BUILTIN_SUBPS,
17127 IX86_BUILTIN_SUBSS,
17129 IX86_BUILTIN_CMPEQPS,
17130 IX86_BUILTIN_CMPLTPS,
17131 IX86_BUILTIN_CMPLEPS,
17132 IX86_BUILTIN_CMPGTPS,
17133 IX86_BUILTIN_CMPGEPS,
17134 IX86_BUILTIN_CMPNEQPS,
17135 IX86_BUILTIN_CMPNLTPS,
17136 IX86_BUILTIN_CMPNLEPS,
17137 IX86_BUILTIN_CMPNGTPS,
17138 IX86_BUILTIN_CMPNGEPS,
17139 IX86_BUILTIN_CMPORDPS,
17140 IX86_BUILTIN_CMPUNORDPS,
17141 IX86_BUILTIN_CMPEQSS,
17142 IX86_BUILTIN_CMPLTSS,
17143 IX86_BUILTIN_CMPLESS,
17144 IX86_BUILTIN_CMPNEQSS,
17145 IX86_BUILTIN_CMPNLTSS,
17146 IX86_BUILTIN_CMPNLESS,
17147 IX86_BUILTIN_CMPNGTSS,
17148 IX86_BUILTIN_CMPNGESS,
17149 IX86_BUILTIN_CMPORDSS,
17150 IX86_BUILTIN_CMPUNORDSS,
17152 IX86_BUILTIN_COMIEQSS,
17153 IX86_BUILTIN_COMILTSS,
17154 IX86_BUILTIN_COMILESS,
17155 IX86_BUILTIN_COMIGTSS,
17156 IX86_BUILTIN_COMIGESS,
17157 IX86_BUILTIN_COMINEQSS,
17158 IX86_BUILTIN_UCOMIEQSS,
17159 IX86_BUILTIN_UCOMILTSS,
17160 IX86_BUILTIN_UCOMILESS,
17161 IX86_BUILTIN_UCOMIGTSS,
17162 IX86_BUILTIN_UCOMIGESS,
17163 IX86_BUILTIN_UCOMINEQSS,
17165 IX86_BUILTIN_CVTPI2PS,
17166 IX86_BUILTIN_CVTPS2PI,
17167 IX86_BUILTIN_CVTSI2SS,
17168 IX86_BUILTIN_CVTSI642SS,
17169 IX86_BUILTIN_CVTSS2SI,
17170 IX86_BUILTIN_CVTSS2SI64,
17171 IX86_BUILTIN_CVTTPS2PI,
17172 IX86_BUILTIN_CVTTSS2SI,
17173 IX86_BUILTIN_CVTTSS2SI64,
17175 IX86_BUILTIN_MAXPS,
17176 IX86_BUILTIN_MAXSS,
17177 IX86_BUILTIN_MINPS,
17178 IX86_BUILTIN_MINSS,
17180 IX86_BUILTIN_LOADUPS,
17181 IX86_BUILTIN_STOREUPS,
17182 IX86_BUILTIN_MOVSS,
17184 IX86_BUILTIN_MOVHLPS,
17185 IX86_BUILTIN_MOVLHPS,
17186 IX86_BUILTIN_LOADHPS,
17187 IX86_BUILTIN_LOADLPS,
17188 IX86_BUILTIN_STOREHPS,
17189 IX86_BUILTIN_STORELPS,
17191 IX86_BUILTIN_MASKMOVQ,
17192 IX86_BUILTIN_MOVMSKPS,
17193 IX86_BUILTIN_PMOVMSKB,
17195 IX86_BUILTIN_MOVNTPS,
17196 IX86_BUILTIN_MOVNTQ,
17198 IX86_BUILTIN_LOADDQU,
17199 IX86_BUILTIN_STOREDQU,
17201 IX86_BUILTIN_PACKSSWB,
17202 IX86_BUILTIN_PACKSSDW,
17203 IX86_BUILTIN_PACKUSWB,
17205 IX86_BUILTIN_PADDB,
17206 IX86_BUILTIN_PADDW,
17207 IX86_BUILTIN_PADDD,
17208 IX86_BUILTIN_PADDQ,
17209 IX86_BUILTIN_PADDSB,
17210 IX86_BUILTIN_PADDSW,
17211 IX86_BUILTIN_PADDUSB,
17212 IX86_BUILTIN_PADDUSW,
17213 IX86_BUILTIN_PSUBB,
17214 IX86_BUILTIN_PSUBW,
17215 IX86_BUILTIN_PSUBD,
17216 IX86_BUILTIN_PSUBQ,
17217 IX86_BUILTIN_PSUBSB,
17218 IX86_BUILTIN_PSUBSW,
17219 IX86_BUILTIN_PSUBUSB,
17220 IX86_BUILTIN_PSUBUSW,
17223 IX86_BUILTIN_PANDN,
17227 IX86_BUILTIN_PAVGB,
17228 IX86_BUILTIN_PAVGW,
17230 IX86_BUILTIN_PCMPEQB,
17231 IX86_BUILTIN_PCMPEQW,
17232 IX86_BUILTIN_PCMPEQD,
17233 IX86_BUILTIN_PCMPGTB,
17234 IX86_BUILTIN_PCMPGTW,
17235 IX86_BUILTIN_PCMPGTD,
17237 IX86_BUILTIN_PMADDWD,
17239 IX86_BUILTIN_PMAXSW,
17240 IX86_BUILTIN_PMAXUB,
17241 IX86_BUILTIN_PMINSW,
17242 IX86_BUILTIN_PMINUB,
17244 IX86_BUILTIN_PMULHUW,
17245 IX86_BUILTIN_PMULHW,
17246 IX86_BUILTIN_PMULLW,
17248 IX86_BUILTIN_PSADBW,
17249 IX86_BUILTIN_PSHUFW,
17251 IX86_BUILTIN_PSLLW,
17252 IX86_BUILTIN_PSLLD,
17253 IX86_BUILTIN_PSLLQ,
17254 IX86_BUILTIN_PSRAW,
17255 IX86_BUILTIN_PSRAD,
17256 IX86_BUILTIN_PSRLW,
17257 IX86_BUILTIN_PSRLD,
17258 IX86_BUILTIN_PSRLQ,
17259 IX86_BUILTIN_PSLLWI,
17260 IX86_BUILTIN_PSLLDI,
17261 IX86_BUILTIN_PSLLQI,
17262 IX86_BUILTIN_PSRAWI,
17263 IX86_BUILTIN_PSRADI,
17264 IX86_BUILTIN_PSRLWI,
17265 IX86_BUILTIN_PSRLDI,
17266 IX86_BUILTIN_PSRLQI,
17268 IX86_BUILTIN_PUNPCKHBW,
17269 IX86_BUILTIN_PUNPCKHWD,
17270 IX86_BUILTIN_PUNPCKHDQ,
17271 IX86_BUILTIN_PUNPCKLBW,
17272 IX86_BUILTIN_PUNPCKLWD,
17273 IX86_BUILTIN_PUNPCKLDQ,
17275 IX86_BUILTIN_SHUFPS,
17277 IX86_BUILTIN_RCPPS,
17278 IX86_BUILTIN_RCPSS,
17279 IX86_BUILTIN_RSQRTPS,
17280 IX86_BUILTIN_RSQRTPS_NR,
17281 IX86_BUILTIN_RSQRTSS,
17282 IX86_BUILTIN_RSQRTF,
17283 IX86_BUILTIN_SQRTPS,
17284 IX86_BUILTIN_SQRTPS_NR,
17285 IX86_BUILTIN_SQRTSS,
17287 IX86_BUILTIN_UNPCKHPS,
17288 IX86_BUILTIN_UNPCKLPS,
17290 IX86_BUILTIN_ANDPS,
17291 IX86_BUILTIN_ANDNPS,
17293 IX86_BUILTIN_XORPS,
17296 IX86_BUILTIN_LDMXCSR,
17297 IX86_BUILTIN_STMXCSR,
17298 IX86_BUILTIN_SFENCE,
17300 /* 3DNow! Original */
17301 IX86_BUILTIN_FEMMS,
17302 IX86_BUILTIN_PAVGUSB,
17303 IX86_BUILTIN_PF2ID,
17304 IX86_BUILTIN_PFACC,
17305 IX86_BUILTIN_PFADD,
17306 IX86_BUILTIN_PFCMPEQ,
17307 IX86_BUILTIN_PFCMPGE,
17308 IX86_BUILTIN_PFCMPGT,
17309 IX86_BUILTIN_PFMAX,
17310 IX86_BUILTIN_PFMIN,
17311 IX86_BUILTIN_PFMUL,
17312 IX86_BUILTIN_PFRCP,
17313 IX86_BUILTIN_PFRCPIT1,
17314 IX86_BUILTIN_PFRCPIT2,
17315 IX86_BUILTIN_PFRSQIT1,
17316 IX86_BUILTIN_PFRSQRT,
17317 IX86_BUILTIN_PFSUB,
17318 IX86_BUILTIN_PFSUBR,
17319 IX86_BUILTIN_PI2FD,
17320 IX86_BUILTIN_PMULHRW,
17322 /* 3DNow! Athlon Extensions */
17323 IX86_BUILTIN_PF2IW,
17324 IX86_BUILTIN_PFNACC,
17325 IX86_BUILTIN_PFPNACC,
17326 IX86_BUILTIN_PI2FW,
17327 IX86_BUILTIN_PSWAPDSI,
17328 IX86_BUILTIN_PSWAPDSF,
17331 IX86_BUILTIN_ADDPD,
17332 IX86_BUILTIN_ADDSD,
17333 IX86_BUILTIN_DIVPD,
17334 IX86_BUILTIN_DIVSD,
17335 IX86_BUILTIN_MULPD,
17336 IX86_BUILTIN_MULSD,
17337 IX86_BUILTIN_SUBPD,
17338 IX86_BUILTIN_SUBSD,
17340 IX86_BUILTIN_CMPEQPD,
17341 IX86_BUILTIN_CMPLTPD,
17342 IX86_BUILTIN_CMPLEPD,
17343 IX86_BUILTIN_CMPGTPD,
17344 IX86_BUILTIN_CMPGEPD,
17345 IX86_BUILTIN_CMPNEQPD,
17346 IX86_BUILTIN_CMPNLTPD,
17347 IX86_BUILTIN_CMPNLEPD,
17348 IX86_BUILTIN_CMPNGTPD,
17349 IX86_BUILTIN_CMPNGEPD,
17350 IX86_BUILTIN_CMPORDPD,
17351 IX86_BUILTIN_CMPUNORDPD,
17352 IX86_BUILTIN_CMPEQSD,
17353 IX86_BUILTIN_CMPLTSD,
17354 IX86_BUILTIN_CMPLESD,
17355 IX86_BUILTIN_CMPNEQSD,
17356 IX86_BUILTIN_CMPNLTSD,
17357 IX86_BUILTIN_CMPNLESD,
17358 IX86_BUILTIN_CMPORDSD,
17359 IX86_BUILTIN_CMPUNORDSD,
17361 IX86_BUILTIN_COMIEQSD,
17362 IX86_BUILTIN_COMILTSD,
17363 IX86_BUILTIN_COMILESD,
17364 IX86_BUILTIN_COMIGTSD,
17365 IX86_BUILTIN_COMIGESD,
17366 IX86_BUILTIN_COMINEQSD,
17367 IX86_BUILTIN_UCOMIEQSD,
17368 IX86_BUILTIN_UCOMILTSD,
17369 IX86_BUILTIN_UCOMILESD,
17370 IX86_BUILTIN_UCOMIGTSD,
17371 IX86_BUILTIN_UCOMIGESD,
17372 IX86_BUILTIN_UCOMINEQSD,
17374 IX86_BUILTIN_MAXPD,
17375 IX86_BUILTIN_MAXSD,
17376 IX86_BUILTIN_MINPD,
17377 IX86_BUILTIN_MINSD,
17379 IX86_BUILTIN_ANDPD,
17380 IX86_BUILTIN_ANDNPD,
17382 IX86_BUILTIN_XORPD,
17384 IX86_BUILTIN_SQRTPD,
17385 IX86_BUILTIN_SQRTSD,
17387 IX86_BUILTIN_UNPCKHPD,
17388 IX86_BUILTIN_UNPCKLPD,
17390 IX86_BUILTIN_SHUFPD,
17392 IX86_BUILTIN_LOADUPD,
17393 IX86_BUILTIN_STOREUPD,
17394 IX86_BUILTIN_MOVSD,
17396 IX86_BUILTIN_LOADHPD,
17397 IX86_BUILTIN_LOADLPD,
17399 IX86_BUILTIN_CVTDQ2PD,
17400 IX86_BUILTIN_CVTDQ2PS,
17402 IX86_BUILTIN_CVTPD2DQ,
17403 IX86_BUILTIN_CVTPD2PI,
17404 IX86_BUILTIN_CVTPD2PS,
17405 IX86_BUILTIN_CVTTPD2DQ,
17406 IX86_BUILTIN_CVTTPD2PI,
17408 IX86_BUILTIN_CVTPI2PD,
17409 IX86_BUILTIN_CVTSI2SD,
17410 IX86_BUILTIN_CVTSI642SD,
17412 IX86_BUILTIN_CVTSD2SI,
17413 IX86_BUILTIN_CVTSD2SI64,
17414 IX86_BUILTIN_CVTSD2SS,
17415 IX86_BUILTIN_CVTSS2SD,
17416 IX86_BUILTIN_CVTTSD2SI,
17417 IX86_BUILTIN_CVTTSD2SI64,
17419 IX86_BUILTIN_CVTPS2DQ,
17420 IX86_BUILTIN_CVTPS2PD,
17421 IX86_BUILTIN_CVTTPS2DQ,
17423 IX86_BUILTIN_MOVNTI,
17424 IX86_BUILTIN_MOVNTPD,
17425 IX86_BUILTIN_MOVNTDQ,
17428 IX86_BUILTIN_MASKMOVDQU,
17429 IX86_BUILTIN_MOVMSKPD,
17430 IX86_BUILTIN_PMOVMSKB128,
17432 IX86_BUILTIN_PACKSSWB128,
17433 IX86_BUILTIN_PACKSSDW128,
17434 IX86_BUILTIN_PACKUSWB128,
17436 IX86_BUILTIN_PADDB128,
17437 IX86_BUILTIN_PADDW128,
17438 IX86_BUILTIN_PADDD128,
17439 IX86_BUILTIN_PADDQ128,
17440 IX86_BUILTIN_PADDSB128,
17441 IX86_BUILTIN_PADDSW128,
17442 IX86_BUILTIN_PADDUSB128,
17443 IX86_BUILTIN_PADDUSW128,
17444 IX86_BUILTIN_PSUBB128,
17445 IX86_BUILTIN_PSUBW128,
17446 IX86_BUILTIN_PSUBD128,
17447 IX86_BUILTIN_PSUBQ128,
17448 IX86_BUILTIN_PSUBSB128,
17449 IX86_BUILTIN_PSUBSW128,
17450 IX86_BUILTIN_PSUBUSB128,
17451 IX86_BUILTIN_PSUBUSW128,
17453 IX86_BUILTIN_PAND128,
17454 IX86_BUILTIN_PANDN128,
17455 IX86_BUILTIN_POR128,
17456 IX86_BUILTIN_PXOR128,
17458 IX86_BUILTIN_PAVGB128,
17459 IX86_BUILTIN_PAVGW128,
17461 IX86_BUILTIN_PCMPEQB128,
17462 IX86_BUILTIN_PCMPEQW128,
17463 IX86_BUILTIN_PCMPEQD128,
17464 IX86_BUILTIN_PCMPGTB128,
17465 IX86_BUILTIN_PCMPGTW128,
17466 IX86_BUILTIN_PCMPGTD128,
17468 IX86_BUILTIN_PMADDWD128,
17470 IX86_BUILTIN_PMAXSW128,
17471 IX86_BUILTIN_PMAXUB128,
17472 IX86_BUILTIN_PMINSW128,
17473 IX86_BUILTIN_PMINUB128,
17475 IX86_BUILTIN_PMULUDQ,
17476 IX86_BUILTIN_PMULUDQ128,
17477 IX86_BUILTIN_PMULHUW128,
17478 IX86_BUILTIN_PMULHW128,
17479 IX86_BUILTIN_PMULLW128,
17481 IX86_BUILTIN_PSADBW128,
17482 IX86_BUILTIN_PSHUFHW,
17483 IX86_BUILTIN_PSHUFLW,
17484 IX86_BUILTIN_PSHUFD,
17486 IX86_BUILTIN_PSLLDQI128,
17487 IX86_BUILTIN_PSLLWI128,
17488 IX86_BUILTIN_PSLLDI128,
17489 IX86_BUILTIN_PSLLQI128,
17490 IX86_BUILTIN_PSRAWI128,
17491 IX86_BUILTIN_PSRADI128,
17492 IX86_BUILTIN_PSRLDQI128,
17493 IX86_BUILTIN_PSRLWI128,
17494 IX86_BUILTIN_PSRLDI128,
17495 IX86_BUILTIN_PSRLQI128,
17497 IX86_BUILTIN_PSLLDQ128,
17498 IX86_BUILTIN_PSLLW128,
17499 IX86_BUILTIN_PSLLD128,
17500 IX86_BUILTIN_PSLLQ128,
17501 IX86_BUILTIN_PSRAW128,
17502 IX86_BUILTIN_PSRAD128,
17503 IX86_BUILTIN_PSRLW128,
17504 IX86_BUILTIN_PSRLD128,
17505 IX86_BUILTIN_PSRLQ128,
17507 IX86_BUILTIN_PUNPCKHBW128,
17508 IX86_BUILTIN_PUNPCKHWD128,
17509 IX86_BUILTIN_PUNPCKHDQ128,
17510 IX86_BUILTIN_PUNPCKHQDQ128,
17511 IX86_BUILTIN_PUNPCKLBW128,
17512 IX86_BUILTIN_PUNPCKLWD128,
17513 IX86_BUILTIN_PUNPCKLDQ128,
17514 IX86_BUILTIN_PUNPCKLQDQ128,
17516 IX86_BUILTIN_CLFLUSH,
17517 IX86_BUILTIN_MFENCE,
17518 IX86_BUILTIN_LFENCE,
17521 IX86_BUILTIN_ADDSUBPS,
17522 IX86_BUILTIN_HADDPS,
17523 IX86_BUILTIN_HSUBPS,
17524 IX86_BUILTIN_MOVSHDUP,
17525 IX86_BUILTIN_MOVSLDUP,
17526 IX86_BUILTIN_ADDSUBPD,
17527 IX86_BUILTIN_HADDPD,
17528 IX86_BUILTIN_HSUBPD,
17529 IX86_BUILTIN_LDDQU,
17531 IX86_BUILTIN_MONITOR,
17532 IX86_BUILTIN_MWAIT,
17535 IX86_BUILTIN_PHADDW,
17536 IX86_BUILTIN_PHADDD,
17537 IX86_BUILTIN_PHADDSW,
17538 IX86_BUILTIN_PHSUBW,
17539 IX86_BUILTIN_PHSUBD,
17540 IX86_BUILTIN_PHSUBSW,
17541 IX86_BUILTIN_PMADDUBSW,
17542 IX86_BUILTIN_PMULHRSW,
17543 IX86_BUILTIN_PSHUFB,
17544 IX86_BUILTIN_PSIGNB,
17545 IX86_BUILTIN_PSIGNW,
17546 IX86_BUILTIN_PSIGND,
17547 IX86_BUILTIN_PALIGNR,
17548 IX86_BUILTIN_PABSB,
17549 IX86_BUILTIN_PABSW,
17550 IX86_BUILTIN_PABSD,
17552 IX86_BUILTIN_PHADDW128,
17553 IX86_BUILTIN_PHADDD128,
17554 IX86_BUILTIN_PHADDSW128,
17555 IX86_BUILTIN_PHSUBW128,
17556 IX86_BUILTIN_PHSUBD128,
17557 IX86_BUILTIN_PHSUBSW128,
17558 IX86_BUILTIN_PMADDUBSW128,
17559 IX86_BUILTIN_PMULHRSW128,
17560 IX86_BUILTIN_PSHUFB128,
17561 IX86_BUILTIN_PSIGNB128,
17562 IX86_BUILTIN_PSIGNW128,
17563 IX86_BUILTIN_PSIGND128,
17564 IX86_BUILTIN_PALIGNR128,
17565 IX86_BUILTIN_PABSB128,
17566 IX86_BUILTIN_PABSW128,
17567 IX86_BUILTIN_PABSD128,
17569 /* AMDFAM10 - SSE4A New Instructions. */
17570 IX86_BUILTIN_MOVNTSD,
17571 IX86_BUILTIN_MOVNTSS,
17572 IX86_BUILTIN_EXTRQI,
17573 IX86_BUILTIN_EXTRQ,
17574 IX86_BUILTIN_INSERTQI,
17575 IX86_BUILTIN_INSERTQ,
17578 IX86_BUILTIN_BLENDPD,
17579 IX86_BUILTIN_BLENDPS,
17580 IX86_BUILTIN_BLENDVPD,
17581 IX86_BUILTIN_BLENDVPS,
17582 IX86_BUILTIN_PBLENDVB128,
17583 IX86_BUILTIN_PBLENDW128,
17588 IX86_BUILTIN_INSERTPS128,
17590 IX86_BUILTIN_MOVNTDQA,
17591 IX86_BUILTIN_MPSADBW128,
17592 IX86_BUILTIN_PACKUSDW128,
17593 IX86_BUILTIN_PCMPEQQ,
17594 IX86_BUILTIN_PHMINPOSUW128,
17596 IX86_BUILTIN_PMAXSB128,
17597 IX86_BUILTIN_PMAXSD128,
17598 IX86_BUILTIN_PMAXUD128,
17599 IX86_BUILTIN_PMAXUW128,
17601 IX86_BUILTIN_PMINSB128,
17602 IX86_BUILTIN_PMINSD128,
17603 IX86_BUILTIN_PMINUD128,
17604 IX86_BUILTIN_PMINUW128,
17606 IX86_BUILTIN_PMOVSXBW128,
17607 IX86_BUILTIN_PMOVSXBD128,
17608 IX86_BUILTIN_PMOVSXBQ128,
17609 IX86_BUILTIN_PMOVSXWD128,
17610 IX86_BUILTIN_PMOVSXWQ128,
17611 IX86_BUILTIN_PMOVSXDQ128,
17613 IX86_BUILTIN_PMOVZXBW128,
17614 IX86_BUILTIN_PMOVZXBD128,
17615 IX86_BUILTIN_PMOVZXBQ128,
17616 IX86_BUILTIN_PMOVZXWD128,
17617 IX86_BUILTIN_PMOVZXWQ128,
17618 IX86_BUILTIN_PMOVZXDQ128,
17620 IX86_BUILTIN_PMULDQ128,
17621 IX86_BUILTIN_PMULLD128,
17623 IX86_BUILTIN_ROUNDPD,
17624 IX86_BUILTIN_ROUNDPS,
17625 IX86_BUILTIN_ROUNDSD,
17626 IX86_BUILTIN_ROUNDSS,
17628 IX86_BUILTIN_PTESTZ,
17629 IX86_BUILTIN_PTESTC,
17630 IX86_BUILTIN_PTESTNZC,
17632 IX86_BUILTIN_VEC_INIT_V2SI,
17633 IX86_BUILTIN_VEC_INIT_V4HI,
17634 IX86_BUILTIN_VEC_INIT_V8QI,
17635 IX86_BUILTIN_VEC_EXT_V2DF,
17636 IX86_BUILTIN_VEC_EXT_V2DI,
17637 IX86_BUILTIN_VEC_EXT_V4SF,
17638 IX86_BUILTIN_VEC_EXT_V4SI,
17639 IX86_BUILTIN_VEC_EXT_V8HI,
17640 IX86_BUILTIN_VEC_EXT_V2SI,
17641 IX86_BUILTIN_VEC_EXT_V4HI,
17642 IX86_BUILTIN_VEC_EXT_V16QI,
17643 IX86_BUILTIN_VEC_SET_V2DI,
17644 IX86_BUILTIN_VEC_SET_V4SF,
17645 IX86_BUILTIN_VEC_SET_V4SI,
17646 IX86_BUILTIN_VEC_SET_V8HI,
17647 IX86_BUILTIN_VEC_SET_V4HI,
17648 IX86_BUILTIN_VEC_SET_V16QI,
17650 IX86_BUILTIN_VEC_PACK_SFIX,
17653 IX86_BUILTIN_CRC32QI,
17654 IX86_BUILTIN_CRC32HI,
17655 IX86_BUILTIN_CRC32SI,
17656 IX86_BUILTIN_CRC32DI,
17658 IX86_BUILTIN_PCMPESTRI128,
17659 IX86_BUILTIN_PCMPESTRM128,
17660 IX86_BUILTIN_PCMPESTRA128,
17661 IX86_BUILTIN_PCMPESTRC128,
17662 IX86_BUILTIN_PCMPESTRO128,
17663 IX86_BUILTIN_PCMPESTRS128,
17664 IX86_BUILTIN_PCMPESTRZ128,
17665 IX86_BUILTIN_PCMPISTRI128,
17666 IX86_BUILTIN_PCMPISTRM128,
17667 IX86_BUILTIN_PCMPISTRA128,
17668 IX86_BUILTIN_PCMPISTRC128,
17669 IX86_BUILTIN_PCMPISTRO128,
17670 IX86_BUILTIN_PCMPISTRS128,
17671 IX86_BUILTIN_PCMPISTRZ128,
17673 IX86_BUILTIN_PCMPGTQ,
17675 /* AES instructions */
17676 IX86_BUILTIN_AESENC128,
17677 IX86_BUILTIN_AESENCLAST128,
17678 IX86_BUILTIN_AESDEC128,
17679 IX86_BUILTIN_AESDECLAST128,
17680 IX86_BUILTIN_AESIMC128,
17681 IX86_BUILTIN_AESKEYGENASSIST128,
17683 /* PCLMUL instruction */
17684 IX86_BUILTIN_PCLMULQDQ128,
17686 /* TFmode support builtins. */
17688 IX86_BUILTIN_FABSQ,
17689 IX86_BUILTIN_COPYSIGNQ,
17691 /* SSE5 instructions */
17692 IX86_BUILTIN_FMADDSS,
17693 IX86_BUILTIN_FMADDSD,
17694 IX86_BUILTIN_FMADDPS,
17695 IX86_BUILTIN_FMADDPD,
17696 IX86_BUILTIN_FMSUBSS,
17697 IX86_BUILTIN_FMSUBSD,
17698 IX86_BUILTIN_FMSUBPS,
17699 IX86_BUILTIN_FMSUBPD,
17700 IX86_BUILTIN_FNMADDSS,
17701 IX86_BUILTIN_FNMADDSD,
17702 IX86_BUILTIN_FNMADDPS,
17703 IX86_BUILTIN_FNMADDPD,
17704 IX86_BUILTIN_FNMSUBSS,
17705 IX86_BUILTIN_FNMSUBSD,
17706 IX86_BUILTIN_FNMSUBPS,
17707 IX86_BUILTIN_FNMSUBPD,
17708 IX86_BUILTIN_PCMOV_V2DI,
17709 IX86_BUILTIN_PCMOV_V4SI,
17710 IX86_BUILTIN_PCMOV_V8HI,
17711 IX86_BUILTIN_PCMOV_V16QI,
17712 IX86_BUILTIN_PCMOV_V4SF,
17713 IX86_BUILTIN_PCMOV_V2DF,
17714 IX86_BUILTIN_PPERM,
17715 IX86_BUILTIN_PERMPS,
17716 IX86_BUILTIN_PERMPD,
17717 IX86_BUILTIN_PMACSSWW,
17718 IX86_BUILTIN_PMACSWW,
17719 IX86_BUILTIN_PMACSSWD,
17720 IX86_BUILTIN_PMACSWD,
17721 IX86_BUILTIN_PMACSSDD,
17722 IX86_BUILTIN_PMACSDD,
17723 IX86_BUILTIN_PMACSSDQL,
17724 IX86_BUILTIN_PMACSSDQH,
17725 IX86_BUILTIN_PMACSDQL,
17726 IX86_BUILTIN_PMACSDQH,
17727 IX86_BUILTIN_PMADCSSWD,
17728 IX86_BUILTIN_PMADCSWD,
17729 IX86_BUILTIN_PHADDBW,
17730 IX86_BUILTIN_PHADDBD,
17731 IX86_BUILTIN_PHADDBQ,
17732 IX86_BUILTIN_PHADDWD,
17733 IX86_BUILTIN_PHADDWQ,
17734 IX86_BUILTIN_PHADDDQ,
17735 IX86_BUILTIN_PHADDUBW,
17736 IX86_BUILTIN_PHADDUBD,
17737 IX86_BUILTIN_PHADDUBQ,
17738 IX86_BUILTIN_PHADDUWD,
17739 IX86_BUILTIN_PHADDUWQ,
17740 IX86_BUILTIN_PHADDUDQ,
17741 IX86_BUILTIN_PHSUBBW,
17742 IX86_BUILTIN_PHSUBWD,
17743 IX86_BUILTIN_PHSUBDQ,
17744 IX86_BUILTIN_PROTB,
17745 IX86_BUILTIN_PROTW,
17746 IX86_BUILTIN_PROTD,
17747 IX86_BUILTIN_PROTQ,
17748 IX86_BUILTIN_PROTB_IMM,
17749 IX86_BUILTIN_PROTW_IMM,
17750 IX86_BUILTIN_PROTD_IMM,
17751 IX86_BUILTIN_PROTQ_IMM,
17752 IX86_BUILTIN_PSHLB,
17753 IX86_BUILTIN_PSHLW,
17754 IX86_BUILTIN_PSHLD,
17755 IX86_BUILTIN_PSHLQ,
17756 IX86_BUILTIN_PSHAB,
17757 IX86_BUILTIN_PSHAW,
17758 IX86_BUILTIN_PSHAD,
17759 IX86_BUILTIN_PSHAQ,
17760 IX86_BUILTIN_FRCZSS,
17761 IX86_BUILTIN_FRCZSD,
17762 IX86_BUILTIN_FRCZPS,
17763 IX86_BUILTIN_FRCZPD,
17764 IX86_BUILTIN_CVTPH2PS,
17765 IX86_BUILTIN_CVTPS2PH,
17767 IX86_BUILTIN_COMEQSS,
17768 IX86_BUILTIN_COMNESS,
17769 IX86_BUILTIN_COMLTSS,
17770 IX86_BUILTIN_COMLESS,
17771 IX86_BUILTIN_COMGTSS,
17772 IX86_BUILTIN_COMGESS,
17773 IX86_BUILTIN_COMUEQSS,
17774 IX86_BUILTIN_COMUNESS,
17775 IX86_BUILTIN_COMULTSS,
17776 IX86_BUILTIN_COMULESS,
17777 IX86_BUILTIN_COMUGTSS,
17778 IX86_BUILTIN_COMUGESS,
17779 IX86_BUILTIN_COMORDSS,
17780 IX86_BUILTIN_COMUNORDSS,
17781 IX86_BUILTIN_COMFALSESS,
17782 IX86_BUILTIN_COMTRUESS,
17784 IX86_BUILTIN_COMEQSD,
17785 IX86_BUILTIN_COMNESD,
17786 IX86_BUILTIN_COMLTSD,
17787 IX86_BUILTIN_COMLESD,
17788 IX86_BUILTIN_COMGTSD,
17789 IX86_BUILTIN_COMGESD,
17790 IX86_BUILTIN_COMUEQSD,
17791 IX86_BUILTIN_COMUNESD,
17792 IX86_BUILTIN_COMULTSD,
17793 IX86_BUILTIN_COMULESD,
17794 IX86_BUILTIN_COMUGTSD,
17795 IX86_BUILTIN_COMUGESD,
17796 IX86_BUILTIN_COMORDSD,
17797 IX86_BUILTIN_COMUNORDSD,
17798 IX86_BUILTIN_COMFALSESD,
17799 IX86_BUILTIN_COMTRUESD,
17801 IX86_BUILTIN_COMEQPS,
17802 IX86_BUILTIN_COMNEPS,
17803 IX86_BUILTIN_COMLTPS,
17804 IX86_BUILTIN_COMLEPS,
17805 IX86_BUILTIN_COMGTPS,
17806 IX86_BUILTIN_COMGEPS,
17807 IX86_BUILTIN_COMUEQPS,
17808 IX86_BUILTIN_COMUNEPS,
17809 IX86_BUILTIN_COMULTPS,
17810 IX86_BUILTIN_COMULEPS,
17811 IX86_BUILTIN_COMUGTPS,
17812 IX86_BUILTIN_COMUGEPS,
17813 IX86_BUILTIN_COMORDPS,
17814 IX86_BUILTIN_COMUNORDPS,
17815 IX86_BUILTIN_COMFALSEPS,
17816 IX86_BUILTIN_COMTRUEPS,
17818 IX86_BUILTIN_COMEQPD,
17819 IX86_BUILTIN_COMNEPD,
17820 IX86_BUILTIN_COMLTPD,
17821 IX86_BUILTIN_COMLEPD,
17822 IX86_BUILTIN_COMGTPD,
17823 IX86_BUILTIN_COMGEPD,
17824 IX86_BUILTIN_COMUEQPD,
17825 IX86_BUILTIN_COMUNEPD,
17826 IX86_BUILTIN_COMULTPD,
17827 IX86_BUILTIN_COMULEPD,
17828 IX86_BUILTIN_COMUGTPD,
17829 IX86_BUILTIN_COMUGEPD,
17830 IX86_BUILTIN_COMORDPD,
17831 IX86_BUILTIN_COMUNORDPD,
17832 IX86_BUILTIN_COMFALSEPD,
17833 IX86_BUILTIN_COMTRUEPD,
17835 IX86_BUILTIN_PCOMEQUB,
17836 IX86_BUILTIN_PCOMNEUB,
17837 IX86_BUILTIN_PCOMLTUB,
17838 IX86_BUILTIN_PCOMLEUB,
17839 IX86_BUILTIN_PCOMGTUB,
17840 IX86_BUILTIN_PCOMGEUB,
17841 IX86_BUILTIN_PCOMFALSEUB,
17842 IX86_BUILTIN_PCOMTRUEUB,
17843 IX86_BUILTIN_PCOMEQUW,
17844 IX86_BUILTIN_PCOMNEUW,
17845 IX86_BUILTIN_PCOMLTUW,
17846 IX86_BUILTIN_PCOMLEUW,
17847 IX86_BUILTIN_PCOMGTUW,
17848 IX86_BUILTIN_PCOMGEUW,
17849 IX86_BUILTIN_PCOMFALSEUW,
17850 IX86_BUILTIN_PCOMTRUEUW,
17851 IX86_BUILTIN_PCOMEQUD,
17852 IX86_BUILTIN_PCOMNEUD,
17853 IX86_BUILTIN_PCOMLTUD,
17854 IX86_BUILTIN_PCOMLEUD,
17855 IX86_BUILTIN_PCOMGTUD,
17856 IX86_BUILTIN_PCOMGEUD,
17857 IX86_BUILTIN_PCOMFALSEUD,
17858 IX86_BUILTIN_PCOMTRUEUD,
17859 IX86_BUILTIN_PCOMEQUQ,
17860 IX86_BUILTIN_PCOMNEUQ,
17861 IX86_BUILTIN_PCOMLTUQ,
17862 IX86_BUILTIN_PCOMLEUQ,
17863 IX86_BUILTIN_PCOMGTUQ,
17864 IX86_BUILTIN_PCOMGEUQ,
17865 IX86_BUILTIN_PCOMFALSEUQ,
17866 IX86_BUILTIN_PCOMTRUEUQ,
17868 IX86_BUILTIN_PCOMEQB,
17869 IX86_BUILTIN_PCOMNEB,
17870 IX86_BUILTIN_PCOMLTB,
17871 IX86_BUILTIN_PCOMLEB,
17872 IX86_BUILTIN_PCOMGTB,
17873 IX86_BUILTIN_PCOMGEB,
17874 IX86_BUILTIN_PCOMFALSEB,
17875 IX86_BUILTIN_PCOMTRUEB,
17876 IX86_BUILTIN_PCOMEQW,
17877 IX86_BUILTIN_PCOMNEW,
17878 IX86_BUILTIN_PCOMLTW,
17879 IX86_BUILTIN_PCOMLEW,
17880 IX86_BUILTIN_PCOMGTW,
17881 IX86_BUILTIN_PCOMGEW,
17882 IX86_BUILTIN_PCOMFALSEW,
17883 IX86_BUILTIN_PCOMTRUEW,
17884 IX86_BUILTIN_PCOMEQD,
17885 IX86_BUILTIN_PCOMNED,
17886 IX86_BUILTIN_PCOMLTD,
17887 IX86_BUILTIN_PCOMLED,
17888 IX86_BUILTIN_PCOMGTD,
17889 IX86_BUILTIN_PCOMGED,
17890 IX86_BUILTIN_PCOMFALSED,
17891 IX86_BUILTIN_PCOMTRUED,
17892 IX86_BUILTIN_PCOMEQQ,
17893 IX86_BUILTIN_PCOMNEQ,
17894 IX86_BUILTIN_PCOMLTQ,
17895 IX86_BUILTIN_PCOMLEQ,
17896 IX86_BUILTIN_PCOMGTQ,
17897 IX86_BUILTIN_PCOMGEQ,
17898 IX86_BUILTIN_PCOMFALSEQ,
17899 IX86_BUILTIN_PCOMTRUEQ,
17904 /* Table for the ix86 builtin decls. */
17905 static GTY(()) tree ix86_builtins[(int) IX86_BUILTIN_MAX];
17907 /* Add an ix86 target builtin function with CODE, NAME and TYPE. Do so,
17908 * if the target_flags include one of MASK. Stores the function decl
17909 * in the ix86_builtins array.
17910 * Returns the function decl or NULL_TREE, if the builtin was not added. */
17913 def_builtin (int mask, const char *name, tree type, enum ix86_builtins code)
17915 tree decl = NULL_TREE;
17917 if (mask & ix86_isa_flags
17918 && (!(mask & OPTION_MASK_ISA_64BIT) || TARGET_64BIT))
17920 decl = add_builtin_function (name, type, code, BUILT_IN_MD,
17922 ix86_builtins[(int) code] = decl;
17928 /* Like def_builtin, but also marks the function decl "const". */
17931 def_builtin_const (int mask, const char *name, tree type,
17932 enum ix86_builtins code)
17934 tree decl = def_builtin (mask, name, type, code);
17936 TREE_READONLY (decl) = 1;
17940 /* Bits for builtin_description.flag. */
17942 /* Set when we don't support the comparison natively, and should
17943 swap_comparison in order to support it. */
17944 #define BUILTIN_DESC_SWAP_OPERANDS 1
17946 struct builtin_description
17948 const unsigned int mask;
17949 const enum insn_code icode;
17950 const char *const name;
17951 const enum ix86_builtins code;
17952 const enum rtx_code comparison;
17956 static const struct builtin_description bdesc_comi[] =
17958 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comieq", IX86_BUILTIN_COMIEQSS, UNEQ, 0 },
17959 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comilt", IX86_BUILTIN_COMILTSS, UNLT, 0 },
17960 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comile", IX86_BUILTIN_COMILESS, UNLE, 0 },
17961 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comigt", IX86_BUILTIN_COMIGTSS, GT, 0 },
17962 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comige", IX86_BUILTIN_COMIGESS, GE, 0 },
17963 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comineq", IX86_BUILTIN_COMINEQSS, LTGT, 0 },
17964 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomieq", IX86_BUILTIN_UCOMIEQSS, UNEQ, 0 },
17965 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomilt", IX86_BUILTIN_UCOMILTSS, UNLT, 0 },
17966 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomile", IX86_BUILTIN_UCOMILESS, UNLE, 0 },
17967 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomigt", IX86_BUILTIN_UCOMIGTSS, GT, 0 },
17968 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomige", IX86_BUILTIN_UCOMIGESS, GE, 0 },
17969 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomineq", IX86_BUILTIN_UCOMINEQSS, LTGT, 0 },
17970 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdeq", IX86_BUILTIN_COMIEQSD, UNEQ, 0 },
17971 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdlt", IX86_BUILTIN_COMILTSD, UNLT, 0 },
17972 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdle", IX86_BUILTIN_COMILESD, UNLE, 0 },
17973 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdgt", IX86_BUILTIN_COMIGTSD, GT, 0 },
17974 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdge", IX86_BUILTIN_COMIGESD, GE, 0 },
17975 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdneq", IX86_BUILTIN_COMINEQSD, LTGT, 0 },
17976 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdeq", IX86_BUILTIN_UCOMIEQSD, UNEQ, 0 },
17977 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdlt", IX86_BUILTIN_UCOMILTSD, UNLT, 0 },
17978 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdle", IX86_BUILTIN_UCOMILESD, UNLE, 0 },
17979 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdgt", IX86_BUILTIN_UCOMIGTSD, GT, 0 },
17980 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdge", IX86_BUILTIN_UCOMIGESD, GE, 0 },
17981 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdneq", IX86_BUILTIN_UCOMINEQSD, LTGT, 0 },
17984 static const struct builtin_description bdesc_pcmpestr[] =
17987 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestri128", IX86_BUILTIN_PCMPESTRI128, UNKNOWN, 0 },
17988 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestrm128", IX86_BUILTIN_PCMPESTRM128, UNKNOWN, 0 },
17989 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestria128", IX86_BUILTIN_PCMPESTRA128, UNKNOWN, (int) CCAmode },
17990 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestric128", IX86_BUILTIN_PCMPESTRC128, UNKNOWN, (int) CCCmode },
17991 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestrio128", IX86_BUILTIN_PCMPESTRO128, UNKNOWN, (int) CCOmode },
17992 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestris128", IX86_BUILTIN_PCMPESTRS128, UNKNOWN, (int) CCSmode },
17993 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestriz128", IX86_BUILTIN_PCMPESTRZ128, UNKNOWN, (int) CCZmode },
17996 static const struct builtin_description bdesc_pcmpistr[] =
17999 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistri128", IX86_BUILTIN_PCMPISTRI128, UNKNOWN, 0 },
18000 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistrm128", IX86_BUILTIN_PCMPISTRM128, UNKNOWN, 0 },
18001 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistria128", IX86_BUILTIN_PCMPISTRA128, UNKNOWN, (int) CCAmode },
18002 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistric128", IX86_BUILTIN_PCMPISTRC128, UNKNOWN, (int) CCCmode },
18003 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistrio128", IX86_BUILTIN_PCMPISTRO128, UNKNOWN, (int) CCOmode },
18004 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistris128", IX86_BUILTIN_PCMPISTRS128, UNKNOWN, (int) CCSmode },
18005 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistriz128", IX86_BUILTIN_PCMPISTRZ128, UNKNOWN, (int) CCZmode },
18008 /* Special builtin types */
18009 enum ix86_special_builtin_type
18011 SPECIAL_FTYPE_UNKNOWN,
18013 V16QI_FTYPE_PCCHAR,
18014 V4SF_FTYPE_PCFLOAT,
18015 V2DF_FTYPE_PCDOUBLE,
18016 V4SF_FTYPE_V4SF_PCV2SF,
18017 V2DF_FTYPE_V2DF_PCDOUBLE,
18019 VOID_FTYPE_PV2SF_V4SF,
18020 VOID_FTYPE_PV2DI_V2DI,
18021 VOID_FTYPE_PCHAR_V16QI,
18022 VOID_FTYPE_PFLOAT_V4SF,
18023 VOID_FTYPE_PDOUBLE_V2DF,
18025 VOID_FTYPE_PINT_INT
18028 /* Builtin types */
18029 enum ix86_builtin_type
18032 FLOAT128_FTYPE_FLOAT128,
18034 FLOAT128_FTYPE_FLOAT128_FLOAT128,
18035 INT_FTYPE_V2DI_V2DI_PTEST,
18053 V4SF_FTYPE_V4SF_VEC_MERGE,
18061 V2DF_FTYPE_V2DF_VEC_MERGE,
18071 V16QI_FTYPE_V16QI_V16QI,
18072 V16QI_FTYPE_V8HI_V8HI,
18073 V8QI_FTYPE_V8QI_V8QI,
18074 V8QI_FTYPE_V4HI_V4HI,
18075 V8HI_FTYPE_V8HI_V8HI,
18076 V8HI_FTYPE_V8HI_V8HI_COUNT,
18077 V8HI_FTYPE_V16QI_V16QI,
18078 V8HI_FTYPE_V4SI_V4SI,
18079 V8HI_FTYPE_V8HI_SI_COUNT,
18080 V4SI_FTYPE_V4SI_V4SI,
18081 V4SI_FTYPE_V4SI_V4SI_COUNT,
18082 V4SI_FTYPE_V8HI_V8HI,
18083 V4SI_FTYPE_V4SF_V4SF,
18084 V4SI_FTYPE_V2DF_V2DF,
18085 V4SI_FTYPE_V4SI_SI_COUNT,
18086 V4HI_FTYPE_V4HI_V4HI,
18087 V4HI_FTYPE_V4HI_V4HI_COUNT,
18088 V4HI_FTYPE_V8QI_V8QI,
18089 V4HI_FTYPE_V2SI_V2SI,
18090 V4HI_FTYPE_V4HI_SI_COUNT,
18091 V4SF_FTYPE_V4SF_V4SF,
18092 V4SF_FTYPE_V4SF_V4SF_SWAP,
18093 V4SF_FTYPE_V4SF_V2SI,
18094 V4SF_FTYPE_V4SF_V2DF,
18095 V4SF_FTYPE_V4SF_DI,
18096 V4SF_FTYPE_V4SF_SI,
18097 V2DI_FTYPE_V2DI_V2DI,
18098 V2DI_FTYPE_V2DI_V2DI_COUNT,
18099 V2DI_FTYPE_V16QI_V16QI,
18100 V2DI_FTYPE_V4SI_V4SI,
18101 V2DI_FTYPE_V2DI_V16QI,
18102 V2DI_FTYPE_V2DF_V2DF,
18103 V2DI_FTYPE_V2DI_SI_COUNT,
18104 V2SI_FTYPE_V2SI_V2SI,
18105 V2SI_FTYPE_V2SI_V2SI_COUNT,
18106 V2SI_FTYPE_V4HI_V4HI,
18107 V2SI_FTYPE_V2SF_V2SF,
18108 V2SI_FTYPE_V2SI_SI_COUNT,
18109 V2DF_FTYPE_V2DF_V2DF,
18110 V2DF_FTYPE_V2DF_V2DF_SWAP,
18111 V2DF_FTYPE_V2DF_V4SF,
18112 V2DF_FTYPE_V2DF_DI,
18113 V2DF_FTYPE_V2DF_SI,
18114 V2SF_FTYPE_V2SF_V2SF,
18115 V1DI_FTYPE_V1DI_V1DI,
18116 V1DI_FTYPE_V1DI_V1DI_COUNT,
18117 V1DI_FTYPE_V8QI_V8QI,
18118 V1DI_FTYPE_V2SI_V2SI,
18119 V1DI_FTYPE_V1DI_SI_COUNT,
18120 UINT64_FTYPE_UINT64_UINT64,
18121 UINT_FTYPE_UINT_UINT,
18122 UINT_FTYPE_UINT_USHORT,
18123 UINT_FTYPE_UINT_UCHAR,
18124 V8HI_FTYPE_V8HI_INT,
18125 V4SI_FTYPE_V4SI_INT,
18126 V4HI_FTYPE_V4HI_INT,
18127 V4SF_FTYPE_V4SF_INT,
18128 V2DI_FTYPE_V2DI_INT,
18129 V2DI2TI_FTYPE_V2DI_INT,
18130 V2DF_FTYPE_V2DF_INT,
18131 V16QI_FTYPE_V16QI_V16QI_V16QI,
18132 V4SF_FTYPE_V4SF_V4SF_V4SF,
18133 V2DF_FTYPE_V2DF_V2DF_V2DF,
18134 V16QI_FTYPE_V16QI_V16QI_INT,
18135 V8HI_FTYPE_V8HI_V8HI_INT,
18136 V4SI_FTYPE_V4SI_V4SI_INT,
18137 V4SF_FTYPE_V4SF_V4SF_INT,
18138 V2DI_FTYPE_V2DI_V2DI_INT,
18139 V2DI2TI_FTYPE_V2DI_V2DI_INT,
18140 V1DI2DI_FTYPE_V1DI_V1DI_INT,
18141 V2DF_FTYPE_V2DF_V2DF_INT,
18142 V2DI_FTYPE_V2DI_UINT_UINT,
18143 V2DI_FTYPE_V2DI_V2DI_UINT_UINT
18146 /* Special builtins with variable number of arguments. */
18147 static const struct builtin_description bdesc_special_args[] =
18150 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_emms, "__builtin_ia32_emms", IX86_BUILTIN_EMMS, UNKNOWN, (int) VOID_FTYPE_VOID },
18153 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_femms, "__builtin_ia32_femms", IX86_BUILTIN_FEMMS, UNKNOWN, (int) VOID_FTYPE_VOID },
18156 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movups, "__builtin_ia32_storeups", IX86_BUILTIN_STOREUPS, UNKNOWN, (int) VOID_FTYPE_PFLOAT_V4SF },
18157 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movntv4sf, "__builtin_ia32_movntps", IX86_BUILTIN_MOVNTPS, UNKNOWN, (int) VOID_FTYPE_PFLOAT_V4SF },
18158 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movups, "__builtin_ia32_loadups", IX86_BUILTIN_LOADUPS, UNKNOWN, (int) V4SF_FTYPE_PCFLOAT },
18160 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_loadhps_exp, "__builtin_ia32_loadhps", IX86_BUILTIN_LOADHPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_PCV2SF },
18161 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_loadlps_exp, "__builtin_ia32_loadlps", IX86_BUILTIN_LOADLPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_PCV2SF },
18162 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_storehps, "__builtin_ia32_storehps", IX86_BUILTIN_STOREHPS, UNKNOWN, (int) VOID_FTYPE_PV2SF_V4SF },
18163 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_storelps, "__builtin_ia32_storelps", IX86_BUILTIN_STORELPS, UNKNOWN, (int) VOID_FTYPE_PV2SF_V4SF },
18165 /* SSE or 3DNow!A */
18166 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_sse_sfence, "__builtin_ia32_sfence", IX86_BUILTIN_SFENCE, UNKNOWN, (int) VOID_FTYPE_VOID },
18167 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_sse_movntdi, "__builtin_ia32_movntq", IX86_BUILTIN_MOVNTQ, UNKNOWN, (int) VOID_FTYPE_PDI_DI },
18170 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_lfence, "__builtin_ia32_lfence", IX86_BUILTIN_LFENCE, UNKNOWN, (int) VOID_FTYPE_VOID },
18171 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_mfence, 0, IX86_BUILTIN_MFENCE, UNKNOWN, (int) VOID_FTYPE_VOID },
18172 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movupd, "__builtin_ia32_storeupd", IX86_BUILTIN_STOREUPD, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V2DF },
18173 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movdqu, "__builtin_ia32_storedqu", IX86_BUILTIN_STOREDQU, UNKNOWN, (int) VOID_FTYPE_PCHAR_V16QI },
18174 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movntv2df, "__builtin_ia32_movntpd", IX86_BUILTIN_MOVNTPD, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V2DF },
18175 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movntv2di, "__builtin_ia32_movntdq", IX86_BUILTIN_MOVNTDQ, UNKNOWN, (int) VOID_FTYPE_PV2DI_V2DI },
18176 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movntsi, "__builtin_ia32_movnti", IX86_BUILTIN_MOVNTI, UNKNOWN, (int) VOID_FTYPE_PINT_INT },
18177 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movupd, "__builtin_ia32_loadupd", IX86_BUILTIN_LOADUPD, UNKNOWN, (int) V2DF_FTYPE_PCDOUBLE },
18178 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movdqu, "__builtin_ia32_loaddqu", IX86_BUILTIN_LOADDQU, UNKNOWN, (int) V16QI_FTYPE_PCCHAR },
18180 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_loadhpd_exp, "__builtin_ia32_loadhpd", IX86_BUILTIN_LOADHPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_PCDOUBLE },
18181 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_loadlpd_exp, "__builtin_ia32_loadlpd", IX86_BUILTIN_LOADLPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_PCDOUBLE },
18184 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_lddqu, "__builtin_ia32_lddqu", IX86_BUILTIN_LDDQU, UNKNOWN, (int) V16QI_FTYPE_PCCHAR },
18187 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_movntdqa, "__builtin_ia32_movntdqa", IX86_BUILTIN_MOVNTDQA, UNKNOWN, (int) V2DI_FTYPE_PV2DI },
18190 { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_vmmovntv2df, "__builtin_ia32_movntsd", IX86_BUILTIN_MOVNTSD, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V2DF },
18191 { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_vmmovntv4sf, "__builtin_ia32_movntss", IX86_BUILTIN_MOVNTSS, UNKNOWN, (int) VOID_FTYPE_PFLOAT_V4SF },
18194 /* Builtins with variable number of arguments. */
18195 static const struct builtin_description bdesc_args[] =
18198 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_addv8qi3, "__builtin_ia32_paddb", IX86_BUILTIN_PADDB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
18199 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_addv4hi3, "__builtin_ia32_paddw", IX86_BUILTIN_PADDW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
18200 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_addv2si3, "__builtin_ia32_paddd", IX86_BUILTIN_PADDD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
18201 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_subv8qi3, "__builtin_ia32_psubb", IX86_BUILTIN_PSUBB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
18202 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_subv4hi3, "__builtin_ia32_psubw", IX86_BUILTIN_PSUBW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
18203 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_subv2si3, "__builtin_ia32_psubd", IX86_BUILTIN_PSUBD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
18205 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ssaddv8qi3, "__builtin_ia32_paddsb", IX86_BUILTIN_PADDSB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
18206 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ssaddv4hi3, "__builtin_ia32_paddsw", IX86_BUILTIN_PADDSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
18207 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_sssubv8qi3, "__builtin_ia32_psubsb", IX86_BUILTIN_PSUBSB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
18208 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_sssubv4hi3, "__builtin_ia32_psubsw", IX86_BUILTIN_PSUBSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
18209 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_usaddv8qi3, "__builtin_ia32_paddusb", IX86_BUILTIN_PADDUSB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
18210 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_usaddv4hi3, "__builtin_ia32_paddusw", IX86_BUILTIN_PADDUSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
18211 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ussubv8qi3, "__builtin_ia32_psubusb", IX86_BUILTIN_PSUBUSB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
18212 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ussubv4hi3, "__builtin_ia32_psubusw", IX86_BUILTIN_PSUBUSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
18214 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_mulv4hi3, "__builtin_ia32_pmullw", IX86_BUILTIN_PMULLW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
18215 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_smulv4hi3_highpart, "__builtin_ia32_pmulhw", IX86_BUILTIN_PMULHW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
18217 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_andv2si3, "__builtin_ia32_pand", IX86_BUILTIN_PAND, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
18218 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_nandv2si3, "__builtin_ia32_pandn", IX86_BUILTIN_PANDN, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
18219 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_iorv2si3, "__builtin_ia32_por", IX86_BUILTIN_POR, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
18220 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_xorv2si3, "__builtin_ia32_pxor", IX86_BUILTIN_PXOR, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
18222 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_eqv8qi3, "__builtin_ia32_pcmpeqb", IX86_BUILTIN_PCMPEQB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
18223 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_eqv4hi3, "__builtin_ia32_pcmpeqw", IX86_BUILTIN_PCMPEQW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
18224 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_eqv2si3, "__builtin_ia32_pcmpeqd", IX86_BUILTIN_PCMPEQD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
18225 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_gtv8qi3, "__builtin_ia32_pcmpgtb", IX86_BUILTIN_PCMPGTB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
18226 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_gtv4hi3, "__builtin_ia32_pcmpgtw", IX86_BUILTIN_PCMPGTW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
18227 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_gtv2si3, "__builtin_ia32_pcmpgtd", IX86_BUILTIN_PCMPGTD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
18229 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpckhbw, "__builtin_ia32_punpckhbw", IX86_BUILTIN_PUNPCKHBW, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
18230 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpckhwd, "__builtin_ia32_punpckhwd", IX86_BUILTIN_PUNPCKHWD, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
18231 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpckhdq, "__builtin_ia32_punpckhdq", IX86_BUILTIN_PUNPCKHDQ, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
18232 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpcklbw, "__builtin_ia32_punpcklbw", IX86_BUILTIN_PUNPCKLBW, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
18233 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpcklwd, "__builtin_ia32_punpcklwd", IX86_BUILTIN_PUNPCKLWD, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI},
18234 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpckldq, "__builtin_ia32_punpckldq", IX86_BUILTIN_PUNPCKLDQ, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI},
18236 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_packsswb, "__builtin_ia32_packsswb", IX86_BUILTIN_PACKSSWB, UNKNOWN, (int) V8QI_FTYPE_V4HI_V4HI },
18237 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_packssdw, "__builtin_ia32_packssdw", IX86_BUILTIN_PACKSSDW, UNKNOWN, (int) V4HI_FTYPE_V2SI_V2SI },
18238 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_packuswb, "__builtin_ia32_packuswb", IX86_BUILTIN_PACKUSWB, UNKNOWN, (int) V8QI_FTYPE_V4HI_V4HI },
18240 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_pmaddwd, "__builtin_ia32_pmaddwd", IX86_BUILTIN_PMADDWD, UNKNOWN, (int) V2SI_FTYPE_V4HI_V4HI },
18242 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv4hi3, "__builtin_ia32_psllwi", IX86_BUILTIN_PSLLWI, UNKNOWN, (int) V4HI_FTYPE_V4HI_SI_COUNT },
18243 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv2si3, "__builtin_ia32_pslldi", IX86_BUILTIN_PSLLDI, UNKNOWN, (int) V2SI_FTYPE_V2SI_SI_COUNT },
18244 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv1di3, "__builtin_ia32_psllqi", IX86_BUILTIN_PSLLQI, UNKNOWN, (int) V1DI_FTYPE_V1DI_SI_COUNT },
18245 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv4hi3, "__builtin_ia32_psllw", IX86_BUILTIN_PSLLW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI_COUNT },
18246 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv2si3, "__builtin_ia32_pslld", IX86_BUILTIN_PSLLD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI_COUNT },
18247 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv1di3, "__builtin_ia32_psllq", IX86_BUILTIN_PSLLQ, UNKNOWN, (int) V1DI_FTYPE_V1DI_V1DI_COUNT },
18249 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv4hi3, "__builtin_ia32_psrlwi", IX86_BUILTIN_PSRLWI, UNKNOWN, (int) V4HI_FTYPE_V4HI_SI_COUNT },
18250 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv2si3, "__builtin_ia32_psrldi", IX86_BUILTIN_PSRLDI, UNKNOWN, (int) V2SI_FTYPE_V2SI_SI_COUNT },
18251 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv1di3, "__builtin_ia32_psrlqi", IX86_BUILTIN_PSRLQI, UNKNOWN, (int) V1DI_FTYPE_V1DI_SI_COUNT },
18252 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv4hi3, "__builtin_ia32_psrlw", IX86_BUILTIN_PSRLW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI_COUNT },
18253 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv2si3, "__builtin_ia32_psrld", IX86_BUILTIN_PSRLD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI_COUNT },
18254 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv1di3, "__builtin_ia32_psrlq", IX86_BUILTIN_PSRLQ, UNKNOWN, (int) V1DI_FTYPE_V1DI_V1DI_COUNT },
18256 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashrv4hi3, "__builtin_ia32_psrawi", IX86_BUILTIN_PSRAWI, UNKNOWN, (int) V4HI_FTYPE_V4HI_SI_COUNT },
18257 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashrv2si3, "__builtin_ia32_psradi", IX86_BUILTIN_PSRADI, UNKNOWN, (int) V2SI_FTYPE_V2SI_SI_COUNT },
18258 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashrv4hi3, "__builtin_ia32_psraw", IX86_BUILTIN_PSRAW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI_COUNT },
18259 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashrv2si3, "__builtin_ia32_psrad", IX86_BUILTIN_PSRAD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI_COUNT },
18262 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_pf2id, "__builtin_ia32_pf2id", IX86_BUILTIN_PF2ID, UNKNOWN, (int) V2SI_FTYPE_V2SF },
18263 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_floatv2si2, "__builtin_ia32_pi2fd", IX86_BUILTIN_PI2FD, UNKNOWN, (int) V2SF_FTYPE_V2SI },
18264 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_rcpv2sf2, "__builtin_ia32_pfrcp", IX86_BUILTIN_PFRCP, UNKNOWN, (int) V2SF_FTYPE_V2SF },
18265 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_rsqrtv2sf2, "__builtin_ia32_pfrsqrt", IX86_BUILTIN_PFRSQRT, UNKNOWN, (int) V2SF_FTYPE_V2SF },
18267 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_uavgv8qi3, "__builtin_ia32_pavgusb", IX86_BUILTIN_PAVGUSB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
18268 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_haddv2sf3, "__builtin_ia32_pfacc", IX86_BUILTIN_PFACC, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
18269 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_addv2sf3, "__builtin_ia32_pfadd", IX86_BUILTIN_PFADD, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
18270 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_eqv2sf3, "__builtin_ia32_pfcmpeq", IX86_BUILTIN_PFCMPEQ, UNKNOWN, (int) V2SI_FTYPE_V2SF_V2SF },
18271 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_gev2sf3, "__builtin_ia32_pfcmpge", IX86_BUILTIN_PFCMPGE, UNKNOWN, (int) V2SI_FTYPE_V2SF_V2SF },
18272 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_gtv2sf3, "__builtin_ia32_pfcmpgt", IX86_BUILTIN_PFCMPGT, UNKNOWN, (int) V2SI_FTYPE_V2SF_V2SF },
18273 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_smaxv2sf3, "__builtin_ia32_pfmax", IX86_BUILTIN_PFMAX, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
18274 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_sminv2sf3, "__builtin_ia32_pfmin", IX86_BUILTIN_PFMIN, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
18275 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_mulv2sf3, "__builtin_ia32_pfmul", IX86_BUILTIN_PFMUL, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
18276 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_rcpit1v2sf3, "__builtin_ia32_pfrcpit1", IX86_BUILTIN_PFRCPIT1, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
18277 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_rcpit2v2sf3, "__builtin_ia32_pfrcpit2", IX86_BUILTIN_PFRCPIT2, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
18278 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_rsqit1v2sf3, "__builtin_ia32_pfrsqit1", IX86_BUILTIN_PFRSQIT1, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
18279 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_subv2sf3, "__builtin_ia32_pfsub", IX86_BUILTIN_PFSUB, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
18280 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_subrv2sf3, "__builtin_ia32_pfsubr", IX86_BUILTIN_PFSUBR, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
18281 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_pmulhrwv4hi3, "__builtin_ia32_pmulhrw", IX86_BUILTIN_PMULHRW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
18284 { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pf2iw, "__builtin_ia32_pf2iw", IX86_BUILTIN_PF2IW, UNKNOWN, (int) V2SI_FTYPE_V2SF },
18285 { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pi2fw, "__builtin_ia32_pi2fw", IX86_BUILTIN_PI2FW, UNKNOWN, (int) V2SF_FTYPE_V2SI },
18286 { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pswapdv2si2, "__builtin_ia32_pswapdsi", IX86_BUILTIN_PSWAPDSI, UNKNOWN, (int) V2SI_FTYPE_V2SI },
18287 { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pswapdv2sf2, "__builtin_ia32_pswapdsf", IX86_BUILTIN_PSWAPDSF, UNKNOWN, (int) V2SF_FTYPE_V2SF },
18288 { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_hsubv2sf3, "__builtin_ia32_pfnacc", IX86_BUILTIN_PFNACC, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
18289 { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_addsubv2sf3, "__builtin_ia32_pfpnacc", IX86_BUILTIN_PFPNACC, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
18292 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movmskps, "__builtin_ia32_movmskps", IX86_BUILTIN_MOVMSKPS, UNKNOWN, (int) INT_FTYPE_V4SF },
18293 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_sqrtv4sf2, "__builtin_ia32_sqrtps", IX86_BUILTIN_SQRTPS, UNKNOWN, (int) V4SF_FTYPE_V4SF },
18294 { OPTION_MASK_ISA_SSE, CODE_FOR_sqrtv4sf2, "__builtin_ia32_sqrtps_nr", IX86_BUILTIN_SQRTPS_NR, UNKNOWN, (int) V4SF_FTYPE_V4SF },
18295 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_rsqrtv4sf2, "__builtin_ia32_rsqrtps", IX86_BUILTIN_RSQRTPS, UNKNOWN, (int) V4SF_FTYPE_V4SF },
18296 { OPTION_MASK_ISA_SSE, CODE_FOR_rsqrtv4sf2, "__builtin_ia32_rsqrtps_nr", IX86_BUILTIN_RSQRTPS_NR, UNKNOWN, (int) V4SF_FTYPE_V4SF },
18297 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_rcpv4sf2, "__builtin_ia32_rcpps", IX86_BUILTIN_RCPPS, UNKNOWN, (int) V4SF_FTYPE_V4SF },
18298 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvtps2pi, "__builtin_ia32_cvtps2pi", IX86_BUILTIN_CVTPS2PI, UNKNOWN, (int) V2SI_FTYPE_V4SF },
18299 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvtss2si, "__builtin_ia32_cvtss2si", IX86_BUILTIN_CVTSS2SI, UNKNOWN, (int) INT_FTYPE_V4SF },
18300 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_64BIT, CODE_FOR_sse_cvtss2siq, "__builtin_ia32_cvtss2si64", IX86_BUILTIN_CVTSS2SI64, UNKNOWN, (int) INT64_FTYPE_V4SF },
18301 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvttps2pi, "__builtin_ia32_cvttps2pi", IX86_BUILTIN_CVTTPS2PI, UNKNOWN, (int) V2SI_FTYPE_V4SF },
18302 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvttss2si, "__builtin_ia32_cvttss2si", IX86_BUILTIN_CVTTSS2SI, UNKNOWN, (int) INT_FTYPE_V4SF },
18303 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_64BIT, CODE_FOR_sse_cvttss2siq, "__builtin_ia32_cvttss2si64", IX86_BUILTIN_CVTTSS2SI64, UNKNOWN, (int) INT64_FTYPE_V4SF },
18305 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_shufps, "__builtin_ia32_shufps", IX86_BUILTIN_SHUFPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
18307 { OPTION_MASK_ISA_SSE, CODE_FOR_addv4sf3, "__builtin_ia32_addps", IX86_BUILTIN_ADDPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
18308 { OPTION_MASK_ISA_SSE, CODE_FOR_subv4sf3, "__builtin_ia32_subps", IX86_BUILTIN_SUBPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
18309 { OPTION_MASK_ISA_SSE, CODE_FOR_mulv4sf3, "__builtin_ia32_mulps", IX86_BUILTIN_MULPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
18310 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_divv4sf3, "__builtin_ia32_divps", IX86_BUILTIN_DIVPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
18311 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmaddv4sf3, "__builtin_ia32_addss", IX86_BUILTIN_ADDSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
18312 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmsubv4sf3, "__builtin_ia32_subss", IX86_BUILTIN_SUBSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
18313 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmulv4sf3, "__builtin_ia32_mulss", IX86_BUILTIN_MULSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
18314 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmdivv4sf3, "__builtin_ia32_divss", IX86_BUILTIN_DIVSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
18316 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpeqps", IX86_BUILTIN_CMPEQPS, EQ, (int) V4SF_FTYPE_V4SF_V4SF },
18317 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpltps", IX86_BUILTIN_CMPLTPS, LT, (int) V4SF_FTYPE_V4SF_V4SF },
18318 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpleps", IX86_BUILTIN_CMPLEPS, LE, (int) V4SF_FTYPE_V4SF_V4SF },
18319 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpgtps", IX86_BUILTIN_CMPGTPS, LT, (int) V4SF_FTYPE_V4SF_V4SF_SWAP },
18320 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpgeps", IX86_BUILTIN_CMPGEPS, LE, (int) V4SF_FTYPE_V4SF_V4SF_SWAP },
18321 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpunordps", IX86_BUILTIN_CMPUNORDPS, UNORDERED, (int) V4SF_FTYPE_V4SF_V4SF },
18322 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpneqps", IX86_BUILTIN_CMPNEQPS, NE, (int) V4SF_FTYPE_V4SF_V4SF },
18323 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpnltps", IX86_BUILTIN_CMPNLTPS, UNGE, (int) V4SF_FTYPE_V4SF_V4SF },
18324 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpnleps", IX86_BUILTIN_CMPNLEPS, UNGT, (int) V4SF_FTYPE_V4SF_V4SF },
18325 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpngtps", IX86_BUILTIN_CMPNGTPS, UNGE, (int) V4SF_FTYPE_V4SF_V4SF_SWAP },
18326 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpngeps", IX86_BUILTIN_CMPNGEPS, UNGT, (int) V4SF_FTYPE_V4SF_V4SF_SWAP},
18327 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpordps", IX86_BUILTIN_CMPORDPS, ORDERED, (int) V4SF_FTYPE_V4SF_V4SF },
18328 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpeqss", IX86_BUILTIN_CMPEQSS, EQ, (int) V4SF_FTYPE_V4SF_V4SF },
18329 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpltss", IX86_BUILTIN_CMPLTSS, LT, (int) V4SF_FTYPE_V4SF_V4SF },
18330 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpless", IX86_BUILTIN_CMPLESS, LE, (int) V4SF_FTYPE_V4SF_V4SF },
18331 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpunordss", IX86_BUILTIN_CMPUNORDSS, UNORDERED, (int) V4SF_FTYPE_V4SF_V4SF },
18332 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpneqss", IX86_BUILTIN_CMPNEQSS, NE, (int) V4SF_FTYPE_V4SF_V4SF },
18333 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpnltss", IX86_BUILTIN_CMPNLTSS, UNGE, (int) V4SF_FTYPE_V4SF_V4SF },
18334 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpnless", IX86_BUILTIN_CMPNLESS, UNGT, (int) V4SF_FTYPE_V4SF_V4SF },
18335 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpngtss", IX86_BUILTIN_CMPNGTSS, UNGE, (int) V4SF_FTYPE_V4SF_V4SF_SWAP },
18336 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpngess", IX86_BUILTIN_CMPNGESS, UNGT, (int) V4SF_FTYPE_V4SF_V4SF_SWAP },
18337 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpordss", IX86_BUILTIN_CMPORDSS, ORDERED, (int) V4SF_FTYPE_V4SF_V4SF },
18339 { OPTION_MASK_ISA_SSE, CODE_FOR_sminv4sf3, "__builtin_ia32_minps", IX86_BUILTIN_MINPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
18340 { OPTION_MASK_ISA_SSE, CODE_FOR_smaxv4sf3, "__builtin_ia32_maxps", IX86_BUILTIN_MAXPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
18341 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmsminv4sf3, "__builtin_ia32_minss", IX86_BUILTIN_MINSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
18342 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmsmaxv4sf3, "__builtin_ia32_maxss", IX86_BUILTIN_MAXSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
18344 { OPTION_MASK_ISA_SSE, CODE_FOR_andv4sf3, "__builtin_ia32_andps", IX86_BUILTIN_ANDPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
18345 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_nandv4sf3, "__builtin_ia32_andnps", IX86_BUILTIN_ANDNPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
18346 { OPTION_MASK_ISA_SSE, CODE_FOR_iorv4sf3, "__builtin_ia32_orps", IX86_BUILTIN_ORPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
18347 { OPTION_MASK_ISA_SSE, CODE_FOR_xorv4sf3, "__builtin_ia32_xorps", IX86_BUILTIN_XORPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
18349 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movss, "__builtin_ia32_movss", IX86_BUILTIN_MOVSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
18350 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movhlps_exp, "__builtin_ia32_movhlps", IX86_BUILTIN_MOVHLPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
18351 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movlhps_exp, "__builtin_ia32_movlhps", IX86_BUILTIN_MOVLHPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
18352 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_unpckhps, "__builtin_ia32_unpckhps", IX86_BUILTIN_UNPCKHPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
18353 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_unpcklps, "__builtin_ia32_unpcklps", IX86_BUILTIN_UNPCKLPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
18355 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvtpi2ps, "__builtin_ia32_cvtpi2ps", IX86_BUILTIN_CVTPI2PS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V2SI },
18356 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvtsi2ss, "__builtin_ia32_cvtsi2ss", IX86_BUILTIN_CVTSI2SS, UNKNOWN, (int) V4SF_FTYPE_V4SF_SI },
18357 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_64BIT, CODE_FOR_sse_cvtsi2ssq, "__builtin_ia32_cvtsi642ss", IX86_BUILTIN_CVTSI642SS, UNKNOWN, V4SF_FTYPE_V4SF_DI },
18359 { OPTION_MASK_ISA_SSE, CODE_FOR_rsqrtsf2, "__builtin_ia32_rsqrtf", IX86_BUILTIN_RSQRTF, UNKNOWN, (int) FLOAT_FTYPE_FLOAT },
18361 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmsqrtv4sf2, "__builtin_ia32_sqrtss", IX86_BUILTIN_SQRTSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_VEC_MERGE },
18362 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmrsqrtv4sf2, "__builtin_ia32_rsqrtss", IX86_BUILTIN_RSQRTSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_VEC_MERGE },
18363 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmrcpv4sf2, "__builtin_ia32_rcpss", IX86_BUILTIN_RCPSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_VEC_MERGE },
18365 /* SSE MMX or 3Dnow!A */
18366 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_uavgv8qi3, "__builtin_ia32_pavgb", IX86_BUILTIN_PAVGB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
18367 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_uavgv4hi3, "__builtin_ia32_pavgw", IX86_BUILTIN_PAVGW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
18368 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_umulv4hi3_highpart, "__builtin_ia32_pmulhuw", IX86_BUILTIN_PMULHUW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
18370 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_umaxv8qi3, "__builtin_ia32_pmaxub", IX86_BUILTIN_PMAXUB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
18371 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_smaxv4hi3, "__builtin_ia32_pmaxsw", IX86_BUILTIN_PMAXSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
18372 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_uminv8qi3, "__builtin_ia32_pminub", IX86_BUILTIN_PMINUB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
18373 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_sminv4hi3, "__builtin_ia32_pminsw", IX86_BUILTIN_PMINSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
18375 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_psadbw, "__builtin_ia32_psadbw", IX86_BUILTIN_PSADBW, UNKNOWN, (int) V1DI_FTYPE_V8QI_V8QI },
18376 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pmovmskb, "__builtin_ia32_pmovmskb", IX86_BUILTIN_PMOVMSKB, UNKNOWN, (int) INT_FTYPE_V8QI },
18378 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pshufw, "__builtin_ia32_pshufw", IX86_BUILTIN_PSHUFW, UNKNOWN, (int) V4HI_FTYPE_V4HI_INT },
18381 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_shufpd, "__builtin_ia32_shufpd", IX86_BUILTIN_SHUFPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
18383 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movmskpd, "__builtin_ia32_movmskpd", IX86_BUILTIN_MOVMSKPD, UNKNOWN, (int) INT_FTYPE_V2DF },
18384 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_pmovmskb, "__builtin_ia32_pmovmskb128", IX86_BUILTIN_PMOVMSKB128, UNKNOWN, (int) INT_FTYPE_V16QI },
18385 { OPTION_MASK_ISA_SSE2, CODE_FOR_sqrtv2df2, "__builtin_ia32_sqrtpd", IX86_BUILTIN_SQRTPD, UNKNOWN, (int) V2DF_FTYPE_V2DF },
18386 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtdq2pd, "__builtin_ia32_cvtdq2pd", IX86_BUILTIN_CVTDQ2PD, UNKNOWN, (int) V2DF_FTYPE_V4SI },
18387 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtdq2ps, "__builtin_ia32_cvtdq2ps", IX86_BUILTIN_CVTDQ2PS, UNKNOWN, (int) V4SF_FTYPE_V4SI },
18389 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtpd2dq, "__builtin_ia32_cvtpd2dq", IX86_BUILTIN_CVTPD2DQ, UNKNOWN, (int) V4SI_FTYPE_V2DF },
18390 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtpd2pi, "__builtin_ia32_cvtpd2pi", IX86_BUILTIN_CVTPD2PI, UNKNOWN, (int) V2SI_FTYPE_V2DF },
18391 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtpd2ps, "__builtin_ia32_cvtpd2ps", IX86_BUILTIN_CVTPD2PS, UNKNOWN, (int) V4SF_FTYPE_V2DF },
18392 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvttpd2dq, "__builtin_ia32_cvttpd2dq", IX86_BUILTIN_CVTTPD2DQ, UNKNOWN, (int) V4SI_FTYPE_V2DF },
18393 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvttpd2pi, "__builtin_ia32_cvttpd2pi", IX86_BUILTIN_CVTTPD2PI, UNKNOWN, (int) V2SI_FTYPE_V2DF },
18395 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtpi2pd, "__builtin_ia32_cvtpi2pd", IX86_BUILTIN_CVTPI2PD, UNKNOWN, (int) V2DF_FTYPE_V2SI },
18397 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtsd2si, "__builtin_ia32_cvtsd2si", IX86_BUILTIN_CVTSD2SI, UNKNOWN, (int) INT_FTYPE_V2DF },
18398 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvttsd2si, "__builtin_ia32_cvttsd2si", IX86_BUILTIN_CVTTSD2SI, UNKNOWN, (int) INT_FTYPE_V2DF },
18399 { OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_64BIT, CODE_FOR_sse2_cvtsd2siq, "__builtin_ia32_cvtsd2si64", IX86_BUILTIN_CVTSD2SI64, UNKNOWN, (int) INT64_FTYPE_V2DF },
18400 { OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_64BIT, CODE_FOR_sse2_cvttsd2siq, "__builtin_ia32_cvttsd2si64", IX86_BUILTIN_CVTTSD2SI64, UNKNOWN, (int) INT64_FTYPE_V2DF },
18402 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtps2dq, "__builtin_ia32_cvtps2dq", IX86_BUILTIN_CVTPS2DQ, UNKNOWN, (int) V4SI_FTYPE_V4SF },
18403 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtps2pd, "__builtin_ia32_cvtps2pd", IX86_BUILTIN_CVTPS2PD, UNKNOWN, (int) V2DF_FTYPE_V4SF },
18404 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvttps2dq, "__builtin_ia32_cvttps2dq", IX86_BUILTIN_CVTTPS2DQ, UNKNOWN, (int) V4SI_FTYPE_V4SF },
18406 { OPTION_MASK_ISA_SSE2, CODE_FOR_addv2df3, "__builtin_ia32_addpd", IX86_BUILTIN_ADDPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
18407 { OPTION_MASK_ISA_SSE2, CODE_FOR_subv2df3, "__builtin_ia32_subpd", IX86_BUILTIN_SUBPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
18408 { OPTION_MASK_ISA_SSE2, CODE_FOR_mulv2df3, "__builtin_ia32_mulpd", IX86_BUILTIN_MULPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
18409 { OPTION_MASK_ISA_SSE2, CODE_FOR_divv2df3, "__builtin_ia32_divpd", IX86_BUILTIN_DIVPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
18410 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmaddv2df3, "__builtin_ia32_addsd", IX86_BUILTIN_ADDSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
18411 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmsubv2df3, "__builtin_ia32_subsd", IX86_BUILTIN_SUBSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
18412 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmulv2df3, "__builtin_ia32_mulsd", IX86_BUILTIN_MULSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
18413 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmdivv2df3, "__builtin_ia32_divsd", IX86_BUILTIN_DIVSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
18415 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpeqpd", IX86_BUILTIN_CMPEQPD, EQ, (int) V2DF_FTYPE_V2DF_V2DF },
18416 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpltpd", IX86_BUILTIN_CMPLTPD, LT, (int) V2DF_FTYPE_V2DF_V2DF },
18417 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmplepd", IX86_BUILTIN_CMPLEPD, LE, (int) V2DF_FTYPE_V2DF_V2DF },
18418 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpgtpd", IX86_BUILTIN_CMPGTPD, LT, (int) V2DF_FTYPE_V2DF_V2DF_SWAP },
18419 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpgepd", IX86_BUILTIN_CMPGEPD, LE, (int) V2DF_FTYPE_V2DF_V2DF_SWAP},
18420 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpunordpd", IX86_BUILTIN_CMPUNORDPD, UNORDERED, (int) V2DF_FTYPE_V2DF_V2DF },
18421 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpneqpd", IX86_BUILTIN_CMPNEQPD, NE, (int) V2DF_FTYPE_V2DF_V2DF },
18422 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpnltpd", IX86_BUILTIN_CMPNLTPD, UNGE, (int) V2DF_FTYPE_V2DF_V2DF },
18423 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpnlepd", IX86_BUILTIN_CMPNLEPD, UNGT, (int) V2DF_FTYPE_V2DF_V2DF },
18424 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpngtpd", IX86_BUILTIN_CMPNGTPD, UNGE, (int) V2DF_FTYPE_V2DF_V2DF_SWAP },
18425 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpngepd", IX86_BUILTIN_CMPNGEPD, UNGT, (int) V2DF_FTYPE_V2DF_V2DF_SWAP },
18426 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpordpd", IX86_BUILTIN_CMPORDPD, ORDERED, (int) V2DF_FTYPE_V2DF_V2DF },
18427 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpeqsd", IX86_BUILTIN_CMPEQSD, EQ, (int) V2DF_FTYPE_V2DF_V2DF },
18428 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpltsd", IX86_BUILTIN_CMPLTSD, LT, (int) V2DF_FTYPE_V2DF_V2DF },
18429 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmplesd", IX86_BUILTIN_CMPLESD, LE, (int) V2DF_FTYPE_V2DF_V2DF },
18430 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpunordsd", IX86_BUILTIN_CMPUNORDSD, UNORDERED, (int) V2DF_FTYPE_V2DF_V2DF },
18431 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpneqsd", IX86_BUILTIN_CMPNEQSD, NE, (int) V2DF_FTYPE_V2DF_V2DF },
18432 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpnltsd", IX86_BUILTIN_CMPNLTSD, UNGE, (int) V2DF_FTYPE_V2DF_V2DF },
18433 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpnlesd", IX86_BUILTIN_CMPNLESD, UNGT, (int) V2DF_FTYPE_V2DF_V2DF },
18434 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpordsd", IX86_BUILTIN_CMPORDSD, ORDERED, (int) V2DF_FTYPE_V2DF_V2DF },
18436 { OPTION_MASK_ISA_SSE2, CODE_FOR_sminv2df3, "__builtin_ia32_minpd", IX86_BUILTIN_MINPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
18437 { OPTION_MASK_ISA_SSE2, CODE_FOR_smaxv2df3, "__builtin_ia32_maxpd", IX86_BUILTIN_MAXPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
18438 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmsminv2df3, "__builtin_ia32_minsd", IX86_BUILTIN_MINSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
18439 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmsmaxv2df3, "__builtin_ia32_maxsd", IX86_BUILTIN_MAXSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
18441 { OPTION_MASK_ISA_SSE2, CODE_FOR_andv2df3, "__builtin_ia32_andpd", IX86_BUILTIN_ANDPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
18442 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_nandv2df3, "__builtin_ia32_andnpd", IX86_BUILTIN_ANDNPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
18443 { OPTION_MASK_ISA_SSE2, CODE_FOR_iorv2df3, "__builtin_ia32_orpd", IX86_BUILTIN_ORPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
18444 { OPTION_MASK_ISA_SSE2, CODE_FOR_xorv2df3, "__builtin_ia32_xorpd", IX86_BUILTIN_XORPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
18446 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movsd, "__builtin_ia32_movsd", IX86_BUILTIN_MOVSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
18447 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_unpckhpd_exp, "__builtin_ia32_unpckhpd", IX86_BUILTIN_UNPCKHPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
18448 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_unpcklpd_exp, "__builtin_ia32_unpcklpd", IX86_BUILTIN_UNPCKLPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
18450 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_pack_sfix_v2df, "__builtin_ia32_vec_pack_sfix", IX86_BUILTIN_VEC_PACK_SFIX, UNKNOWN, (int) V4SI_FTYPE_V2DF_V2DF },
18452 { OPTION_MASK_ISA_SSE2, CODE_FOR_addv16qi3, "__builtin_ia32_paddb128", IX86_BUILTIN_PADDB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
18453 { OPTION_MASK_ISA_SSE2, CODE_FOR_addv8hi3, "__builtin_ia32_paddw128", IX86_BUILTIN_PADDW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
18454 { OPTION_MASK_ISA_SSE2, CODE_FOR_addv4si3, "__builtin_ia32_paddd128", IX86_BUILTIN_PADDD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
18455 { OPTION_MASK_ISA_SSE2, CODE_FOR_addv2di3, "__builtin_ia32_paddq128", IX86_BUILTIN_PADDQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
18456 { OPTION_MASK_ISA_SSE2, CODE_FOR_subv16qi3, "__builtin_ia32_psubb128", IX86_BUILTIN_PSUBB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
18457 { OPTION_MASK_ISA_SSE2, CODE_FOR_subv8hi3, "__builtin_ia32_psubw128", IX86_BUILTIN_PSUBW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
18458 { OPTION_MASK_ISA_SSE2, CODE_FOR_subv4si3, "__builtin_ia32_psubd128", IX86_BUILTIN_PSUBD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
18459 { OPTION_MASK_ISA_SSE2, CODE_FOR_subv2di3, "__builtin_ia32_psubq128", IX86_BUILTIN_PSUBQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
18461 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ssaddv16qi3, "__builtin_ia32_paddsb128", IX86_BUILTIN_PADDSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
18462 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ssaddv8hi3, "__builtin_ia32_paddsw128", IX86_BUILTIN_PADDSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
18463 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_sssubv16qi3, "__builtin_ia32_psubsb128", IX86_BUILTIN_PSUBSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
18464 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_sssubv8hi3, "__builtin_ia32_psubsw128", IX86_BUILTIN_PSUBSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
18465 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_usaddv16qi3, "__builtin_ia32_paddusb128", IX86_BUILTIN_PADDUSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
18466 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_usaddv8hi3, "__builtin_ia32_paddusw128", IX86_BUILTIN_PADDUSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
18467 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ussubv16qi3, "__builtin_ia32_psubusb128", IX86_BUILTIN_PSUBUSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
18468 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ussubv8hi3, "__builtin_ia32_psubusw128", IX86_BUILTIN_PSUBUSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
18470 { OPTION_MASK_ISA_SSE2, CODE_FOR_mulv8hi3, "__builtin_ia32_pmullw128", IX86_BUILTIN_PMULLW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
18471 { OPTION_MASK_ISA_SSE2, CODE_FOR_smulv8hi3_highpart, "__builtin_ia32_pmulhw128", IX86_BUILTIN_PMULHW128, UNKNOWN,(int) V8HI_FTYPE_V8HI_V8HI },
18473 { OPTION_MASK_ISA_SSE2, CODE_FOR_andv2di3, "__builtin_ia32_pand128", IX86_BUILTIN_PAND128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
18474 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_nandv2di3, "__builtin_ia32_pandn128", IX86_BUILTIN_PANDN128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
18475 { OPTION_MASK_ISA_SSE2, CODE_FOR_iorv2di3, "__builtin_ia32_por128", IX86_BUILTIN_POR128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
18476 { OPTION_MASK_ISA_SSE2, CODE_FOR_xorv2di3, "__builtin_ia32_pxor128", IX86_BUILTIN_PXOR128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
18478 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_uavgv16qi3, "__builtin_ia32_pavgb128", IX86_BUILTIN_PAVGB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
18479 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_uavgv8hi3, "__builtin_ia32_pavgw128", IX86_BUILTIN_PAVGW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
18481 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_eqv16qi3, "__builtin_ia32_pcmpeqb128", IX86_BUILTIN_PCMPEQB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
18482 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_eqv8hi3, "__builtin_ia32_pcmpeqw128", IX86_BUILTIN_PCMPEQW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
18483 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_eqv4si3, "__builtin_ia32_pcmpeqd128", IX86_BUILTIN_PCMPEQD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
18484 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_gtv16qi3, "__builtin_ia32_pcmpgtb128", IX86_BUILTIN_PCMPGTB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
18485 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_gtv8hi3, "__builtin_ia32_pcmpgtw128", IX86_BUILTIN_PCMPGTW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
18486 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_gtv4si3, "__builtin_ia32_pcmpgtd128", IX86_BUILTIN_PCMPGTD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
18488 { OPTION_MASK_ISA_SSE2, CODE_FOR_umaxv16qi3, "__builtin_ia32_pmaxub128", IX86_BUILTIN_PMAXUB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
18489 { OPTION_MASK_ISA_SSE2, CODE_FOR_smaxv8hi3, "__builtin_ia32_pmaxsw128", IX86_BUILTIN_PMAXSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
18490 { OPTION_MASK_ISA_SSE2, CODE_FOR_uminv16qi3, "__builtin_ia32_pminub128", IX86_BUILTIN_PMINUB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
18491 { OPTION_MASK_ISA_SSE2, CODE_FOR_sminv8hi3, "__builtin_ia32_pminsw128", IX86_BUILTIN_PMINSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
18493 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_punpckhbw, "__builtin_ia32_punpckhbw128", IX86_BUILTIN_PUNPCKHBW128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
18494 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_punpckhwd, "__builtin_ia32_punpckhwd128", IX86_BUILTIN_PUNPCKHWD128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
18495 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_punpckhdq, "__builtin_ia32_punpckhdq128", IX86_BUILTIN_PUNPCKHDQ128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
18496 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_punpckhqdq, "__builtin_ia32_punpckhqdq128", IX86_BUILTIN_PUNPCKHQDQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
18497 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_punpcklbw, "__builtin_ia32_punpcklbw128", IX86_BUILTIN_PUNPCKLBW128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
18498 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_punpcklwd, "__builtin_ia32_punpcklwd128", IX86_BUILTIN_PUNPCKLWD128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
18499 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_punpckldq, "__builtin_ia32_punpckldq128", IX86_BUILTIN_PUNPCKLDQ128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
18500 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_punpcklqdq, "__builtin_ia32_punpcklqdq128", IX86_BUILTIN_PUNPCKLQDQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
18502 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_packsswb, "__builtin_ia32_packsswb128", IX86_BUILTIN_PACKSSWB128, UNKNOWN, (int) V16QI_FTYPE_V8HI_V8HI },
18503 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_packssdw, "__builtin_ia32_packssdw128", IX86_BUILTIN_PACKSSDW128, UNKNOWN, (int) V8HI_FTYPE_V4SI_V4SI },
18504 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_packuswb, "__builtin_ia32_packuswb128", IX86_BUILTIN_PACKUSWB128, UNKNOWN, (int) V16QI_FTYPE_V8HI_V8HI },
18506 { OPTION_MASK_ISA_SSE2, CODE_FOR_umulv8hi3_highpart, "__builtin_ia32_pmulhuw128", IX86_BUILTIN_PMULHUW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
18507 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_psadbw, "__builtin_ia32_psadbw128", IX86_BUILTIN_PSADBW128, UNKNOWN, (int) V2DI_FTYPE_V16QI_V16QI },
18509 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_umulv1siv1di3, "__builtin_ia32_pmuludq", IX86_BUILTIN_PMULUDQ, UNKNOWN, (int) V1DI_FTYPE_V2SI_V2SI },
18510 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_umulv2siv2di3, "__builtin_ia32_pmuludq128", IX86_BUILTIN_PMULUDQ128, UNKNOWN, (int) V2DI_FTYPE_V4SI_V4SI },
18512 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_pmaddwd, "__builtin_ia32_pmaddwd128", IX86_BUILTIN_PMADDWD128, UNKNOWN, (int) V4SI_FTYPE_V8HI_V8HI },
18514 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtsi2sd, "__builtin_ia32_cvtsi2sd", IX86_BUILTIN_CVTSI2SD, UNKNOWN, (int) V2DF_FTYPE_V2DF_SI },
18515 { OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_64BIT, CODE_FOR_sse2_cvtsi2sdq, "__builtin_ia32_cvtsi642sd", IX86_BUILTIN_CVTSI642SD, UNKNOWN, (int) V2DF_FTYPE_V2DF_DI },
18516 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtsd2ss, "__builtin_ia32_cvtsd2ss", IX86_BUILTIN_CVTSD2SS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V2DF },
18517 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtss2sd, "__builtin_ia32_cvtss2sd", IX86_BUILTIN_CVTSS2SD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V4SF },
18519 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ashlti3, "__builtin_ia32_pslldqi128", IX86_BUILTIN_PSLLDQI128, UNKNOWN, (int) V2DI2TI_FTYPE_V2DI_INT },
18520 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv8hi3, "__builtin_ia32_psllwi128", IX86_BUILTIN_PSLLWI128, UNKNOWN, (int) V8HI_FTYPE_V8HI_SI_COUNT },
18521 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv4si3, "__builtin_ia32_pslldi128", IX86_BUILTIN_PSLLDI128, UNKNOWN, (int) V4SI_FTYPE_V4SI_SI_COUNT },
18522 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv2di3, "__builtin_ia32_psllqi128", IX86_BUILTIN_PSLLQI128, UNKNOWN, (int) V2DI_FTYPE_V2DI_SI_COUNT },
18523 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv8hi3, "__builtin_ia32_psllw128", IX86_BUILTIN_PSLLW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_COUNT },
18524 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv4si3, "__builtin_ia32_pslld128", IX86_BUILTIN_PSLLD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_COUNT },
18525 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv2di3, "__builtin_ia32_psllq128", IX86_BUILTIN_PSLLQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_COUNT },
18527 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_lshrti3, "__builtin_ia32_psrldqi128", IX86_BUILTIN_PSRLDQI128, UNKNOWN, (int) V2DI2TI_FTYPE_V2DI_INT },
18528 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv8hi3, "__builtin_ia32_psrlwi128", IX86_BUILTIN_PSRLWI128, UNKNOWN, (int) V8HI_FTYPE_V8HI_SI_COUNT },
18529 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv4si3, "__builtin_ia32_psrldi128", IX86_BUILTIN_PSRLDI128, UNKNOWN, (int) V4SI_FTYPE_V4SI_SI_COUNT },
18530 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv2di3, "__builtin_ia32_psrlqi128", IX86_BUILTIN_PSRLQI128, UNKNOWN, (int) V2DI_FTYPE_V2DI_SI_COUNT },
18531 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv8hi3, "__builtin_ia32_psrlw128", IX86_BUILTIN_PSRLW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_COUNT },
18532 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv4si3, "__builtin_ia32_psrld128", IX86_BUILTIN_PSRLD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_COUNT },
18533 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv2di3, "__builtin_ia32_psrlq128", IX86_BUILTIN_PSRLQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_COUNT },
18535 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashrv8hi3, "__builtin_ia32_psrawi128", IX86_BUILTIN_PSRAWI128, UNKNOWN, (int) V8HI_FTYPE_V8HI_SI_COUNT },
18536 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashrv4si3, "__builtin_ia32_psradi128", IX86_BUILTIN_PSRADI128, UNKNOWN, (int) V4SI_FTYPE_V4SI_SI_COUNT },
18537 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashrv8hi3, "__builtin_ia32_psraw128", IX86_BUILTIN_PSRAW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_COUNT },
18538 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashrv4si3, "__builtin_ia32_psrad128", IX86_BUILTIN_PSRAD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_COUNT },
18540 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_pshufd, "__builtin_ia32_pshufd", IX86_BUILTIN_PSHUFD, UNKNOWN, (int) V4SI_FTYPE_V4SI_INT },
18541 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_pshuflw, "__builtin_ia32_pshuflw", IX86_BUILTIN_PSHUFLW, UNKNOWN, (int) V8HI_FTYPE_V8HI_INT },
18542 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_pshufhw, "__builtin_ia32_pshufhw", IX86_BUILTIN_PSHUFHW, UNKNOWN, (int) V8HI_FTYPE_V8HI_INT },
18544 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmsqrtv2df2, "__builtin_ia32_sqrtsd", IX86_BUILTIN_SQRTSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_VEC_MERGE },
18547 { OPTION_MASK_ISA_SSE2, CODE_FOR_mmx_addv1di3, "__builtin_ia32_paddq", IX86_BUILTIN_PADDQ, UNKNOWN, (int) V1DI_FTYPE_V1DI_V1DI },
18548 { OPTION_MASK_ISA_SSE2, CODE_FOR_mmx_subv1di3, "__builtin_ia32_psubq", IX86_BUILTIN_PSUBQ, UNKNOWN, (int) V1DI_FTYPE_V1DI_V1DI },
18551 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_movshdup, "__builtin_ia32_movshdup", IX86_BUILTIN_MOVSHDUP, UNKNOWN, (int) V4SF_FTYPE_V4SF},
18552 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_movsldup, "__builtin_ia32_movsldup", IX86_BUILTIN_MOVSLDUP, UNKNOWN, (int) V4SF_FTYPE_V4SF },
18554 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_addsubv4sf3, "__builtin_ia32_addsubps", IX86_BUILTIN_ADDSUBPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
18555 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_addsubv2df3, "__builtin_ia32_addsubpd", IX86_BUILTIN_ADDSUBPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
18556 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_haddv4sf3, "__builtin_ia32_haddps", IX86_BUILTIN_HADDPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
18557 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_haddv2df3, "__builtin_ia32_haddpd", IX86_BUILTIN_HADDPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
18558 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_hsubv4sf3, "__builtin_ia32_hsubps", IX86_BUILTIN_HSUBPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
18559 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_hsubv2df3, "__builtin_ia32_hsubpd", IX86_BUILTIN_HSUBPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
18562 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv16qi2, "__builtin_ia32_pabsb128", IX86_BUILTIN_PABSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI },
18563 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv8qi2, "__builtin_ia32_pabsb", IX86_BUILTIN_PABSB, UNKNOWN, (int) V8QI_FTYPE_V8QI },
18564 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv8hi2, "__builtin_ia32_pabsw128", IX86_BUILTIN_PABSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI },
18565 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv4hi2, "__builtin_ia32_pabsw", IX86_BUILTIN_PABSW, UNKNOWN, (int) V4HI_FTYPE_V4HI },
18566 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv4si2, "__builtin_ia32_pabsd128", IX86_BUILTIN_PABSD128, UNKNOWN, (int) V4SI_FTYPE_V4SI },
18567 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv2si2, "__builtin_ia32_pabsd", IX86_BUILTIN_PABSD, UNKNOWN, (int) V2SI_FTYPE_V2SI },
18569 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phaddwv8hi3, "__builtin_ia32_phaddw128", IX86_BUILTIN_PHADDW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
18570 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phaddwv4hi3, "__builtin_ia32_phaddw", IX86_BUILTIN_PHADDW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
18571 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phadddv4si3, "__builtin_ia32_phaddd128", IX86_BUILTIN_PHADDD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
18572 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phadddv2si3, "__builtin_ia32_phaddd", IX86_BUILTIN_PHADDD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
18573 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phaddswv8hi3, "__builtin_ia32_phaddsw128", IX86_BUILTIN_PHADDSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
18574 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phaddswv4hi3, "__builtin_ia32_phaddsw", IX86_BUILTIN_PHADDSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
18575 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubwv8hi3, "__builtin_ia32_phsubw128", IX86_BUILTIN_PHSUBW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
18576 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubwv4hi3, "__builtin_ia32_phsubw", IX86_BUILTIN_PHSUBW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
18577 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubdv4si3, "__builtin_ia32_phsubd128", IX86_BUILTIN_PHSUBD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
18578 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubdv2si3, "__builtin_ia32_phsubd", IX86_BUILTIN_PHSUBD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
18579 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubswv8hi3, "__builtin_ia32_phsubsw128", IX86_BUILTIN_PHSUBSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
18580 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubswv4hi3, "__builtin_ia32_phsubsw", IX86_BUILTIN_PHSUBSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
18581 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pmaddubsw128, "__builtin_ia32_pmaddubsw128", IX86_BUILTIN_PMADDUBSW128, UNKNOWN, (int) V8HI_FTYPE_V16QI_V16QI },
18582 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pmaddubsw, "__builtin_ia32_pmaddubsw", IX86_BUILTIN_PMADDUBSW, UNKNOWN, (int) V4HI_FTYPE_V8QI_V8QI },
18583 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pmulhrswv8hi3, "__builtin_ia32_pmulhrsw128", IX86_BUILTIN_PMULHRSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
18584 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pmulhrswv4hi3, "__builtin_ia32_pmulhrsw", IX86_BUILTIN_PMULHRSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
18585 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pshufbv16qi3, "__builtin_ia32_pshufb128", IX86_BUILTIN_PSHUFB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
18586 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pshufbv8qi3, "__builtin_ia32_pshufb", IX86_BUILTIN_PSHUFB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
18587 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv16qi3, "__builtin_ia32_psignb128", IX86_BUILTIN_PSIGNB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
18588 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv8qi3, "__builtin_ia32_psignb", IX86_BUILTIN_PSIGNB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
18589 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv8hi3, "__builtin_ia32_psignw128", IX86_BUILTIN_PSIGNW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
18590 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv4hi3, "__builtin_ia32_psignw", IX86_BUILTIN_PSIGNW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
18591 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv4si3, "__builtin_ia32_psignd128", IX86_BUILTIN_PSIGND128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
18592 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv2si3, "__builtin_ia32_psignd", IX86_BUILTIN_PSIGND, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
18595 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_palignrti, "__builtin_ia32_palignr128", IX86_BUILTIN_PALIGNR128, UNKNOWN, (int) V2DI2TI_FTYPE_V2DI_V2DI_INT },
18596 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_palignrdi, "__builtin_ia32_palignr", IX86_BUILTIN_PALIGNR, UNKNOWN, (int) V1DI2DI_FTYPE_V1DI_V1DI_INT },
18599 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_blendpd, "__builtin_ia32_blendpd", IX86_BUILTIN_BLENDPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
18600 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_blendps, "__builtin_ia32_blendps", IX86_BUILTIN_BLENDPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
18601 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_blendvpd, "__builtin_ia32_blendvpd", IX86_BUILTIN_BLENDVPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF },
18602 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_blendvps, "__builtin_ia32_blendvps", IX86_BUILTIN_BLENDVPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF },
18603 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_dppd, "__builtin_ia32_dppd", IX86_BUILTIN_DPPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
18604 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_dpps, "__builtin_ia32_dpps", IX86_BUILTIN_DPPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
18605 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_insertps, "__builtin_ia32_insertps128", IX86_BUILTIN_INSERTPS128, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
18606 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_mpsadbw, "__builtin_ia32_mpsadbw128", IX86_BUILTIN_MPSADBW128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_INT },
18607 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_pblendvb, "__builtin_ia32_pblendvb128", IX86_BUILTIN_PBLENDVB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI },
18608 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_pblendw, "__builtin_ia32_pblendw128", IX86_BUILTIN_PBLENDW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_INT },
18610 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_extendv8qiv8hi2, "__builtin_ia32_pmovsxbw128", IX86_BUILTIN_PMOVSXBW128, UNKNOWN, (int) V8HI_FTYPE_V16QI },
18611 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_extendv4qiv4si2, "__builtin_ia32_pmovsxbd128", IX86_BUILTIN_PMOVSXBD128, UNKNOWN, (int) V4SI_FTYPE_V16QI },
18612 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_extendv2qiv2di2, "__builtin_ia32_pmovsxbq128", IX86_BUILTIN_PMOVSXBQ128, UNKNOWN, (int) V2DI_FTYPE_V16QI },
18613 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_extendv4hiv4si2, "__builtin_ia32_pmovsxwd128", IX86_BUILTIN_PMOVSXWD128, UNKNOWN, (int) V4SI_FTYPE_V8HI },
18614 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_extendv2hiv2di2, "__builtin_ia32_pmovsxwq128", IX86_BUILTIN_PMOVSXWQ128, UNKNOWN, (int) V2DI_FTYPE_V8HI },
18615 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_extendv2siv2di2, "__builtin_ia32_pmovsxdq128", IX86_BUILTIN_PMOVSXDQ128, UNKNOWN, (int) V2DI_FTYPE_V4SI },
18616 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv8qiv8hi2, "__builtin_ia32_pmovzxbw128", IX86_BUILTIN_PMOVZXBW128, UNKNOWN, (int) V8HI_FTYPE_V16QI },
18617 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv4qiv4si2, "__builtin_ia32_pmovzxbd128", IX86_BUILTIN_PMOVZXBD128, UNKNOWN, (int) V4SI_FTYPE_V16QI },
18618 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv2qiv2di2, "__builtin_ia32_pmovzxbq128", IX86_BUILTIN_PMOVZXBQ128, UNKNOWN, (int) V2DI_FTYPE_V16QI },
18619 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv4hiv4si2, "__builtin_ia32_pmovzxwd128", IX86_BUILTIN_PMOVZXWD128, UNKNOWN, (int) V4SI_FTYPE_V8HI },
18620 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv2hiv2di2, "__builtin_ia32_pmovzxwq128", IX86_BUILTIN_PMOVZXWQ128, UNKNOWN, (int) V2DI_FTYPE_V8HI },
18621 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv2siv2di2, "__builtin_ia32_pmovzxdq128", IX86_BUILTIN_PMOVZXDQ128, UNKNOWN, (int) V2DI_FTYPE_V4SI },
18622 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_phminposuw, "__builtin_ia32_phminposuw128", IX86_BUILTIN_PHMINPOSUW128, UNKNOWN, (int) V8HI_FTYPE_V8HI },
18624 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_packusdw, "__builtin_ia32_packusdw128", IX86_BUILTIN_PACKUSDW128, UNKNOWN, (int) V8HI_FTYPE_V4SI_V4SI },
18625 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_eqv2di3, "__builtin_ia32_pcmpeqq", IX86_BUILTIN_PCMPEQQ, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
18626 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_smaxv16qi3, "__builtin_ia32_pmaxsb128", IX86_BUILTIN_PMAXSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
18627 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_smaxv4si3, "__builtin_ia32_pmaxsd128", IX86_BUILTIN_PMAXSD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
18628 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_umaxv4si3, "__builtin_ia32_pmaxud128", IX86_BUILTIN_PMAXUD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
18629 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_umaxv8hi3, "__builtin_ia32_pmaxuw128", IX86_BUILTIN_PMAXUW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
18630 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sminv16qi3, "__builtin_ia32_pminsb128", IX86_BUILTIN_PMINSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
18631 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sminv4si3, "__builtin_ia32_pminsd128", IX86_BUILTIN_PMINSD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
18632 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_uminv4si3, "__builtin_ia32_pminud128", IX86_BUILTIN_PMINUD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
18633 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_uminv8hi3, "__builtin_ia32_pminuw128", IX86_BUILTIN_PMINUW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
18634 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_mulv2siv2di3, "__builtin_ia32_pmuldq128", IX86_BUILTIN_PMULDQ128, UNKNOWN, (int) V2DI_FTYPE_V4SI_V4SI },
18635 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_mulv4si3, "__builtin_ia32_pmulld128", IX86_BUILTIN_PMULLD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
18637 /* SSE4.1 and SSE5 */
18638 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundpd, "__builtin_ia32_roundpd", IX86_BUILTIN_ROUNDPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_INT },
18639 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundps, "__builtin_ia32_roundps", IX86_BUILTIN_ROUNDPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_INT },
18640 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundsd, "__builtin_ia32_roundsd", IX86_BUILTIN_ROUNDSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
18641 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundss, "__builtin_ia32_roundss", IX86_BUILTIN_ROUNDSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
18643 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_ptest, "__builtin_ia32_ptestz128", IX86_BUILTIN_PTESTZ, EQ, (int) INT_FTYPE_V2DI_V2DI_PTEST },
18644 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_ptest, "__builtin_ia32_ptestc128", IX86_BUILTIN_PTESTC, LTU, (int) INT_FTYPE_V2DI_V2DI_PTEST },
18645 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_ptest, "__builtin_ia32_ptestnzc128", IX86_BUILTIN_PTESTNZC, GTU, (int) INT_FTYPE_V2DI_V2DI_PTEST },
18648 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_gtv2di3, "__builtin_ia32_pcmpgtq", IX86_BUILTIN_PCMPGTQ, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
18649 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_crc32qi, "__builtin_ia32_crc32qi", IX86_BUILTIN_CRC32QI, UNKNOWN, (int) UINT_FTYPE_UINT_UCHAR },
18650 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_crc32hi, "__builtin_ia32_crc32hi", IX86_BUILTIN_CRC32HI, UNKNOWN, (int) UINT_FTYPE_UINT_USHORT },
18651 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_crc32si, "__builtin_ia32_crc32si", IX86_BUILTIN_CRC32SI, UNKNOWN, (int) UINT_FTYPE_UINT_UINT },
18652 { OPTION_MASK_ISA_SSE4_2 | OPTION_MASK_ISA_64BIT, CODE_FOR_sse4_2_crc32di, "__builtin_ia32_crc32di", IX86_BUILTIN_CRC32DI, UNKNOWN, (int) UINT64_FTYPE_UINT64_UINT64 },
18655 { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_extrqi, "__builtin_ia32_extrqi", IX86_BUILTIN_EXTRQI, UNKNOWN, (int) V2DI_FTYPE_V2DI_UINT_UINT },
18656 { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_extrq, "__builtin_ia32_extrq", IX86_BUILTIN_EXTRQ, UNKNOWN, (int) V2DI_FTYPE_V2DI_V16QI },
18657 { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_insertqi, "__builtin_ia32_insertqi", IX86_BUILTIN_INSERTQI, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_UINT_UINT },
18658 { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_insertq, "__builtin_ia32_insertq", IX86_BUILTIN_INSERTQ, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
18661 { OPTION_MASK_ISA_SSE2, CODE_FOR_aeskeygenassist, 0, IX86_BUILTIN_AESKEYGENASSIST128, UNKNOWN, (int) V2DI_FTYPE_V2DI_INT },
18662 { OPTION_MASK_ISA_SSE2, CODE_FOR_aesimc, 0, IX86_BUILTIN_AESIMC128, UNKNOWN, (int) V2DI_FTYPE_V2DI },
18664 { OPTION_MASK_ISA_SSE2, CODE_FOR_aesenc, 0, IX86_BUILTIN_AESENC128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
18665 { OPTION_MASK_ISA_SSE2, CODE_FOR_aesenclast, 0, IX86_BUILTIN_AESENCLAST128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
18666 { OPTION_MASK_ISA_SSE2, CODE_FOR_aesdec, 0, IX86_BUILTIN_AESDEC128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
18667 { OPTION_MASK_ISA_SSE2, CODE_FOR_aesdeclast, 0, IX86_BUILTIN_AESDECLAST128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
18670 { OPTION_MASK_ISA_SSE2, CODE_FOR_pclmulqdq, 0, IX86_BUILTIN_PCLMULQDQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_INT },
18673 { OPTION_MASK_ISA_64BIT, CODE_FOR_abstf2, 0, IX86_BUILTIN_FABSQ, UNKNOWN, (int) FLOAT128_FTYPE_FLOAT128 },
18674 { OPTION_MASK_ISA_64BIT, CODE_FOR_copysigntf3, 0, IX86_BUILTIN_COPYSIGNQ, UNKNOWN, (int) FLOAT128_FTYPE_FLOAT128_FLOAT128 },
18678 enum multi_arg_type {
18688 MULTI_ARG_3_PERMPS,
18689 MULTI_ARG_3_PERMPD,
18696 MULTI_ARG_2_DI_IMM,
18697 MULTI_ARG_2_SI_IMM,
18698 MULTI_ARG_2_HI_IMM,
18699 MULTI_ARG_2_QI_IMM,
18700 MULTI_ARG_2_SF_CMP,
18701 MULTI_ARG_2_DF_CMP,
18702 MULTI_ARG_2_DI_CMP,
18703 MULTI_ARG_2_SI_CMP,
18704 MULTI_ARG_2_HI_CMP,
18705 MULTI_ARG_2_QI_CMP,
18728 static const struct builtin_description bdesc_multi_arg[] =
18730 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_vmfmaddv4sf4, "__builtin_ia32_fmaddss", IX86_BUILTIN_FMADDSS, 0, (int)MULTI_ARG_3_SF },
18731 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_vmfmaddv2df4, "__builtin_ia32_fmaddsd", IX86_BUILTIN_FMADDSD, 0, (int)MULTI_ARG_3_DF },
18732 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_fmaddv4sf4, "__builtin_ia32_fmaddps", IX86_BUILTIN_FMADDPS, 0, (int)MULTI_ARG_3_SF },
18733 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_fmaddv2df4, "__builtin_ia32_fmaddpd", IX86_BUILTIN_FMADDPD, 0, (int)MULTI_ARG_3_DF },
18734 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_vmfmsubv4sf4, "__builtin_ia32_fmsubss", IX86_BUILTIN_FMSUBSS, 0, (int)MULTI_ARG_3_SF },
18735 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_vmfmsubv2df4, "__builtin_ia32_fmsubsd", IX86_BUILTIN_FMSUBSD, 0, (int)MULTI_ARG_3_DF },
18736 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_fmsubv4sf4, "__builtin_ia32_fmsubps", IX86_BUILTIN_FMSUBPS, 0, (int)MULTI_ARG_3_SF },
18737 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_fmsubv2df4, "__builtin_ia32_fmsubpd", IX86_BUILTIN_FMSUBPD, 0, (int)MULTI_ARG_3_DF },
18738 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_vmfnmaddv4sf4, "__builtin_ia32_fnmaddss", IX86_BUILTIN_FNMADDSS, 0, (int)MULTI_ARG_3_SF },
18739 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_vmfnmaddv2df4, "__builtin_ia32_fnmaddsd", IX86_BUILTIN_FNMADDSD, 0, (int)MULTI_ARG_3_DF },
18740 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_fnmaddv4sf4, "__builtin_ia32_fnmaddps", IX86_BUILTIN_FNMADDPS, 0, (int)MULTI_ARG_3_SF },
18741 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_fnmaddv2df4, "__builtin_ia32_fnmaddpd", IX86_BUILTIN_FNMADDPD, 0, (int)MULTI_ARG_3_DF },
18742 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_vmfnmsubv4sf4, "__builtin_ia32_fnmsubss", IX86_BUILTIN_FNMSUBSS, 0, (int)MULTI_ARG_3_SF },
18743 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_vmfnmsubv2df4, "__builtin_ia32_fnmsubsd", IX86_BUILTIN_FNMSUBSD, 0, (int)MULTI_ARG_3_DF },
18744 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_fnmsubv4sf4, "__builtin_ia32_fnmsubps", IX86_BUILTIN_FNMSUBPS, 0, (int)MULTI_ARG_3_SF },
18745 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_fnmsubv2df4, "__builtin_ia32_fnmsubpd", IX86_BUILTIN_FNMSUBPD, 0, (int)MULTI_ARG_3_DF },
18746 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcmov_v2di, "__builtin_ia32_pcmov", IX86_BUILTIN_PCMOV_V2DI, 0, (int)MULTI_ARG_3_DI },
18747 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcmov_v2di, "__builtin_ia32_pcmov_v2di", IX86_BUILTIN_PCMOV_V2DI, 0, (int)MULTI_ARG_3_DI },
18748 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcmov_v4si, "__builtin_ia32_pcmov_v4si", IX86_BUILTIN_PCMOV_V4SI, 0, (int)MULTI_ARG_3_SI },
18749 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcmov_v8hi, "__builtin_ia32_pcmov_v8hi", IX86_BUILTIN_PCMOV_V8HI, 0, (int)MULTI_ARG_3_HI },
18750 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcmov_v16qi, "__builtin_ia32_pcmov_v16qi",IX86_BUILTIN_PCMOV_V16QI,0, (int)MULTI_ARG_3_QI },
18751 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcmov_v2df, "__builtin_ia32_pcmov_v2df", IX86_BUILTIN_PCMOV_V2DF, 0, (int)MULTI_ARG_3_DF },
18752 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcmov_v4sf, "__builtin_ia32_pcmov_v4sf", IX86_BUILTIN_PCMOV_V4SF, 0, (int)MULTI_ARG_3_SF },
18753 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pperm, "__builtin_ia32_pperm", IX86_BUILTIN_PPERM, 0, (int)MULTI_ARG_3_QI },
18754 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_permv4sf, "__builtin_ia32_permps", IX86_BUILTIN_PERMPS, 0, (int)MULTI_ARG_3_PERMPS },
18755 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_permv2df, "__builtin_ia32_permpd", IX86_BUILTIN_PERMPD, 0, (int)MULTI_ARG_3_PERMPD },
18756 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pmacssww, "__builtin_ia32_pmacssww", IX86_BUILTIN_PMACSSWW, 0, (int)MULTI_ARG_3_HI },
18757 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pmacsww, "__builtin_ia32_pmacsww", IX86_BUILTIN_PMACSWW, 0, (int)MULTI_ARG_3_HI },
18758 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pmacsswd, "__builtin_ia32_pmacsswd", IX86_BUILTIN_PMACSSWD, 0, (int)MULTI_ARG_3_HI_SI },
18759 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pmacswd, "__builtin_ia32_pmacswd", IX86_BUILTIN_PMACSWD, 0, (int)MULTI_ARG_3_HI_SI },
18760 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pmacssdd, "__builtin_ia32_pmacssdd", IX86_BUILTIN_PMACSSDD, 0, (int)MULTI_ARG_3_SI },
18761 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pmacsdd, "__builtin_ia32_pmacsdd", IX86_BUILTIN_PMACSDD, 0, (int)MULTI_ARG_3_SI },
18762 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pmacssdql, "__builtin_ia32_pmacssdql", IX86_BUILTIN_PMACSSDQL, 0, (int)MULTI_ARG_3_SI_DI },
18763 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pmacssdqh, "__builtin_ia32_pmacssdqh", IX86_BUILTIN_PMACSSDQH, 0, (int)MULTI_ARG_3_SI_DI },
18764 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pmacsdql, "__builtin_ia32_pmacsdql", IX86_BUILTIN_PMACSDQL, 0, (int)MULTI_ARG_3_SI_DI },
18765 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pmacsdqh, "__builtin_ia32_pmacsdqh", IX86_BUILTIN_PMACSDQH, 0, (int)MULTI_ARG_3_SI_DI },
18766 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pmadcsswd, "__builtin_ia32_pmadcsswd", IX86_BUILTIN_PMADCSSWD, 0, (int)MULTI_ARG_3_HI_SI },
18767 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pmadcswd, "__builtin_ia32_pmadcswd", IX86_BUILTIN_PMADCSWD, 0, (int)MULTI_ARG_3_HI_SI },
18768 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vrotlv2di3, "__builtin_ia32_protq", IX86_BUILTIN_PROTQ, 0, (int)MULTI_ARG_2_DI },
18769 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vrotlv4si3, "__builtin_ia32_protd", IX86_BUILTIN_PROTD, 0, (int)MULTI_ARG_2_SI },
18770 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vrotlv8hi3, "__builtin_ia32_protw", IX86_BUILTIN_PROTW, 0, (int)MULTI_ARG_2_HI },
18771 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vrotlv16qi3, "__builtin_ia32_protb", IX86_BUILTIN_PROTB, 0, (int)MULTI_ARG_2_QI },
18772 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_rotlv2di3, "__builtin_ia32_protqi", IX86_BUILTIN_PROTQ_IMM, 0, (int)MULTI_ARG_2_DI_IMM },
18773 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_rotlv4si3, "__builtin_ia32_protdi", IX86_BUILTIN_PROTD_IMM, 0, (int)MULTI_ARG_2_SI_IMM },
18774 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_rotlv8hi3, "__builtin_ia32_protwi", IX86_BUILTIN_PROTW_IMM, 0, (int)MULTI_ARG_2_HI_IMM },
18775 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_rotlv16qi3, "__builtin_ia32_protbi", IX86_BUILTIN_PROTB_IMM, 0, (int)MULTI_ARG_2_QI_IMM },
18776 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_ashlv2di3, "__builtin_ia32_pshaq", IX86_BUILTIN_PSHAQ, 0, (int)MULTI_ARG_2_DI },
18777 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_ashlv4si3, "__builtin_ia32_pshad", IX86_BUILTIN_PSHAD, 0, (int)MULTI_ARG_2_SI },
18778 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_ashlv8hi3, "__builtin_ia32_pshaw", IX86_BUILTIN_PSHAW, 0, (int)MULTI_ARG_2_HI },
18779 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_ashlv16qi3, "__builtin_ia32_pshab", IX86_BUILTIN_PSHAB, 0, (int)MULTI_ARG_2_QI },
18780 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_lshlv2di3, "__builtin_ia32_pshlq", IX86_BUILTIN_PSHLQ, 0, (int)MULTI_ARG_2_DI },
18781 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_lshlv4si3, "__builtin_ia32_pshld", IX86_BUILTIN_PSHLD, 0, (int)MULTI_ARG_2_SI },
18782 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_lshlv8hi3, "__builtin_ia32_pshlw", IX86_BUILTIN_PSHLW, 0, (int)MULTI_ARG_2_HI },
18783 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_lshlv16qi3, "__builtin_ia32_pshlb", IX86_BUILTIN_PSHLB, 0, (int)MULTI_ARG_2_QI },
18784 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmfrczv4sf2, "__builtin_ia32_frczss", IX86_BUILTIN_FRCZSS, 0, (int)MULTI_ARG_2_SF },
18785 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmfrczv2df2, "__builtin_ia32_frczsd", IX86_BUILTIN_FRCZSD, 0, (int)MULTI_ARG_2_DF },
18786 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_frczv4sf2, "__builtin_ia32_frczps", IX86_BUILTIN_FRCZPS, 0, (int)MULTI_ARG_1_SF },
18787 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_frczv2df2, "__builtin_ia32_frczpd", IX86_BUILTIN_FRCZPD, 0, (int)MULTI_ARG_1_DF },
18788 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_cvtph2ps, "__builtin_ia32_cvtph2ps", IX86_BUILTIN_CVTPH2PS, 0, (int)MULTI_ARG_1_PH2PS },
18789 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_cvtps2ph, "__builtin_ia32_cvtps2ph", IX86_BUILTIN_CVTPS2PH, 0, (int)MULTI_ARG_1_PS2PH },
18790 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_phaddbw, "__builtin_ia32_phaddbw", IX86_BUILTIN_PHADDBW, 0, (int)MULTI_ARG_1_QI_HI },
18791 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_phaddbd, "__builtin_ia32_phaddbd", IX86_BUILTIN_PHADDBD, 0, (int)MULTI_ARG_1_QI_SI },
18792 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_phaddbq, "__builtin_ia32_phaddbq", IX86_BUILTIN_PHADDBQ, 0, (int)MULTI_ARG_1_QI_DI },
18793 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_phaddwd, "__builtin_ia32_phaddwd", IX86_BUILTIN_PHADDWD, 0, (int)MULTI_ARG_1_HI_SI },
18794 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_phaddwq, "__builtin_ia32_phaddwq", IX86_BUILTIN_PHADDWQ, 0, (int)MULTI_ARG_1_HI_DI },
18795 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_phadddq, "__builtin_ia32_phadddq", IX86_BUILTIN_PHADDDQ, 0, (int)MULTI_ARG_1_SI_DI },
18796 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_phaddubw, "__builtin_ia32_phaddubw", IX86_BUILTIN_PHADDUBW, 0, (int)MULTI_ARG_1_QI_HI },
18797 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_phaddubd, "__builtin_ia32_phaddubd", IX86_BUILTIN_PHADDUBD, 0, (int)MULTI_ARG_1_QI_SI },
18798 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_phaddubq, "__builtin_ia32_phaddubq", IX86_BUILTIN_PHADDUBQ, 0, (int)MULTI_ARG_1_QI_DI },
18799 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_phadduwd, "__builtin_ia32_phadduwd", IX86_BUILTIN_PHADDUWD, 0, (int)MULTI_ARG_1_HI_SI },
18800 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_phadduwq, "__builtin_ia32_phadduwq", IX86_BUILTIN_PHADDUWQ, 0, (int)MULTI_ARG_1_HI_DI },
18801 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_phaddudq, "__builtin_ia32_phaddudq", IX86_BUILTIN_PHADDUDQ, 0, (int)MULTI_ARG_1_SI_DI },
18802 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_phsubbw, "__builtin_ia32_phsubbw", IX86_BUILTIN_PHSUBBW, 0, (int)MULTI_ARG_1_QI_HI },
18803 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_phsubwd, "__builtin_ia32_phsubwd", IX86_BUILTIN_PHSUBWD, 0, (int)MULTI_ARG_1_HI_SI },
18804 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_phsubdq, "__builtin_ia32_phsubdq", IX86_BUILTIN_PHSUBDQ, 0, (int)MULTI_ARG_1_SI_DI },
18806 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv4sf3, "__builtin_ia32_comeqss", IX86_BUILTIN_COMEQSS, EQ, (int)MULTI_ARG_2_SF_CMP },
18807 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv4sf3, "__builtin_ia32_comness", IX86_BUILTIN_COMNESS, NE, (int)MULTI_ARG_2_SF_CMP },
18808 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv4sf3, "__builtin_ia32_comneqss", IX86_BUILTIN_COMNESS, NE, (int)MULTI_ARG_2_SF_CMP },
18809 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv4sf3, "__builtin_ia32_comltss", IX86_BUILTIN_COMLTSS, LT, (int)MULTI_ARG_2_SF_CMP },
18810 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv4sf3, "__builtin_ia32_comless", IX86_BUILTIN_COMLESS, LE, (int)MULTI_ARG_2_SF_CMP },
18811 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv4sf3, "__builtin_ia32_comgtss", IX86_BUILTIN_COMGTSS, GT, (int)MULTI_ARG_2_SF_CMP },
18812 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv4sf3, "__builtin_ia32_comgess", IX86_BUILTIN_COMGESS, GE, (int)MULTI_ARG_2_SF_CMP },
18813 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv4sf3, "__builtin_ia32_comueqss", IX86_BUILTIN_COMUEQSS, UNEQ, (int)MULTI_ARG_2_SF_CMP },
18814 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv4sf3, "__builtin_ia32_comuness", IX86_BUILTIN_COMUNESS, LTGT, (int)MULTI_ARG_2_SF_CMP },
18815 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv4sf3, "__builtin_ia32_comuneqss", IX86_BUILTIN_COMUNESS, LTGT, (int)MULTI_ARG_2_SF_CMP },
18816 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv4sf3, "__builtin_ia32_comunltss", IX86_BUILTIN_COMULTSS, UNLT, (int)MULTI_ARG_2_SF_CMP },
18817 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv4sf3, "__builtin_ia32_comunless", IX86_BUILTIN_COMULESS, UNLE, (int)MULTI_ARG_2_SF_CMP },
18818 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv4sf3, "__builtin_ia32_comungtss", IX86_BUILTIN_COMUGTSS, UNGT, (int)MULTI_ARG_2_SF_CMP },
18819 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv4sf3, "__builtin_ia32_comungess", IX86_BUILTIN_COMUGESS, UNGE, (int)MULTI_ARG_2_SF_CMP },
18820 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv4sf3, "__builtin_ia32_comordss", IX86_BUILTIN_COMORDSS, ORDERED, (int)MULTI_ARG_2_SF_CMP },
18821 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv4sf3, "__builtin_ia32_comunordss", IX86_BUILTIN_COMUNORDSS, UNORDERED, (int)MULTI_ARG_2_SF_CMP },
18823 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv2df3, "__builtin_ia32_comeqsd", IX86_BUILTIN_COMEQSD, EQ, (int)MULTI_ARG_2_DF_CMP },
18824 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv2df3, "__builtin_ia32_comnesd", IX86_BUILTIN_COMNESD, NE, (int)MULTI_ARG_2_DF_CMP },
18825 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv2df3, "__builtin_ia32_comneqsd", IX86_BUILTIN_COMNESD, NE, (int)MULTI_ARG_2_DF_CMP },
18826 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv2df3, "__builtin_ia32_comltsd", IX86_BUILTIN_COMLTSD, LT, (int)MULTI_ARG_2_DF_CMP },
18827 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv2df3, "__builtin_ia32_comlesd", IX86_BUILTIN_COMLESD, LE, (int)MULTI_ARG_2_DF_CMP },
18828 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv2df3, "__builtin_ia32_comgtsd", IX86_BUILTIN_COMGTSD, GT, (int)MULTI_ARG_2_DF_CMP },
18829 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv2df3, "__builtin_ia32_comgesd", IX86_BUILTIN_COMGESD, GE, (int)MULTI_ARG_2_DF_CMP },
18830 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv2df3, "__builtin_ia32_comueqsd", IX86_BUILTIN_COMUEQSD, UNEQ, (int)MULTI_ARG_2_DF_CMP },
18831 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv2df3, "__builtin_ia32_comunesd", IX86_BUILTIN_COMUNESD, LTGT, (int)MULTI_ARG_2_DF_CMP },
18832 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv2df3, "__builtin_ia32_comuneqsd", IX86_BUILTIN_COMUNESD, LTGT, (int)MULTI_ARG_2_DF_CMP },
18833 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv2df3, "__builtin_ia32_comunltsd", IX86_BUILTIN_COMULTSD, UNLT, (int)MULTI_ARG_2_DF_CMP },
18834 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv2df3, "__builtin_ia32_comunlesd", IX86_BUILTIN_COMULESD, UNLE, (int)MULTI_ARG_2_DF_CMP },
18835 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv2df3, "__builtin_ia32_comungtsd", IX86_BUILTIN_COMUGTSD, UNGT, (int)MULTI_ARG_2_DF_CMP },
18836 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv2df3, "__builtin_ia32_comungesd", IX86_BUILTIN_COMUGESD, UNGE, (int)MULTI_ARG_2_DF_CMP },
18837 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv2df3, "__builtin_ia32_comordsd", IX86_BUILTIN_COMORDSD, ORDERED, (int)MULTI_ARG_2_DF_CMP },
18838 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv2df3, "__builtin_ia32_comunordsd", IX86_BUILTIN_COMUNORDSD, UNORDERED, (int)MULTI_ARG_2_DF_CMP },
18840 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4sf3, "__builtin_ia32_comeqps", IX86_BUILTIN_COMEQPS, EQ, (int)MULTI_ARG_2_SF_CMP },
18841 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4sf3, "__builtin_ia32_comneps", IX86_BUILTIN_COMNEPS, NE, (int)MULTI_ARG_2_SF_CMP },
18842 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4sf3, "__builtin_ia32_comneqps", IX86_BUILTIN_COMNEPS, NE, (int)MULTI_ARG_2_SF_CMP },
18843 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4sf3, "__builtin_ia32_comltps", IX86_BUILTIN_COMLTPS, LT, (int)MULTI_ARG_2_SF_CMP },
18844 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4sf3, "__builtin_ia32_comleps", IX86_BUILTIN_COMLEPS, LE, (int)MULTI_ARG_2_SF_CMP },
18845 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4sf3, "__builtin_ia32_comgtps", IX86_BUILTIN_COMGTPS, GT, (int)MULTI_ARG_2_SF_CMP },
18846 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4sf3, "__builtin_ia32_comgeps", IX86_BUILTIN_COMGEPS, GE, (int)MULTI_ARG_2_SF_CMP },
18847 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4sf3, "__builtin_ia32_comueqps", IX86_BUILTIN_COMUEQPS, UNEQ, (int)MULTI_ARG_2_SF_CMP },
18848 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4sf3, "__builtin_ia32_comuneps", IX86_BUILTIN_COMUNEPS, LTGT, (int)MULTI_ARG_2_SF_CMP },
18849 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4sf3, "__builtin_ia32_comuneqps", IX86_BUILTIN_COMUNEPS, LTGT, (int)MULTI_ARG_2_SF_CMP },
18850 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4sf3, "__builtin_ia32_comunltps", IX86_BUILTIN_COMULTPS, UNLT, (int)MULTI_ARG_2_SF_CMP },
18851 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4sf3, "__builtin_ia32_comunleps", IX86_BUILTIN_COMULEPS, UNLE, (int)MULTI_ARG_2_SF_CMP },
18852 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4sf3, "__builtin_ia32_comungtps", IX86_BUILTIN_COMUGTPS, UNGT, (int)MULTI_ARG_2_SF_CMP },
18853 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4sf3, "__builtin_ia32_comungeps", IX86_BUILTIN_COMUGEPS, UNGE, (int)MULTI_ARG_2_SF_CMP },
18854 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4sf3, "__builtin_ia32_comordps", IX86_BUILTIN_COMORDPS, ORDERED, (int)MULTI_ARG_2_SF_CMP },
18855 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4sf3, "__builtin_ia32_comunordps", IX86_BUILTIN_COMUNORDPS, UNORDERED, (int)MULTI_ARG_2_SF_CMP },
18857 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2df3, "__builtin_ia32_comeqpd", IX86_BUILTIN_COMEQPD, EQ, (int)MULTI_ARG_2_DF_CMP },
18858 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2df3, "__builtin_ia32_comnepd", IX86_BUILTIN_COMNEPD, NE, (int)MULTI_ARG_2_DF_CMP },
18859 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2df3, "__builtin_ia32_comneqpd", IX86_BUILTIN_COMNEPD, NE, (int)MULTI_ARG_2_DF_CMP },
18860 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2df3, "__builtin_ia32_comltpd", IX86_BUILTIN_COMLTPD, LT, (int)MULTI_ARG_2_DF_CMP },
18861 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2df3, "__builtin_ia32_comlepd", IX86_BUILTIN_COMLEPD, LE, (int)MULTI_ARG_2_DF_CMP },
18862 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2df3, "__builtin_ia32_comgtpd", IX86_BUILTIN_COMGTPD, GT, (int)MULTI_ARG_2_DF_CMP },
18863 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2df3, "__builtin_ia32_comgepd", IX86_BUILTIN_COMGEPD, GE, (int)MULTI_ARG_2_DF_CMP },
18864 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2df3, "__builtin_ia32_comueqpd", IX86_BUILTIN_COMUEQPD, UNEQ, (int)MULTI_ARG_2_DF_CMP },
18865 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2df3, "__builtin_ia32_comunepd", IX86_BUILTIN_COMUNEPD, LTGT, (int)MULTI_ARG_2_DF_CMP },
18866 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2df3, "__builtin_ia32_comuneqpd", IX86_BUILTIN_COMUNEPD, LTGT, (int)MULTI_ARG_2_DF_CMP },
18867 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2df3, "__builtin_ia32_comunltpd", IX86_BUILTIN_COMULTPD, UNLT, (int)MULTI_ARG_2_DF_CMP },
18868 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2df3, "__builtin_ia32_comunlepd", IX86_BUILTIN_COMULEPD, UNLE, (int)MULTI_ARG_2_DF_CMP },
18869 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2df3, "__builtin_ia32_comungtpd", IX86_BUILTIN_COMUGTPD, UNGT, (int)MULTI_ARG_2_DF_CMP },
18870 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2df3, "__builtin_ia32_comungepd", IX86_BUILTIN_COMUGEPD, UNGE, (int)MULTI_ARG_2_DF_CMP },
18871 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2df3, "__builtin_ia32_comordpd", IX86_BUILTIN_COMORDPD, ORDERED, (int)MULTI_ARG_2_DF_CMP },
18872 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2df3, "__builtin_ia32_comunordpd", IX86_BUILTIN_COMUNORDPD, UNORDERED, (int)MULTI_ARG_2_DF_CMP },
18874 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv16qi3, "__builtin_ia32_pcomeqb", IX86_BUILTIN_PCOMEQB, EQ, (int)MULTI_ARG_2_QI_CMP },
18875 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv16qi3, "__builtin_ia32_pcomneb", IX86_BUILTIN_PCOMNEB, NE, (int)MULTI_ARG_2_QI_CMP },
18876 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv16qi3, "__builtin_ia32_pcomneqb", IX86_BUILTIN_PCOMNEB, NE, (int)MULTI_ARG_2_QI_CMP },
18877 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv16qi3, "__builtin_ia32_pcomltb", IX86_BUILTIN_PCOMLTB, LT, (int)MULTI_ARG_2_QI_CMP },
18878 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv16qi3, "__builtin_ia32_pcomleb", IX86_BUILTIN_PCOMLEB, LE, (int)MULTI_ARG_2_QI_CMP },
18879 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv16qi3, "__builtin_ia32_pcomgtb", IX86_BUILTIN_PCOMGTB, GT, (int)MULTI_ARG_2_QI_CMP },
18880 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv16qi3, "__builtin_ia32_pcomgeb", IX86_BUILTIN_PCOMGEB, GE, (int)MULTI_ARG_2_QI_CMP },
18882 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv8hi3, "__builtin_ia32_pcomeqw", IX86_BUILTIN_PCOMEQW, EQ, (int)MULTI_ARG_2_HI_CMP },
18883 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv8hi3, "__builtin_ia32_pcomnew", IX86_BUILTIN_PCOMNEW, NE, (int)MULTI_ARG_2_HI_CMP },
18884 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv8hi3, "__builtin_ia32_pcomneqw", IX86_BUILTIN_PCOMNEW, NE, (int)MULTI_ARG_2_HI_CMP },
18885 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv8hi3, "__builtin_ia32_pcomltw", IX86_BUILTIN_PCOMLTW, LT, (int)MULTI_ARG_2_HI_CMP },
18886 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv8hi3, "__builtin_ia32_pcomlew", IX86_BUILTIN_PCOMLEW, LE, (int)MULTI_ARG_2_HI_CMP },
18887 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv8hi3, "__builtin_ia32_pcomgtw", IX86_BUILTIN_PCOMGTW, GT, (int)MULTI_ARG_2_HI_CMP },
18888 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv8hi3, "__builtin_ia32_pcomgew", IX86_BUILTIN_PCOMGEW, GE, (int)MULTI_ARG_2_HI_CMP },
18890 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4si3, "__builtin_ia32_pcomeqd", IX86_BUILTIN_PCOMEQD, EQ, (int)MULTI_ARG_2_SI_CMP },
18891 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4si3, "__builtin_ia32_pcomned", IX86_BUILTIN_PCOMNED, NE, (int)MULTI_ARG_2_SI_CMP },
18892 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4si3, "__builtin_ia32_pcomneqd", IX86_BUILTIN_PCOMNED, NE, (int)MULTI_ARG_2_SI_CMP },
18893 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4si3, "__builtin_ia32_pcomltd", IX86_BUILTIN_PCOMLTD, LT, (int)MULTI_ARG_2_SI_CMP },
18894 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4si3, "__builtin_ia32_pcomled", IX86_BUILTIN_PCOMLED, LE, (int)MULTI_ARG_2_SI_CMP },
18895 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4si3, "__builtin_ia32_pcomgtd", IX86_BUILTIN_PCOMGTD, GT, (int)MULTI_ARG_2_SI_CMP },
18896 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4si3, "__builtin_ia32_pcomged", IX86_BUILTIN_PCOMGED, GE, (int)MULTI_ARG_2_SI_CMP },
18898 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2di3, "__builtin_ia32_pcomeqq", IX86_BUILTIN_PCOMEQQ, EQ, (int)MULTI_ARG_2_DI_CMP },
18899 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2di3, "__builtin_ia32_pcomneq", IX86_BUILTIN_PCOMNEQ, NE, (int)MULTI_ARG_2_DI_CMP },
18900 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2di3, "__builtin_ia32_pcomneqq", IX86_BUILTIN_PCOMNEQ, NE, (int)MULTI_ARG_2_DI_CMP },
18901 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2di3, "__builtin_ia32_pcomltq", IX86_BUILTIN_PCOMLTQ, LT, (int)MULTI_ARG_2_DI_CMP },
18902 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2di3, "__builtin_ia32_pcomleq", IX86_BUILTIN_PCOMLEQ, LE, (int)MULTI_ARG_2_DI_CMP },
18903 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2di3, "__builtin_ia32_pcomgtq", IX86_BUILTIN_PCOMGTQ, GT, (int)MULTI_ARG_2_DI_CMP },
18904 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2di3, "__builtin_ia32_pcomgeq", IX86_BUILTIN_PCOMGEQ, GE, (int)MULTI_ARG_2_DI_CMP },
18906 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_uns2v16qi3,"__builtin_ia32_pcomequb", IX86_BUILTIN_PCOMEQUB, EQ, (int)MULTI_ARG_2_QI_CMP },
18907 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_uns2v16qi3,"__builtin_ia32_pcomneub", IX86_BUILTIN_PCOMNEUB, NE, (int)MULTI_ARG_2_QI_CMP },
18908 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_uns2v16qi3,"__builtin_ia32_pcomnequb", IX86_BUILTIN_PCOMNEUB, NE, (int)MULTI_ARG_2_QI_CMP },
18909 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_unsv16qi3, "__builtin_ia32_pcomltub", IX86_BUILTIN_PCOMLTUB, LTU, (int)MULTI_ARG_2_QI_CMP },
18910 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_unsv16qi3, "__builtin_ia32_pcomleub", IX86_BUILTIN_PCOMLEUB, LEU, (int)MULTI_ARG_2_QI_CMP },
18911 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_unsv16qi3, "__builtin_ia32_pcomgtub", IX86_BUILTIN_PCOMGTUB, GTU, (int)MULTI_ARG_2_QI_CMP },
18912 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_unsv16qi3, "__builtin_ia32_pcomgeub", IX86_BUILTIN_PCOMGEUB, GEU, (int)MULTI_ARG_2_QI_CMP },
18914 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_uns2v8hi3, "__builtin_ia32_pcomequw", IX86_BUILTIN_PCOMEQUW, EQ, (int)MULTI_ARG_2_HI_CMP },
18915 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_uns2v8hi3, "__builtin_ia32_pcomneuw", IX86_BUILTIN_PCOMNEUW, NE, (int)MULTI_ARG_2_HI_CMP },
18916 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_uns2v8hi3, "__builtin_ia32_pcomnequw", IX86_BUILTIN_PCOMNEUW, NE, (int)MULTI_ARG_2_HI_CMP },
18917 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_unsv8hi3, "__builtin_ia32_pcomltuw", IX86_BUILTIN_PCOMLTUW, LTU, (int)MULTI_ARG_2_HI_CMP },
18918 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_unsv8hi3, "__builtin_ia32_pcomleuw", IX86_BUILTIN_PCOMLEUW, LEU, (int)MULTI_ARG_2_HI_CMP },
18919 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_unsv8hi3, "__builtin_ia32_pcomgtuw", IX86_BUILTIN_PCOMGTUW, GTU, (int)MULTI_ARG_2_HI_CMP },
18920 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_unsv8hi3, "__builtin_ia32_pcomgeuw", IX86_BUILTIN_PCOMGEUW, GEU, (int)MULTI_ARG_2_HI_CMP },
18922 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_uns2v4si3, "__builtin_ia32_pcomequd", IX86_BUILTIN_PCOMEQUD, EQ, (int)MULTI_ARG_2_SI_CMP },
18923 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_uns2v4si3, "__builtin_ia32_pcomneud", IX86_BUILTIN_PCOMNEUD, NE, (int)MULTI_ARG_2_SI_CMP },
18924 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_uns2v4si3, "__builtin_ia32_pcomnequd", IX86_BUILTIN_PCOMNEUD, NE, (int)MULTI_ARG_2_SI_CMP },
18925 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_unsv4si3, "__builtin_ia32_pcomltud", IX86_BUILTIN_PCOMLTUD, LTU, (int)MULTI_ARG_2_SI_CMP },
18926 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_unsv4si3, "__builtin_ia32_pcomleud", IX86_BUILTIN_PCOMLEUD, LEU, (int)MULTI_ARG_2_SI_CMP },
18927 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_unsv4si3, "__builtin_ia32_pcomgtud", IX86_BUILTIN_PCOMGTUD, GTU, (int)MULTI_ARG_2_SI_CMP },
18928 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_unsv4si3, "__builtin_ia32_pcomgeud", IX86_BUILTIN_PCOMGEUD, GEU, (int)MULTI_ARG_2_SI_CMP },
18930 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_uns2v2di3, "__builtin_ia32_pcomequq", IX86_BUILTIN_PCOMEQUQ, EQ, (int)MULTI_ARG_2_DI_CMP },
18931 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_uns2v2di3, "__builtin_ia32_pcomneuq", IX86_BUILTIN_PCOMNEUQ, NE, (int)MULTI_ARG_2_DI_CMP },
18932 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_uns2v2di3, "__builtin_ia32_pcomnequq", IX86_BUILTIN_PCOMNEUQ, NE, (int)MULTI_ARG_2_DI_CMP },
18933 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_unsv2di3, "__builtin_ia32_pcomltuq", IX86_BUILTIN_PCOMLTUQ, LTU, (int)MULTI_ARG_2_DI_CMP },
18934 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_unsv2di3, "__builtin_ia32_pcomleuq", IX86_BUILTIN_PCOMLEUQ, LEU, (int)MULTI_ARG_2_DI_CMP },
18935 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_unsv2di3, "__builtin_ia32_pcomgtuq", IX86_BUILTIN_PCOMGTUQ, GTU, (int)MULTI_ARG_2_DI_CMP },
18936 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_unsv2di3, "__builtin_ia32_pcomgeuq", IX86_BUILTIN_PCOMGEUQ, GEU, (int)MULTI_ARG_2_DI_CMP },
18938 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_com_tfv4sf3, "__builtin_ia32_comfalsess", IX86_BUILTIN_COMFALSESS, COM_FALSE_S, (int)MULTI_ARG_2_SF_TF },
18939 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_com_tfv4sf3, "__builtin_ia32_comtruess", IX86_BUILTIN_COMTRUESS, COM_TRUE_S, (int)MULTI_ARG_2_SF_TF },
18940 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_com_tfv4sf3, "__builtin_ia32_comfalseps", IX86_BUILTIN_COMFALSEPS, COM_FALSE_P, (int)MULTI_ARG_2_SF_TF },
18941 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_com_tfv4sf3, "__builtin_ia32_comtrueps", IX86_BUILTIN_COMTRUEPS, COM_TRUE_P, (int)MULTI_ARG_2_SF_TF },
18942 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_com_tfv2df3, "__builtin_ia32_comfalsesd", IX86_BUILTIN_COMFALSESD, COM_FALSE_S, (int)MULTI_ARG_2_DF_TF },
18943 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_com_tfv2df3, "__builtin_ia32_comtruesd", IX86_BUILTIN_COMTRUESD, COM_TRUE_S, (int)MULTI_ARG_2_DF_TF },
18944 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_com_tfv2df3, "__builtin_ia32_comfalsepd", IX86_BUILTIN_COMFALSEPD, COM_FALSE_P, (int)MULTI_ARG_2_DF_TF },
18945 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_com_tfv2df3, "__builtin_ia32_comtruepd", IX86_BUILTIN_COMTRUEPD, COM_TRUE_P, (int)MULTI_ARG_2_DF_TF },
18947 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcom_tfv16qi3, "__builtin_ia32_pcomfalseb", IX86_BUILTIN_PCOMFALSEB, PCOM_FALSE, (int)MULTI_ARG_2_QI_TF },
18948 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcom_tfv8hi3, "__builtin_ia32_pcomfalsew", IX86_BUILTIN_PCOMFALSEW, PCOM_FALSE, (int)MULTI_ARG_2_HI_TF },
18949 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcom_tfv4si3, "__builtin_ia32_pcomfalsed", IX86_BUILTIN_PCOMFALSED, PCOM_FALSE, (int)MULTI_ARG_2_SI_TF },
18950 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcom_tfv2di3, "__builtin_ia32_pcomfalseq", IX86_BUILTIN_PCOMFALSEQ, PCOM_FALSE, (int)MULTI_ARG_2_DI_TF },
18951 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcom_tfv16qi3, "__builtin_ia32_pcomfalseub",IX86_BUILTIN_PCOMFALSEUB,PCOM_FALSE, (int)MULTI_ARG_2_QI_TF },
18952 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcom_tfv8hi3, "__builtin_ia32_pcomfalseuw",IX86_BUILTIN_PCOMFALSEUW,PCOM_FALSE, (int)MULTI_ARG_2_HI_TF },
18953 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcom_tfv4si3, "__builtin_ia32_pcomfalseud",IX86_BUILTIN_PCOMFALSEUD,PCOM_FALSE, (int)MULTI_ARG_2_SI_TF },
18954 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcom_tfv2di3, "__builtin_ia32_pcomfalseuq",IX86_BUILTIN_PCOMFALSEUQ,PCOM_FALSE, (int)MULTI_ARG_2_DI_TF },
18956 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcom_tfv16qi3, "__builtin_ia32_pcomtrueb", IX86_BUILTIN_PCOMTRUEB, PCOM_TRUE, (int)MULTI_ARG_2_QI_TF },
18957 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcom_tfv8hi3, "__builtin_ia32_pcomtruew", IX86_BUILTIN_PCOMTRUEW, PCOM_TRUE, (int)MULTI_ARG_2_HI_TF },
18958 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcom_tfv4si3, "__builtin_ia32_pcomtrued", IX86_BUILTIN_PCOMTRUED, PCOM_TRUE, (int)MULTI_ARG_2_SI_TF },
18959 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcom_tfv2di3, "__builtin_ia32_pcomtrueq", IX86_BUILTIN_PCOMTRUEQ, PCOM_TRUE, (int)MULTI_ARG_2_DI_TF },
18960 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcom_tfv16qi3, "__builtin_ia32_pcomtrueub", IX86_BUILTIN_PCOMTRUEUB, PCOM_TRUE, (int)MULTI_ARG_2_QI_TF },
18961 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcom_tfv8hi3, "__builtin_ia32_pcomtrueuw", IX86_BUILTIN_PCOMTRUEUW, PCOM_TRUE, (int)MULTI_ARG_2_HI_TF },
18962 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcom_tfv4si3, "__builtin_ia32_pcomtrueud", IX86_BUILTIN_PCOMTRUEUD, PCOM_TRUE, (int)MULTI_ARG_2_SI_TF },
18963 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcom_tfv2di3, "__builtin_ia32_pcomtrueuq", IX86_BUILTIN_PCOMTRUEUQ, PCOM_TRUE, (int)MULTI_ARG_2_DI_TF },
18966 /* Set up all the MMX/SSE builtins. This is not called if TARGET_MMX
18967 is zero. Otherwise, if TARGET_SSE is not set, only expand the MMX
18970 ix86_init_mmx_sse_builtins (void)
18972 const struct builtin_description * d;
18975 tree V16QI_type_node = build_vector_type_for_mode (char_type_node, V16QImode);
18976 tree V2SI_type_node = build_vector_type_for_mode (intSI_type_node, V2SImode);
18977 tree V1DI_type_node
18978 = build_vector_type_for_mode (long_long_integer_type_node, V1DImode);
18979 tree V2SF_type_node = build_vector_type_for_mode (float_type_node, V2SFmode);
18980 tree V2DI_type_node
18981 = build_vector_type_for_mode (long_long_integer_type_node, V2DImode);
18982 tree V2DF_type_node = build_vector_type_for_mode (double_type_node, V2DFmode);
18983 tree V4SF_type_node = build_vector_type_for_mode (float_type_node, V4SFmode);
18984 tree V4SI_type_node = build_vector_type_for_mode (intSI_type_node, V4SImode);
18985 tree V4HI_type_node = build_vector_type_for_mode (intHI_type_node, V4HImode);
18986 tree V8QI_type_node = build_vector_type_for_mode (char_type_node, V8QImode);
18987 tree V8HI_type_node = build_vector_type_for_mode (intHI_type_node, V8HImode);
18989 tree pchar_type_node = build_pointer_type (char_type_node);
18990 tree pcchar_type_node
18991 = build_pointer_type (build_type_variant (char_type_node, 1, 0));
18992 tree pfloat_type_node = build_pointer_type (float_type_node);
18993 tree pcfloat_type_node
18994 = build_pointer_type (build_type_variant (float_type_node, 1, 0));
18995 tree pv2sf_type_node = build_pointer_type (V2SF_type_node);
18996 tree pcv2sf_type_node
18997 = build_pointer_type (build_type_variant (V2SF_type_node, 1, 0));
18998 tree pv2di_type_node = build_pointer_type (V2DI_type_node);
18999 tree pdi_type_node = build_pointer_type (long_long_unsigned_type_node);
19002 tree int_ftype_v4sf_v4sf
19003 = build_function_type_list (integer_type_node,
19004 V4SF_type_node, V4SF_type_node, NULL_TREE);
19005 tree v4si_ftype_v4sf_v4sf
19006 = build_function_type_list (V4SI_type_node,
19007 V4SF_type_node, V4SF_type_node, NULL_TREE);
19008 /* MMX/SSE/integer conversions. */
19009 tree int_ftype_v4sf
19010 = build_function_type_list (integer_type_node,
19011 V4SF_type_node, NULL_TREE);
19012 tree int64_ftype_v4sf
19013 = build_function_type_list (long_long_integer_type_node,
19014 V4SF_type_node, NULL_TREE);
19015 tree int_ftype_v8qi
19016 = build_function_type_list (integer_type_node, V8QI_type_node, NULL_TREE);
19017 tree v4sf_ftype_v4sf_int
19018 = build_function_type_list (V4SF_type_node,
19019 V4SF_type_node, integer_type_node, NULL_TREE);
19020 tree v4sf_ftype_v4sf_int64
19021 = build_function_type_list (V4SF_type_node,
19022 V4SF_type_node, long_long_integer_type_node,
19024 tree v4sf_ftype_v4sf_v2si
19025 = build_function_type_list (V4SF_type_node,
19026 V4SF_type_node, V2SI_type_node, NULL_TREE);
19028 /* Miscellaneous. */
19029 tree v8qi_ftype_v4hi_v4hi
19030 = build_function_type_list (V8QI_type_node,
19031 V4HI_type_node, V4HI_type_node, NULL_TREE);
19032 tree v4hi_ftype_v2si_v2si
19033 = build_function_type_list (V4HI_type_node,
19034 V2SI_type_node, V2SI_type_node, NULL_TREE);
19035 tree v4sf_ftype_v4sf_v4sf_int
19036 = build_function_type_list (V4SF_type_node,
19037 V4SF_type_node, V4SF_type_node,
19038 integer_type_node, NULL_TREE);
19039 tree v2si_ftype_v4hi_v4hi
19040 = build_function_type_list (V2SI_type_node,
19041 V4HI_type_node, V4HI_type_node, NULL_TREE);
19042 tree v4hi_ftype_v4hi_int
19043 = build_function_type_list (V4HI_type_node,
19044 V4HI_type_node, integer_type_node, NULL_TREE);
19045 tree v2si_ftype_v2si_int
19046 = build_function_type_list (V2SI_type_node,
19047 V2SI_type_node, integer_type_node, NULL_TREE);
19048 tree v1di_ftype_v1di_int
19049 = build_function_type_list (V1DI_type_node,
19050 V1DI_type_node, integer_type_node, NULL_TREE);
19052 tree void_ftype_void
19053 = build_function_type (void_type_node, void_list_node);
19054 tree void_ftype_unsigned
19055 = build_function_type_list (void_type_node, unsigned_type_node, NULL_TREE);
19056 tree void_ftype_unsigned_unsigned
19057 = build_function_type_list (void_type_node, unsigned_type_node,
19058 unsigned_type_node, NULL_TREE);
19059 tree void_ftype_pcvoid_unsigned_unsigned
19060 = build_function_type_list (void_type_node, const_ptr_type_node,
19061 unsigned_type_node, unsigned_type_node,
19063 tree unsigned_ftype_void
19064 = build_function_type (unsigned_type_node, void_list_node);
19065 tree v2si_ftype_v4sf
19066 = build_function_type_list (V2SI_type_node, V4SF_type_node, NULL_TREE);
19067 /* Loads/stores. */
19068 tree void_ftype_v8qi_v8qi_pchar
19069 = build_function_type_list (void_type_node,
19070 V8QI_type_node, V8QI_type_node,
19071 pchar_type_node, NULL_TREE);
19072 tree v4sf_ftype_pcfloat
19073 = build_function_type_list (V4SF_type_node, pcfloat_type_node, NULL_TREE);
19074 tree v4sf_ftype_v4sf_pcv2sf
19075 = build_function_type_list (V4SF_type_node,
19076 V4SF_type_node, pcv2sf_type_node, NULL_TREE);
19077 tree void_ftype_pv2sf_v4sf
19078 = build_function_type_list (void_type_node,
19079 pv2sf_type_node, V4SF_type_node, NULL_TREE);
19080 tree void_ftype_pfloat_v4sf
19081 = build_function_type_list (void_type_node,
19082 pfloat_type_node, V4SF_type_node, NULL_TREE);
19083 tree void_ftype_pdi_di
19084 = build_function_type_list (void_type_node,
19085 pdi_type_node, long_long_unsigned_type_node,
19087 tree void_ftype_pv2di_v2di
19088 = build_function_type_list (void_type_node,
19089 pv2di_type_node, V2DI_type_node, NULL_TREE);
19090 /* Normal vector unops. */
19091 tree v4sf_ftype_v4sf
19092 = build_function_type_list (V4SF_type_node, V4SF_type_node, NULL_TREE);
19093 tree v16qi_ftype_v16qi
19094 = build_function_type_list (V16QI_type_node, V16QI_type_node, NULL_TREE);
19095 tree v8hi_ftype_v8hi
19096 = build_function_type_list (V8HI_type_node, V8HI_type_node, NULL_TREE);
19097 tree v4si_ftype_v4si
19098 = build_function_type_list (V4SI_type_node, V4SI_type_node, NULL_TREE);
19099 tree v8qi_ftype_v8qi
19100 = build_function_type_list (V8QI_type_node, V8QI_type_node, NULL_TREE);
19101 tree v4hi_ftype_v4hi
19102 = build_function_type_list (V4HI_type_node, V4HI_type_node, NULL_TREE);
19104 /* Normal vector binops. */
19105 tree v4sf_ftype_v4sf_v4sf
19106 = build_function_type_list (V4SF_type_node,
19107 V4SF_type_node, V4SF_type_node, NULL_TREE);
19108 tree v8qi_ftype_v8qi_v8qi
19109 = build_function_type_list (V8QI_type_node,
19110 V8QI_type_node, V8QI_type_node, NULL_TREE);
19111 tree v4hi_ftype_v4hi_v4hi
19112 = build_function_type_list (V4HI_type_node,
19113 V4HI_type_node, V4HI_type_node, NULL_TREE);
19114 tree v2si_ftype_v2si_v2si
19115 = build_function_type_list (V2SI_type_node,
19116 V2SI_type_node, V2SI_type_node, NULL_TREE);
19117 tree v1di_ftype_v1di_v1di
19118 = build_function_type_list (V1DI_type_node,
19119 V1DI_type_node, V1DI_type_node, NULL_TREE);
19120 tree v1di_ftype_v1di_v1di_int
19121 = build_function_type_list (V1DI_type_node,
19122 V1DI_type_node, V1DI_type_node,
19123 integer_type_node, NULL_TREE);
19124 tree v2si_ftype_v2sf
19125 = build_function_type_list (V2SI_type_node, V2SF_type_node, NULL_TREE);
19126 tree v2sf_ftype_v2si
19127 = build_function_type_list (V2SF_type_node, V2SI_type_node, NULL_TREE);
19128 tree v2si_ftype_v2si
19129 = build_function_type_list (V2SI_type_node, V2SI_type_node, NULL_TREE);
19130 tree v2sf_ftype_v2sf
19131 = build_function_type_list (V2SF_type_node, V2SF_type_node, NULL_TREE);
19132 tree v2sf_ftype_v2sf_v2sf
19133 = build_function_type_list (V2SF_type_node,
19134 V2SF_type_node, V2SF_type_node, NULL_TREE);
19135 tree v2si_ftype_v2sf_v2sf
19136 = build_function_type_list (V2SI_type_node,
19137 V2SF_type_node, V2SF_type_node, NULL_TREE);
19138 tree pint_type_node = build_pointer_type (integer_type_node);
19139 tree pdouble_type_node = build_pointer_type (double_type_node);
19140 tree pcdouble_type_node = build_pointer_type (
19141 build_type_variant (double_type_node, 1, 0));
19142 tree int_ftype_v2df_v2df
19143 = build_function_type_list (integer_type_node,
19144 V2DF_type_node, V2DF_type_node, NULL_TREE);
19146 tree void_ftype_pcvoid
19147 = build_function_type_list (void_type_node, const_ptr_type_node, NULL_TREE);
19148 tree v4sf_ftype_v4si
19149 = build_function_type_list (V4SF_type_node, V4SI_type_node, NULL_TREE);
19150 tree v4si_ftype_v4sf
19151 = build_function_type_list (V4SI_type_node, V4SF_type_node, NULL_TREE);
19152 tree v2df_ftype_v4si
19153 = build_function_type_list (V2DF_type_node, V4SI_type_node, NULL_TREE);
19154 tree v4si_ftype_v2df
19155 = build_function_type_list (V4SI_type_node, V2DF_type_node, NULL_TREE);
19156 tree v4si_ftype_v2df_v2df
19157 = build_function_type_list (V4SI_type_node,
19158 V2DF_type_node, V2DF_type_node, NULL_TREE);
19159 tree v2si_ftype_v2df
19160 = build_function_type_list (V2SI_type_node, V2DF_type_node, NULL_TREE);
19161 tree v4sf_ftype_v2df
19162 = build_function_type_list (V4SF_type_node, V2DF_type_node, NULL_TREE);
19163 tree v2df_ftype_v2si
19164 = build_function_type_list (V2DF_type_node, V2SI_type_node, NULL_TREE);
19165 tree v2df_ftype_v4sf
19166 = build_function_type_list (V2DF_type_node, V4SF_type_node, NULL_TREE);
19167 tree int_ftype_v2df
19168 = build_function_type_list (integer_type_node, V2DF_type_node, NULL_TREE);
19169 tree int64_ftype_v2df
19170 = build_function_type_list (long_long_integer_type_node,
19171 V2DF_type_node, NULL_TREE);
19172 tree v2df_ftype_v2df_int
19173 = build_function_type_list (V2DF_type_node,
19174 V2DF_type_node, integer_type_node, NULL_TREE);
19175 tree v2df_ftype_v2df_int64
19176 = build_function_type_list (V2DF_type_node,
19177 V2DF_type_node, long_long_integer_type_node,
19179 tree v4sf_ftype_v4sf_v2df
19180 = build_function_type_list (V4SF_type_node,
19181 V4SF_type_node, V2DF_type_node, NULL_TREE);
19182 tree v2df_ftype_v2df_v4sf
19183 = build_function_type_list (V2DF_type_node,
19184 V2DF_type_node, V4SF_type_node, NULL_TREE);
19185 tree v2df_ftype_v2df_v2df_int
19186 = build_function_type_list (V2DF_type_node,
19187 V2DF_type_node, V2DF_type_node,
19190 tree v2df_ftype_v2df_pcdouble
19191 = build_function_type_list (V2DF_type_node,
19192 V2DF_type_node, pcdouble_type_node, NULL_TREE);
19193 tree void_ftype_pdouble_v2df
19194 = build_function_type_list (void_type_node,
19195 pdouble_type_node, V2DF_type_node, NULL_TREE);
19196 tree void_ftype_pint_int
19197 = build_function_type_list (void_type_node,
19198 pint_type_node, integer_type_node, NULL_TREE);
19199 tree void_ftype_v16qi_v16qi_pchar
19200 = build_function_type_list (void_type_node,
19201 V16QI_type_node, V16QI_type_node,
19202 pchar_type_node, NULL_TREE);
19203 tree v2df_ftype_pcdouble
19204 = build_function_type_list (V2DF_type_node, pcdouble_type_node, NULL_TREE);
19205 tree v2df_ftype_v2df_v2df
19206 = build_function_type_list (V2DF_type_node,
19207 V2DF_type_node, V2DF_type_node, NULL_TREE);
19208 tree v16qi_ftype_v16qi_v16qi
19209 = build_function_type_list (V16QI_type_node,
19210 V16QI_type_node, V16QI_type_node, NULL_TREE);
19211 tree v8hi_ftype_v8hi_v8hi
19212 = build_function_type_list (V8HI_type_node,
19213 V8HI_type_node, V8HI_type_node, NULL_TREE);
19214 tree v4si_ftype_v4si_v4si
19215 = build_function_type_list (V4SI_type_node,
19216 V4SI_type_node, V4SI_type_node, NULL_TREE);
19217 tree v2di_ftype_v2di_v2di
19218 = build_function_type_list (V2DI_type_node,
19219 V2DI_type_node, V2DI_type_node, NULL_TREE);
19220 tree v2di_ftype_v2df_v2df
19221 = build_function_type_list (V2DI_type_node,
19222 V2DF_type_node, V2DF_type_node, NULL_TREE);
19223 tree v2df_ftype_v2df
19224 = build_function_type_list (V2DF_type_node, V2DF_type_node, NULL_TREE);
19225 tree v2di_ftype_v2di_int
19226 = build_function_type_list (V2DI_type_node,
19227 V2DI_type_node, integer_type_node, NULL_TREE);
19228 tree v2di_ftype_v2di_v2di_int
19229 = build_function_type_list (V2DI_type_node, V2DI_type_node,
19230 V2DI_type_node, integer_type_node, NULL_TREE);
19231 tree v4si_ftype_v4si_int
19232 = build_function_type_list (V4SI_type_node,
19233 V4SI_type_node, integer_type_node, NULL_TREE);
19234 tree v8hi_ftype_v8hi_int
19235 = build_function_type_list (V8HI_type_node,
19236 V8HI_type_node, integer_type_node, NULL_TREE);
19237 tree v4si_ftype_v8hi_v8hi
19238 = build_function_type_list (V4SI_type_node,
19239 V8HI_type_node, V8HI_type_node, NULL_TREE);
19240 tree v1di_ftype_v8qi_v8qi
19241 = build_function_type_list (V1DI_type_node,
19242 V8QI_type_node, V8QI_type_node, NULL_TREE);
19243 tree v1di_ftype_v2si_v2si
19244 = build_function_type_list (V1DI_type_node,
19245 V2SI_type_node, V2SI_type_node, NULL_TREE);
19246 tree v2di_ftype_v16qi_v16qi
19247 = build_function_type_list (V2DI_type_node,
19248 V16QI_type_node, V16QI_type_node, NULL_TREE);
19249 tree v2di_ftype_v4si_v4si
19250 = build_function_type_list (V2DI_type_node,
19251 V4SI_type_node, V4SI_type_node, NULL_TREE);
19252 tree int_ftype_v16qi
19253 = build_function_type_list (integer_type_node, V16QI_type_node, NULL_TREE);
19254 tree v16qi_ftype_pcchar
19255 = build_function_type_list (V16QI_type_node, pcchar_type_node, NULL_TREE);
19256 tree void_ftype_pchar_v16qi
19257 = build_function_type_list (void_type_node,
19258 pchar_type_node, V16QI_type_node, NULL_TREE);
19260 tree v2di_ftype_v2di_unsigned_unsigned
19261 = build_function_type_list (V2DI_type_node, V2DI_type_node,
19262 unsigned_type_node, unsigned_type_node,
19264 tree v2di_ftype_v2di_v2di_unsigned_unsigned
19265 = build_function_type_list (V2DI_type_node, V2DI_type_node, V2DI_type_node,
19266 unsigned_type_node, unsigned_type_node,
19268 tree v2di_ftype_v2di_v16qi
19269 = build_function_type_list (V2DI_type_node, V2DI_type_node, V16QI_type_node,
19271 tree v2df_ftype_v2df_v2df_v2df
19272 = build_function_type_list (V2DF_type_node,
19273 V2DF_type_node, V2DF_type_node,
19274 V2DF_type_node, NULL_TREE);
19275 tree v4sf_ftype_v4sf_v4sf_v4sf
19276 = build_function_type_list (V4SF_type_node,
19277 V4SF_type_node, V4SF_type_node,
19278 V4SF_type_node, NULL_TREE);
19279 tree v8hi_ftype_v16qi
19280 = build_function_type_list (V8HI_type_node, V16QI_type_node,
19282 tree v4si_ftype_v16qi
19283 = build_function_type_list (V4SI_type_node, V16QI_type_node,
19285 tree v2di_ftype_v16qi
19286 = build_function_type_list (V2DI_type_node, V16QI_type_node,
19288 tree v4si_ftype_v8hi
19289 = build_function_type_list (V4SI_type_node, V8HI_type_node,
19291 tree v2di_ftype_v8hi
19292 = build_function_type_list (V2DI_type_node, V8HI_type_node,
19294 tree v2di_ftype_v4si
19295 = build_function_type_list (V2DI_type_node, V4SI_type_node,
19297 tree v2di_ftype_pv2di
19298 = build_function_type_list (V2DI_type_node, pv2di_type_node,
19300 tree v16qi_ftype_v16qi_v16qi_int
19301 = build_function_type_list (V16QI_type_node, V16QI_type_node,
19302 V16QI_type_node, integer_type_node,
19304 tree v16qi_ftype_v16qi_v16qi_v16qi
19305 = build_function_type_list (V16QI_type_node, V16QI_type_node,
19306 V16QI_type_node, V16QI_type_node,
19308 tree v8hi_ftype_v8hi_v8hi_int
19309 = build_function_type_list (V8HI_type_node, V8HI_type_node,
19310 V8HI_type_node, integer_type_node,
19312 tree v4si_ftype_v4si_v4si_int
19313 = build_function_type_list (V4SI_type_node, V4SI_type_node,
19314 V4SI_type_node, integer_type_node,
19316 tree int_ftype_v2di_v2di
19317 = build_function_type_list (integer_type_node,
19318 V2DI_type_node, V2DI_type_node,
19320 tree int_ftype_v16qi_int_v16qi_int_int
19321 = build_function_type_list (integer_type_node,
19328 tree v16qi_ftype_v16qi_int_v16qi_int_int
19329 = build_function_type_list (V16QI_type_node,
19336 tree int_ftype_v16qi_v16qi_int
19337 = build_function_type_list (integer_type_node,
19343 /* SSE5 instructions */
19344 tree v2di_ftype_v2di_v2di_v2di
19345 = build_function_type_list (V2DI_type_node,
19351 tree v4si_ftype_v4si_v4si_v4si
19352 = build_function_type_list (V4SI_type_node,
19358 tree v4si_ftype_v4si_v4si_v2di
19359 = build_function_type_list (V4SI_type_node,
19365 tree v8hi_ftype_v8hi_v8hi_v8hi
19366 = build_function_type_list (V8HI_type_node,
19372 tree v8hi_ftype_v8hi_v8hi_v4si
19373 = build_function_type_list (V8HI_type_node,
19379 tree v2df_ftype_v2df_v2df_v16qi
19380 = build_function_type_list (V2DF_type_node,
19386 tree v4sf_ftype_v4sf_v4sf_v16qi
19387 = build_function_type_list (V4SF_type_node,
19393 tree v2di_ftype_v2di_si
19394 = build_function_type_list (V2DI_type_node,
19399 tree v4si_ftype_v4si_si
19400 = build_function_type_list (V4SI_type_node,
19405 tree v8hi_ftype_v8hi_si
19406 = build_function_type_list (V8HI_type_node,
19411 tree v16qi_ftype_v16qi_si
19412 = build_function_type_list (V16QI_type_node,
19416 tree v4sf_ftype_v4hi
19417 = build_function_type_list (V4SF_type_node,
19421 tree v4hi_ftype_v4sf
19422 = build_function_type_list (V4HI_type_node,
19426 tree v2di_ftype_v2di
19427 = build_function_type_list (V2DI_type_node, V2DI_type_node, NULL_TREE);
19429 tree v16qi_ftype_v8hi_v8hi
19430 = build_function_type_list (V16QI_type_node,
19431 V8HI_type_node, V8HI_type_node,
19433 tree v8hi_ftype_v4si_v4si
19434 = build_function_type_list (V8HI_type_node,
19435 V4SI_type_node, V4SI_type_node,
19437 tree v8hi_ftype_v16qi_v16qi
19438 = build_function_type_list (V8HI_type_node,
19439 V16QI_type_node, V16QI_type_node,
19441 tree v4hi_ftype_v8qi_v8qi
19442 = build_function_type_list (V4HI_type_node,
19443 V8QI_type_node, V8QI_type_node,
19445 tree unsigned_ftype_unsigned_uchar
19446 = build_function_type_list (unsigned_type_node,
19447 unsigned_type_node,
19448 unsigned_char_type_node,
19450 tree unsigned_ftype_unsigned_ushort
19451 = build_function_type_list (unsigned_type_node,
19452 unsigned_type_node,
19453 short_unsigned_type_node,
19455 tree unsigned_ftype_unsigned_unsigned
19456 = build_function_type_list (unsigned_type_node,
19457 unsigned_type_node,
19458 unsigned_type_node,
19460 tree uint64_ftype_uint64_uint64
19461 = build_function_type_list (long_long_unsigned_type_node,
19462 long_long_unsigned_type_node,
19463 long_long_unsigned_type_node,
19465 tree float_ftype_float
19466 = build_function_type_list (float_type_node,
19472 /* The __float80 type. */
19473 if (TYPE_MODE (long_double_type_node) == XFmode)
19474 (*lang_hooks.types.register_builtin_type) (long_double_type_node,
19478 /* The __float80 type. */
19479 tree float80_type_node = make_node (REAL_TYPE);
19481 TYPE_PRECISION (float80_type_node) = 80;
19482 layout_type (float80_type_node);
19483 (*lang_hooks.types.register_builtin_type) (float80_type_node,
19489 tree float128_type_node = make_node (REAL_TYPE);
19491 TYPE_PRECISION (float128_type_node) = 128;
19492 layout_type (float128_type_node);
19493 (*lang_hooks.types.register_builtin_type) (float128_type_node,
19496 /* TFmode support builtins. */
19497 ftype = build_function_type (float128_type_node,
19499 def_builtin (OPTION_MASK_ISA_64BIT, "__builtin_infq", ftype, IX86_BUILTIN_INFQ);
19501 ftype = build_function_type_list (float128_type_node,
19502 float128_type_node,
19504 def_builtin_const (OPTION_MASK_ISA_64BIT, "__builtin_fabsq", ftype, IX86_BUILTIN_FABSQ);
19506 ftype = build_function_type_list (float128_type_node,
19507 float128_type_node,
19508 float128_type_node,
19510 def_builtin_const (OPTION_MASK_ISA_64BIT, "__builtin_copysignq", ftype, IX86_BUILTIN_COPYSIGNQ);
19513 /* Add all special builtins with variable number of operands. */
19514 for (i = 0, d = bdesc_special_args;
19515 i < ARRAY_SIZE (bdesc_special_args);
19523 switch ((enum ix86_special_builtin_type) d->flag)
19525 case VOID_FTYPE_VOID:
19526 type = void_ftype_void;
19528 case V16QI_FTYPE_PCCHAR:
19529 type = v16qi_ftype_pcchar;
19531 case V4SF_FTYPE_PCFLOAT:
19532 type = v4sf_ftype_pcfloat;
19534 case V2DI_FTYPE_PV2DI:
19535 type = v2di_ftype_pv2di;
19537 case V2DF_FTYPE_PCDOUBLE:
19538 type = v2df_ftype_pcdouble;
19540 case V4SF_FTYPE_V4SF_PCV2SF:
19541 type = v4sf_ftype_v4sf_pcv2sf;
19543 case V2DF_FTYPE_V2DF_PCDOUBLE:
19544 type = v2df_ftype_v2df_pcdouble;
19546 case VOID_FTYPE_PV2SF_V4SF:
19547 type = void_ftype_pv2sf_v4sf;
19549 case VOID_FTYPE_PV2DI_V2DI:
19550 type = void_ftype_pv2di_v2di;
19552 case VOID_FTYPE_PCHAR_V16QI:
19553 type = void_ftype_pchar_v16qi;
19555 case VOID_FTYPE_PFLOAT_V4SF:
19556 type = void_ftype_pfloat_v4sf;
19558 case VOID_FTYPE_PDOUBLE_V2DF:
19559 type = void_ftype_pdouble_v2df;
19561 case VOID_FTYPE_PDI_DI:
19562 type = void_ftype_pdi_di;
19564 case VOID_FTYPE_PINT_INT:
19565 type = void_ftype_pint_int;
19568 gcc_unreachable ();
19571 def_builtin (d->mask, d->name, type, d->code);
19574 /* Add all builtins with variable number of operands. */
19575 for (i = 0, d = bdesc_args;
19576 i < ARRAY_SIZE (bdesc_args);
19584 switch ((enum ix86_builtin_type) d->flag)
19586 case FLOAT_FTYPE_FLOAT:
19587 type = float_ftype_float;
19589 case INT_FTYPE_V2DI_V2DI_PTEST:
19590 type = int_ftype_v2di_v2di;
19592 case INT64_FTYPE_V4SF:
19593 type = int64_ftype_v4sf;
19595 case INT64_FTYPE_V2DF:
19596 type = int64_ftype_v2df;
19598 case INT_FTYPE_V16QI:
19599 type = int_ftype_v16qi;
19601 case INT_FTYPE_V8QI:
19602 type = int_ftype_v8qi;
19604 case INT_FTYPE_V4SF:
19605 type = int_ftype_v4sf;
19607 case INT_FTYPE_V2DF:
19608 type = int_ftype_v2df;
19610 case V16QI_FTYPE_V16QI:
19611 type = v16qi_ftype_v16qi;
19613 case V8HI_FTYPE_V8HI:
19614 type = v8hi_ftype_v8hi;
19616 case V8HI_FTYPE_V16QI:
19617 type = v8hi_ftype_v16qi;
19619 case V8QI_FTYPE_V8QI:
19620 type = v8qi_ftype_v8qi;
19622 case V4SI_FTYPE_V4SI:
19623 type = v4si_ftype_v4si;
19625 case V4SI_FTYPE_V16QI:
19626 type = v4si_ftype_v16qi;
19628 case V4SI_FTYPE_V8HI:
19629 type = v4si_ftype_v8hi;
19631 case V4SI_FTYPE_V4SF:
19632 type = v4si_ftype_v4sf;
19634 case V4SI_FTYPE_V2DF:
19635 type = v4si_ftype_v2df;
19637 case V4HI_FTYPE_V4HI:
19638 type = v4hi_ftype_v4hi;
19640 case V4SF_FTYPE_V4SF:
19641 case V4SF_FTYPE_V4SF_VEC_MERGE:
19642 type = v4sf_ftype_v4sf;
19644 case V4SF_FTYPE_V4SI:
19645 type = v4sf_ftype_v4si;
19647 case V4SF_FTYPE_V2DF:
19648 type = v4sf_ftype_v2df;
19650 case V2DI_FTYPE_V2DI:
19651 type = v2di_ftype_v2di;
19653 case V2DI_FTYPE_V16QI:
19654 type = v2di_ftype_v16qi;
19656 case V2DI_FTYPE_V8HI:
19657 type = v2di_ftype_v8hi;
19659 case V2DI_FTYPE_V4SI:
19660 type = v2di_ftype_v4si;
19662 case V2SI_FTYPE_V2SI:
19663 type = v2si_ftype_v2si;
19665 case V2SI_FTYPE_V4SF:
19666 type = v2si_ftype_v4sf;
19668 case V2SI_FTYPE_V2DF:
19669 type = v2si_ftype_v2df;
19671 case V2SI_FTYPE_V2SF:
19672 type = v2si_ftype_v2sf;
19674 case V2DF_FTYPE_V4SF:
19675 type = v2df_ftype_v4sf;
19677 case V2DF_FTYPE_V2DF:
19678 case V2DF_FTYPE_V2DF_VEC_MERGE:
19679 type = v2df_ftype_v2df;
19681 case V2DF_FTYPE_V2SI:
19682 type = v2df_ftype_v2si;
19684 case V2DF_FTYPE_V4SI:
19685 type = v2df_ftype_v4si;
19687 case V2SF_FTYPE_V2SF:
19688 type = v2sf_ftype_v2sf;
19690 case V2SF_FTYPE_V2SI:
19691 type = v2sf_ftype_v2si;
19693 case V16QI_FTYPE_V16QI_V16QI:
19694 type = v16qi_ftype_v16qi_v16qi;
19696 case V16QI_FTYPE_V8HI_V8HI:
19697 type = v16qi_ftype_v8hi_v8hi;
19699 case V8QI_FTYPE_V8QI_V8QI:
19700 type = v8qi_ftype_v8qi_v8qi;
19702 case V8QI_FTYPE_V4HI_V4HI:
19703 type = v8qi_ftype_v4hi_v4hi;
19705 case V8HI_FTYPE_V8HI_V8HI:
19706 case V8HI_FTYPE_V8HI_V8HI_COUNT:
19707 type = v8hi_ftype_v8hi_v8hi;
19709 case V8HI_FTYPE_V16QI_V16QI:
19710 type = v8hi_ftype_v16qi_v16qi;
19712 case V8HI_FTYPE_V4SI_V4SI:
19713 type = v8hi_ftype_v4si_v4si;
19715 case V8HI_FTYPE_V8HI_SI_COUNT:
19716 type = v8hi_ftype_v8hi_int;
19718 case V4SI_FTYPE_V4SI_V4SI:
19719 case V4SI_FTYPE_V4SI_V4SI_COUNT:
19720 type = v4si_ftype_v4si_v4si;
19722 case V4SI_FTYPE_V8HI_V8HI:
19723 type = v4si_ftype_v8hi_v8hi;
19725 case V4SI_FTYPE_V4SF_V4SF:
19726 type = v4si_ftype_v4sf_v4sf;
19728 case V4SI_FTYPE_V2DF_V2DF:
19729 type = v4si_ftype_v2df_v2df;
19731 case V4SI_FTYPE_V4SI_SI_COUNT:
19732 type = v4si_ftype_v4si_int;
19734 case V4HI_FTYPE_V4HI_V4HI:
19735 case V4HI_FTYPE_V4HI_V4HI_COUNT:
19736 type = v4hi_ftype_v4hi_v4hi;
19738 case V4HI_FTYPE_V8QI_V8QI:
19739 type = v4hi_ftype_v8qi_v8qi;
19741 case V4HI_FTYPE_V2SI_V2SI:
19742 type = v4hi_ftype_v2si_v2si;
19744 case V4HI_FTYPE_V4HI_SI_COUNT:
19745 type = v4hi_ftype_v4hi_int;
19747 case V4SF_FTYPE_V4SF_V4SF:
19748 case V4SF_FTYPE_V4SF_V4SF_SWAP:
19749 type = v4sf_ftype_v4sf_v4sf;
19751 case V4SF_FTYPE_V4SF_V2SI:
19752 type = v4sf_ftype_v4sf_v2si;
19754 case V4SF_FTYPE_V4SF_V2DF:
19755 type = v4sf_ftype_v4sf_v2df;
19757 case V4SF_FTYPE_V4SF_DI:
19758 type = v4sf_ftype_v4sf_int64;
19760 case V4SF_FTYPE_V4SF_SI:
19761 type = v4sf_ftype_v4sf_int;
19763 case V2DI_FTYPE_V2DI_V2DI:
19764 case V2DI_FTYPE_V2DI_V2DI_COUNT:
19765 type = v2di_ftype_v2di_v2di;
19767 case V2DI_FTYPE_V16QI_V16QI:
19768 type = v2di_ftype_v16qi_v16qi;
19770 case V2DI_FTYPE_V4SI_V4SI:
19771 type = v2di_ftype_v4si_v4si;
19773 case V2DI_FTYPE_V2DI_V16QI:
19774 type = v2di_ftype_v2di_v16qi;
19776 case V2DI_FTYPE_V2DF_V2DF:
19777 type = v2di_ftype_v2df_v2df;
19779 case V2DI_FTYPE_V2DI_SI_COUNT:
19780 type = v2di_ftype_v2di_int;
19782 case V2SI_FTYPE_V2SI_V2SI:
19783 case V2SI_FTYPE_V2SI_V2SI_COUNT:
19784 type = v2si_ftype_v2si_v2si;
19786 case V2SI_FTYPE_V4HI_V4HI:
19787 type = v2si_ftype_v4hi_v4hi;
19789 case V2SI_FTYPE_V2SF_V2SF:
19790 type = v2si_ftype_v2sf_v2sf;
19792 case V2SI_FTYPE_V2SI_SI_COUNT:
19793 type = v2si_ftype_v2si_int;
19795 case V2DF_FTYPE_V2DF_V2DF:
19796 case V2DF_FTYPE_V2DF_V2DF_SWAP:
19797 type = v2df_ftype_v2df_v2df;
19799 case V2DF_FTYPE_V2DF_V4SF:
19800 type = v2df_ftype_v2df_v4sf;
19802 case V2DF_FTYPE_V2DF_DI:
19803 type = v2df_ftype_v2df_int64;
19805 case V2DF_FTYPE_V2DF_SI:
19806 type = v2df_ftype_v2df_int;
19808 case V2SF_FTYPE_V2SF_V2SF:
19809 type = v2sf_ftype_v2sf_v2sf;
19811 case V1DI_FTYPE_V1DI_V1DI:
19812 case V1DI_FTYPE_V1DI_V1DI_COUNT:
19813 type = v1di_ftype_v1di_v1di;
19815 case V1DI_FTYPE_V8QI_V8QI:
19816 type = v1di_ftype_v8qi_v8qi;
19818 case V1DI_FTYPE_V2SI_V2SI:
19819 type = v1di_ftype_v2si_v2si;
19821 case V1DI_FTYPE_V1DI_SI_COUNT:
19822 type = v1di_ftype_v1di_int;
19824 case UINT64_FTYPE_UINT64_UINT64:
19825 type = uint64_ftype_uint64_uint64;
19827 case UINT_FTYPE_UINT_UINT:
19828 type = unsigned_ftype_unsigned_unsigned;
19830 case UINT_FTYPE_UINT_USHORT:
19831 type = unsigned_ftype_unsigned_ushort;
19833 case UINT_FTYPE_UINT_UCHAR:
19834 type = unsigned_ftype_unsigned_uchar;
19836 case V8HI_FTYPE_V8HI_INT:
19837 type = v8hi_ftype_v8hi_int;
19839 case V4SI_FTYPE_V4SI_INT:
19840 type = v4si_ftype_v4si_int;
19842 case V4HI_FTYPE_V4HI_INT:
19843 type = v4hi_ftype_v4hi_int;
19845 case V4SF_FTYPE_V4SF_INT:
19846 type = v4sf_ftype_v4sf_int;
19848 case V2DI_FTYPE_V2DI_INT:
19849 case V2DI2TI_FTYPE_V2DI_INT:
19850 type = v2di_ftype_v2di_int;
19852 case V2DF_FTYPE_V2DF_INT:
19853 type = v2df_ftype_v2df_int;
19855 case V16QI_FTYPE_V16QI_V16QI_V16QI:
19856 type = v16qi_ftype_v16qi_v16qi_v16qi;
19858 case V4SF_FTYPE_V4SF_V4SF_V4SF:
19859 type = v4sf_ftype_v4sf_v4sf_v4sf;
19861 case V2DF_FTYPE_V2DF_V2DF_V2DF:
19862 type = v2df_ftype_v2df_v2df_v2df;
19864 case V16QI_FTYPE_V16QI_V16QI_INT:
19865 type = v16qi_ftype_v16qi_v16qi_int;
19867 case V8HI_FTYPE_V8HI_V8HI_INT:
19868 type = v8hi_ftype_v8hi_v8hi_int;
19870 case V4SI_FTYPE_V4SI_V4SI_INT:
19871 type = v4si_ftype_v4si_v4si_int;
19873 case V4SF_FTYPE_V4SF_V4SF_INT:
19874 type = v4sf_ftype_v4sf_v4sf_int;
19876 case V2DI_FTYPE_V2DI_V2DI_INT:
19877 case V2DI2TI_FTYPE_V2DI_V2DI_INT:
19878 type = v2di_ftype_v2di_v2di_int;
19880 case V2DF_FTYPE_V2DF_V2DF_INT:
19881 type = v2df_ftype_v2df_v2df_int;
19883 case V2DI_FTYPE_V2DI_UINT_UINT:
19884 type = v2di_ftype_v2di_unsigned_unsigned;
19886 case V2DI_FTYPE_V2DI_V2DI_UINT_UINT:
19887 type = v2di_ftype_v2di_v2di_unsigned_unsigned;
19889 case V1DI2DI_FTYPE_V1DI_V1DI_INT:
19890 type = v1di_ftype_v1di_v1di_int;
19893 gcc_unreachable ();
19896 def_builtin_const (d->mask, d->name, type, d->code);
19899 /* pcmpestr[im] insns. */
19900 for (i = 0, d = bdesc_pcmpestr;
19901 i < ARRAY_SIZE (bdesc_pcmpestr);
19904 if (d->code == IX86_BUILTIN_PCMPESTRM128)
19905 ftype = v16qi_ftype_v16qi_int_v16qi_int_int;
19907 ftype = int_ftype_v16qi_int_v16qi_int_int;
19908 def_builtin_const (d->mask, d->name, ftype, d->code);
19911 /* pcmpistr[im] insns. */
19912 for (i = 0, d = bdesc_pcmpistr;
19913 i < ARRAY_SIZE (bdesc_pcmpistr);
19916 if (d->code == IX86_BUILTIN_PCMPISTRM128)
19917 ftype = v16qi_ftype_v16qi_v16qi_int;
19919 ftype = int_ftype_v16qi_v16qi_int;
19920 def_builtin_const (d->mask, d->name, ftype, d->code);
19923 /* comi/ucomi insns. */
19924 for (i = 0, d = bdesc_comi; i < ARRAY_SIZE (bdesc_comi); i++, d++)
19925 if (d->mask == OPTION_MASK_ISA_SSE2)
19926 def_builtin_const (d->mask, d->name, int_ftype_v2df_v2df, d->code);
19928 def_builtin_const (d->mask, d->name, int_ftype_v4sf_v4sf, d->code);
19931 def_builtin (OPTION_MASK_ISA_SSE, "__builtin_ia32_ldmxcsr", void_ftype_unsigned, IX86_BUILTIN_LDMXCSR);
19932 def_builtin (OPTION_MASK_ISA_SSE, "__builtin_ia32_stmxcsr", unsigned_ftype_void, IX86_BUILTIN_STMXCSR);
19934 /* SSE or 3DNow!A */
19935 def_builtin (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, "__builtin_ia32_maskmovq", void_ftype_v8qi_v8qi_pchar, IX86_BUILTIN_MASKMOVQ);
19938 def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_maskmovdqu", void_ftype_v16qi_v16qi_pchar, IX86_BUILTIN_MASKMOVDQU);
19940 def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_clflush", void_ftype_pcvoid, IX86_BUILTIN_CLFLUSH);
19941 x86_mfence = def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_mfence", void_ftype_void, IX86_BUILTIN_MFENCE);
19944 def_builtin (OPTION_MASK_ISA_SSE3, "__builtin_ia32_monitor", void_ftype_pcvoid_unsigned_unsigned, IX86_BUILTIN_MONITOR);
19945 def_builtin (OPTION_MASK_ISA_SSE3, "__builtin_ia32_mwait", void_ftype_unsigned_unsigned, IX86_BUILTIN_MWAIT);
19950 /* Define AES built-in functions only if AES is enabled. */
19951 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_aesenc128", v2di_ftype_v2di_v2di, IX86_BUILTIN_AESENC128);
19952 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_aesenclast128", v2di_ftype_v2di_v2di, IX86_BUILTIN_AESENCLAST128);
19953 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_aesdec128", v2di_ftype_v2di_v2di, IX86_BUILTIN_AESDEC128);
19954 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_aesdeclast128", v2di_ftype_v2di_v2di, IX86_BUILTIN_AESDECLAST128);
19955 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_aesimc128", v2di_ftype_v2di, IX86_BUILTIN_AESIMC128);
19956 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_aeskeygenassist128", v2di_ftype_v2di_int, IX86_BUILTIN_AESKEYGENASSIST128);
19962 /* Define PCLMUL built-in function only if PCLMUL is enabled. */
19963 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_pclmulqdq128", v2di_ftype_v2di_v2di_int, IX86_BUILTIN_PCLMULQDQ128);
19966 /* Access to the vec_init patterns. */
19967 ftype = build_function_type_list (V2SI_type_node, integer_type_node,
19968 integer_type_node, NULL_TREE);
19969 def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_vec_init_v2si", ftype, IX86_BUILTIN_VEC_INIT_V2SI);
19971 ftype = build_function_type_list (V4HI_type_node, short_integer_type_node,
19972 short_integer_type_node,
19973 short_integer_type_node,
19974 short_integer_type_node, NULL_TREE);
19975 def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_vec_init_v4hi", ftype, IX86_BUILTIN_VEC_INIT_V4HI);
19977 ftype = build_function_type_list (V8QI_type_node, char_type_node,
19978 char_type_node, char_type_node,
19979 char_type_node, char_type_node,
19980 char_type_node, char_type_node,
19981 char_type_node, NULL_TREE);
19982 def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_vec_init_v8qi", ftype, IX86_BUILTIN_VEC_INIT_V8QI);
19984 /* Access to the vec_extract patterns. */
19985 ftype = build_function_type_list (double_type_node, V2DF_type_node,
19986 integer_type_node, NULL_TREE);
19987 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v2df", ftype, IX86_BUILTIN_VEC_EXT_V2DF);
19989 ftype = build_function_type_list (long_long_integer_type_node,
19990 V2DI_type_node, integer_type_node,
19992 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v2di", ftype, IX86_BUILTIN_VEC_EXT_V2DI);
19994 ftype = build_function_type_list (float_type_node, V4SF_type_node,
19995 integer_type_node, NULL_TREE);
19996 def_builtin_const (OPTION_MASK_ISA_SSE, "__builtin_ia32_vec_ext_v4sf", ftype, IX86_BUILTIN_VEC_EXT_V4SF);
19998 ftype = build_function_type_list (intSI_type_node, V4SI_type_node,
19999 integer_type_node, NULL_TREE);
20000 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v4si", ftype, IX86_BUILTIN_VEC_EXT_V4SI);
20002 ftype = build_function_type_list (intHI_type_node, V8HI_type_node,
20003 integer_type_node, NULL_TREE);
20004 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v8hi", ftype, IX86_BUILTIN_VEC_EXT_V8HI);
20006 ftype = build_function_type_list (intHI_type_node, V4HI_type_node,
20007 integer_type_node, NULL_TREE);
20008 def_builtin_const (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, "__builtin_ia32_vec_ext_v4hi", ftype, IX86_BUILTIN_VEC_EXT_V4HI);
20010 ftype = build_function_type_list (intSI_type_node, V2SI_type_node,
20011 integer_type_node, NULL_TREE);
20012 def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_vec_ext_v2si", ftype, IX86_BUILTIN_VEC_EXT_V2SI);
20014 ftype = build_function_type_list (intQI_type_node, V16QI_type_node,
20015 integer_type_node, NULL_TREE);
20016 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v16qi", ftype, IX86_BUILTIN_VEC_EXT_V16QI);
20018 /* Access to the vec_set patterns. */
20019 ftype = build_function_type_list (V2DI_type_node, V2DI_type_node,
20021 integer_type_node, NULL_TREE);
20022 def_builtin_const (OPTION_MASK_ISA_SSE4_1 | OPTION_MASK_ISA_64BIT, "__builtin_ia32_vec_set_v2di", ftype, IX86_BUILTIN_VEC_SET_V2DI);
20024 ftype = build_function_type_list (V4SF_type_node, V4SF_type_node,
20026 integer_type_node, NULL_TREE);
20027 def_builtin_const (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_vec_set_v4sf", ftype, IX86_BUILTIN_VEC_SET_V4SF);
20029 ftype = build_function_type_list (V4SI_type_node, V4SI_type_node,
20031 integer_type_node, NULL_TREE);
20032 def_builtin_const (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_vec_set_v4si", ftype, IX86_BUILTIN_VEC_SET_V4SI);
20034 ftype = build_function_type_list (V8HI_type_node, V8HI_type_node,
20036 integer_type_node, NULL_TREE);
20037 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_set_v8hi", ftype, IX86_BUILTIN_VEC_SET_V8HI);
20039 ftype = build_function_type_list (V4HI_type_node, V4HI_type_node,
20041 integer_type_node, NULL_TREE);
20042 def_builtin_const (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, "__builtin_ia32_vec_set_v4hi", ftype, IX86_BUILTIN_VEC_SET_V4HI);
20044 ftype = build_function_type_list (V16QI_type_node, V16QI_type_node,
20046 integer_type_node, NULL_TREE);
20047 def_builtin_const (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_vec_set_v16qi", ftype, IX86_BUILTIN_VEC_SET_V16QI);
20049 /* Add SSE5 multi-arg argument instructions */
20050 for (i = 0, d = bdesc_multi_arg; i < ARRAY_SIZE (bdesc_multi_arg); i++, d++)
20052 tree mtype = NULL_TREE;
20057 switch ((enum multi_arg_type)d->flag)
20059 case MULTI_ARG_3_SF: mtype = v4sf_ftype_v4sf_v4sf_v4sf; break;
20060 case MULTI_ARG_3_DF: mtype = v2df_ftype_v2df_v2df_v2df; break;
20061 case MULTI_ARG_3_DI: mtype = v2di_ftype_v2di_v2di_v2di; break;
20062 case MULTI_ARG_3_SI: mtype = v4si_ftype_v4si_v4si_v4si; break;
20063 case MULTI_ARG_3_SI_DI: mtype = v4si_ftype_v4si_v4si_v2di; break;
20064 case MULTI_ARG_3_HI: mtype = v8hi_ftype_v8hi_v8hi_v8hi; break;
20065 case MULTI_ARG_3_HI_SI: mtype = v8hi_ftype_v8hi_v8hi_v4si; break;
20066 case MULTI_ARG_3_QI: mtype = v16qi_ftype_v16qi_v16qi_v16qi; break;
20067 case MULTI_ARG_3_PERMPS: mtype = v4sf_ftype_v4sf_v4sf_v16qi; break;
20068 case MULTI_ARG_3_PERMPD: mtype = v2df_ftype_v2df_v2df_v16qi; break;
20069 case MULTI_ARG_2_SF: mtype = v4sf_ftype_v4sf_v4sf; break;
20070 case MULTI_ARG_2_DF: mtype = v2df_ftype_v2df_v2df; break;
20071 case MULTI_ARG_2_DI: mtype = v2di_ftype_v2di_v2di; break;
20072 case MULTI_ARG_2_SI: mtype = v4si_ftype_v4si_v4si; break;
20073 case MULTI_ARG_2_HI: mtype = v8hi_ftype_v8hi_v8hi; break;
20074 case MULTI_ARG_2_QI: mtype = v16qi_ftype_v16qi_v16qi; break;
20075 case MULTI_ARG_2_DI_IMM: mtype = v2di_ftype_v2di_si; break;
20076 case MULTI_ARG_2_SI_IMM: mtype = v4si_ftype_v4si_si; break;
20077 case MULTI_ARG_2_HI_IMM: mtype = v8hi_ftype_v8hi_si; break;
20078 case MULTI_ARG_2_QI_IMM: mtype = v16qi_ftype_v16qi_si; break;
20079 case MULTI_ARG_2_SF_CMP: mtype = v4sf_ftype_v4sf_v4sf; break;
20080 case MULTI_ARG_2_DF_CMP: mtype = v2df_ftype_v2df_v2df; break;
20081 case MULTI_ARG_2_DI_CMP: mtype = v2di_ftype_v2di_v2di; break;
20082 case MULTI_ARG_2_SI_CMP: mtype = v4si_ftype_v4si_v4si; break;
20083 case MULTI_ARG_2_HI_CMP: mtype = v8hi_ftype_v8hi_v8hi; break;
20084 case MULTI_ARG_2_QI_CMP: mtype = v16qi_ftype_v16qi_v16qi; break;
20085 case MULTI_ARG_2_SF_TF: mtype = v4sf_ftype_v4sf_v4sf; break;
20086 case MULTI_ARG_2_DF_TF: mtype = v2df_ftype_v2df_v2df; break;
20087 case MULTI_ARG_2_DI_TF: mtype = v2di_ftype_v2di_v2di; break;
20088 case MULTI_ARG_2_SI_TF: mtype = v4si_ftype_v4si_v4si; break;
20089 case MULTI_ARG_2_HI_TF: mtype = v8hi_ftype_v8hi_v8hi; break;
20090 case MULTI_ARG_2_QI_TF: mtype = v16qi_ftype_v16qi_v16qi; break;
20091 case MULTI_ARG_1_SF: mtype = v4sf_ftype_v4sf; break;
20092 case MULTI_ARG_1_DF: mtype = v2df_ftype_v2df; break;
20093 case MULTI_ARG_1_DI: mtype = v2di_ftype_v2di; break;
20094 case MULTI_ARG_1_SI: mtype = v4si_ftype_v4si; break;
20095 case MULTI_ARG_1_HI: mtype = v8hi_ftype_v8hi; break;
20096 case MULTI_ARG_1_QI: mtype = v16qi_ftype_v16qi; break;
20097 case MULTI_ARG_1_SI_DI: mtype = v2di_ftype_v4si; break;
20098 case MULTI_ARG_1_HI_DI: mtype = v2di_ftype_v8hi; break;
20099 case MULTI_ARG_1_HI_SI: mtype = v4si_ftype_v8hi; break;
20100 case MULTI_ARG_1_QI_DI: mtype = v2di_ftype_v16qi; break;
20101 case MULTI_ARG_1_QI_SI: mtype = v4si_ftype_v16qi; break;
20102 case MULTI_ARG_1_QI_HI: mtype = v8hi_ftype_v16qi; break;
20103 case MULTI_ARG_1_PH2PS: mtype = v4sf_ftype_v4hi; break;
20104 case MULTI_ARG_1_PS2PH: mtype = v4hi_ftype_v4sf; break;
20105 case MULTI_ARG_UNKNOWN:
20107 gcc_unreachable ();
20111 def_builtin_const (d->mask, d->name, mtype, d->code);
20116 ix86_init_builtins (void)
20119 ix86_init_mmx_sse_builtins ();
20122 /* Errors in the source file can cause expand_expr to return const0_rtx
20123 where we expect a vector. To avoid crashing, use one of the vector
20124 clear instructions. */
20126 safe_vector_operand (rtx x, enum machine_mode mode)
20128 if (x == const0_rtx)
20129 x = CONST0_RTX (mode);
20133 /* Subroutine of ix86_expand_builtin to take care of binop insns. */
20136 ix86_expand_binop_builtin (enum insn_code icode, tree exp, rtx target)
20139 tree arg0 = CALL_EXPR_ARG (exp, 0);
20140 tree arg1 = CALL_EXPR_ARG (exp, 1);
20141 rtx op0 = expand_normal (arg0);
20142 rtx op1 = expand_normal (arg1);
20143 enum machine_mode tmode = insn_data[icode].operand[0].mode;
20144 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
20145 enum machine_mode mode1 = insn_data[icode].operand[2].mode;
20147 if (VECTOR_MODE_P (mode0))
20148 op0 = safe_vector_operand (op0, mode0);
20149 if (VECTOR_MODE_P (mode1))
20150 op1 = safe_vector_operand (op1, mode1);
20152 if (optimize || !target
20153 || GET_MODE (target) != tmode
20154 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
20155 target = gen_reg_rtx (tmode);
20157 if (GET_MODE (op1) == SImode && mode1 == TImode)
20159 rtx x = gen_reg_rtx (V4SImode);
20160 emit_insn (gen_sse2_loadd (x, op1));
20161 op1 = gen_lowpart (TImode, x);
20164 if (!(*insn_data[icode].operand[1].predicate) (op0, mode0))
20165 op0 = copy_to_mode_reg (mode0, op0);
20166 if (!(*insn_data[icode].operand[2].predicate) (op1, mode1))
20167 op1 = copy_to_mode_reg (mode1, op1);
20169 pat = GEN_FCN (icode) (target, op0, op1);
20178 /* Subroutine of ix86_expand_builtin to take care of 2-4 argument insns. */
20181 ix86_expand_multi_arg_builtin (enum insn_code icode, tree exp, rtx target,
20182 enum multi_arg_type m_type,
20183 enum insn_code sub_code)
20188 bool comparison_p = false;
20190 bool last_arg_constant = false;
20191 int num_memory = 0;
20194 enum machine_mode mode;
20197 enum machine_mode tmode = insn_data[icode].operand[0].mode;
20201 case MULTI_ARG_3_SF:
20202 case MULTI_ARG_3_DF:
20203 case MULTI_ARG_3_DI:
20204 case MULTI_ARG_3_SI:
20205 case MULTI_ARG_3_SI_DI:
20206 case MULTI_ARG_3_HI:
20207 case MULTI_ARG_3_HI_SI:
20208 case MULTI_ARG_3_QI:
20209 case MULTI_ARG_3_PERMPS:
20210 case MULTI_ARG_3_PERMPD:
20214 case MULTI_ARG_2_SF:
20215 case MULTI_ARG_2_DF:
20216 case MULTI_ARG_2_DI:
20217 case MULTI_ARG_2_SI:
20218 case MULTI_ARG_2_HI:
20219 case MULTI_ARG_2_QI:
20223 case MULTI_ARG_2_DI_IMM:
20224 case MULTI_ARG_2_SI_IMM:
20225 case MULTI_ARG_2_HI_IMM:
20226 case MULTI_ARG_2_QI_IMM:
20228 last_arg_constant = true;
20231 case MULTI_ARG_1_SF:
20232 case MULTI_ARG_1_DF:
20233 case MULTI_ARG_1_DI:
20234 case MULTI_ARG_1_SI:
20235 case MULTI_ARG_1_HI:
20236 case MULTI_ARG_1_QI:
20237 case MULTI_ARG_1_SI_DI:
20238 case MULTI_ARG_1_HI_DI:
20239 case MULTI_ARG_1_HI_SI:
20240 case MULTI_ARG_1_QI_DI:
20241 case MULTI_ARG_1_QI_SI:
20242 case MULTI_ARG_1_QI_HI:
20243 case MULTI_ARG_1_PH2PS:
20244 case MULTI_ARG_1_PS2PH:
20248 case MULTI_ARG_2_SF_CMP:
20249 case MULTI_ARG_2_DF_CMP:
20250 case MULTI_ARG_2_DI_CMP:
20251 case MULTI_ARG_2_SI_CMP:
20252 case MULTI_ARG_2_HI_CMP:
20253 case MULTI_ARG_2_QI_CMP:
20255 comparison_p = true;
20258 case MULTI_ARG_2_SF_TF:
20259 case MULTI_ARG_2_DF_TF:
20260 case MULTI_ARG_2_DI_TF:
20261 case MULTI_ARG_2_SI_TF:
20262 case MULTI_ARG_2_HI_TF:
20263 case MULTI_ARG_2_QI_TF:
20268 case MULTI_ARG_UNKNOWN:
20270 gcc_unreachable ();
20273 if (optimize || !target
20274 || GET_MODE (target) != tmode
20275 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
20276 target = gen_reg_rtx (tmode);
20278 gcc_assert (nargs <= 4);
20280 for (i = 0; i < nargs; i++)
20282 tree arg = CALL_EXPR_ARG (exp, i);
20283 rtx op = expand_normal (arg);
20284 int adjust = (comparison_p) ? 1 : 0;
20285 enum machine_mode mode = insn_data[icode].operand[i+adjust+1].mode;
20287 if (last_arg_constant && i == nargs-1)
20289 if (GET_CODE (op) != CONST_INT)
20291 error ("last argument must be an immediate");
20292 return gen_reg_rtx (tmode);
20297 if (VECTOR_MODE_P (mode))
20298 op = safe_vector_operand (op, mode);
20300 /* If we aren't optimizing, only allow one memory operand to be
20302 if (memory_operand (op, mode))
20305 gcc_assert (GET_MODE (op) == mode || GET_MODE (op) == VOIDmode);
20308 || ! (*insn_data[icode].operand[i+adjust+1].predicate) (op, mode)
20310 op = force_reg (mode, op);
20314 args[i].mode = mode;
20320 pat = GEN_FCN (icode) (target, args[0].op);
20325 pat = GEN_FCN (icode) (target, args[0].op, args[1].op,
20326 GEN_INT ((int)sub_code));
20327 else if (! comparison_p)
20328 pat = GEN_FCN (icode) (target, args[0].op, args[1].op);
20331 rtx cmp_op = gen_rtx_fmt_ee (sub_code, GET_MODE (target),
20335 pat = GEN_FCN (icode) (target, cmp_op, args[0].op, args[1].op);
20340 pat = GEN_FCN (icode) (target, args[0].op, args[1].op, args[2].op);
20344 gcc_unreachable ();
20354 /* Subroutine of ix86_expand_args_builtin to take care of scalar unop
20355 insns with vec_merge. */
20358 ix86_expand_unop_vec_merge_builtin (enum insn_code icode, tree exp,
20362 tree arg0 = CALL_EXPR_ARG (exp, 0);
20363 rtx op1, op0 = expand_normal (arg0);
20364 enum machine_mode tmode = insn_data[icode].operand[0].mode;
20365 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
20367 if (optimize || !target
20368 || GET_MODE (target) != tmode
20369 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
20370 target = gen_reg_rtx (tmode);
20372 if (VECTOR_MODE_P (mode0))
20373 op0 = safe_vector_operand (op0, mode0);
20375 if ((optimize && !register_operand (op0, mode0))
20376 || ! (*insn_data[icode].operand[1].predicate) (op0, mode0))
20377 op0 = copy_to_mode_reg (mode0, op0);
20380 if (! (*insn_data[icode].operand[2].predicate) (op1, mode0))
20381 op1 = copy_to_mode_reg (mode0, op1);
20383 pat = GEN_FCN (icode) (target, op0, op1);
20390 /* Subroutine of ix86_expand_builtin to take care of comparison insns. */
20393 ix86_expand_sse_compare (const struct builtin_description *d,
20394 tree exp, rtx target, bool swap)
20397 tree arg0 = CALL_EXPR_ARG (exp, 0);
20398 tree arg1 = CALL_EXPR_ARG (exp, 1);
20399 rtx op0 = expand_normal (arg0);
20400 rtx op1 = expand_normal (arg1);
20402 enum machine_mode tmode = insn_data[d->icode].operand[0].mode;
20403 enum machine_mode mode0 = insn_data[d->icode].operand[1].mode;
20404 enum machine_mode mode1 = insn_data[d->icode].operand[2].mode;
20405 enum rtx_code comparison = d->comparison;
20407 if (VECTOR_MODE_P (mode0))
20408 op0 = safe_vector_operand (op0, mode0);
20409 if (VECTOR_MODE_P (mode1))
20410 op1 = safe_vector_operand (op1, mode1);
20412 /* Swap operands if we have a comparison that isn't available in
20416 rtx tmp = gen_reg_rtx (mode1);
20417 emit_move_insn (tmp, op1);
20422 if (optimize || !target
20423 || GET_MODE (target) != tmode
20424 || ! (*insn_data[d->icode].operand[0].predicate) (target, tmode))
20425 target = gen_reg_rtx (tmode);
20427 if ((optimize && !register_operand (op0, mode0))
20428 || ! (*insn_data[d->icode].operand[1].predicate) (op0, mode0))
20429 op0 = copy_to_mode_reg (mode0, op0);
20430 if ((optimize && !register_operand (op1, mode1))
20431 || ! (*insn_data[d->icode].operand[2].predicate) (op1, mode1))
20432 op1 = copy_to_mode_reg (mode1, op1);
20434 op2 = gen_rtx_fmt_ee (comparison, mode0, op0, op1);
20435 pat = GEN_FCN (d->icode) (target, op0, op1, op2);
20442 /* Subroutine of ix86_expand_builtin to take care of comi insns. */
20445 ix86_expand_sse_comi (const struct builtin_description *d, tree exp,
20449 tree arg0 = CALL_EXPR_ARG (exp, 0);
20450 tree arg1 = CALL_EXPR_ARG (exp, 1);
20451 rtx op0 = expand_normal (arg0);
20452 rtx op1 = expand_normal (arg1);
20453 enum machine_mode mode0 = insn_data[d->icode].operand[0].mode;
20454 enum machine_mode mode1 = insn_data[d->icode].operand[1].mode;
20455 enum rtx_code comparison = d->comparison;
20457 if (VECTOR_MODE_P (mode0))
20458 op0 = safe_vector_operand (op0, mode0);
20459 if (VECTOR_MODE_P (mode1))
20460 op1 = safe_vector_operand (op1, mode1);
20462 /* Swap operands if we have a comparison that isn't available in
20464 if (d->flag & BUILTIN_DESC_SWAP_OPERANDS)
20471 target = gen_reg_rtx (SImode);
20472 emit_move_insn (target, const0_rtx);
20473 target = gen_rtx_SUBREG (QImode, target, 0);
20475 if ((optimize && !register_operand (op0, mode0))
20476 || !(*insn_data[d->icode].operand[0].predicate) (op0, mode0))
20477 op0 = copy_to_mode_reg (mode0, op0);
20478 if ((optimize && !register_operand (op1, mode1))
20479 || !(*insn_data[d->icode].operand[1].predicate) (op1, mode1))
20480 op1 = copy_to_mode_reg (mode1, op1);
20482 pat = GEN_FCN (d->icode) (op0, op1);
20486 emit_insn (gen_rtx_SET (VOIDmode,
20487 gen_rtx_STRICT_LOW_PART (VOIDmode, target),
20488 gen_rtx_fmt_ee (comparison, QImode,
20492 return SUBREG_REG (target);
20495 /* Subroutine of ix86_expand_builtin to take care of ptest insns. */
20498 ix86_expand_sse_ptest (const struct builtin_description *d, tree exp,
20502 tree arg0 = CALL_EXPR_ARG (exp, 0);
20503 tree arg1 = CALL_EXPR_ARG (exp, 1);
20504 rtx op0 = expand_normal (arg0);
20505 rtx op1 = expand_normal (arg1);
20506 enum machine_mode mode0 = insn_data[d->icode].operand[0].mode;
20507 enum machine_mode mode1 = insn_data[d->icode].operand[1].mode;
20508 enum rtx_code comparison = d->comparison;
20510 if (VECTOR_MODE_P (mode0))
20511 op0 = safe_vector_operand (op0, mode0);
20512 if (VECTOR_MODE_P (mode1))
20513 op1 = safe_vector_operand (op1, mode1);
20515 target = gen_reg_rtx (SImode);
20516 emit_move_insn (target, const0_rtx);
20517 target = gen_rtx_SUBREG (QImode, target, 0);
20519 if ((optimize && !register_operand (op0, mode0))
20520 || !(*insn_data[d->icode].operand[0].predicate) (op0, mode0))
20521 op0 = copy_to_mode_reg (mode0, op0);
20522 if ((optimize && !register_operand (op1, mode1))
20523 || !(*insn_data[d->icode].operand[1].predicate) (op1, mode1))
20524 op1 = copy_to_mode_reg (mode1, op1);
20526 pat = GEN_FCN (d->icode) (op0, op1);
20530 emit_insn (gen_rtx_SET (VOIDmode,
20531 gen_rtx_STRICT_LOW_PART (VOIDmode, target),
20532 gen_rtx_fmt_ee (comparison, QImode,
20536 return SUBREG_REG (target);
20539 /* Subroutine of ix86_expand_builtin to take care of pcmpestr[im] insns. */
20542 ix86_expand_sse_pcmpestr (const struct builtin_description *d,
20543 tree exp, rtx target)
20546 tree arg0 = CALL_EXPR_ARG (exp, 0);
20547 tree arg1 = CALL_EXPR_ARG (exp, 1);
20548 tree arg2 = CALL_EXPR_ARG (exp, 2);
20549 tree arg3 = CALL_EXPR_ARG (exp, 3);
20550 tree arg4 = CALL_EXPR_ARG (exp, 4);
20551 rtx scratch0, scratch1;
20552 rtx op0 = expand_normal (arg0);
20553 rtx op1 = expand_normal (arg1);
20554 rtx op2 = expand_normal (arg2);
20555 rtx op3 = expand_normal (arg3);
20556 rtx op4 = expand_normal (arg4);
20557 enum machine_mode tmode0, tmode1, modev2, modei3, modev4, modei5, modeimm;
20559 tmode0 = insn_data[d->icode].operand[0].mode;
20560 tmode1 = insn_data[d->icode].operand[1].mode;
20561 modev2 = insn_data[d->icode].operand[2].mode;
20562 modei3 = insn_data[d->icode].operand[3].mode;
20563 modev4 = insn_data[d->icode].operand[4].mode;
20564 modei5 = insn_data[d->icode].operand[5].mode;
20565 modeimm = insn_data[d->icode].operand[6].mode;
20567 if (VECTOR_MODE_P (modev2))
20568 op0 = safe_vector_operand (op0, modev2);
20569 if (VECTOR_MODE_P (modev4))
20570 op2 = safe_vector_operand (op2, modev4);
20572 if (! (*insn_data[d->icode].operand[2].predicate) (op0, modev2))
20573 op0 = copy_to_mode_reg (modev2, op0);
20574 if (! (*insn_data[d->icode].operand[3].predicate) (op1, modei3))
20575 op1 = copy_to_mode_reg (modei3, op1);
20576 if ((optimize && !register_operand (op2, modev4))
20577 || !(*insn_data[d->icode].operand[4].predicate) (op2, modev4))
20578 op2 = copy_to_mode_reg (modev4, op2);
20579 if (! (*insn_data[d->icode].operand[5].predicate) (op3, modei5))
20580 op3 = copy_to_mode_reg (modei5, op3);
20582 if (! (*insn_data[d->icode].operand[6].predicate) (op4, modeimm))
20584 error ("the fifth argument must be a 8-bit immediate");
20588 if (d->code == IX86_BUILTIN_PCMPESTRI128)
20590 if (optimize || !target
20591 || GET_MODE (target) != tmode0
20592 || ! (*insn_data[d->icode].operand[0].predicate) (target, tmode0))
20593 target = gen_reg_rtx (tmode0);
20595 scratch1 = gen_reg_rtx (tmode1);
20597 pat = GEN_FCN (d->icode) (target, scratch1, op0, op1, op2, op3, op4);
20599 else if (d->code == IX86_BUILTIN_PCMPESTRM128)
20601 if (optimize || !target
20602 || GET_MODE (target) != tmode1
20603 || ! (*insn_data[d->icode].operand[1].predicate) (target, tmode1))
20604 target = gen_reg_rtx (tmode1);
20606 scratch0 = gen_reg_rtx (tmode0);
20608 pat = GEN_FCN (d->icode) (scratch0, target, op0, op1, op2, op3, op4);
20612 gcc_assert (d->flag);
20614 scratch0 = gen_reg_rtx (tmode0);
20615 scratch1 = gen_reg_rtx (tmode1);
20617 pat = GEN_FCN (d->icode) (scratch0, scratch1, op0, op1, op2, op3, op4);
20627 target = gen_reg_rtx (SImode);
20628 emit_move_insn (target, const0_rtx);
20629 target = gen_rtx_SUBREG (QImode, target, 0);
20632 (gen_rtx_SET (VOIDmode, gen_rtx_STRICT_LOW_PART (VOIDmode, target),
20633 gen_rtx_fmt_ee (EQ, QImode,
20634 gen_rtx_REG ((enum machine_mode) d->flag,
20637 return SUBREG_REG (target);
20644 /* Subroutine of ix86_expand_builtin to take care of pcmpistr[im] insns. */
20647 ix86_expand_sse_pcmpistr (const struct builtin_description *d,
20648 tree exp, rtx target)
20651 tree arg0 = CALL_EXPR_ARG (exp, 0);
20652 tree arg1 = CALL_EXPR_ARG (exp, 1);
20653 tree arg2 = CALL_EXPR_ARG (exp, 2);
20654 rtx scratch0, scratch1;
20655 rtx op0 = expand_normal (arg0);
20656 rtx op1 = expand_normal (arg1);
20657 rtx op2 = expand_normal (arg2);
20658 enum machine_mode tmode0, tmode1, modev2, modev3, modeimm;
20660 tmode0 = insn_data[d->icode].operand[0].mode;
20661 tmode1 = insn_data[d->icode].operand[1].mode;
20662 modev2 = insn_data[d->icode].operand[2].mode;
20663 modev3 = insn_data[d->icode].operand[3].mode;
20664 modeimm = insn_data[d->icode].operand[4].mode;
20666 if (VECTOR_MODE_P (modev2))
20667 op0 = safe_vector_operand (op0, modev2);
20668 if (VECTOR_MODE_P (modev3))
20669 op1 = safe_vector_operand (op1, modev3);
20671 if (! (*insn_data[d->icode].operand[2].predicate) (op0, modev2))
20672 op0 = copy_to_mode_reg (modev2, op0);
20673 if ((optimize && !register_operand (op1, modev3))
20674 || !(*insn_data[d->icode].operand[3].predicate) (op1, modev3))
20675 op1 = copy_to_mode_reg (modev3, op1);
20677 if (! (*insn_data[d->icode].operand[4].predicate) (op2, modeimm))
20679 error ("the third argument must be a 8-bit immediate");
20683 if (d->code == IX86_BUILTIN_PCMPISTRI128)
20685 if (optimize || !target
20686 || GET_MODE (target) != tmode0
20687 || ! (*insn_data[d->icode].operand[0].predicate) (target, tmode0))
20688 target = gen_reg_rtx (tmode0);
20690 scratch1 = gen_reg_rtx (tmode1);
20692 pat = GEN_FCN (d->icode) (target, scratch1, op0, op1, op2);
20694 else if (d->code == IX86_BUILTIN_PCMPISTRM128)
20696 if (optimize || !target
20697 || GET_MODE (target) != tmode1
20698 || ! (*insn_data[d->icode].operand[1].predicate) (target, tmode1))
20699 target = gen_reg_rtx (tmode1);
20701 scratch0 = gen_reg_rtx (tmode0);
20703 pat = GEN_FCN (d->icode) (scratch0, target, op0, op1, op2);
20707 gcc_assert (d->flag);
20709 scratch0 = gen_reg_rtx (tmode0);
20710 scratch1 = gen_reg_rtx (tmode1);
20712 pat = GEN_FCN (d->icode) (scratch0, scratch1, op0, op1, op2);
20722 target = gen_reg_rtx (SImode);
20723 emit_move_insn (target, const0_rtx);
20724 target = gen_rtx_SUBREG (QImode, target, 0);
20727 (gen_rtx_SET (VOIDmode, gen_rtx_STRICT_LOW_PART (VOIDmode, target),
20728 gen_rtx_fmt_ee (EQ, QImode,
20729 gen_rtx_REG ((enum machine_mode) d->flag,
20732 return SUBREG_REG (target);
20738 /* Subroutine of ix86_expand_builtin to take care of insns with
20739 variable number of operands. */
20742 ix86_expand_args_builtin (const struct builtin_description *d,
20743 tree exp, rtx target)
20745 rtx pat, real_target;
20746 unsigned int i, nargs;
20747 unsigned int nargs_constant = 0;
20748 int num_memory = 0;
20752 enum machine_mode mode;
20754 bool last_arg_count = false;
20755 enum insn_code icode = d->icode;
20756 const struct insn_data *insn_p = &insn_data[icode];
20757 enum machine_mode tmode = insn_p->operand[0].mode;
20758 enum machine_mode rmode = VOIDmode;
20760 enum rtx_code comparison = d->comparison;
20762 switch ((enum ix86_builtin_type) d->flag)
20764 case INT_FTYPE_V2DI_V2DI_PTEST:
20765 return ix86_expand_sse_ptest (d, exp, target);
20766 case FLOAT128_FTYPE_FLOAT128:
20767 case FLOAT_FTYPE_FLOAT:
20768 case INT64_FTYPE_V4SF:
20769 case INT64_FTYPE_V2DF:
20770 case INT_FTYPE_V16QI:
20771 case INT_FTYPE_V8QI:
20772 case INT_FTYPE_V4SF:
20773 case INT_FTYPE_V2DF:
20774 case V16QI_FTYPE_V16QI:
20775 case V8HI_FTYPE_V8HI:
20776 case V8HI_FTYPE_V16QI:
20777 case V8QI_FTYPE_V8QI:
20778 case V4SI_FTYPE_V4SI:
20779 case V4SI_FTYPE_V16QI:
20780 case V4SI_FTYPE_V4SF:
20781 case V4SI_FTYPE_V8HI:
20782 case V4SI_FTYPE_V2DF:
20783 case V4HI_FTYPE_V4HI:
20784 case V4SF_FTYPE_V4SF:
20785 case V4SF_FTYPE_V4SI:
20786 case V4SF_FTYPE_V2DF:
20787 case V2DI_FTYPE_V2DI:
20788 case V2DI_FTYPE_V16QI:
20789 case V2DI_FTYPE_V8HI:
20790 case V2DI_FTYPE_V4SI:
20791 case V2DF_FTYPE_V2DF:
20792 case V2DF_FTYPE_V4SI:
20793 case V2DF_FTYPE_V4SF:
20794 case V2DF_FTYPE_V2SI:
20795 case V2SI_FTYPE_V2SI:
20796 case V2SI_FTYPE_V4SF:
20797 case V2SI_FTYPE_V2SF:
20798 case V2SI_FTYPE_V2DF:
20799 case V2SF_FTYPE_V2SF:
20800 case V2SF_FTYPE_V2SI:
20803 case V4SF_FTYPE_V4SF_VEC_MERGE:
20804 case V2DF_FTYPE_V2DF_VEC_MERGE:
20805 return ix86_expand_unop_vec_merge_builtin (icode, exp, target);
20806 case FLOAT128_FTYPE_FLOAT128_FLOAT128:
20807 case V16QI_FTYPE_V16QI_V16QI:
20808 case V16QI_FTYPE_V8HI_V8HI:
20809 case V8QI_FTYPE_V8QI_V8QI:
20810 case V8QI_FTYPE_V4HI_V4HI:
20811 case V8HI_FTYPE_V8HI_V8HI:
20812 case V8HI_FTYPE_V16QI_V16QI:
20813 case V8HI_FTYPE_V4SI_V4SI:
20814 case V4SI_FTYPE_V4SI_V4SI:
20815 case V4SI_FTYPE_V8HI_V8HI:
20816 case V4SI_FTYPE_V4SF_V4SF:
20817 case V4SI_FTYPE_V2DF_V2DF:
20818 case V4HI_FTYPE_V4HI_V4HI:
20819 case V4HI_FTYPE_V8QI_V8QI:
20820 case V4HI_FTYPE_V2SI_V2SI:
20821 case V4SF_FTYPE_V4SF_V4SF:
20822 case V4SF_FTYPE_V4SF_V2SI:
20823 case V4SF_FTYPE_V4SF_V2DF:
20824 case V4SF_FTYPE_V4SF_DI:
20825 case V4SF_FTYPE_V4SF_SI:
20826 case V2DI_FTYPE_V2DI_V2DI:
20827 case V2DI_FTYPE_V16QI_V16QI:
20828 case V2DI_FTYPE_V4SI_V4SI:
20829 case V2DI_FTYPE_V2DI_V16QI:
20830 case V2DI_FTYPE_V2DF_V2DF:
20831 case V2SI_FTYPE_V2SI_V2SI:
20832 case V2SI_FTYPE_V4HI_V4HI:
20833 case V2SI_FTYPE_V2SF_V2SF:
20834 case V2DF_FTYPE_V2DF_V2DF:
20835 case V2DF_FTYPE_V2DF_V4SF:
20836 case V2DF_FTYPE_V2DF_DI:
20837 case V2DF_FTYPE_V2DF_SI:
20838 case V2SF_FTYPE_V2SF_V2SF:
20839 case V1DI_FTYPE_V1DI_V1DI:
20840 case V1DI_FTYPE_V8QI_V8QI:
20841 case V1DI_FTYPE_V2SI_V2SI:
20842 if (comparison == UNKNOWN)
20843 return ix86_expand_binop_builtin (icode, exp, target);
20846 case V4SF_FTYPE_V4SF_V4SF_SWAP:
20847 case V2DF_FTYPE_V2DF_V2DF_SWAP:
20848 gcc_assert (comparison != UNKNOWN);
20852 case V8HI_FTYPE_V8HI_V8HI_COUNT:
20853 case V8HI_FTYPE_V8HI_SI_COUNT:
20854 case V4SI_FTYPE_V4SI_V4SI_COUNT:
20855 case V4SI_FTYPE_V4SI_SI_COUNT:
20856 case V4HI_FTYPE_V4HI_V4HI_COUNT:
20857 case V4HI_FTYPE_V4HI_SI_COUNT:
20858 case V2DI_FTYPE_V2DI_V2DI_COUNT:
20859 case V2DI_FTYPE_V2DI_SI_COUNT:
20860 case V2SI_FTYPE_V2SI_V2SI_COUNT:
20861 case V2SI_FTYPE_V2SI_SI_COUNT:
20862 case V1DI_FTYPE_V1DI_V1DI_COUNT:
20863 case V1DI_FTYPE_V1DI_SI_COUNT:
20865 last_arg_count = true;
20867 case UINT64_FTYPE_UINT64_UINT64:
20868 case UINT_FTYPE_UINT_UINT:
20869 case UINT_FTYPE_UINT_USHORT:
20870 case UINT_FTYPE_UINT_UCHAR:
20873 case V2DI2TI_FTYPE_V2DI_INT:
20876 nargs_constant = 1;
20878 case V8HI_FTYPE_V8HI_INT:
20879 case V4SI_FTYPE_V4SI_INT:
20880 case V4HI_FTYPE_V4HI_INT:
20881 case V4SF_FTYPE_V4SF_INT:
20882 case V2DI_FTYPE_V2DI_INT:
20883 case V2DF_FTYPE_V2DF_INT:
20885 nargs_constant = 1;
20887 case V16QI_FTYPE_V16QI_V16QI_V16QI:
20888 case V4SF_FTYPE_V4SF_V4SF_V4SF:
20889 case V2DF_FTYPE_V2DF_V2DF_V2DF:
20892 case V16QI_FTYPE_V16QI_V16QI_INT:
20893 case V8HI_FTYPE_V8HI_V8HI_INT:
20894 case V4SI_FTYPE_V4SI_V4SI_INT:
20895 case V4SF_FTYPE_V4SF_V4SF_INT:
20896 case V2DI_FTYPE_V2DI_V2DI_INT:
20897 case V2DF_FTYPE_V2DF_V2DF_INT:
20899 nargs_constant = 1;
20901 case V2DI2TI_FTYPE_V2DI_V2DI_INT:
20904 nargs_constant = 1;
20906 case V1DI2DI_FTYPE_V1DI_V1DI_INT:
20909 nargs_constant = 1;
20911 case V2DI_FTYPE_V2DI_UINT_UINT:
20913 nargs_constant = 2;
20915 case V2DI_FTYPE_V2DI_V2DI_UINT_UINT:
20917 nargs_constant = 2;
20920 gcc_unreachable ();
20923 gcc_assert (nargs <= ARRAY_SIZE (args));
20925 if (comparison != UNKNOWN)
20927 gcc_assert (nargs == 2);
20928 return ix86_expand_sse_compare (d, exp, target, swap);
20931 if (rmode == VOIDmode || rmode == tmode)
20935 || GET_MODE (target) != tmode
20936 || ! (*insn_p->operand[0].predicate) (target, tmode))
20937 target = gen_reg_rtx (tmode);
20938 real_target = target;
20942 target = gen_reg_rtx (rmode);
20943 real_target = simplify_gen_subreg (tmode, target, rmode, 0);
20946 for (i = 0; i < nargs; i++)
20948 tree arg = CALL_EXPR_ARG (exp, i);
20949 rtx op = expand_normal (arg);
20950 enum machine_mode mode = insn_p->operand[i + 1].mode;
20951 bool match = (*insn_p->operand[i + 1].predicate) (op, mode);
20953 if (last_arg_count && (i + 1) == nargs)
20955 /* SIMD shift insns take either an 8-bit immediate or
20956 register as count. But builtin functions take int as
20957 count. If count doesn't match, we put it in register. */
20960 op = simplify_gen_subreg (SImode, op, GET_MODE (op), 0);
20961 if (!(*insn_p->operand[i + 1].predicate) (op, mode))
20962 op = copy_to_reg (op);
20965 else if ((nargs - i) <= nargs_constant)
20970 case CODE_FOR_sse4_1_roundpd:
20971 case CODE_FOR_sse4_1_roundps:
20972 case CODE_FOR_sse4_1_roundsd:
20973 case CODE_FOR_sse4_1_roundss:
20974 case CODE_FOR_sse4_1_blendps:
20975 error ("the last argument must be a 4-bit immediate");
20978 case CODE_FOR_sse4_1_blendpd:
20979 error ("the last argument must be a 2-bit immediate");
20983 switch (nargs_constant)
20986 if ((nargs - i) == nargs_constant)
20988 error ("the next to last argument must be an 8-bit immediate");
20992 error ("the last argument must be an 8-bit immediate");
20995 gcc_unreachable ();
21002 if (VECTOR_MODE_P (mode))
21003 op = safe_vector_operand (op, mode);
21005 /* If we aren't optimizing, only allow one memory operand to
21007 if (memory_operand (op, mode))
21010 if (GET_MODE (op) == mode || GET_MODE (op) == VOIDmode)
21012 if (optimize || !match || num_memory > 1)
21013 op = copy_to_mode_reg (mode, op);
21017 op = copy_to_reg (op);
21018 op = simplify_gen_subreg (mode, op, GET_MODE (op), 0);
21023 args[i].mode = mode;
21029 pat = GEN_FCN (icode) (real_target, args[0].op);
21032 pat = GEN_FCN (icode) (real_target, args[0].op, args[1].op);
21035 pat = GEN_FCN (icode) (real_target, args[0].op, args[1].op,
21039 pat = GEN_FCN (icode) (real_target, args[0].op, args[1].op,
21040 args[2].op, args[3].op);
21043 gcc_unreachable ();
21053 /* Subroutine of ix86_expand_builtin to take care of special insns
21054 with variable number of operands. */
21057 ix86_expand_special_args_builtin (const struct builtin_description *d,
21058 tree exp, rtx target)
21062 unsigned int i, nargs, arg_adjust, memory;
21066 enum machine_mode mode;
21068 enum insn_code icode = d->icode;
21069 bool last_arg_constant = false;
21070 const struct insn_data *insn_p = &insn_data[icode];
21071 enum machine_mode tmode = insn_p->operand[0].mode;
21072 enum { load, store } class;
21074 switch ((enum ix86_special_builtin_type) d->flag)
21076 case VOID_FTYPE_VOID:
21077 emit_insn (GEN_FCN (icode) (target));
21079 case V2DI_FTYPE_PV2DI:
21080 case V16QI_FTYPE_PCCHAR:
21081 case V4SF_FTYPE_PCFLOAT:
21082 case V2DF_FTYPE_PCDOUBLE:
21087 case VOID_FTYPE_PV2SF_V4SF:
21088 case VOID_FTYPE_PV2DI_V2DI:
21089 case VOID_FTYPE_PCHAR_V16QI:
21090 case VOID_FTYPE_PFLOAT_V4SF:
21091 case VOID_FTYPE_PDOUBLE_V2DF:
21092 case VOID_FTYPE_PDI_DI:
21093 case VOID_FTYPE_PINT_INT:
21096 /* Reserve memory operand for target. */
21097 memory = ARRAY_SIZE (args);
21099 case V4SF_FTYPE_V4SF_PCV2SF:
21100 case V2DF_FTYPE_V2DF_PCDOUBLE:
21106 gcc_unreachable ();
21109 gcc_assert (nargs <= ARRAY_SIZE (args));
21111 if (class == store)
21113 arg = CALL_EXPR_ARG (exp, 0);
21114 op = expand_normal (arg);
21115 gcc_assert (target == 0);
21116 target = gen_rtx_MEM (tmode, copy_to_mode_reg (Pmode, op));
21124 || GET_MODE (target) != tmode
21125 || ! (*insn_p->operand[0].predicate) (target, tmode))
21126 target = gen_reg_rtx (tmode);
21129 for (i = 0; i < nargs; i++)
21131 enum machine_mode mode = insn_p->operand[i + 1].mode;
21134 arg = CALL_EXPR_ARG (exp, i + arg_adjust);
21135 op = expand_normal (arg);
21136 match = (*insn_p->operand[i + 1].predicate) (op, mode);
21138 if (last_arg_constant && (i + 1) == nargs)
21144 error ("the last argument must be an 8-bit immediate");
21152 /* This must be the memory operand. */
21153 op = gen_rtx_MEM (mode, copy_to_mode_reg (Pmode, op));
21154 gcc_assert (GET_MODE (op) == mode
21155 || GET_MODE (op) == VOIDmode);
21159 /* This must be register. */
21160 if (VECTOR_MODE_P (mode))
21161 op = safe_vector_operand (op, mode);
21163 gcc_assert (GET_MODE (op) == mode
21164 || GET_MODE (op) == VOIDmode);
21165 op = copy_to_mode_reg (mode, op);
21170 args[i].mode = mode;
21176 pat = GEN_FCN (icode) (target, args[0].op);
21179 pat = GEN_FCN (icode) (target, args[0].op, args[1].op);
21182 gcc_unreachable ();
21188 return class == store ? 0 : target;
21191 /* Return the integer constant in ARG. Constrain it to be in the range
21192 of the subparts of VEC_TYPE; issue an error if not. */
21195 get_element_number (tree vec_type, tree arg)
21197 unsigned HOST_WIDE_INT elt, max = TYPE_VECTOR_SUBPARTS (vec_type) - 1;
21199 if (!host_integerp (arg, 1)
21200 || (elt = tree_low_cst (arg, 1), elt > max))
21202 error ("selector must be an integer constant in the range 0..%wi", max);
21209 /* A subroutine of ix86_expand_builtin. These builtins are a wrapper around
21210 ix86_expand_vector_init. We DO have language-level syntax for this, in
21211 the form of (type){ init-list }. Except that since we can't place emms
21212 instructions from inside the compiler, we can't allow the use of MMX
21213 registers unless the user explicitly asks for it. So we do *not* define
21214 vec_set/vec_extract/vec_init patterns for MMX modes in mmx.md. Instead
21215 we have builtins invoked by mmintrin.h that gives us license to emit
21216 these sorts of instructions. */
21219 ix86_expand_vec_init_builtin (tree type, tree exp, rtx target)
21221 enum machine_mode tmode = TYPE_MODE (type);
21222 enum machine_mode inner_mode = GET_MODE_INNER (tmode);
21223 int i, n_elt = GET_MODE_NUNITS (tmode);
21224 rtvec v = rtvec_alloc (n_elt);
21226 gcc_assert (VECTOR_MODE_P (tmode));
21227 gcc_assert (call_expr_nargs (exp) == n_elt);
21229 for (i = 0; i < n_elt; ++i)
21231 rtx x = expand_normal (CALL_EXPR_ARG (exp, i));
21232 RTVEC_ELT (v, i) = gen_lowpart (inner_mode, x);
21235 if (!target || !register_operand (target, tmode))
21236 target = gen_reg_rtx (tmode);
21238 ix86_expand_vector_init (true, target, gen_rtx_PARALLEL (tmode, v));
21242 /* A subroutine of ix86_expand_builtin. These builtins are a wrapper around
21243 ix86_expand_vector_extract. They would be redundant (for non-MMX) if we
21244 had a language-level syntax for referencing vector elements. */
21247 ix86_expand_vec_ext_builtin (tree exp, rtx target)
21249 enum machine_mode tmode, mode0;
21254 arg0 = CALL_EXPR_ARG (exp, 0);
21255 arg1 = CALL_EXPR_ARG (exp, 1);
21257 op0 = expand_normal (arg0);
21258 elt = get_element_number (TREE_TYPE (arg0), arg1);
21260 tmode = TYPE_MODE (TREE_TYPE (TREE_TYPE (arg0)));
21261 mode0 = TYPE_MODE (TREE_TYPE (arg0));
21262 gcc_assert (VECTOR_MODE_P (mode0));
21264 op0 = force_reg (mode0, op0);
21266 if (optimize || !target || !register_operand (target, tmode))
21267 target = gen_reg_rtx (tmode);
21269 ix86_expand_vector_extract (true, target, op0, elt);
21274 /* A subroutine of ix86_expand_builtin. These builtins are a wrapper around
21275 ix86_expand_vector_set. They would be redundant (for non-MMX) if we had
21276 a language-level syntax for referencing vector elements. */
21279 ix86_expand_vec_set_builtin (tree exp)
21281 enum machine_mode tmode, mode1;
21282 tree arg0, arg1, arg2;
21284 rtx op0, op1, target;
21286 arg0 = CALL_EXPR_ARG (exp, 0);
21287 arg1 = CALL_EXPR_ARG (exp, 1);
21288 arg2 = CALL_EXPR_ARG (exp, 2);
21290 tmode = TYPE_MODE (TREE_TYPE (arg0));
21291 mode1 = TYPE_MODE (TREE_TYPE (TREE_TYPE (arg0)));
21292 gcc_assert (VECTOR_MODE_P (tmode));
21294 op0 = expand_expr (arg0, NULL_RTX, tmode, EXPAND_NORMAL);
21295 op1 = expand_expr (arg1, NULL_RTX, mode1, EXPAND_NORMAL);
21296 elt = get_element_number (TREE_TYPE (arg0), arg2);
21298 if (GET_MODE (op1) != mode1 && GET_MODE (op1) != VOIDmode)
21299 op1 = convert_modes (mode1, GET_MODE (op1), op1, true);
21301 op0 = force_reg (tmode, op0);
21302 op1 = force_reg (mode1, op1);
21304 /* OP0 is the source of these builtin functions and shouldn't be
21305 modified. Create a copy, use it and return it as target. */
21306 target = gen_reg_rtx (tmode);
21307 emit_move_insn (target, op0);
21308 ix86_expand_vector_set (true, target, op1, elt);
21313 /* Expand an expression EXP that calls a built-in function,
21314 with result going to TARGET if that's convenient
21315 (and in mode MODE if that's convenient).
21316 SUBTARGET may be used as the target for computing one of EXP's operands.
21317 IGNORE is nonzero if the value is to be ignored. */
21320 ix86_expand_builtin (tree exp, rtx target, rtx subtarget ATTRIBUTE_UNUSED,
21321 enum machine_mode mode ATTRIBUTE_UNUSED,
21322 int ignore ATTRIBUTE_UNUSED)
21324 const struct builtin_description *d;
21326 enum insn_code icode;
21327 tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
21328 tree arg0, arg1, arg2;
21329 rtx op0, op1, op2, pat;
21330 enum machine_mode mode0, mode1, mode2;
21331 unsigned int fcode = DECL_FUNCTION_CODE (fndecl);
21335 case IX86_BUILTIN_MASKMOVQ:
21336 case IX86_BUILTIN_MASKMOVDQU:
21337 icode = (fcode == IX86_BUILTIN_MASKMOVQ
21338 ? CODE_FOR_mmx_maskmovq
21339 : CODE_FOR_sse2_maskmovdqu);
21340 /* Note the arg order is different from the operand order. */
21341 arg1 = CALL_EXPR_ARG (exp, 0);
21342 arg2 = CALL_EXPR_ARG (exp, 1);
21343 arg0 = CALL_EXPR_ARG (exp, 2);
21344 op0 = expand_normal (arg0);
21345 op1 = expand_normal (arg1);
21346 op2 = expand_normal (arg2);
21347 mode0 = insn_data[icode].operand[0].mode;
21348 mode1 = insn_data[icode].operand[1].mode;
21349 mode2 = insn_data[icode].operand[2].mode;
21351 op0 = force_reg (Pmode, op0);
21352 op0 = gen_rtx_MEM (mode1, op0);
21354 if (! (*insn_data[icode].operand[0].predicate) (op0, mode0))
21355 op0 = copy_to_mode_reg (mode0, op0);
21356 if (! (*insn_data[icode].operand[1].predicate) (op1, mode1))
21357 op1 = copy_to_mode_reg (mode1, op1);
21358 if (! (*insn_data[icode].operand[2].predicate) (op2, mode2))
21359 op2 = copy_to_mode_reg (mode2, op2);
21360 pat = GEN_FCN (icode) (op0, op1, op2);
21366 case IX86_BUILTIN_LDMXCSR:
21367 op0 = expand_normal (CALL_EXPR_ARG (exp, 0));
21368 target = assign_386_stack_local (SImode, SLOT_VIRTUAL);
21369 emit_move_insn (target, op0);
21370 emit_insn (gen_sse_ldmxcsr (target));
21373 case IX86_BUILTIN_STMXCSR:
21374 target = assign_386_stack_local (SImode, SLOT_VIRTUAL);
21375 emit_insn (gen_sse_stmxcsr (target));
21376 return copy_to_mode_reg (SImode, target);
21378 case IX86_BUILTIN_CLFLUSH:
21379 arg0 = CALL_EXPR_ARG (exp, 0);
21380 op0 = expand_normal (arg0);
21381 icode = CODE_FOR_sse2_clflush;
21382 if (! (*insn_data[icode].operand[0].predicate) (op0, Pmode))
21383 op0 = copy_to_mode_reg (Pmode, op0);
21385 emit_insn (gen_sse2_clflush (op0));
21388 case IX86_BUILTIN_MONITOR:
21389 arg0 = CALL_EXPR_ARG (exp, 0);
21390 arg1 = CALL_EXPR_ARG (exp, 1);
21391 arg2 = CALL_EXPR_ARG (exp, 2);
21392 op0 = expand_normal (arg0);
21393 op1 = expand_normal (arg1);
21394 op2 = expand_normal (arg2);
21396 op0 = copy_to_mode_reg (Pmode, op0);
21398 op1 = copy_to_mode_reg (SImode, op1);
21400 op2 = copy_to_mode_reg (SImode, op2);
21402 emit_insn (gen_sse3_monitor (op0, op1, op2));
21404 emit_insn (gen_sse3_monitor64 (op0, op1, op2));
21407 case IX86_BUILTIN_MWAIT:
21408 arg0 = CALL_EXPR_ARG (exp, 0);
21409 arg1 = CALL_EXPR_ARG (exp, 1);
21410 op0 = expand_normal (arg0);
21411 op1 = expand_normal (arg1);
21413 op0 = copy_to_mode_reg (SImode, op0);
21415 op1 = copy_to_mode_reg (SImode, op1);
21416 emit_insn (gen_sse3_mwait (op0, op1));
21419 case IX86_BUILTIN_VEC_INIT_V2SI:
21420 case IX86_BUILTIN_VEC_INIT_V4HI:
21421 case IX86_BUILTIN_VEC_INIT_V8QI:
21422 return ix86_expand_vec_init_builtin (TREE_TYPE (exp), exp, target);
21424 case IX86_BUILTIN_VEC_EXT_V2DF:
21425 case IX86_BUILTIN_VEC_EXT_V2DI:
21426 case IX86_BUILTIN_VEC_EXT_V4SF:
21427 case IX86_BUILTIN_VEC_EXT_V4SI:
21428 case IX86_BUILTIN_VEC_EXT_V8HI:
21429 case IX86_BUILTIN_VEC_EXT_V2SI:
21430 case IX86_BUILTIN_VEC_EXT_V4HI:
21431 case IX86_BUILTIN_VEC_EXT_V16QI:
21432 return ix86_expand_vec_ext_builtin (exp, target);
21434 case IX86_BUILTIN_VEC_SET_V2DI:
21435 case IX86_BUILTIN_VEC_SET_V4SF:
21436 case IX86_BUILTIN_VEC_SET_V4SI:
21437 case IX86_BUILTIN_VEC_SET_V8HI:
21438 case IX86_BUILTIN_VEC_SET_V4HI:
21439 case IX86_BUILTIN_VEC_SET_V16QI:
21440 return ix86_expand_vec_set_builtin (exp);
21442 case IX86_BUILTIN_INFQ:
21444 REAL_VALUE_TYPE inf;
21448 tmp = CONST_DOUBLE_FROM_REAL_VALUE (inf, mode);
21450 tmp = validize_mem (force_const_mem (mode, tmp));
21453 target = gen_reg_rtx (mode);
21455 emit_move_insn (target, tmp);
21463 for (i = 0, d = bdesc_special_args;
21464 i < ARRAY_SIZE (bdesc_special_args);
21466 if (d->code == fcode)
21467 return ix86_expand_special_args_builtin (d, exp, target);
21469 for (i = 0, d = bdesc_args;
21470 i < ARRAY_SIZE (bdesc_args);
21472 if (d->code == fcode)
21473 return ix86_expand_args_builtin (d, exp, target);
21475 for (i = 0, d = bdesc_comi; i < ARRAY_SIZE (bdesc_comi); i++, d++)
21476 if (d->code == fcode)
21477 return ix86_expand_sse_comi (d, exp, target);
21479 for (i = 0, d = bdesc_pcmpestr;
21480 i < ARRAY_SIZE (bdesc_pcmpestr);
21482 if (d->code == fcode)
21483 return ix86_expand_sse_pcmpestr (d, exp, target);
21485 for (i = 0, d = bdesc_pcmpistr;
21486 i < ARRAY_SIZE (bdesc_pcmpistr);
21488 if (d->code == fcode)
21489 return ix86_expand_sse_pcmpistr (d, exp, target);
21491 for (i = 0, d = bdesc_multi_arg; i < ARRAY_SIZE (bdesc_multi_arg); i++, d++)
21492 if (d->code == fcode)
21493 return ix86_expand_multi_arg_builtin (d->icode, exp, target,
21494 (enum multi_arg_type)d->flag,
21497 gcc_unreachable ();
21500 /* Returns a function decl for a vectorized version of the builtin function
21501 with builtin function code FN and the result vector type TYPE, or NULL_TREE
21502 if it is not available. */
21505 ix86_builtin_vectorized_function (unsigned int fn, tree type_out,
21508 enum machine_mode in_mode, out_mode;
21511 if (TREE_CODE (type_out) != VECTOR_TYPE
21512 || TREE_CODE (type_in) != VECTOR_TYPE)
21515 out_mode = TYPE_MODE (TREE_TYPE (type_out));
21516 out_n = TYPE_VECTOR_SUBPARTS (type_out);
21517 in_mode = TYPE_MODE (TREE_TYPE (type_in));
21518 in_n = TYPE_VECTOR_SUBPARTS (type_in);
21522 case BUILT_IN_SQRT:
21523 if (out_mode == DFmode && out_n == 2
21524 && in_mode == DFmode && in_n == 2)
21525 return ix86_builtins[IX86_BUILTIN_SQRTPD];
21528 case BUILT_IN_SQRTF:
21529 if (out_mode == SFmode && out_n == 4
21530 && in_mode == SFmode && in_n == 4)
21531 return ix86_builtins[IX86_BUILTIN_SQRTPS_NR];
21534 case BUILT_IN_LRINT:
21535 if (out_mode == SImode && out_n == 4
21536 && in_mode == DFmode && in_n == 2)
21537 return ix86_builtins[IX86_BUILTIN_VEC_PACK_SFIX];
21540 case BUILT_IN_LRINTF:
21541 if (out_mode == SImode && out_n == 4
21542 && in_mode == SFmode && in_n == 4)
21543 return ix86_builtins[IX86_BUILTIN_CVTPS2DQ];
21550 /* Dispatch to a handler for a vectorization library. */
21551 if (ix86_veclib_handler)
21552 return (*ix86_veclib_handler)(fn, type_out, type_in);
21557 /* Handler for an SVML-style interface to
21558 a library with vectorized intrinsics. */
21561 ix86_veclibabi_svml (enum built_in_function fn, tree type_out, tree type_in)
21564 tree fntype, new_fndecl, args;
21567 enum machine_mode el_mode, in_mode;
21570 /* The SVML is suitable for unsafe math only. */
21571 if (!flag_unsafe_math_optimizations)
21574 el_mode = TYPE_MODE (TREE_TYPE (type_out));
21575 n = TYPE_VECTOR_SUBPARTS (type_out);
21576 in_mode = TYPE_MODE (TREE_TYPE (type_in));
21577 in_n = TYPE_VECTOR_SUBPARTS (type_in);
21578 if (el_mode != in_mode
21586 case BUILT_IN_LOG10:
21588 case BUILT_IN_TANH:
21590 case BUILT_IN_ATAN:
21591 case BUILT_IN_ATAN2:
21592 case BUILT_IN_ATANH:
21593 case BUILT_IN_CBRT:
21594 case BUILT_IN_SINH:
21596 case BUILT_IN_ASINH:
21597 case BUILT_IN_ASIN:
21598 case BUILT_IN_COSH:
21600 case BUILT_IN_ACOSH:
21601 case BUILT_IN_ACOS:
21602 if (el_mode != DFmode || n != 2)
21606 case BUILT_IN_EXPF:
21607 case BUILT_IN_LOGF:
21608 case BUILT_IN_LOG10F:
21609 case BUILT_IN_POWF:
21610 case BUILT_IN_TANHF:
21611 case BUILT_IN_TANF:
21612 case BUILT_IN_ATANF:
21613 case BUILT_IN_ATAN2F:
21614 case BUILT_IN_ATANHF:
21615 case BUILT_IN_CBRTF:
21616 case BUILT_IN_SINHF:
21617 case BUILT_IN_SINF:
21618 case BUILT_IN_ASINHF:
21619 case BUILT_IN_ASINF:
21620 case BUILT_IN_COSHF:
21621 case BUILT_IN_COSF:
21622 case BUILT_IN_ACOSHF:
21623 case BUILT_IN_ACOSF:
21624 if (el_mode != SFmode || n != 4)
21632 bname = IDENTIFIER_POINTER (DECL_NAME (implicit_built_in_decls[fn]));
21634 if (fn == BUILT_IN_LOGF)
21635 strcpy (name, "vmlsLn4");
21636 else if (fn == BUILT_IN_LOG)
21637 strcpy (name, "vmldLn2");
21640 sprintf (name, "vmls%s", bname+10);
21641 name[strlen (name)-1] = '4';
21644 sprintf (name, "vmld%s2", bname+10);
21646 /* Convert to uppercase. */
21650 for (args = DECL_ARGUMENTS (implicit_built_in_decls[fn]); args;
21651 args = TREE_CHAIN (args))
21655 fntype = build_function_type_list (type_out, type_in, NULL);
21657 fntype = build_function_type_list (type_out, type_in, type_in, NULL);
21659 /* Build a function declaration for the vectorized function. */
21660 new_fndecl = build_decl (FUNCTION_DECL, get_identifier (name), fntype);
21661 TREE_PUBLIC (new_fndecl) = 1;
21662 DECL_EXTERNAL (new_fndecl) = 1;
21663 DECL_IS_NOVOPS (new_fndecl) = 1;
21664 TREE_READONLY (new_fndecl) = 1;
21669 /* Handler for an ACML-style interface to
21670 a library with vectorized intrinsics. */
21673 ix86_veclibabi_acml (enum built_in_function fn, tree type_out, tree type_in)
21675 char name[20] = "__vr.._";
21676 tree fntype, new_fndecl, args;
21679 enum machine_mode el_mode, in_mode;
21682 /* The ACML is 64bits only and suitable for unsafe math only as
21683 it does not correctly support parts of IEEE with the required
21684 precision such as denormals. */
21686 || !flag_unsafe_math_optimizations)
21689 el_mode = TYPE_MODE (TREE_TYPE (type_out));
21690 n = TYPE_VECTOR_SUBPARTS (type_out);
21691 in_mode = TYPE_MODE (TREE_TYPE (type_in));
21692 in_n = TYPE_VECTOR_SUBPARTS (type_in);
21693 if (el_mode != in_mode
21703 case BUILT_IN_LOG2:
21704 case BUILT_IN_LOG10:
21707 if (el_mode != DFmode
21712 case BUILT_IN_SINF:
21713 case BUILT_IN_COSF:
21714 case BUILT_IN_EXPF:
21715 case BUILT_IN_POWF:
21716 case BUILT_IN_LOGF:
21717 case BUILT_IN_LOG2F:
21718 case BUILT_IN_LOG10F:
21721 if (el_mode != SFmode
21730 bname = IDENTIFIER_POINTER (DECL_NAME (implicit_built_in_decls[fn]));
21731 sprintf (name + 7, "%s", bname+10);
21734 for (args = DECL_ARGUMENTS (implicit_built_in_decls[fn]); args;
21735 args = TREE_CHAIN (args))
21739 fntype = build_function_type_list (type_out, type_in, NULL);
21741 fntype = build_function_type_list (type_out, type_in, type_in, NULL);
21743 /* Build a function declaration for the vectorized function. */
21744 new_fndecl = build_decl (FUNCTION_DECL, get_identifier (name), fntype);
21745 TREE_PUBLIC (new_fndecl) = 1;
21746 DECL_EXTERNAL (new_fndecl) = 1;
21747 DECL_IS_NOVOPS (new_fndecl) = 1;
21748 TREE_READONLY (new_fndecl) = 1;
21754 /* Returns a decl of a function that implements conversion of the
21755 input vector of type TYPE, or NULL_TREE if it is not available. */
21758 ix86_vectorize_builtin_conversion (unsigned int code, tree type)
21760 if (TREE_CODE (type) != VECTOR_TYPE)
21766 switch (TYPE_MODE (type))
21769 return ix86_builtins[IX86_BUILTIN_CVTDQ2PS];
21774 case FIX_TRUNC_EXPR:
21775 switch (TYPE_MODE (type))
21778 return ix86_builtins[IX86_BUILTIN_CVTTPS2DQ];
21788 /* Returns a code for a target-specific builtin that implements
21789 reciprocal of the function, or NULL_TREE if not available. */
21792 ix86_builtin_reciprocal (unsigned int fn, bool md_fn,
21793 bool sqrt ATTRIBUTE_UNUSED)
21795 if (! (TARGET_SSE_MATH && TARGET_RECIP && !optimize_size
21796 && flag_finite_math_only && !flag_trapping_math
21797 && flag_unsafe_math_optimizations))
21801 /* Machine dependent builtins. */
21804 /* Vectorized version of sqrt to rsqrt conversion. */
21805 case IX86_BUILTIN_SQRTPS_NR:
21806 return ix86_builtins[IX86_BUILTIN_RSQRTPS_NR];
21812 /* Normal builtins. */
21815 /* Sqrt to rsqrt conversion. */
21816 case BUILT_IN_SQRTF:
21817 return ix86_builtins[IX86_BUILTIN_RSQRTF];
21824 /* Store OPERAND to the memory after reload is completed. This means
21825 that we can't easily use assign_stack_local. */
21827 ix86_force_to_memory (enum machine_mode mode, rtx operand)
21831 gcc_assert (reload_completed);
21832 if (TARGET_RED_ZONE)
21834 result = gen_rtx_MEM (mode,
21835 gen_rtx_PLUS (Pmode,
21837 GEN_INT (-RED_ZONE_SIZE)));
21838 emit_move_insn (result, operand);
21840 else if (!TARGET_RED_ZONE && TARGET_64BIT)
21846 operand = gen_lowpart (DImode, operand);
21850 gen_rtx_SET (VOIDmode,
21851 gen_rtx_MEM (DImode,
21852 gen_rtx_PRE_DEC (DImode,
21853 stack_pointer_rtx)),
21857 gcc_unreachable ();
21859 result = gen_rtx_MEM (mode, stack_pointer_rtx);
21868 split_di (&operand, 1, operands, operands + 1);
21870 gen_rtx_SET (VOIDmode,
21871 gen_rtx_MEM (SImode,
21872 gen_rtx_PRE_DEC (Pmode,
21873 stack_pointer_rtx)),
21876 gen_rtx_SET (VOIDmode,
21877 gen_rtx_MEM (SImode,
21878 gen_rtx_PRE_DEC (Pmode,
21879 stack_pointer_rtx)),
21884 /* Store HImodes as SImodes. */
21885 operand = gen_lowpart (SImode, operand);
21889 gen_rtx_SET (VOIDmode,
21890 gen_rtx_MEM (GET_MODE (operand),
21891 gen_rtx_PRE_DEC (SImode,
21892 stack_pointer_rtx)),
21896 gcc_unreachable ();
21898 result = gen_rtx_MEM (mode, stack_pointer_rtx);
21903 /* Free operand from the memory. */
21905 ix86_free_from_memory (enum machine_mode mode)
21907 if (!TARGET_RED_ZONE)
21911 if (mode == DImode || TARGET_64BIT)
21915 /* Use LEA to deallocate stack space. In peephole2 it will be converted
21916 to pop or add instruction if registers are available. */
21917 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
21918 gen_rtx_PLUS (Pmode, stack_pointer_rtx,
21923 /* Put float CONST_DOUBLE in the constant pool instead of fp regs.
21924 QImode must go into class Q_REGS.
21925 Narrow ALL_REGS to GENERAL_REGS. This supports allowing movsf and
21926 movdf to do mem-to-mem moves through integer regs. */
21928 ix86_preferred_reload_class (rtx x, enum reg_class regclass)
21930 enum machine_mode mode = GET_MODE (x);
21932 /* We're only allowed to return a subclass of CLASS. Many of the
21933 following checks fail for NO_REGS, so eliminate that early. */
21934 if (regclass == NO_REGS)
21937 /* All classes can load zeros. */
21938 if (x == CONST0_RTX (mode))
21941 /* Force constants into memory if we are loading a (nonzero) constant into
21942 an MMX or SSE register. This is because there are no MMX/SSE instructions
21943 to load from a constant. */
21945 && (MAYBE_MMX_CLASS_P (regclass) || MAYBE_SSE_CLASS_P (regclass)))
21948 /* Prefer SSE regs only, if we can use them for math. */
21949 if (TARGET_SSE_MATH && !TARGET_MIX_SSE_I387 && SSE_FLOAT_MODE_P (mode))
21950 return SSE_CLASS_P (regclass) ? regclass : NO_REGS;
21952 /* Floating-point constants need more complex checks. */
21953 if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) != VOIDmode)
21955 /* General regs can load everything. */
21956 if (reg_class_subset_p (regclass, GENERAL_REGS))
21959 /* Floats can load 0 and 1 plus some others. Note that we eliminated
21960 zero above. We only want to wind up preferring 80387 registers if
21961 we plan on doing computation with them. */
21963 && standard_80387_constant_p (x))
21965 /* Limit class to non-sse. */
21966 if (regclass == FLOAT_SSE_REGS)
21968 if (regclass == FP_TOP_SSE_REGS)
21970 if (regclass == FP_SECOND_SSE_REGS)
21971 return FP_SECOND_REG;
21972 if (regclass == FLOAT_INT_REGS || regclass == FLOAT_REGS)
21979 /* Generally when we see PLUS here, it's the function invariant
21980 (plus soft-fp const_int). Which can only be computed into general
21982 if (GET_CODE (x) == PLUS)
21983 return reg_class_subset_p (regclass, GENERAL_REGS) ? regclass : NO_REGS;
21985 /* QImode constants are easy to load, but non-constant QImode data
21986 must go into Q_REGS. */
21987 if (GET_MODE (x) == QImode && !CONSTANT_P (x))
21989 if (reg_class_subset_p (regclass, Q_REGS))
21991 if (reg_class_subset_p (Q_REGS, regclass))
21999 /* Discourage putting floating-point values in SSE registers unless
22000 SSE math is being used, and likewise for the 387 registers. */
22002 ix86_preferred_output_reload_class (rtx x, enum reg_class regclass)
22004 enum machine_mode mode = GET_MODE (x);
22006 /* Restrict the output reload class to the register bank that we are doing
22007 math on. If we would like not to return a subset of CLASS, reject this
22008 alternative: if reload cannot do this, it will still use its choice. */
22009 mode = GET_MODE (x);
22010 if (TARGET_SSE_MATH && SSE_FLOAT_MODE_P (mode))
22011 return MAYBE_SSE_CLASS_P (regclass) ? SSE_REGS : NO_REGS;
22013 if (X87_FLOAT_MODE_P (mode))
22015 if (regclass == FP_TOP_SSE_REGS)
22017 else if (regclass == FP_SECOND_SSE_REGS)
22018 return FP_SECOND_REG;
22020 return FLOAT_CLASS_P (regclass) ? regclass : NO_REGS;
22026 static enum reg_class
22027 ix86_secondary_reload (bool in_p, rtx x, enum reg_class class,
22028 enum machine_mode mode,
22029 secondary_reload_info *sri ATTRIBUTE_UNUSED)
22031 /* QImode spills from non-QI registers require
22032 intermediate register on 32bit targets. */
22033 if (!in_p && mode == QImode && !TARGET_64BIT
22034 && (class == GENERAL_REGS
22035 || class == LEGACY_REGS
22036 || class == INDEX_REGS))
22045 if (regno >= FIRST_PSEUDO_REGISTER || GET_CODE (x) == SUBREG)
22046 regno = true_regnum (x);
22048 /* Return Q_REGS if the operand is in memory. */
22056 /* If we are copying between general and FP registers, we need a memory
22057 location. The same is true for SSE and MMX registers.
22059 To optimize register_move_cost performance, allow inline variant.
22061 The macro can't work reliably when one of the CLASSES is class containing
22062 registers from multiple units (SSE, MMX, integer). We avoid this by never
22063 combining those units in single alternative in the machine description.
22064 Ensure that this constraint holds to avoid unexpected surprises.
22066 When STRICT is false, we are being called from REGISTER_MOVE_COST, so do not
22067 enforce these sanity checks. */
22070 inline_secondary_memory_needed (enum reg_class class1, enum reg_class class2,
22071 enum machine_mode mode, int strict)
22073 if (MAYBE_FLOAT_CLASS_P (class1) != FLOAT_CLASS_P (class1)
22074 || MAYBE_FLOAT_CLASS_P (class2) != FLOAT_CLASS_P (class2)
22075 || MAYBE_SSE_CLASS_P (class1) != SSE_CLASS_P (class1)
22076 || MAYBE_SSE_CLASS_P (class2) != SSE_CLASS_P (class2)
22077 || MAYBE_MMX_CLASS_P (class1) != MMX_CLASS_P (class1)
22078 || MAYBE_MMX_CLASS_P (class2) != MMX_CLASS_P (class2))
22080 gcc_assert (!strict);
22084 if (FLOAT_CLASS_P (class1) != FLOAT_CLASS_P (class2))
22087 /* ??? This is a lie. We do have moves between mmx/general, and for
22088 mmx/sse2. But by saying we need secondary memory we discourage the
22089 register allocator from using the mmx registers unless needed. */
22090 if (MMX_CLASS_P (class1) != MMX_CLASS_P (class2))
22093 if (SSE_CLASS_P (class1) != SSE_CLASS_P (class2))
22095 /* SSE1 doesn't have any direct moves from other classes. */
22099 /* If the target says that inter-unit moves are more expensive
22100 than moving through memory, then don't generate them. */
22101 if (!TARGET_INTER_UNIT_MOVES)
22104 /* Between SSE and general, we have moves no larger than word size. */
22105 if (GET_MODE_SIZE (mode) > UNITS_PER_WORD)
22113 ix86_secondary_memory_needed (enum reg_class class1, enum reg_class class2,
22114 enum machine_mode mode, int strict)
22116 return inline_secondary_memory_needed (class1, class2, mode, strict);
22119 /* Return true if the registers in CLASS cannot represent the change from
22120 modes FROM to TO. */
22123 ix86_cannot_change_mode_class (enum machine_mode from, enum machine_mode to,
22124 enum reg_class regclass)
22129 /* x87 registers can't do subreg at all, as all values are reformatted
22130 to extended precision. */
22131 if (MAYBE_FLOAT_CLASS_P (regclass))
22134 if (MAYBE_SSE_CLASS_P (regclass) || MAYBE_MMX_CLASS_P (regclass))
22136 /* Vector registers do not support QI or HImode loads. If we don't
22137 disallow a change to these modes, reload will assume it's ok to
22138 drop the subreg from (subreg:SI (reg:HI 100) 0). This affects
22139 the vec_dupv4hi pattern. */
22140 if (GET_MODE_SIZE (from) < 4)
22143 /* Vector registers do not support subreg with nonzero offsets, which
22144 are otherwise valid for integer registers. Since we can't see
22145 whether we have a nonzero offset from here, prohibit all
22146 nonparadoxical subregs changing size. */
22147 if (GET_MODE_SIZE (to) < GET_MODE_SIZE (from))
22154 /* Return the cost of moving data of mode M between a
22155 register and memory. A value of 2 is the default; this cost is
22156 relative to those in `REGISTER_MOVE_COST'.
22158 This function is used extensively by register_move_cost that is used to
22159 build tables at startup. Make it inline in this case.
22160 When IN is 2, return maximum of in and out move cost.
22162 If moving between registers and memory is more expensive than
22163 between two registers, you should define this macro to express the
22166 Model also increased moving costs of QImode registers in non
22170 inline_memory_move_cost (enum machine_mode mode, enum reg_class regclass,
22174 if (FLOAT_CLASS_P (regclass))
22192 return MAX (ix86_cost->fp_load [index], ix86_cost->fp_store [index]);
22193 return in ? ix86_cost->fp_load [index] : ix86_cost->fp_store [index];
22195 if (SSE_CLASS_P (regclass))
22198 switch (GET_MODE_SIZE (mode))
22213 return MAX (ix86_cost->sse_load [index], ix86_cost->sse_store [index]);
22214 return in ? ix86_cost->sse_load [index] : ix86_cost->sse_store [index];
22216 if (MMX_CLASS_P (regclass))
22219 switch (GET_MODE_SIZE (mode))
22231 return MAX (ix86_cost->mmx_load [index], ix86_cost->mmx_store [index]);
22232 return in ? ix86_cost->mmx_load [index] : ix86_cost->mmx_store [index];
22234 switch (GET_MODE_SIZE (mode))
22237 if (Q_CLASS_P (regclass) || TARGET_64BIT)
22240 return ix86_cost->int_store[0];
22241 if (TARGET_PARTIAL_REG_DEPENDENCY && !optimize_size)
22242 cost = ix86_cost->movzbl_load;
22244 cost = ix86_cost->int_load[0];
22246 return MAX (cost, ix86_cost->int_store[0]);
22252 return MAX (ix86_cost->movzbl_load, ix86_cost->int_store[0] + 4);
22254 return ix86_cost->movzbl_load;
22256 return ix86_cost->int_store[0] + 4;
22261 return MAX (ix86_cost->int_load[1], ix86_cost->int_store[1]);
22262 return in ? ix86_cost->int_load[1] : ix86_cost->int_store[1];
22264 /* Compute number of 32bit moves needed. TFmode is moved as XFmode. */
22265 if (mode == TFmode)
22268 cost = MAX (ix86_cost->int_load[2] , ix86_cost->int_store[2]);
22270 cost = ix86_cost->int_load[2];
22272 cost = ix86_cost->int_store[2];
22273 return (cost * (((int) GET_MODE_SIZE (mode)
22274 + UNITS_PER_WORD - 1) / UNITS_PER_WORD));
22279 ix86_memory_move_cost (enum machine_mode mode, enum reg_class regclass, int in)
22281 return inline_memory_move_cost (mode, regclass, in);
22285 /* Return the cost of moving data from a register in class CLASS1 to
22286 one in class CLASS2.
22288 It is not required that the cost always equal 2 when FROM is the same as TO;
22289 on some machines it is expensive to move between registers if they are not
22290 general registers. */
22293 ix86_register_move_cost (enum machine_mode mode, enum reg_class class1,
22294 enum reg_class class2)
22296 /* In case we require secondary memory, compute cost of the store followed
22297 by load. In order to avoid bad register allocation choices, we need
22298 for this to be *at least* as high as the symmetric MEMORY_MOVE_COST. */
22300 if (inline_secondary_memory_needed (class1, class2, mode, 0))
22304 cost += inline_memory_move_cost (mode, class1, 2);
22305 cost += inline_memory_move_cost (mode, class2, 2);
22307 /* In case of copying from general_purpose_register we may emit multiple
22308 stores followed by single load causing memory size mismatch stall.
22309 Count this as arbitrarily high cost of 20. */
22310 if (CLASS_MAX_NREGS (class1, mode) > CLASS_MAX_NREGS (class2, mode))
22313 /* In the case of FP/MMX moves, the registers actually overlap, and we
22314 have to switch modes in order to treat them differently. */
22315 if ((MMX_CLASS_P (class1) && MAYBE_FLOAT_CLASS_P (class2))
22316 || (MMX_CLASS_P (class2) && MAYBE_FLOAT_CLASS_P (class1)))
22322 /* Moves between SSE/MMX and integer unit are expensive. */
22323 if (MMX_CLASS_P (class1) != MMX_CLASS_P (class2)
22324 || SSE_CLASS_P (class1) != SSE_CLASS_P (class2))
22326 /* ??? By keeping returned value relatively high, we limit the number
22327 of moves between integer and MMX/SSE registers for all targets.
22328 Additionally, high value prevents problem with x86_modes_tieable_p(),
22329 where integer modes in MMX/SSE registers are not tieable
22330 because of missing QImode and HImode moves to, from or between
22331 MMX/SSE registers. */
22332 return MAX (8, ix86_cost->mmxsse_to_integer);
22334 if (MAYBE_FLOAT_CLASS_P (class1))
22335 return ix86_cost->fp_move;
22336 if (MAYBE_SSE_CLASS_P (class1))
22337 return ix86_cost->sse_move;
22338 if (MAYBE_MMX_CLASS_P (class1))
22339 return ix86_cost->mmx_move;
22343 /* Return 1 if hard register REGNO can hold a value of machine-mode MODE. */
22346 ix86_hard_regno_mode_ok (int regno, enum machine_mode mode)
22348 /* Flags and only flags can only hold CCmode values. */
22349 if (CC_REGNO_P (regno))
22350 return GET_MODE_CLASS (mode) == MODE_CC;
22351 if (GET_MODE_CLASS (mode) == MODE_CC
22352 || GET_MODE_CLASS (mode) == MODE_RANDOM
22353 || GET_MODE_CLASS (mode) == MODE_PARTIAL_INT)
22355 if (FP_REGNO_P (regno))
22356 return VALID_FP_MODE_P (mode);
22357 if (SSE_REGNO_P (regno))
22359 /* We implement the move patterns for all vector modes into and
22360 out of SSE registers, even when no operation instructions
22362 return (VALID_SSE_REG_MODE (mode)
22363 || VALID_SSE2_REG_MODE (mode)
22364 || VALID_MMX_REG_MODE (mode)
22365 || VALID_MMX_REG_MODE_3DNOW (mode));
22367 if (MMX_REGNO_P (regno))
22369 /* We implement the move patterns for 3DNOW modes even in MMX mode,
22370 so if the register is available at all, then we can move data of
22371 the given mode into or out of it. */
22372 return (VALID_MMX_REG_MODE (mode)
22373 || VALID_MMX_REG_MODE_3DNOW (mode));
22376 if (mode == QImode)
22378 /* Take care for QImode values - they can be in non-QI regs,
22379 but then they do cause partial register stalls. */
22380 if (regno < 4 || TARGET_64BIT)
22382 if (!TARGET_PARTIAL_REG_STALL)
22384 return reload_in_progress || reload_completed;
22386 /* We handle both integer and floats in the general purpose registers. */
22387 else if (VALID_INT_MODE_P (mode))
22389 else if (VALID_FP_MODE_P (mode))
22391 else if (VALID_DFP_MODE_P (mode))
22393 /* Lots of MMX code casts 8 byte vector modes to DImode. If we then go
22394 on to use that value in smaller contexts, this can easily force a
22395 pseudo to be allocated to GENERAL_REGS. Since this is no worse than
22396 supporting DImode, allow it. */
22397 else if (VALID_MMX_REG_MODE_3DNOW (mode) || VALID_MMX_REG_MODE (mode))
22403 /* A subroutine of ix86_modes_tieable_p. Return true if MODE is a
22404 tieable integer mode. */
22407 ix86_tieable_integer_mode_p (enum machine_mode mode)
22416 return TARGET_64BIT || !TARGET_PARTIAL_REG_STALL;
22419 return TARGET_64BIT;
22426 /* Return true if MODE1 is accessible in a register that can hold MODE2
22427 without copying. That is, all register classes that can hold MODE2
22428 can also hold MODE1. */
22431 ix86_modes_tieable_p (enum machine_mode mode1, enum machine_mode mode2)
22433 if (mode1 == mode2)
22436 if (ix86_tieable_integer_mode_p (mode1)
22437 && ix86_tieable_integer_mode_p (mode2))
22440 /* MODE2 being XFmode implies fp stack or general regs, which means we
22441 can tie any smaller floating point modes to it. Note that we do not
22442 tie this with TFmode. */
22443 if (mode2 == XFmode)
22444 return mode1 == SFmode || mode1 == DFmode;
22446 /* MODE2 being DFmode implies fp stack, general or sse regs, which means
22447 that we can tie it with SFmode. */
22448 if (mode2 == DFmode)
22449 return mode1 == SFmode;
22451 /* If MODE2 is only appropriate for an SSE register, then tie with
22452 any other mode acceptable to SSE registers. */
22453 if (GET_MODE_SIZE (mode2) == 16
22454 && ix86_hard_regno_mode_ok (FIRST_SSE_REG, mode2))
22455 return (GET_MODE_SIZE (mode1) == 16
22456 && ix86_hard_regno_mode_ok (FIRST_SSE_REG, mode1));
22458 /* If MODE2 is appropriate for an MMX register, then tie
22459 with any other mode acceptable to MMX registers. */
22460 if (GET_MODE_SIZE (mode2) == 8
22461 && ix86_hard_regno_mode_ok (FIRST_MMX_REG, mode2))
22462 return (GET_MODE_SIZE (mode1) == 8
22463 && ix86_hard_regno_mode_ok (FIRST_MMX_REG, mode1));
22468 /* Compute a (partial) cost for rtx X. Return true if the complete
22469 cost has been computed, and false if subexpressions should be
22470 scanned. In either case, *TOTAL contains the cost result. */
22473 ix86_rtx_costs (rtx x, int code, int outer_code_i, int *total)
22475 enum rtx_code outer_code = (enum rtx_code) outer_code_i;
22476 enum machine_mode mode = GET_MODE (x);
22484 if (TARGET_64BIT && !x86_64_immediate_operand (x, VOIDmode))
22486 else if (TARGET_64BIT && !x86_64_zext_immediate_operand (x, VOIDmode))
22488 else if (flag_pic && SYMBOLIC_CONST (x)
22490 || (!GET_CODE (x) != LABEL_REF
22491 && (GET_CODE (x) != SYMBOL_REF
22492 || !SYMBOL_REF_LOCAL_P (x)))))
22499 if (mode == VOIDmode)
22502 switch (standard_80387_constant_p (x))
22507 default: /* Other constants */
22512 /* Start with (MEM (SYMBOL_REF)), since that's where
22513 it'll probably end up. Add a penalty for size. */
22514 *total = (COSTS_N_INSNS (1)
22515 + (flag_pic != 0 && !TARGET_64BIT)
22516 + (mode == SFmode ? 0 : mode == DFmode ? 1 : 2));
22522 /* The zero extensions is often completely free on x86_64, so make
22523 it as cheap as possible. */
22524 if (TARGET_64BIT && mode == DImode
22525 && GET_MODE (XEXP (x, 0)) == SImode)
22527 else if (TARGET_ZERO_EXTEND_WITH_AND)
22528 *total = ix86_cost->add;
22530 *total = ix86_cost->movzx;
22534 *total = ix86_cost->movsx;
22538 if (CONST_INT_P (XEXP (x, 1))
22539 && (GET_MODE (XEXP (x, 0)) != DImode || TARGET_64BIT))
22541 HOST_WIDE_INT value = INTVAL (XEXP (x, 1));
22544 *total = ix86_cost->add;
22547 if ((value == 2 || value == 3)
22548 && ix86_cost->lea <= ix86_cost->shift_const)
22550 *total = ix86_cost->lea;
22560 if (!TARGET_64BIT && GET_MODE (XEXP (x, 0)) == DImode)
22562 if (CONST_INT_P (XEXP (x, 1)))
22564 if (INTVAL (XEXP (x, 1)) > 32)
22565 *total = ix86_cost->shift_const + COSTS_N_INSNS (2);
22567 *total = ix86_cost->shift_const * 2;
22571 if (GET_CODE (XEXP (x, 1)) == AND)
22572 *total = ix86_cost->shift_var * 2;
22574 *total = ix86_cost->shift_var * 6 + COSTS_N_INSNS (2);
22579 if (CONST_INT_P (XEXP (x, 1)))
22580 *total = ix86_cost->shift_const;
22582 *total = ix86_cost->shift_var;
22587 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
22589 /* ??? SSE scalar cost should be used here. */
22590 *total = ix86_cost->fmul;
22593 else if (X87_FLOAT_MODE_P (mode))
22595 *total = ix86_cost->fmul;
22598 else if (FLOAT_MODE_P (mode))
22600 /* ??? SSE vector cost should be used here. */
22601 *total = ix86_cost->fmul;
22606 rtx op0 = XEXP (x, 0);
22607 rtx op1 = XEXP (x, 1);
22609 if (CONST_INT_P (XEXP (x, 1)))
22611 unsigned HOST_WIDE_INT value = INTVAL (XEXP (x, 1));
22612 for (nbits = 0; value != 0; value &= value - 1)
22616 /* This is arbitrary. */
22619 /* Compute costs correctly for widening multiplication. */
22620 if ((GET_CODE (op0) == SIGN_EXTEND || GET_CODE (op0) == ZERO_EXTEND)
22621 && GET_MODE_SIZE (GET_MODE (XEXP (op0, 0))) * 2
22622 == GET_MODE_SIZE (mode))
22624 int is_mulwiden = 0;
22625 enum machine_mode inner_mode = GET_MODE (op0);
22627 if (GET_CODE (op0) == GET_CODE (op1))
22628 is_mulwiden = 1, op1 = XEXP (op1, 0);
22629 else if (CONST_INT_P (op1))
22631 if (GET_CODE (op0) == SIGN_EXTEND)
22632 is_mulwiden = trunc_int_for_mode (INTVAL (op1), inner_mode)
22635 is_mulwiden = !(INTVAL (op1) & ~GET_MODE_MASK (inner_mode));
22639 op0 = XEXP (op0, 0), mode = GET_MODE (op0);
22642 *total = (ix86_cost->mult_init[MODE_INDEX (mode)]
22643 + nbits * ix86_cost->mult_bit
22644 + rtx_cost (op0, outer_code) + rtx_cost (op1, outer_code));
22653 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
22654 /* ??? SSE cost should be used here. */
22655 *total = ix86_cost->fdiv;
22656 else if (X87_FLOAT_MODE_P (mode))
22657 *total = ix86_cost->fdiv;
22658 else if (FLOAT_MODE_P (mode))
22659 /* ??? SSE vector cost should be used here. */
22660 *total = ix86_cost->fdiv;
22662 *total = ix86_cost->divide[MODE_INDEX (mode)];
22666 if (GET_MODE_CLASS (mode) == MODE_INT
22667 && GET_MODE_BITSIZE (mode) <= GET_MODE_BITSIZE (Pmode))
22669 if (GET_CODE (XEXP (x, 0)) == PLUS
22670 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
22671 && CONST_INT_P (XEXP (XEXP (XEXP (x, 0), 0), 1))
22672 && CONSTANT_P (XEXP (x, 1)))
22674 HOST_WIDE_INT val = INTVAL (XEXP (XEXP (XEXP (x, 0), 0), 1));
22675 if (val == 2 || val == 4 || val == 8)
22677 *total = ix86_cost->lea;
22678 *total += rtx_cost (XEXP (XEXP (x, 0), 1), outer_code);
22679 *total += rtx_cost (XEXP (XEXP (XEXP (x, 0), 0), 0),
22681 *total += rtx_cost (XEXP (x, 1), outer_code);
22685 else if (GET_CODE (XEXP (x, 0)) == MULT
22686 && CONST_INT_P (XEXP (XEXP (x, 0), 1)))
22688 HOST_WIDE_INT val = INTVAL (XEXP (XEXP (x, 0), 1));
22689 if (val == 2 || val == 4 || val == 8)
22691 *total = ix86_cost->lea;
22692 *total += rtx_cost (XEXP (XEXP (x, 0), 0), outer_code);
22693 *total += rtx_cost (XEXP (x, 1), outer_code);
22697 else if (GET_CODE (XEXP (x, 0)) == PLUS)
22699 *total = ix86_cost->lea;
22700 *total += rtx_cost (XEXP (XEXP (x, 0), 0), outer_code);
22701 *total += rtx_cost (XEXP (XEXP (x, 0), 1), outer_code);
22702 *total += rtx_cost (XEXP (x, 1), outer_code);
22709 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
22711 /* ??? SSE cost should be used here. */
22712 *total = ix86_cost->fadd;
22715 else if (X87_FLOAT_MODE_P (mode))
22717 *total = ix86_cost->fadd;
22720 else if (FLOAT_MODE_P (mode))
22722 /* ??? SSE vector cost should be used here. */
22723 *total = ix86_cost->fadd;
22731 if (!TARGET_64BIT && mode == DImode)
22733 *total = (ix86_cost->add * 2
22734 + (rtx_cost (XEXP (x, 0), outer_code)
22735 << (GET_MODE (XEXP (x, 0)) != DImode))
22736 + (rtx_cost (XEXP (x, 1), outer_code)
22737 << (GET_MODE (XEXP (x, 1)) != DImode)));
22743 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
22745 /* ??? SSE cost should be used here. */
22746 *total = ix86_cost->fchs;
22749 else if (X87_FLOAT_MODE_P (mode))
22751 *total = ix86_cost->fchs;
22754 else if (FLOAT_MODE_P (mode))
22756 /* ??? SSE vector cost should be used here. */
22757 *total = ix86_cost->fchs;
22763 if (!TARGET_64BIT && mode == DImode)
22764 *total = ix86_cost->add * 2;
22766 *total = ix86_cost->add;
22770 if (GET_CODE (XEXP (x, 0)) == ZERO_EXTRACT
22771 && XEXP (XEXP (x, 0), 1) == const1_rtx
22772 && CONST_INT_P (XEXP (XEXP (x, 0), 2))
22773 && XEXP (x, 1) == const0_rtx)
22775 /* This kind of construct is implemented using test[bwl].
22776 Treat it as if we had an AND. */
22777 *total = (ix86_cost->add
22778 + rtx_cost (XEXP (XEXP (x, 0), 0), outer_code)
22779 + rtx_cost (const1_rtx, outer_code));
22785 if (!(SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH))
22790 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
22791 /* ??? SSE cost should be used here. */
22792 *total = ix86_cost->fabs;
22793 else if (X87_FLOAT_MODE_P (mode))
22794 *total = ix86_cost->fabs;
22795 else if (FLOAT_MODE_P (mode))
22796 /* ??? SSE vector cost should be used here. */
22797 *total = ix86_cost->fabs;
22801 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
22802 /* ??? SSE cost should be used here. */
22803 *total = ix86_cost->fsqrt;
22804 else if (X87_FLOAT_MODE_P (mode))
22805 *total = ix86_cost->fsqrt;
22806 else if (FLOAT_MODE_P (mode))
22807 /* ??? SSE vector cost should be used here. */
22808 *total = ix86_cost->fsqrt;
22812 if (XINT (x, 1) == UNSPEC_TP)
22823 static int current_machopic_label_num;
22825 /* Given a symbol name and its associated stub, write out the
22826 definition of the stub. */
22829 machopic_output_stub (FILE *file, const char *symb, const char *stub)
22831 unsigned int length;
22832 char *binder_name, *symbol_name, lazy_ptr_name[32];
22833 int label = ++current_machopic_label_num;
22835 /* For 64-bit we shouldn't get here. */
22836 gcc_assert (!TARGET_64BIT);
22838 /* Lose our funky encoding stuff so it doesn't contaminate the stub. */
22839 symb = (*targetm.strip_name_encoding) (symb);
22841 length = strlen (stub);
22842 binder_name = alloca (length + 32);
22843 GEN_BINDER_NAME_FOR_STUB (binder_name, stub, length);
22845 length = strlen (symb);
22846 symbol_name = alloca (length + 32);
22847 GEN_SYMBOL_NAME_FOR_SYMBOL (symbol_name, symb, length);
22849 sprintf (lazy_ptr_name, "L%d$lz", label);
22852 switch_to_section (darwin_sections[machopic_picsymbol_stub_section]);
22854 switch_to_section (darwin_sections[machopic_symbol_stub_section]);
22856 fprintf (file, "%s:\n", stub);
22857 fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
22861 fprintf (file, "\tcall\tLPC$%d\nLPC$%d:\tpopl\t%%eax\n", label, label);
22862 fprintf (file, "\tmovl\t%s-LPC$%d(%%eax),%%edx\n", lazy_ptr_name, label);
22863 fprintf (file, "\tjmp\t*%%edx\n");
22866 fprintf (file, "\tjmp\t*%s\n", lazy_ptr_name);
22868 fprintf (file, "%s:\n", binder_name);
22872 fprintf (file, "\tlea\t%s-LPC$%d(%%eax),%%eax\n", lazy_ptr_name, label);
22873 fprintf (file, "\tpushl\t%%eax\n");
22876 fprintf (file, "\tpushl\t$%s\n", lazy_ptr_name);
22878 fprintf (file, "\tjmp\tdyld_stub_binding_helper\n");
22880 switch_to_section (darwin_sections[machopic_lazy_symbol_ptr_section]);
22881 fprintf (file, "%s:\n", lazy_ptr_name);
22882 fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
22883 fprintf (file, "\t.long %s\n", binder_name);
22887 darwin_x86_file_end (void)
22889 darwin_file_end ();
22892 #endif /* TARGET_MACHO */
22894 /* Order the registers for register allocator. */
22897 x86_order_regs_for_local_alloc (void)
22902 /* First allocate the local general purpose registers. */
22903 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
22904 if (GENERAL_REGNO_P (i) && call_used_regs[i])
22905 reg_alloc_order [pos++] = i;
22907 /* Global general purpose registers. */
22908 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
22909 if (GENERAL_REGNO_P (i) && !call_used_regs[i])
22910 reg_alloc_order [pos++] = i;
22912 /* x87 registers come first in case we are doing FP math
22914 if (!TARGET_SSE_MATH)
22915 for (i = FIRST_STACK_REG; i <= LAST_STACK_REG; i++)
22916 reg_alloc_order [pos++] = i;
22918 /* SSE registers. */
22919 for (i = FIRST_SSE_REG; i <= LAST_SSE_REG; i++)
22920 reg_alloc_order [pos++] = i;
22921 for (i = FIRST_REX_SSE_REG; i <= LAST_REX_SSE_REG; i++)
22922 reg_alloc_order [pos++] = i;
22924 /* x87 registers. */
22925 if (TARGET_SSE_MATH)
22926 for (i = FIRST_STACK_REG; i <= LAST_STACK_REG; i++)
22927 reg_alloc_order [pos++] = i;
22929 for (i = FIRST_MMX_REG; i <= LAST_MMX_REG; i++)
22930 reg_alloc_order [pos++] = i;
22932 /* Initialize the rest of array as we do not allocate some registers
22934 while (pos < FIRST_PSEUDO_REGISTER)
22935 reg_alloc_order [pos++] = 0;
22938 /* Handle a "ms_struct" or "gcc_struct" attribute; arguments as in
22939 struct attribute_spec.handler. */
22941 ix86_handle_struct_attribute (tree *node, tree name,
22942 tree args ATTRIBUTE_UNUSED,
22943 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
22946 if (DECL_P (*node))
22948 if (TREE_CODE (*node) == TYPE_DECL)
22949 type = &TREE_TYPE (*node);
22954 if (!(type && (TREE_CODE (*type) == RECORD_TYPE
22955 || TREE_CODE (*type) == UNION_TYPE)))
22957 warning (OPT_Wattributes, "%qs attribute ignored",
22958 IDENTIFIER_POINTER (name));
22959 *no_add_attrs = true;
22962 else if ((is_attribute_p ("ms_struct", name)
22963 && lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (*type)))
22964 || ((is_attribute_p ("gcc_struct", name)
22965 && lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (*type)))))
22967 warning (OPT_Wattributes, "%qs incompatible attribute ignored",
22968 IDENTIFIER_POINTER (name));
22969 *no_add_attrs = true;
22976 ix86_ms_bitfield_layout_p (const_tree record_type)
22978 return (TARGET_MS_BITFIELD_LAYOUT &&
22979 !lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (record_type)))
22980 || lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (record_type));
22983 /* Returns an expression indicating where the this parameter is
22984 located on entry to the FUNCTION. */
22987 x86_this_parameter (tree function)
22989 tree type = TREE_TYPE (function);
22990 bool aggr = aggregate_value_p (TREE_TYPE (type), type) != 0;
22995 const int *parm_regs;
22997 if (TARGET_64BIT_MS_ABI)
22998 parm_regs = x86_64_ms_abi_int_parameter_registers;
23000 parm_regs = x86_64_int_parameter_registers;
23001 return gen_rtx_REG (DImode, parm_regs[aggr]);
23004 nregs = ix86_function_regparm (type, function);
23006 if (nregs > 0 && !stdarg_p (type))
23010 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type)))
23011 regno = aggr ? DX_REG : CX_REG;
23019 return gen_rtx_MEM (SImode,
23020 plus_constant (stack_pointer_rtx, 4));
23023 return gen_rtx_REG (SImode, regno);
23026 return gen_rtx_MEM (SImode, plus_constant (stack_pointer_rtx, aggr ? 8 : 4));
23029 /* Determine whether x86_output_mi_thunk can succeed. */
23032 x86_can_output_mi_thunk (const_tree thunk ATTRIBUTE_UNUSED,
23033 HOST_WIDE_INT delta ATTRIBUTE_UNUSED,
23034 HOST_WIDE_INT vcall_offset, const_tree function)
23036 /* 64-bit can handle anything. */
23040 /* For 32-bit, everything's fine if we have one free register. */
23041 if (ix86_function_regparm (TREE_TYPE (function), function) < 3)
23044 /* Need a free register for vcall_offset. */
23048 /* Need a free register for GOT references. */
23049 if (flag_pic && !(*targetm.binds_local_p) (function))
23052 /* Otherwise ok. */
23056 /* Output the assembler code for a thunk function. THUNK_DECL is the
23057 declaration for the thunk function itself, FUNCTION is the decl for
23058 the target function. DELTA is an immediate constant offset to be
23059 added to THIS. If VCALL_OFFSET is nonzero, the word at
23060 *(*this + vcall_offset) should be added to THIS. */
23063 x86_output_mi_thunk (FILE *file ATTRIBUTE_UNUSED,
23064 tree thunk ATTRIBUTE_UNUSED, HOST_WIDE_INT delta,
23065 HOST_WIDE_INT vcall_offset, tree function)
23068 rtx this_param = x86_this_parameter (function);
23071 /* If VCALL_OFFSET, we'll need THIS in a register. Might as well
23072 pull it in now and let DELTA benefit. */
23073 if (REG_P (this_param))
23074 this_reg = this_param;
23075 else if (vcall_offset)
23077 /* Put the this parameter into %eax. */
23078 xops[0] = this_param;
23079 xops[1] = this_reg = gen_rtx_REG (Pmode, AX_REG);
23081 output_asm_insn ("mov{q}\t{%0, %1|%1, %0}", xops);
23083 output_asm_insn ("mov{l}\t{%0, %1|%1, %0}", xops);
23086 this_reg = NULL_RTX;
23088 /* Adjust the this parameter by a fixed constant. */
23091 xops[0] = GEN_INT (delta);
23092 xops[1] = this_reg ? this_reg : this_param;
23095 if (!x86_64_general_operand (xops[0], DImode))
23097 tmp = gen_rtx_REG (DImode, R10_REG);
23099 output_asm_insn ("mov{q}\t{%1, %0|%0, %1}", xops);
23101 xops[1] = this_param;
23103 output_asm_insn ("add{q}\t{%0, %1|%1, %0}", xops);
23106 output_asm_insn ("add{l}\t{%0, %1|%1, %0}", xops);
23109 /* Adjust the this parameter by a value stored in the vtable. */
23113 tmp = gen_rtx_REG (DImode, R10_REG);
23116 int tmp_regno = CX_REG;
23117 if (lookup_attribute ("fastcall",
23118 TYPE_ATTRIBUTES (TREE_TYPE (function))))
23119 tmp_regno = AX_REG;
23120 tmp = gen_rtx_REG (SImode, tmp_regno);
23123 xops[0] = gen_rtx_MEM (Pmode, this_reg);
23126 output_asm_insn ("mov{q}\t{%0, %1|%1, %0}", xops);
23128 output_asm_insn ("mov{l}\t{%0, %1|%1, %0}", xops);
23130 /* Adjust the this parameter. */
23131 xops[0] = gen_rtx_MEM (Pmode, plus_constant (tmp, vcall_offset));
23132 if (TARGET_64BIT && !memory_operand (xops[0], Pmode))
23134 rtx tmp2 = gen_rtx_REG (DImode, R11_REG);
23135 xops[0] = GEN_INT (vcall_offset);
23137 output_asm_insn ("mov{q}\t{%0, %1|%1, %0}", xops);
23138 xops[0] = gen_rtx_MEM (Pmode, gen_rtx_PLUS (Pmode, tmp, tmp2));
23140 xops[1] = this_reg;
23142 output_asm_insn ("add{q}\t{%0, %1|%1, %0}", xops);
23144 output_asm_insn ("add{l}\t{%0, %1|%1, %0}", xops);
23147 /* If necessary, drop THIS back to its stack slot. */
23148 if (this_reg && this_reg != this_param)
23150 xops[0] = this_reg;
23151 xops[1] = this_param;
23153 output_asm_insn ("mov{q}\t{%0, %1|%1, %0}", xops);
23155 output_asm_insn ("mov{l}\t{%0, %1|%1, %0}", xops);
23158 xops[0] = XEXP (DECL_RTL (function), 0);
23161 if (!flag_pic || (*targetm.binds_local_p) (function))
23162 output_asm_insn ("jmp\t%P0", xops);
23163 /* All thunks should be in the same object as their target,
23164 and thus binds_local_p should be true. */
23165 else if (TARGET_64BIT_MS_ABI)
23166 gcc_unreachable ();
23169 tmp = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, xops[0]), UNSPEC_GOTPCREL);
23170 tmp = gen_rtx_CONST (Pmode, tmp);
23171 tmp = gen_rtx_MEM (QImode, tmp);
23173 output_asm_insn ("jmp\t%A0", xops);
23178 if (!flag_pic || (*targetm.binds_local_p) (function))
23179 output_asm_insn ("jmp\t%P0", xops);
23184 rtx sym_ref = XEXP (DECL_RTL (function), 0);
23185 tmp = (gen_rtx_SYMBOL_REF
23187 machopic_indirection_name (sym_ref, /*stub_p=*/true)));
23188 tmp = gen_rtx_MEM (QImode, tmp);
23190 output_asm_insn ("jmp\t%0", xops);
23193 #endif /* TARGET_MACHO */
23195 tmp = gen_rtx_REG (SImode, CX_REG);
23196 output_set_got (tmp, NULL_RTX);
23199 output_asm_insn ("mov{l}\t{%0@GOT(%1), %1|%1, %0@GOT[%1]}", xops);
23200 output_asm_insn ("jmp\t{*}%1", xops);
23206 x86_file_start (void)
23208 default_file_start ();
23210 darwin_file_start ();
23212 if (X86_FILE_START_VERSION_DIRECTIVE)
23213 fputs ("\t.version\t\"01.01\"\n", asm_out_file);
23214 if (X86_FILE_START_FLTUSED)
23215 fputs ("\t.global\t__fltused\n", asm_out_file);
23216 if (ix86_asm_dialect == ASM_INTEL)
23217 fputs ("\t.intel_syntax noprefix\n", asm_out_file);
23221 x86_field_alignment (tree field, int computed)
23223 enum machine_mode mode;
23224 tree type = TREE_TYPE (field);
23226 if (TARGET_64BIT || TARGET_ALIGN_DOUBLE)
23228 mode = TYPE_MODE (TREE_CODE (type) == ARRAY_TYPE
23229 ? get_inner_array_type (type) : type);
23230 if (mode == DFmode || mode == DCmode
23231 || GET_MODE_CLASS (mode) == MODE_INT
23232 || GET_MODE_CLASS (mode) == MODE_COMPLEX_INT)
23233 return MIN (32, computed);
23237 /* Output assembler code to FILE to increment profiler label # LABELNO
23238 for profiling a function entry. */
23240 x86_function_profiler (FILE *file, int labelno ATTRIBUTE_UNUSED)
23244 #ifndef NO_PROFILE_COUNTERS
23245 fprintf (file, "\tleaq\t%sP%d@(%%rip),%%r11\n", LPREFIX, labelno);
23248 if (!TARGET_64BIT_MS_ABI && flag_pic)
23249 fprintf (file, "\tcall\t*%s@GOTPCREL(%%rip)\n", MCOUNT_NAME);
23251 fprintf (file, "\tcall\t%s\n", MCOUNT_NAME);
23255 #ifndef NO_PROFILE_COUNTERS
23256 fprintf (file, "\tleal\t%sP%d@GOTOFF(%%ebx),%%%s\n",
23257 LPREFIX, labelno, PROFILE_COUNT_REGISTER);
23259 fprintf (file, "\tcall\t*%s@GOT(%%ebx)\n", MCOUNT_NAME);
23263 #ifndef NO_PROFILE_COUNTERS
23264 fprintf (file, "\tmovl\t$%sP%d,%%%s\n", LPREFIX, labelno,
23265 PROFILE_COUNT_REGISTER);
23267 fprintf (file, "\tcall\t%s\n", MCOUNT_NAME);
23271 /* We don't have exact information about the insn sizes, but we may assume
23272 quite safely that we are informed about all 1 byte insns and memory
23273 address sizes. This is enough to eliminate unnecessary padding in
23277 min_insn_size (rtx insn)
23281 if (!INSN_P (insn) || !active_insn_p (insn))
23284 /* Discard alignments we've emit and jump instructions. */
23285 if (GET_CODE (PATTERN (insn)) == UNSPEC_VOLATILE
23286 && XINT (PATTERN (insn), 1) == UNSPECV_ALIGN)
23289 && (GET_CODE (PATTERN (insn)) == ADDR_VEC
23290 || GET_CODE (PATTERN (insn)) == ADDR_DIFF_VEC))
23293 /* Important case - calls are always 5 bytes.
23294 It is common to have many calls in the row. */
23296 && symbolic_reference_mentioned_p (PATTERN (insn))
23297 && !SIBLING_CALL_P (insn))
23299 if (get_attr_length (insn) <= 1)
23302 /* For normal instructions we may rely on the sizes of addresses
23303 and the presence of symbol to require 4 bytes of encoding.
23304 This is not the case for jumps where references are PC relative. */
23305 if (!JUMP_P (insn))
23307 l = get_attr_length_address (insn);
23308 if (l < 4 && symbolic_reference_mentioned_p (PATTERN (insn)))
23317 /* AMD K8 core mispredicts jumps when there are more than 3 jumps in 16 byte
23321 ix86_avoid_jump_misspredicts (void)
23323 rtx insn, start = get_insns ();
23324 int nbytes = 0, njumps = 0;
23327 /* Look for all minimal intervals of instructions containing 4 jumps.
23328 The intervals are bounded by START and INSN. NBYTES is the total
23329 size of instructions in the interval including INSN and not including
23330 START. When the NBYTES is smaller than 16 bytes, it is possible
23331 that the end of START and INSN ends up in the same 16byte page.
23333 The smallest offset in the page INSN can start is the case where START
23334 ends on the offset 0. Offset of INSN is then NBYTES - sizeof (INSN).
23335 We add p2align to 16byte window with maxskip 17 - NBYTES + sizeof (INSN).
23337 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
23340 nbytes += min_insn_size (insn);
23342 fprintf(dump_file, "Insn %i estimated to %i bytes\n",
23343 INSN_UID (insn), min_insn_size (insn));
23345 && GET_CODE (PATTERN (insn)) != ADDR_VEC
23346 && GET_CODE (PATTERN (insn)) != ADDR_DIFF_VEC)
23354 start = NEXT_INSN (start);
23355 if ((JUMP_P (start)
23356 && GET_CODE (PATTERN (start)) != ADDR_VEC
23357 && GET_CODE (PATTERN (start)) != ADDR_DIFF_VEC)
23359 njumps--, isjump = 1;
23362 nbytes -= min_insn_size (start);
23364 gcc_assert (njumps >= 0);
23366 fprintf (dump_file, "Interval %i to %i has %i bytes\n",
23367 INSN_UID (start), INSN_UID (insn), nbytes);
23369 if (njumps == 3 && isjump && nbytes < 16)
23371 int padsize = 15 - nbytes + min_insn_size (insn);
23374 fprintf (dump_file, "Padding insn %i by %i bytes!\n",
23375 INSN_UID (insn), padsize);
23376 emit_insn_before (gen_align (GEN_INT (padsize)), insn);
23381 /* AMD Athlon works faster
23382 when RET is not destination of conditional jump or directly preceded
23383 by other jump instruction. We avoid the penalty by inserting NOP just
23384 before the RET instructions in such cases. */
23386 ix86_pad_returns (void)
23391 FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR->preds)
23393 basic_block bb = e->src;
23394 rtx ret = BB_END (bb);
23396 bool replace = false;
23398 if (!JUMP_P (ret) || GET_CODE (PATTERN (ret)) != RETURN
23399 || !maybe_hot_bb_p (bb))
23401 for (prev = PREV_INSN (ret); prev; prev = PREV_INSN (prev))
23402 if (active_insn_p (prev) || LABEL_P (prev))
23404 if (prev && LABEL_P (prev))
23409 FOR_EACH_EDGE (e, ei, bb->preds)
23410 if (EDGE_FREQUENCY (e) && e->src->index >= 0
23411 && !(e->flags & EDGE_FALLTHRU))
23416 prev = prev_active_insn (ret);
23418 && ((JUMP_P (prev) && any_condjump_p (prev))
23421 /* Empty functions get branch mispredict even when the jump destination
23422 is not visible to us. */
23423 if (!prev && cfun->function_frequency > FUNCTION_FREQUENCY_UNLIKELY_EXECUTED)
23428 emit_insn_before (gen_return_internal_long (), ret);
23434 /* Implement machine specific optimizations. We implement padding of returns
23435 for K8 CPUs and pass to avoid 4 jumps in the single 16 byte window. */
23439 if (TARGET_PAD_RETURNS && optimize && !optimize_size)
23440 ix86_pad_returns ();
23441 if (TARGET_FOUR_JUMP_LIMIT && optimize && !optimize_size)
23442 ix86_avoid_jump_misspredicts ();
23445 /* Return nonzero when QImode register that must be represented via REX prefix
23448 x86_extended_QIreg_mentioned_p (rtx insn)
23451 extract_insn_cached (insn);
23452 for (i = 0; i < recog_data.n_operands; i++)
23453 if (REG_P (recog_data.operand[i])
23454 && REGNO (recog_data.operand[i]) >= 4)
23459 /* Return nonzero when P points to register encoded via REX prefix.
23460 Called via for_each_rtx. */
23462 extended_reg_mentioned_1 (rtx *p, void *data ATTRIBUTE_UNUSED)
23464 unsigned int regno;
23467 regno = REGNO (*p);
23468 return REX_INT_REGNO_P (regno) || REX_SSE_REGNO_P (regno);
23471 /* Return true when INSN mentions register that must be encoded using REX
23474 x86_extended_reg_mentioned_p (rtx insn)
23476 return for_each_rtx (&PATTERN (insn), extended_reg_mentioned_1, NULL);
23479 /* Generate an unsigned DImode/SImode to FP conversion. This is the same code
23480 optabs would emit if we didn't have TFmode patterns. */
23483 x86_emit_floatuns (rtx operands[2])
23485 rtx neglab, donelab, i0, i1, f0, in, out;
23486 enum machine_mode mode, inmode;
23488 inmode = GET_MODE (operands[1]);
23489 gcc_assert (inmode == SImode || inmode == DImode);
23492 in = force_reg (inmode, operands[1]);
23493 mode = GET_MODE (out);
23494 neglab = gen_label_rtx ();
23495 donelab = gen_label_rtx ();
23496 f0 = gen_reg_rtx (mode);
23498 emit_cmp_and_jump_insns (in, const0_rtx, LT, const0_rtx, inmode, 0, neglab);
23500 expand_float (out, in, 0);
23502 emit_jump_insn (gen_jump (donelab));
23505 emit_label (neglab);
23507 i0 = expand_simple_binop (inmode, LSHIFTRT, in, const1_rtx, NULL,
23509 i1 = expand_simple_binop (inmode, AND, in, const1_rtx, NULL,
23511 i0 = expand_simple_binop (inmode, IOR, i0, i1, i0, 1, OPTAB_DIRECT);
23513 expand_float (f0, i0, 0);
23515 emit_insn (gen_rtx_SET (VOIDmode, out, gen_rtx_PLUS (mode, f0, f0)));
23517 emit_label (donelab);
23520 /* A subroutine of ix86_expand_vector_init. Store into TARGET a vector
23521 with all elements equal to VAR. Return true if successful. */
23524 ix86_expand_vector_init_duplicate (bool mmx_ok, enum machine_mode mode,
23525 rtx target, rtx val)
23527 enum machine_mode smode, wsmode, wvmode;
23542 val = force_reg (GET_MODE_INNER (mode), val);
23543 x = gen_rtx_VEC_DUPLICATE (mode, val);
23544 emit_insn (gen_rtx_SET (VOIDmode, target, x));
23550 if (TARGET_SSE || TARGET_3DNOW_A)
23552 val = gen_lowpart (SImode, val);
23553 x = gen_rtx_TRUNCATE (HImode, val);
23554 x = gen_rtx_VEC_DUPLICATE (mode, x);
23555 emit_insn (gen_rtx_SET (VOIDmode, target, x));
23577 /* Extend HImode to SImode using a paradoxical SUBREG. */
23578 tmp1 = gen_reg_rtx (SImode);
23579 emit_move_insn (tmp1, gen_lowpart (SImode, val));
23580 /* Insert the SImode value as low element of V4SImode vector. */
23581 tmp2 = gen_reg_rtx (V4SImode);
23582 tmp1 = gen_rtx_VEC_MERGE (V4SImode,
23583 gen_rtx_VEC_DUPLICATE (V4SImode, tmp1),
23584 CONST0_RTX (V4SImode),
23586 emit_insn (gen_rtx_SET (VOIDmode, tmp2, tmp1));
23587 /* Cast the V4SImode vector back to a V8HImode vector. */
23588 tmp1 = gen_reg_rtx (V8HImode);
23589 emit_move_insn (tmp1, gen_lowpart (V8HImode, tmp2));
23590 /* Duplicate the low short through the whole low SImode word. */
23591 emit_insn (gen_sse2_punpcklwd (tmp1, tmp1, tmp1));
23592 /* Cast the V8HImode vector back to a V4SImode vector. */
23593 tmp2 = gen_reg_rtx (V4SImode);
23594 emit_move_insn (tmp2, gen_lowpart (V4SImode, tmp1));
23595 /* Replicate the low element of the V4SImode vector. */
23596 emit_insn (gen_sse2_pshufd (tmp2, tmp2, const0_rtx));
23597 /* Cast the V2SImode back to V8HImode, and store in target. */
23598 emit_move_insn (target, gen_lowpart (V8HImode, tmp2));
23609 /* Extend QImode to SImode using a paradoxical SUBREG. */
23610 tmp1 = gen_reg_rtx (SImode);
23611 emit_move_insn (tmp1, gen_lowpart (SImode, val));
23612 /* Insert the SImode value as low element of V4SImode vector. */
23613 tmp2 = gen_reg_rtx (V4SImode);
23614 tmp1 = gen_rtx_VEC_MERGE (V4SImode,
23615 gen_rtx_VEC_DUPLICATE (V4SImode, tmp1),
23616 CONST0_RTX (V4SImode),
23618 emit_insn (gen_rtx_SET (VOIDmode, tmp2, tmp1));
23619 /* Cast the V4SImode vector back to a V16QImode vector. */
23620 tmp1 = gen_reg_rtx (V16QImode);
23621 emit_move_insn (tmp1, gen_lowpart (V16QImode, tmp2));
23622 /* Duplicate the low byte through the whole low SImode word. */
23623 emit_insn (gen_sse2_punpcklbw (tmp1, tmp1, tmp1));
23624 emit_insn (gen_sse2_punpcklbw (tmp1, tmp1, tmp1));
23625 /* Cast the V16QImode vector back to a V4SImode vector. */
23626 tmp2 = gen_reg_rtx (V4SImode);
23627 emit_move_insn (tmp2, gen_lowpart (V4SImode, tmp1));
23628 /* Replicate the low element of the V4SImode vector. */
23629 emit_insn (gen_sse2_pshufd (tmp2, tmp2, const0_rtx));
23630 /* Cast the V2SImode back to V16QImode, and store in target. */
23631 emit_move_insn (target, gen_lowpart (V16QImode, tmp2));
23639 /* Replicate the value once into the next wider mode and recurse. */
23640 val = convert_modes (wsmode, smode, val, true);
23641 x = expand_simple_binop (wsmode, ASHIFT, val,
23642 GEN_INT (GET_MODE_BITSIZE (smode)),
23643 NULL_RTX, 1, OPTAB_LIB_WIDEN);
23644 val = expand_simple_binop (wsmode, IOR, val, x, x, 1, OPTAB_LIB_WIDEN);
23646 x = gen_reg_rtx (wvmode);
23647 if (!ix86_expand_vector_init_duplicate (mmx_ok, wvmode, x, val))
23648 gcc_unreachable ();
23649 emit_move_insn (target, gen_lowpart (mode, x));
23657 /* A subroutine of ix86_expand_vector_init. Store into TARGET a vector
23658 whose ONE_VAR element is VAR, and other elements are zero. Return true
23662 ix86_expand_vector_init_one_nonzero (bool mmx_ok, enum machine_mode mode,
23663 rtx target, rtx var, int one_var)
23665 enum machine_mode vsimode;
23668 bool use_vector_set = false;
23673 use_vector_set = TARGET_64BIT && TARGET_SSE4_1;
23678 use_vector_set = TARGET_SSE4_1;
23681 use_vector_set = TARGET_SSE2;
23684 use_vector_set = TARGET_SSE || TARGET_3DNOW_A;
23689 if (use_vector_set)
23691 emit_insn (gen_rtx_SET (VOIDmode, target, CONST0_RTX (mode)));
23692 var = force_reg (GET_MODE_INNER (mode), var);
23693 ix86_expand_vector_set (mmx_ok, target, var, one_var);
23709 var = force_reg (GET_MODE_INNER (mode), var);
23710 x = gen_rtx_VEC_CONCAT (mode, var, CONST0_RTX (GET_MODE_INNER (mode)));
23711 emit_insn (gen_rtx_SET (VOIDmode, target, x));
23716 if (!REG_P (target) || REGNO (target) < FIRST_PSEUDO_REGISTER)
23717 new_target = gen_reg_rtx (mode);
23719 new_target = target;
23720 var = force_reg (GET_MODE_INNER (mode), var);
23721 x = gen_rtx_VEC_DUPLICATE (mode, var);
23722 x = gen_rtx_VEC_MERGE (mode, x, CONST0_RTX (mode), const1_rtx);
23723 emit_insn (gen_rtx_SET (VOIDmode, new_target, x));
23726 /* We need to shuffle the value to the correct position, so
23727 create a new pseudo to store the intermediate result. */
23729 /* With SSE2, we can use the integer shuffle insns. */
23730 if (mode != V4SFmode && TARGET_SSE2)
23732 emit_insn (gen_sse2_pshufd_1 (new_target, new_target,
23734 GEN_INT (one_var == 1 ? 0 : 1),
23735 GEN_INT (one_var == 2 ? 0 : 1),
23736 GEN_INT (one_var == 3 ? 0 : 1)));
23737 if (target != new_target)
23738 emit_move_insn (target, new_target);
23742 /* Otherwise convert the intermediate result to V4SFmode and
23743 use the SSE1 shuffle instructions. */
23744 if (mode != V4SFmode)
23746 tmp = gen_reg_rtx (V4SFmode);
23747 emit_move_insn (tmp, gen_lowpart (V4SFmode, new_target));
23752 emit_insn (gen_sse_shufps_1 (tmp, tmp, tmp,
23754 GEN_INT (one_var == 1 ? 0 : 1),
23755 GEN_INT (one_var == 2 ? 0+4 : 1+4),
23756 GEN_INT (one_var == 3 ? 0+4 : 1+4)));
23758 if (mode != V4SFmode)
23759 emit_move_insn (target, gen_lowpart (V4SImode, tmp));
23760 else if (tmp != target)
23761 emit_move_insn (target, tmp);
23763 else if (target != new_target)
23764 emit_move_insn (target, new_target);
23769 vsimode = V4SImode;
23775 vsimode = V2SImode;
23781 /* Zero extend the variable element to SImode and recurse. */
23782 var = convert_modes (SImode, GET_MODE_INNER (mode), var, true);
23784 x = gen_reg_rtx (vsimode);
23785 if (!ix86_expand_vector_init_one_nonzero (mmx_ok, vsimode, x,
23787 gcc_unreachable ();
23789 emit_move_insn (target, gen_lowpart (mode, x));
23797 /* A subroutine of ix86_expand_vector_init. Store into TARGET a vector
23798 consisting of the values in VALS. It is known that all elements
23799 except ONE_VAR are constants. Return true if successful. */
23802 ix86_expand_vector_init_one_var (bool mmx_ok, enum machine_mode mode,
23803 rtx target, rtx vals, int one_var)
23805 rtx var = XVECEXP (vals, 0, one_var);
23806 enum machine_mode wmode;
23809 const_vec = copy_rtx (vals);
23810 XVECEXP (const_vec, 0, one_var) = CONST0_RTX (GET_MODE_INNER (mode));
23811 const_vec = gen_rtx_CONST_VECTOR (mode, XVEC (const_vec, 0));
23819 /* For the two element vectors, it's just as easy to use
23820 the general case. */
23836 /* There's no way to set one QImode entry easily. Combine
23837 the variable value with its adjacent constant value, and
23838 promote to an HImode set. */
23839 x = XVECEXP (vals, 0, one_var ^ 1);
23842 var = convert_modes (HImode, QImode, var, true);
23843 var = expand_simple_binop (HImode, ASHIFT, var, GEN_INT (8),
23844 NULL_RTX, 1, OPTAB_LIB_WIDEN);
23845 x = GEN_INT (INTVAL (x) & 0xff);
23849 var = convert_modes (HImode, QImode, var, true);
23850 x = gen_int_mode (INTVAL (x) << 8, HImode);
23852 if (x != const0_rtx)
23853 var = expand_simple_binop (HImode, IOR, var, x, var,
23854 1, OPTAB_LIB_WIDEN);
23856 x = gen_reg_rtx (wmode);
23857 emit_move_insn (x, gen_lowpart (wmode, const_vec));
23858 ix86_expand_vector_set (mmx_ok, x, var, one_var >> 1);
23860 emit_move_insn (target, gen_lowpart (mode, x));
23867 emit_move_insn (target, const_vec);
23868 ix86_expand_vector_set (mmx_ok, target, var, one_var);
23872 /* A subroutine of ix86_expand_vector_init_general. Use vector
23873 concatenate to handle the most general case: all values variable,
23874 and none identical. */
23877 ix86_expand_vector_init_concat (enum machine_mode mode,
23878 rtx target, rtx *ops, int n)
23880 enum machine_mode cmode, hmode = VOIDmode;
23881 rtx first[4], second[2];
23909 gcc_unreachable ();
23912 if (!register_operand (ops[1], cmode))
23913 ops[1] = force_reg (cmode, ops[1]);
23914 if (!register_operand (ops[0], cmode))
23915 ops[0] = force_reg (cmode, ops[0]);
23916 emit_insn (gen_rtx_SET (VOIDmode, target,
23917 gen_rtx_VEC_CONCAT (mode, ops[0],
23931 gcc_unreachable ();
23936 /* FIXME: We process inputs backward to help RA. PR 36222. */
23939 for (; i > 0; i -= 2, j--)
23941 first[j] = gen_reg_rtx (cmode);
23942 v = gen_rtvec (2, ops[i - 1], ops[i]);
23943 ix86_expand_vector_init (false, first[j],
23944 gen_rtx_PARALLEL (cmode, v));
23950 gcc_assert (hmode != VOIDmode);
23951 for (i = j = 0; i < n; i += 2, j++)
23953 second[j] = gen_reg_rtx (hmode);
23954 ix86_expand_vector_init_concat (hmode, second [j],
23958 ix86_expand_vector_init_concat (mode, target, second, n);
23961 ix86_expand_vector_init_concat (mode, target, first, n);
23965 gcc_unreachable ();
23969 /* A subroutine of ix86_expand_vector_init_general. Use vector
23970 interleave to handle the most general case: all values variable,
23971 and none identical. */
23974 ix86_expand_vector_init_interleave (enum machine_mode mode,
23975 rtx target, rtx *ops, int n)
23977 enum machine_mode first_imode, second_imode, third_imode;
23980 rtx (*gen_load_even) (rtx, rtx, rtx);
23981 rtx (*gen_interleave_first_low) (rtx, rtx, rtx);
23982 rtx (*gen_interleave_second_low) (rtx, rtx, rtx);
23987 gen_load_even = gen_vec_setv8hi;
23988 gen_interleave_first_low = gen_vec_interleave_lowv4si;
23989 gen_interleave_second_low = gen_vec_interleave_lowv2di;
23990 first_imode = V4SImode;
23991 second_imode = V2DImode;
23992 third_imode = VOIDmode;
23995 gen_load_even = gen_vec_setv16qi;
23996 gen_interleave_first_low = gen_vec_interleave_lowv8hi;
23997 gen_interleave_second_low = gen_vec_interleave_lowv4si;
23998 first_imode = V8HImode;
23999 second_imode = V4SImode;
24000 third_imode = V2DImode;
24003 gcc_unreachable ();
24006 for (i = 0; i < n; i++)
24008 /* Extend the odd elment to SImode using a paradoxical SUBREG. */
24009 op0 = gen_reg_rtx (SImode);
24010 emit_move_insn (op0, gen_lowpart (SImode, ops [i + i]));
24012 /* Insert the SImode value as low element of V4SImode vector. */
24013 op1 = gen_reg_rtx (V4SImode);
24014 op0 = gen_rtx_VEC_MERGE (V4SImode,
24015 gen_rtx_VEC_DUPLICATE (V4SImode,
24017 CONST0_RTX (V4SImode),
24019 emit_insn (gen_rtx_SET (VOIDmode, op1, op0));
24021 /* Cast the V4SImode vector back to a vector in orignal mode. */
24022 op0 = gen_reg_rtx (mode);
24023 emit_move_insn (op0, gen_lowpart (mode, op1));
24025 /* Load even elements into the second positon. */
24026 emit_insn ((*gen_load_even) (op0, ops [i + i + 1],
24029 /* Cast vector to FIRST_IMODE vector. */
24030 ops[i] = gen_reg_rtx (first_imode);
24031 emit_move_insn (ops[i], gen_lowpart (first_imode, op0));
24034 /* Interleave low FIRST_IMODE vectors. */
24035 for (i = j = 0; i < n; i += 2, j++)
24037 op0 = gen_reg_rtx (first_imode);
24038 emit_insn ((*gen_interleave_first_low) (op0, ops[i], ops[i + 1]));
24040 /* Cast FIRST_IMODE vector to SECOND_IMODE vector. */
24041 ops[j] = gen_reg_rtx (second_imode);
24042 emit_move_insn (ops[j], gen_lowpart (second_imode, op0));
24045 /* Interleave low SECOND_IMODE vectors. */
24046 switch (second_imode)
24049 for (i = j = 0; i < n / 2; i += 2, j++)
24051 op0 = gen_reg_rtx (second_imode);
24052 emit_insn ((*gen_interleave_second_low) (op0, ops[i],
24055 /* Cast the SECOND_IMODE vector to the THIRD_IMODE
24057 ops[j] = gen_reg_rtx (third_imode);
24058 emit_move_insn (ops[j], gen_lowpart (third_imode, op0));
24060 second_imode = V2DImode;
24061 gen_interleave_second_low = gen_vec_interleave_lowv2di;
24065 op0 = gen_reg_rtx (second_imode);
24066 emit_insn ((*gen_interleave_second_low) (op0, ops[0],
24069 /* Cast the SECOND_IMODE vector back to a vector on original
24071 emit_insn (gen_rtx_SET (VOIDmode, target,
24072 gen_lowpart (mode, op0)));
24076 gcc_unreachable ();
24080 /* A subroutine of ix86_expand_vector_init. Handle the most general case:
24081 all values variable, and none identical. */
24084 ix86_expand_vector_init_general (bool mmx_ok, enum machine_mode mode,
24085 rtx target, rtx vals)
24094 if (!mmx_ok && !TARGET_SSE)
24111 for (i = 0; i < n; i++)
24112 ops[i] = XVECEXP (vals, 0, i);
24113 ix86_expand_vector_init_concat (mode, target, ops, n);
24117 if (!TARGET_SSE4_1)
24121 goto vec_interleave;
24128 goto vec_interleave;
24131 for (i = 0; i < n; i++)
24132 ops[i] = XVECEXP (vals, 0, i);
24133 ix86_expand_vector_init_interleave (mode, target, ops, n >> 1);
24141 gcc_unreachable ();
24145 int i, j, n_elts, n_words, n_elt_per_word;
24146 enum machine_mode inner_mode;
24147 rtx words[4], shift;
24149 inner_mode = GET_MODE_INNER (mode);
24150 n_elts = GET_MODE_NUNITS (mode);
24151 n_words = GET_MODE_SIZE (mode) / UNITS_PER_WORD;
24152 n_elt_per_word = n_elts / n_words;
24153 shift = GEN_INT (GET_MODE_BITSIZE (inner_mode));
24155 for (i = 0; i < n_words; ++i)
24157 rtx word = NULL_RTX;
24159 for (j = 0; j < n_elt_per_word; ++j)
24161 rtx elt = XVECEXP (vals, 0, (i+1)*n_elt_per_word - j - 1);
24162 elt = convert_modes (word_mode, inner_mode, elt, true);
24168 word = expand_simple_binop (word_mode, ASHIFT, word, shift,
24169 word, 1, OPTAB_LIB_WIDEN);
24170 word = expand_simple_binop (word_mode, IOR, word, elt,
24171 word, 1, OPTAB_LIB_WIDEN);
24179 emit_move_insn (target, gen_lowpart (mode, words[0]));
24180 else if (n_words == 2)
24182 rtx tmp = gen_reg_rtx (mode);
24183 emit_insn (gen_rtx_CLOBBER (VOIDmode, tmp));
24184 emit_move_insn (gen_lowpart (word_mode, tmp), words[0]);
24185 emit_move_insn (gen_highpart (word_mode, tmp), words[1]);
24186 emit_move_insn (target, tmp);
24188 else if (n_words == 4)
24190 rtx tmp = gen_reg_rtx (V4SImode);
24191 gcc_assert (word_mode == SImode);
24192 vals = gen_rtx_PARALLEL (V4SImode, gen_rtvec_v (4, words));
24193 ix86_expand_vector_init_general (false, V4SImode, tmp, vals);
24194 emit_move_insn (target, gen_lowpart (mode, tmp));
24197 gcc_unreachable ();
24201 /* Initialize vector TARGET via VALS. Suppress the use of MMX
24202 instructions unless MMX_OK is true. */
24205 ix86_expand_vector_init (bool mmx_ok, rtx target, rtx vals)
24207 enum machine_mode mode = GET_MODE (target);
24208 enum machine_mode inner_mode = GET_MODE_INNER (mode);
24209 int n_elts = GET_MODE_NUNITS (mode);
24210 int n_var = 0, one_var = -1;
24211 bool all_same = true, all_const_zero = true;
24215 for (i = 0; i < n_elts; ++i)
24217 x = XVECEXP (vals, 0, i);
24218 if (!(CONST_INT_P (x)
24219 || GET_CODE (x) == CONST_DOUBLE
24220 || GET_CODE (x) == CONST_FIXED))
24221 n_var++, one_var = i;
24222 else if (x != CONST0_RTX (inner_mode))
24223 all_const_zero = false;
24224 if (i > 0 && !rtx_equal_p (x, XVECEXP (vals, 0, 0)))
24228 /* Constants are best loaded from the constant pool. */
24231 emit_move_insn (target, gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0)));
24235 /* If all values are identical, broadcast the value. */
24237 && ix86_expand_vector_init_duplicate (mmx_ok, mode, target,
24238 XVECEXP (vals, 0, 0)))
24241 /* Values where only one field is non-constant are best loaded from
24242 the pool and overwritten via move later. */
24246 && ix86_expand_vector_init_one_nonzero (mmx_ok, mode, target,
24247 XVECEXP (vals, 0, one_var),
24251 if (ix86_expand_vector_init_one_var (mmx_ok, mode, target, vals, one_var))
24255 ix86_expand_vector_init_general (mmx_ok, mode, target, vals);
24259 ix86_expand_vector_set (bool mmx_ok, rtx target, rtx val, int elt)
24261 enum machine_mode mode = GET_MODE (target);
24262 enum machine_mode inner_mode = GET_MODE_INNER (mode);
24263 bool use_vec_merge = false;
24272 tmp = gen_reg_rtx (GET_MODE_INNER (mode));
24273 ix86_expand_vector_extract (true, tmp, target, 1 - elt);
24275 tmp = gen_rtx_VEC_CONCAT (mode, tmp, val);
24277 tmp = gen_rtx_VEC_CONCAT (mode, val, tmp);
24278 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
24284 use_vec_merge = TARGET_SSE4_1;
24292 /* For the two element vectors, we implement a VEC_CONCAT with
24293 the extraction of the other element. */
24295 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, GEN_INT (1 - elt)));
24296 tmp = gen_rtx_VEC_SELECT (inner_mode, target, tmp);
24299 op0 = val, op1 = tmp;
24301 op0 = tmp, op1 = val;
24303 tmp = gen_rtx_VEC_CONCAT (mode, op0, op1);
24304 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
24309 use_vec_merge = TARGET_SSE4_1;
24316 use_vec_merge = true;
24320 /* tmp = target = A B C D */
24321 tmp = copy_to_reg (target);
24322 /* target = A A B B */
24323 emit_insn (gen_sse_unpcklps (target, target, target));
24324 /* target = X A B B */
24325 ix86_expand_vector_set (false, target, val, 0);
24326 /* target = A X C D */
24327 emit_insn (gen_sse_shufps_1 (target, target, tmp,
24328 GEN_INT (1), GEN_INT (0),
24329 GEN_INT (2+4), GEN_INT (3+4)));
24333 /* tmp = target = A B C D */
24334 tmp = copy_to_reg (target);
24335 /* tmp = X B C D */
24336 ix86_expand_vector_set (false, tmp, val, 0);
24337 /* target = A B X D */
24338 emit_insn (gen_sse_shufps_1 (target, target, tmp,
24339 GEN_INT (0), GEN_INT (1),
24340 GEN_INT (0+4), GEN_INT (3+4)));
24344 /* tmp = target = A B C D */
24345 tmp = copy_to_reg (target);
24346 /* tmp = X B C D */
24347 ix86_expand_vector_set (false, tmp, val, 0);
24348 /* target = A B X D */
24349 emit_insn (gen_sse_shufps_1 (target, target, tmp,
24350 GEN_INT (0), GEN_INT (1),
24351 GEN_INT (2+4), GEN_INT (0+4)));
24355 gcc_unreachable ();
24360 use_vec_merge = TARGET_SSE4_1;
24364 /* Element 0 handled by vec_merge below. */
24367 use_vec_merge = true;
24373 /* With SSE2, use integer shuffles to swap element 0 and ELT,
24374 store into element 0, then shuffle them back. */
24378 order[0] = GEN_INT (elt);
24379 order[1] = const1_rtx;
24380 order[2] = const2_rtx;
24381 order[3] = GEN_INT (3);
24382 order[elt] = const0_rtx;
24384 emit_insn (gen_sse2_pshufd_1 (target, target, order[0],
24385 order[1], order[2], order[3]));
24387 ix86_expand_vector_set (false, target, val, 0);
24389 emit_insn (gen_sse2_pshufd_1 (target, target, order[0],
24390 order[1], order[2], order[3]));
24394 /* For SSE1, we have to reuse the V4SF code. */
24395 ix86_expand_vector_set (false, gen_lowpart (V4SFmode, target),
24396 gen_lowpart (SFmode, val), elt);
24401 use_vec_merge = TARGET_SSE2;
24404 use_vec_merge = mmx_ok && (TARGET_SSE || TARGET_3DNOW_A);
24408 use_vec_merge = TARGET_SSE4_1;
24418 tmp = gen_rtx_VEC_DUPLICATE (mode, val);
24419 tmp = gen_rtx_VEC_MERGE (mode, tmp, target, GEN_INT (1 << elt));
24420 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
24424 rtx mem = assign_stack_temp (mode, GET_MODE_SIZE (mode), false);
24426 emit_move_insn (mem, target);
24428 tmp = adjust_address (mem, inner_mode, elt*GET_MODE_SIZE (inner_mode));
24429 emit_move_insn (tmp, val);
24431 emit_move_insn (target, mem);
24436 ix86_expand_vector_extract (bool mmx_ok, rtx target, rtx vec, int elt)
24438 enum machine_mode mode = GET_MODE (vec);
24439 enum machine_mode inner_mode = GET_MODE_INNER (mode);
24440 bool use_vec_extr = false;
24453 use_vec_extr = true;
24457 use_vec_extr = TARGET_SSE4_1;
24469 tmp = gen_reg_rtx (mode);
24470 emit_insn (gen_sse_shufps_1 (tmp, vec, vec,
24471 GEN_INT (elt), GEN_INT (elt),
24472 GEN_INT (elt+4), GEN_INT (elt+4)));
24476 tmp = gen_reg_rtx (mode);
24477 emit_insn (gen_sse_unpckhps (tmp, vec, vec));
24481 gcc_unreachable ();
24484 use_vec_extr = true;
24489 use_vec_extr = TARGET_SSE4_1;
24503 tmp = gen_reg_rtx (mode);
24504 emit_insn (gen_sse2_pshufd_1 (tmp, vec,
24505 GEN_INT (elt), GEN_INT (elt),
24506 GEN_INT (elt), GEN_INT (elt)));
24510 tmp = gen_reg_rtx (mode);
24511 emit_insn (gen_sse2_punpckhdq (tmp, vec, vec));
24515 gcc_unreachable ();
24518 use_vec_extr = true;
24523 /* For SSE1, we have to reuse the V4SF code. */
24524 ix86_expand_vector_extract (false, gen_lowpart (SFmode, target),
24525 gen_lowpart (V4SFmode, vec), elt);
24531 use_vec_extr = TARGET_SSE2;
24534 use_vec_extr = mmx_ok && (TARGET_SSE || TARGET_3DNOW_A);
24538 use_vec_extr = TARGET_SSE4_1;
24542 /* ??? Could extract the appropriate HImode element and shift. */
24549 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, GEN_INT (elt)));
24550 tmp = gen_rtx_VEC_SELECT (inner_mode, vec, tmp);
24552 /* Let the rtl optimizers know about the zero extension performed. */
24553 if (inner_mode == QImode || inner_mode == HImode)
24555 tmp = gen_rtx_ZERO_EXTEND (SImode, tmp);
24556 target = gen_lowpart (SImode, target);
24559 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
24563 rtx mem = assign_stack_temp (mode, GET_MODE_SIZE (mode), false);
24565 emit_move_insn (mem, vec);
24567 tmp = adjust_address (mem, inner_mode, elt*GET_MODE_SIZE (inner_mode));
24568 emit_move_insn (target, tmp);
24572 /* Expand a vector reduction on V4SFmode for SSE1. FN is the binary
24573 pattern to reduce; DEST is the destination; IN is the input vector. */
24576 ix86_expand_reduc_v4sf (rtx (*fn) (rtx, rtx, rtx), rtx dest, rtx in)
24578 rtx tmp1, tmp2, tmp3;
24580 tmp1 = gen_reg_rtx (V4SFmode);
24581 tmp2 = gen_reg_rtx (V4SFmode);
24582 tmp3 = gen_reg_rtx (V4SFmode);
24584 emit_insn (gen_sse_movhlps (tmp1, in, in));
24585 emit_insn (fn (tmp2, tmp1, in));
24587 emit_insn (gen_sse_shufps_1 (tmp3, tmp2, tmp2,
24588 GEN_INT (1), GEN_INT (1),
24589 GEN_INT (1+4), GEN_INT (1+4)));
24590 emit_insn (fn (dest, tmp2, tmp3));
24593 /* Target hook for scalar_mode_supported_p. */
24595 ix86_scalar_mode_supported_p (enum machine_mode mode)
24597 if (DECIMAL_FLOAT_MODE_P (mode))
24599 else if (mode == TFmode)
24600 return TARGET_64BIT;
24602 return default_scalar_mode_supported_p (mode);
24605 /* Implements target hook vector_mode_supported_p. */
24607 ix86_vector_mode_supported_p (enum machine_mode mode)
24609 if (TARGET_SSE && VALID_SSE_REG_MODE (mode))
24611 if (TARGET_SSE2 && VALID_SSE2_REG_MODE (mode))
24613 if (TARGET_MMX && VALID_MMX_REG_MODE (mode))
24615 if (TARGET_3DNOW && VALID_MMX_REG_MODE_3DNOW (mode))
24620 /* Target hook for c_mode_for_suffix. */
24621 static enum machine_mode
24622 ix86_c_mode_for_suffix (char suffix)
24624 if (TARGET_64BIT && suffix == 'q')
24626 if (TARGET_MMX && suffix == 'w')
24632 /* Worker function for TARGET_MD_ASM_CLOBBERS.
24634 We do this in the new i386 backend to maintain source compatibility
24635 with the old cc0-based compiler. */
24638 ix86_md_asm_clobbers (tree outputs ATTRIBUTE_UNUSED,
24639 tree inputs ATTRIBUTE_UNUSED,
24642 clobbers = tree_cons (NULL_TREE, build_string (5, "flags"),
24644 clobbers = tree_cons (NULL_TREE, build_string (4, "fpsr"),
24649 /* Implements target vector targetm.asm.encode_section_info. This
24650 is not used by netware. */
24652 static void ATTRIBUTE_UNUSED
24653 ix86_encode_section_info (tree decl, rtx rtl, int first)
24655 default_encode_section_info (decl, rtl, first);
24657 if (TREE_CODE (decl) == VAR_DECL
24658 && (TREE_STATIC (decl) || DECL_EXTERNAL (decl))
24659 && ix86_in_large_data_p (decl))
24660 SYMBOL_REF_FLAGS (XEXP (rtl, 0)) |= SYMBOL_FLAG_FAR_ADDR;
24663 /* Worker function for REVERSE_CONDITION. */
24666 ix86_reverse_condition (enum rtx_code code, enum machine_mode mode)
24668 return (mode != CCFPmode && mode != CCFPUmode
24669 ? reverse_condition (code)
24670 : reverse_condition_maybe_unordered (code));
24673 /* Output code to perform an x87 FP register move, from OPERANDS[1]
24677 output_387_reg_move (rtx insn, rtx *operands)
24679 if (REG_P (operands[0]))
24681 if (REG_P (operands[1])
24682 && find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
24684 if (REGNO (operands[0]) == FIRST_STACK_REG)
24685 return output_387_ffreep (operands, 0);
24686 return "fstp\t%y0";
24688 if (STACK_TOP_P (operands[0]))
24689 return "fld%z1\t%y1";
24692 else if (MEM_P (operands[0]))
24694 gcc_assert (REG_P (operands[1]));
24695 if (find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
24696 return "fstp%z0\t%y0";
24699 /* There is no non-popping store to memory for XFmode.
24700 So if we need one, follow the store with a load. */
24701 if (GET_MODE (operands[0]) == XFmode)
24702 return "fstp%z0\t%y0\n\tfld%z0\t%y0";
24704 return "fst%z0\t%y0";
24711 /* Output code to perform a conditional jump to LABEL, if C2 flag in
24712 FP status register is set. */
24715 ix86_emit_fp_unordered_jump (rtx label)
24717 rtx reg = gen_reg_rtx (HImode);
24720 emit_insn (gen_x86_fnstsw_1 (reg));
24722 if (TARGET_SAHF && (TARGET_USE_SAHF || optimize_size))
24724 emit_insn (gen_x86_sahf_1 (reg));
24726 temp = gen_rtx_REG (CCmode, FLAGS_REG);
24727 temp = gen_rtx_UNORDERED (VOIDmode, temp, const0_rtx);
24731 emit_insn (gen_testqi_ext_ccno_0 (reg, GEN_INT (0x04)));
24733 temp = gen_rtx_REG (CCNOmode, FLAGS_REG);
24734 temp = gen_rtx_NE (VOIDmode, temp, const0_rtx);
24737 temp = gen_rtx_IF_THEN_ELSE (VOIDmode, temp,
24738 gen_rtx_LABEL_REF (VOIDmode, label),
24740 temp = gen_rtx_SET (VOIDmode, pc_rtx, temp);
24742 emit_jump_insn (temp);
24743 predict_jump (REG_BR_PROB_BASE * 10 / 100);
24746 /* Output code to perform a log1p XFmode calculation. */
24748 void ix86_emit_i387_log1p (rtx op0, rtx op1)
24750 rtx label1 = gen_label_rtx ();
24751 rtx label2 = gen_label_rtx ();
24753 rtx tmp = gen_reg_rtx (XFmode);
24754 rtx tmp2 = gen_reg_rtx (XFmode);
24756 emit_insn (gen_absxf2 (tmp, op1));
24757 emit_insn (gen_cmpxf (tmp,
24758 CONST_DOUBLE_FROM_REAL_VALUE (
24759 REAL_VALUE_ATOF ("0.29289321881345247561810596348408353", XFmode),
24761 emit_jump_insn (gen_bge (label1));
24763 emit_move_insn (tmp2, standard_80387_constant_rtx (4)); /* fldln2 */
24764 emit_insn (gen_fyl2xp1xf3_i387 (op0, op1, tmp2));
24765 emit_jump (label2);
24767 emit_label (label1);
24768 emit_move_insn (tmp, CONST1_RTX (XFmode));
24769 emit_insn (gen_addxf3 (tmp, op1, tmp));
24770 emit_move_insn (tmp2, standard_80387_constant_rtx (4)); /* fldln2 */
24771 emit_insn (gen_fyl2xxf3_i387 (op0, tmp, tmp2));
24773 emit_label (label2);
24776 /* Output code to perform a Newton-Rhapson approximation of a single precision
24777 floating point divide [http://en.wikipedia.org/wiki/N-th_root_algorithm]. */
24779 void ix86_emit_swdivsf (rtx res, rtx a, rtx b, enum machine_mode mode)
24781 rtx x0, x1, e0, e1, two;
24783 x0 = gen_reg_rtx (mode);
24784 e0 = gen_reg_rtx (mode);
24785 e1 = gen_reg_rtx (mode);
24786 x1 = gen_reg_rtx (mode);
24788 two = CONST_DOUBLE_FROM_REAL_VALUE (dconst2, SFmode);
24790 if (VECTOR_MODE_P (mode))
24791 two = ix86_build_const_vector (SFmode, true, two);
24793 two = force_reg (mode, two);
24795 /* a / b = a * rcp(b) * (2.0 - b * rcp(b)) */
24797 /* x0 = rcp(b) estimate */
24798 emit_insn (gen_rtx_SET (VOIDmode, x0,
24799 gen_rtx_UNSPEC (mode, gen_rtvec (1, b),
24802 emit_insn (gen_rtx_SET (VOIDmode, e0,
24803 gen_rtx_MULT (mode, x0, b)));
24805 emit_insn (gen_rtx_SET (VOIDmode, e1,
24806 gen_rtx_MINUS (mode, two, e0)));
24808 emit_insn (gen_rtx_SET (VOIDmode, x1,
24809 gen_rtx_MULT (mode, x0, e1)));
24811 emit_insn (gen_rtx_SET (VOIDmode, res,
24812 gen_rtx_MULT (mode, a, x1)));
24815 /* Output code to perform a Newton-Rhapson approximation of a
24816 single precision floating point [reciprocal] square root. */
24818 void ix86_emit_swsqrtsf (rtx res, rtx a, enum machine_mode mode,
24821 rtx x0, e0, e1, e2, e3, mthree, mhalf;
24824 x0 = gen_reg_rtx (mode);
24825 e0 = gen_reg_rtx (mode);
24826 e1 = gen_reg_rtx (mode);
24827 e2 = gen_reg_rtx (mode);
24828 e3 = gen_reg_rtx (mode);
24830 real_from_integer (&r, VOIDmode, -3, -1, 0);
24831 mthree = CONST_DOUBLE_FROM_REAL_VALUE (r, SFmode);
24833 real_arithmetic (&r, NEGATE_EXPR, &dconsthalf, NULL);
24834 mhalf = CONST_DOUBLE_FROM_REAL_VALUE (r, SFmode);
24836 if (VECTOR_MODE_P (mode))
24838 mthree = ix86_build_const_vector (SFmode, true, mthree);
24839 mhalf = ix86_build_const_vector (SFmode, true, mhalf);
24842 /* sqrt(a) = -0.5 * a * rsqrtss(a) * (a * rsqrtss(a) * rsqrtss(a) - 3.0)
24843 rsqrt(a) = -0.5 * rsqrtss(a) * (a * rsqrtss(a) * rsqrtss(a) - 3.0) */
24845 /* x0 = rsqrt(a) estimate */
24846 emit_insn (gen_rtx_SET (VOIDmode, x0,
24847 gen_rtx_UNSPEC (mode, gen_rtvec (1, a),
24850 /* If (a == 0.0) Filter out infinity to prevent NaN for sqrt(0.0). */
24855 zero = gen_reg_rtx (mode);
24856 mask = gen_reg_rtx (mode);
24858 zero = force_reg (mode, CONST0_RTX(mode));
24859 emit_insn (gen_rtx_SET (VOIDmode, mask,
24860 gen_rtx_NE (mode, zero, a)));
24862 emit_insn (gen_rtx_SET (VOIDmode, x0,
24863 gen_rtx_AND (mode, x0, mask)));
24867 emit_insn (gen_rtx_SET (VOIDmode, e0,
24868 gen_rtx_MULT (mode, x0, a)));
24870 emit_insn (gen_rtx_SET (VOIDmode, e1,
24871 gen_rtx_MULT (mode, e0, x0)));
24874 mthree = force_reg (mode, mthree);
24875 emit_insn (gen_rtx_SET (VOIDmode, e2,
24876 gen_rtx_PLUS (mode, e1, mthree)));
24878 mhalf = force_reg (mode, mhalf);
24880 /* e3 = -.5 * x0 */
24881 emit_insn (gen_rtx_SET (VOIDmode, e3,
24882 gen_rtx_MULT (mode, x0, mhalf)));
24884 /* e3 = -.5 * e0 */
24885 emit_insn (gen_rtx_SET (VOIDmode, e3,
24886 gen_rtx_MULT (mode, e0, mhalf)));
24887 /* ret = e2 * e3 */
24888 emit_insn (gen_rtx_SET (VOIDmode, res,
24889 gen_rtx_MULT (mode, e2, e3)));
24892 /* Solaris implementation of TARGET_ASM_NAMED_SECTION. */
24894 static void ATTRIBUTE_UNUSED
24895 i386_solaris_elf_named_section (const char *name, unsigned int flags,
24898 /* With Binutils 2.15, the "@unwind" marker must be specified on
24899 every occurrence of the ".eh_frame" section, not just the first
24902 && strcmp (name, ".eh_frame") == 0)
24904 fprintf (asm_out_file, "\t.section\t%s,\"%s\",@unwind\n", name,
24905 flags & SECTION_WRITE ? "aw" : "a");
24908 default_elf_asm_named_section (name, flags, decl);
24911 /* Return the mangling of TYPE if it is an extended fundamental type. */
24913 static const char *
24914 ix86_mangle_type (const_tree type)
24916 type = TYPE_MAIN_VARIANT (type);
24918 if (TREE_CODE (type) != VOID_TYPE && TREE_CODE (type) != BOOLEAN_TYPE
24919 && TREE_CODE (type) != INTEGER_TYPE && TREE_CODE (type) != REAL_TYPE)
24922 switch (TYPE_MODE (type))
24925 /* __float128 is "g". */
24928 /* "long double" or __float80 is "e". */
24935 /* For 32-bit code we can save PIC register setup by using
24936 __stack_chk_fail_local hidden function instead of calling
24937 __stack_chk_fail directly. 64-bit code doesn't need to setup any PIC
24938 register, so it is better to call __stack_chk_fail directly. */
24941 ix86_stack_protect_fail (void)
24943 return TARGET_64BIT
24944 ? default_external_stack_protect_fail ()
24945 : default_hidden_stack_protect_fail ();
24948 /* Select a format to encode pointers in exception handling data. CODE
24949 is 0 for data, 1 for code labels, 2 for function pointers. GLOBAL is
24950 true if the symbol may be affected by dynamic relocations.
24952 ??? All x86 object file formats are capable of representing this.
24953 After all, the relocation needed is the same as for the call insn.
24954 Whether or not a particular assembler allows us to enter such, I
24955 guess we'll have to see. */
24957 asm_preferred_eh_data_format (int code, int global)
24961 int type = DW_EH_PE_sdata8;
24963 || ix86_cmodel == CM_SMALL_PIC
24964 || (ix86_cmodel == CM_MEDIUM_PIC && (global || code)))
24965 type = DW_EH_PE_sdata4;
24966 return (global ? DW_EH_PE_indirect : 0) | DW_EH_PE_pcrel | type;
24968 if (ix86_cmodel == CM_SMALL
24969 || (ix86_cmodel == CM_MEDIUM && code))
24970 return DW_EH_PE_udata4;
24971 return DW_EH_PE_absptr;
24974 /* Expand copysign from SIGN to the positive value ABS_VALUE
24975 storing in RESULT. If MASK is non-null, it shall be a mask to mask out
24978 ix86_sse_copysign_to_positive (rtx result, rtx abs_value, rtx sign, rtx mask)
24980 enum machine_mode mode = GET_MODE (sign);
24981 rtx sgn = gen_reg_rtx (mode);
24982 if (mask == NULL_RTX)
24984 mask = ix86_build_signbit_mask (mode, VECTOR_MODE_P (mode), false);
24985 if (!VECTOR_MODE_P (mode))
24987 /* We need to generate a scalar mode mask in this case. */
24988 rtx tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, const0_rtx));
24989 tmp = gen_rtx_VEC_SELECT (mode, mask, tmp);
24990 mask = gen_reg_rtx (mode);
24991 emit_insn (gen_rtx_SET (VOIDmode, mask, tmp));
24995 mask = gen_rtx_NOT (mode, mask);
24996 emit_insn (gen_rtx_SET (VOIDmode, sgn,
24997 gen_rtx_AND (mode, mask, sign)));
24998 emit_insn (gen_rtx_SET (VOIDmode, result,
24999 gen_rtx_IOR (mode, abs_value, sgn)));
25002 /* Expand fabs (OP0) and return a new rtx that holds the result. The
25003 mask for masking out the sign-bit is stored in *SMASK, if that is
25006 ix86_expand_sse_fabs (rtx op0, rtx *smask)
25008 enum machine_mode mode = GET_MODE (op0);
25011 xa = gen_reg_rtx (mode);
25012 mask = ix86_build_signbit_mask (mode, VECTOR_MODE_P (mode), true);
25013 if (!VECTOR_MODE_P (mode))
25015 /* We need to generate a scalar mode mask in this case. */
25016 rtx tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, const0_rtx));
25017 tmp = gen_rtx_VEC_SELECT (mode, mask, tmp);
25018 mask = gen_reg_rtx (mode);
25019 emit_insn (gen_rtx_SET (VOIDmode, mask, tmp));
25021 emit_insn (gen_rtx_SET (VOIDmode, xa,
25022 gen_rtx_AND (mode, op0, mask)));
25030 /* Expands a comparison of OP0 with OP1 using comparison code CODE,
25031 swapping the operands if SWAP_OPERANDS is true. The expanded
25032 code is a forward jump to a newly created label in case the
25033 comparison is true. The generated label rtx is returned. */
25035 ix86_expand_sse_compare_and_jump (enum rtx_code code, rtx op0, rtx op1,
25036 bool swap_operands)
25047 label = gen_label_rtx ();
25048 tmp = gen_rtx_REG (CCFPUmode, FLAGS_REG);
25049 emit_insn (gen_rtx_SET (VOIDmode, tmp,
25050 gen_rtx_COMPARE (CCFPUmode, op0, op1)));
25051 tmp = gen_rtx_fmt_ee (code, VOIDmode, tmp, const0_rtx);
25052 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
25053 gen_rtx_LABEL_REF (VOIDmode, label), pc_rtx);
25054 tmp = emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
25055 JUMP_LABEL (tmp) = label;
25060 /* Expand a mask generating SSE comparison instruction comparing OP0 with OP1
25061 using comparison code CODE. Operands are swapped for the comparison if
25062 SWAP_OPERANDS is true. Returns a rtx for the generated mask. */
25064 ix86_expand_sse_compare_mask (enum rtx_code code, rtx op0, rtx op1,
25065 bool swap_operands)
25067 enum machine_mode mode = GET_MODE (op0);
25068 rtx mask = gen_reg_rtx (mode);
25077 if (mode == DFmode)
25078 emit_insn (gen_sse2_maskcmpdf3 (mask, op0, op1,
25079 gen_rtx_fmt_ee (code, mode, op0, op1)));
25081 emit_insn (gen_sse_maskcmpsf3 (mask, op0, op1,
25082 gen_rtx_fmt_ee (code, mode, op0, op1)));
25087 /* Generate and return a rtx of mode MODE for 2**n where n is the number
25088 of bits of the mantissa of MODE, which must be one of DFmode or SFmode. */
25090 ix86_gen_TWO52 (enum machine_mode mode)
25092 REAL_VALUE_TYPE TWO52r;
25095 real_ldexp (&TWO52r, &dconst1, mode == DFmode ? 52 : 23);
25096 TWO52 = const_double_from_real_value (TWO52r, mode);
25097 TWO52 = force_reg (mode, TWO52);
25102 /* Expand SSE sequence for computing lround from OP1 storing
25105 ix86_expand_lround (rtx op0, rtx op1)
25107 /* C code for the stuff we're doing below:
25108 tmp = op1 + copysign (nextafter (0.5, 0.0), op1)
25111 enum machine_mode mode = GET_MODE (op1);
25112 const struct real_format *fmt;
25113 REAL_VALUE_TYPE pred_half, half_minus_pred_half;
25116 /* load nextafter (0.5, 0.0) */
25117 fmt = REAL_MODE_FORMAT (mode);
25118 real_2expN (&half_minus_pred_half, -(fmt->p) - 1, mode);
25119 REAL_ARITHMETIC (pred_half, MINUS_EXPR, dconsthalf, half_minus_pred_half);
25121 /* adj = copysign (0.5, op1) */
25122 adj = force_reg (mode, const_double_from_real_value (pred_half, mode));
25123 ix86_sse_copysign_to_positive (adj, adj, force_reg (mode, op1), NULL_RTX);
25125 /* adj = op1 + adj */
25126 adj = expand_simple_binop (mode, PLUS, adj, op1, NULL_RTX, 0, OPTAB_DIRECT);
25128 /* op0 = (imode)adj */
25129 expand_fix (op0, adj, 0);
25132 /* Expand SSE2 sequence for computing lround from OPERAND1 storing
25135 ix86_expand_lfloorceil (rtx op0, rtx op1, bool do_floor)
25137 /* C code for the stuff we're doing below (for do_floor):
25139 xi -= (double)xi > op1 ? 1 : 0;
25142 enum machine_mode fmode = GET_MODE (op1);
25143 enum machine_mode imode = GET_MODE (op0);
25144 rtx ireg, freg, label, tmp;
25146 /* reg = (long)op1 */
25147 ireg = gen_reg_rtx (imode);
25148 expand_fix (ireg, op1, 0);
25150 /* freg = (double)reg */
25151 freg = gen_reg_rtx (fmode);
25152 expand_float (freg, ireg, 0);
25154 /* ireg = (freg > op1) ? ireg - 1 : ireg */
25155 label = ix86_expand_sse_compare_and_jump (UNLE,
25156 freg, op1, !do_floor);
25157 tmp = expand_simple_binop (imode, do_floor ? MINUS : PLUS,
25158 ireg, const1_rtx, NULL_RTX, 0, OPTAB_DIRECT);
25159 emit_move_insn (ireg, tmp);
25161 emit_label (label);
25162 LABEL_NUSES (label) = 1;
25164 emit_move_insn (op0, ireg);
25167 /* Expand rint (IEEE round to nearest) rounding OPERAND1 and storing the
25168 result in OPERAND0. */
25170 ix86_expand_rint (rtx operand0, rtx operand1)
25172 /* C code for the stuff we're doing below:
25173 xa = fabs (operand1);
25174 if (!isless (xa, 2**52))
25176 xa = xa + 2**52 - 2**52;
25177 return copysign (xa, operand1);
25179 enum machine_mode mode = GET_MODE (operand0);
25180 rtx res, xa, label, TWO52, mask;
25182 res = gen_reg_rtx (mode);
25183 emit_move_insn (res, operand1);
25185 /* xa = abs (operand1) */
25186 xa = ix86_expand_sse_fabs (res, &mask);
25188 /* if (!isless (xa, TWO52)) goto label; */
25189 TWO52 = ix86_gen_TWO52 (mode);
25190 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
25192 xa = expand_simple_binop (mode, PLUS, xa, TWO52, NULL_RTX, 0, OPTAB_DIRECT);
25193 xa = expand_simple_binop (mode, MINUS, xa, TWO52, xa, 0, OPTAB_DIRECT);
25195 ix86_sse_copysign_to_positive (res, xa, res, mask);
25197 emit_label (label);
25198 LABEL_NUSES (label) = 1;
25200 emit_move_insn (operand0, res);
25203 /* Expand SSE2 sequence for computing floor or ceil from OPERAND1 storing
25206 ix86_expand_floorceildf_32 (rtx operand0, rtx operand1, bool do_floor)
25208 /* C code for the stuff we expand below.
25209 double xa = fabs (x), x2;
25210 if (!isless (xa, TWO52))
25212 xa = xa + TWO52 - TWO52;
25213 x2 = copysign (xa, x);
25222 enum machine_mode mode = GET_MODE (operand0);
25223 rtx xa, TWO52, tmp, label, one, res, mask;
25225 TWO52 = ix86_gen_TWO52 (mode);
25227 /* Temporary for holding the result, initialized to the input
25228 operand to ease control flow. */
25229 res = gen_reg_rtx (mode);
25230 emit_move_insn (res, operand1);
25232 /* xa = abs (operand1) */
25233 xa = ix86_expand_sse_fabs (res, &mask);
25235 /* if (!isless (xa, TWO52)) goto label; */
25236 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
25238 /* xa = xa + TWO52 - TWO52; */
25239 xa = expand_simple_binop (mode, PLUS, xa, TWO52, NULL_RTX, 0, OPTAB_DIRECT);
25240 xa = expand_simple_binop (mode, MINUS, xa, TWO52, xa, 0, OPTAB_DIRECT);
25242 /* xa = copysign (xa, operand1) */
25243 ix86_sse_copysign_to_positive (xa, xa, res, mask);
25245 /* generate 1.0 or -1.0 */
25246 one = force_reg (mode,
25247 const_double_from_real_value (do_floor
25248 ? dconst1 : dconstm1, mode));
25250 /* Compensate: xa = xa - (xa > operand1 ? 1 : 0) */
25251 tmp = ix86_expand_sse_compare_mask (UNGT, xa, res, !do_floor);
25252 emit_insn (gen_rtx_SET (VOIDmode, tmp,
25253 gen_rtx_AND (mode, one, tmp)));
25254 /* We always need to subtract here to preserve signed zero. */
25255 tmp = expand_simple_binop (mode, MINUS,
25256 xa, tmp, NULL_RTX, 0, OPTAB_DIRECT);
25257 emit_move_insn (res, tmp);
25259 emit_label (label);
25260 LABEL_NUSES (label) = 1;
25262 emit_move_insn (operand0, res);
25265 /* Expand SSE2 sequence for computing floor or ceil from OPERAND1 storing
25268 ix86_expand_floorceil (rtx operand0, rtx operand1, bool do_floor)
25270 /* C code for the stuff we expand below.
25271 double xa = fabs (x), x2;
25272 if (!isless (xa, TWO52))
25274 x2 = (double)(long)x;
25281 if (HONOR_SIGNED_ZEROS (mode))
25282 return copysign (x2, x);
25285 enum machine_mode mode = GET_MODE (operand0);
25286 rtx xa, xi, TWO52, tmp, label, one, res, mask;
25288 TWO52 = ix86_gen_TWO52 (mode);
25290 /* Temporary for holding the result, initialized to the input
25291 operand to ease control flow. */
25292 res = gen_reg_rtx (mode);
25293 emit_move_insn (res, operand1);
25295 /* xa = abs (operand1) */
25296 xa = ix86_expand_sse_fabs (res, &mask);
25298 /* if (!isless (xa, TWO52)) goto label; */
25299 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
25301 /* xa = (double)(long)x */
25302 xi = gen_reg_rtx (mode == DFmode ? DImode : SImode);
25303 expand_fix (xi, res, 0);
25304 expand_float (xa, xi, 0);
25307 one = force_reg (mode, const_double_from_real_value (dconst1, mode));
25309 /* Compensate: xa = xa - (xa > operand1 ? 1 : 0) */
25310 tmp = ix86_expand_sse_compare_mask (UNGT, xa, res, !do_floor);
25311 emit_insn (gen_rtx_SET (VOIDmode, tmp,
25312 gen_rtx_AND (mode, one, tmp)));
25313 tmp = expand_simple_binop (mode, do_floor ? MINUS : PLUS,
25314 xa, tmp, NULL_RTX, 0, OPTAB_DIRECT);
25315 emit_move_insn (res, tmp);
25317 if (HONOR_SIGNED_ZEROS (mode))
25318 ix86_sse_copysign_to_positive (res, res, force_reg (mode, operand1), mask);
25320 emit_label (label);
25321 LABEL_NUSES (label) = 1;
25323 emit_move_insn (operand0, res);
25326 /* Expand SSE sequence for computing round from OPERAND1 storing
25327 into OPERAND0. Sequence that works without relying on DImode truncation
25328 via cvttsd2siq that is only available on 64bit targets. */
25330 ix86_expand_rounddf_32 (rtx operand0, rtx operand1)
25332 /* C code for the stuff we expand below.
25333 double xa = fabs (x), xa2, x2;
25334 if (!isless (xa, TWO52))
25336 Using the absolute value and copying back sign makes
25337 -0.0 -> -0.0 correct.
25338 xa2 = xa + TWO52 - TWO52;
25343 else if (dxa > 0.5)
25345 x2 = copysign (xa2, x);
25348 enum machine_mode mode = GET_MODE (operand0);
25349 rtx xa, xa2, dxa, TWO52, tmp, label, half, mhalf, one, res, mask;
25351 TWO52 = ix86_gen_TWO52 (mode);
25353 /* Temporary for holding the result, initialized to the input
25354 operand to ease control flow. */
25355 res = gen_reg_rtx (mode);
25356 emit_move_insn (res, operand1);
25358 /* xa = abs (operand1) */
25359 xa = ix86_expand_sse_fabs (res, &mask);
25361 /* if (!isless (xa, TWO52)) goto label; */
25362 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
25364 /* xa2 = xa + TWO52 - TWO52; */
25365 xa2 = expand_simple_binop (mode, PLUS, xa, TWO52, NULL_RTX, 0, OPTAB_DIRECT);
25366 xa2 = expand_simple_binop (mode, MINUS, xa2, TWO52, xa2, 0, OPTAB_DIRECT);
25368 /* dxa = xa2 - xa; */
25369 dxa = expand_simple_binop (mode, MINUS, xa2, xa, NULL_RTX, 0, OPTAB_DIRECT);
25371 /* generate 0.5, 1.0 and -0.5 */
25372 half = force_reg (mode, const_double_from_real_value (dconsthalf, mode));
25373 one = expand_simple_binop (mode, PLUS, half, half, NULL_RTX, 0, OPTAB_DIRECT);
25374 mhalf = expand_simple_binop (mode, MINUS, half, one, NULL_RTX,
25378 tmp = gen_reg_rtx (mode);
25379 /* xa2 = xa2 - (dxa > 0.5 ? 1 : 0) */
25380 tmp = ix86_expand_sse_compare_mask (UNGT, dxa, half, false);
25381 emit_insn (gen_rtx_SET (VOIDmode, tmp,
25382 gen_rtx_AND (mode, one, tmp)));
25383 xa2 = expand_simple_binop (mode, MINUS, xa2, tmp, NULL_RTX, 0, OPTAB_DIRECT);
25384 /* xa2 = xa2 + (dxa <= -0.5 ? 1 : 0) */
25385 tmp = ix86_expand_sse_compare_mask (UNGE, mhalf, dxa, false);
25386 emit_insn (gen_rtx_SET (VOIDmode, tmp,
25387 gen_rtx_AND (mode, one, tmp)));
25388 xa2 = expand_simple_binop (mode, PLUS, xa2, tmp, NULL_RTX, 0, OPTAB_DIRECT);
25390 /* res = copysign (xa2, operand1) */
25391 ix86_sse_copysign_to_positive (res, xa2, force_reg (mode, operand1), mask);
25393 emit_label (label);
25394 LABEL_NUSES (label) = 1;
25396 emit_move_insn (operand0, res);
25399 /* Expand SSE sequence for computing trunc from OPERAND1 storing
25402 ix86_expand_trunc (rtx operand0, rtx operand1)
25404 /* C code for SSE variant we expand below.
25405 double xa = fabs (x), x2;
25406 if (!isless (xa, TWO52))
25408 x2 = (double)(long)x;
25409 if (HONOR_SIGNED_ZEROS (mode))
25410 return copysign (x2, x);
25413 enum machine_mode mode = GET_MODE (operand0);
25414 rtx xa, xi, TWO52, label, res, mask;
25416 TWO52 = ix86_gen_TWO52 (mode);
25418 /* Temporary for holding the result, initialized to the input
25419 operand to ease control flow. */
25420 res = gen_reg_rtx (mode);
25421 emit_move_insn (res, operand1);
25423 /* xa = abs (operand1) */
25424 xa = ix86_expand_sse_fabs (res, &mask);
25426 /* if (!isless (xa, TWO52)) goto label; */
25427 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
25429 /* x = (double)(long)x */
25430 xi = gen_reg_rtx (mode == DFmode ? DImode : SImode);
25431 expand_fix (xi, res, 0);
25432 expand_float (res, xi, 0);
25434 if (HONOR_SIGNED_ZEROS (mode))
25435 ix86_sse_copysign_to_positive (res, res, force_reg (mode, operand1), mask);
25437 emit_label (label);
25438 LABEL_NUSES (label) = 1;
25440 emit_move_insn (operand0, res);
25443 /* Expand SSE sequence for computing trunc from OPERAND1 storing
25446 ix86_expand_truncdf_32 (rtx operand0, rtx operand1)
25448 enum machine_mode mode = GET_MODE (operand0);
25449 rtx xa, mask, TWO52, label, one, res, smask, tmp;
25451 /* C code for SSE variant we expand below.
25452 double xa = fabs (x), x2;
25453 if (!isless (xa, TWO52))
25455 xa2 = xa + TWO52 - TWO52;
25459 x2 = copysign (xa2, x);
25463 TWO52 = ix86_gen_TWO52 (mode);
25465 /* Temporary for holding the result, initialized to the input
25466 operand to ease control flow. */
25467 res = gen_reg_rtx (mode);
25468 emit_move_insn (res, operand1);
25470 /* xa = abs (operand1) */
25471 xa = ix86_expand_sse_fabs (res, &smask);
25473 /* if (!isless (xa, TWO52)) goto label; */
25474 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
25476 /* res = xa + TWO52 - TWO52; */
25477 tmp = expand_simple_binop (mode, PLUS, xa, TWO52, NULL_RTX, 0, OPTAB_DIRECT);
25478 tmp = expand_simple_binop (mode, MINUS, tmp, TWO52, tmp, 0, OPTAB_DIRECT);
25479 emit_move_insn (res, tmp);
25482 one = force_reg (mode, const_double_from_real_value (dconst1, mode));
25484 /* Compensate: res = xa2 - (res > xa ? 1 : 0) */
25485 mask = ix86_expand_sse_compare_mask (UNGT, res, xa, false);
25486 emit_insn (gen_rtx_SET (VOIDmode, mask,
25487 gen_rtx_AND (mode, mask, one)));
25488 tmp = expand_simple_binop (mode, MINUS,
25489 res, mask, NULL_RTX, 0, OPTAB_DIRECT);
25490 emit_move_insn (res, tmp);
25492 /* res = copysign (res, operand1) */
25493 ix86_sse_copysign_to_positive (res, res, force_reg (mode, operand1), smask);
25495 emit_label (label);
25496 LABEL_NUSES (label) = 1;
25498 emit_move_insn (operand0, res);
25501 /* Expand SSE sequence for computing round from OPERAND1 storing
25504 ix86_expand_round (rtx operand0, rtx operand1)
25506 /* C code for the stuff we're doing below:
25507 double xa = fabs (x);
25508 if (!isless (xa, TWO52))
25510 xa = (double)(long)(xa + nextafter (0.5, 0.0));
25511 return copysign (xa, x);
25513 enum machine_mode mode = GET_MODE (operand0);
25514 rtx res, TWO52, xa, label, xi, half, mask;
25515 const struct real_format *fmt;
25516 REAL_VALUE_TYPE pred_half, half_minus_pred_half;
25518 /* Temporary for holding the result, initialized to the input
25519 operand to ease control flow. */
25520 res = gen_reg_rtx (mode);
25521 emit_move_insn (res, operand1);
25523 TWO52 = ix86_gen_TWO52 (mode);
25524 xa = ix86_expand_sse_fabs (res, &mask);
25525 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
25527 /* load nextafter (0.5, 0.0) */
25528 fmt = REAL_MODE_FORMAT (mode);
25529 real_2expN (&half_minus_pred_half, -(fmt->p) - 1, mode);
25530 REAL_ARITHMETIC (pred_half, MINUS_EXPR, dconsthalf, half_minus_pred_half);
25532 /* xa = xa + 0.5 */
25533 half = force_reg (mode, const_double_from_real_value (pred_half, mode));
25534 xa = expand_simple_binop (mode, PLUS, xa, half, NULL_RTX, 0, OPTAB_DIRECT);
25536 /* xa = (double)(int64_t)xa */
25537 xi = gen_reg_rtx (mode == DFmode ? DImode : SImode);
25538 expand_fix (xi, xa, 0);
25539 expand_float (xa, xi, 0);
25541 /* res = copysign (xa, operand1) */
25542 ix86_sse_copysign_to_positive (res, xa, force_reg (mode, operand1), mask);
25544 emit_label (label);
25545 LABEL_NUSES (label) = 1;
25547 emit_move_insn (operand0, res);
25551 /* Validate whether a SSE5 instruction is valid or not.
25552 OPERANDS is the array of operands.
25553 NUM is the number of operands.
25554 USES_OC0 is true if the instruction uses OC0 and provides 4 variants.
25555 NUM_MEMORY is the maximum number of memory operands to accept. */
25558 ix86_sse5_valid_op_p (rtx operands[], rtx insn ATTRIBUTE_UNUSED, int num,
25559 bool uses_oc0, int num_memory)
25565 /* Count the number of memory arguments */
25568 for (i = 0; i < num; i++)
25570 enum machine_mode mode = GET_MODE (operands[i]);
25571 if (register_operand (operands[i], mode))
25574 else if (memory_operand (operands[i], mode))
25576 mem_mask |= (1 << i);
25582 rtx pattern = PATTERN (insn);
25584 /* allow 0 for pcmov */
25585 if (GET_CODE (pattern) != SET
25586 || GET_CODE (SET_SRC (pattern)) != IF_THEN_ELSE
25588 || operands[i] != CONST0_RTX (mode))
25593 /* Special case pmacsdq{l,h} where we allow the 3rd argument to be
25594 a memory operation. */
25595 if (num_memory < 0)
25597 num_memory = -num_memory;
25598 if ((mem_mask & (1 << (num-1))) != 0)
25600 mem_mask &= ~(1 << (num-1));
25605 /* If there were no memory operations, allow the insn */
25609 /* Do not allow the destination register to be a memory operand. */
25610 else if (mem_mask & (1 << 0))
25613 /* If there are too many memory operations, disallow the instruction. While
25614 the hardware only allows 1 memory reference, before register allocation
25615 for some insns, we allow two memory operations sometimes in order to allow
25616 code like the following to be optimized:
25618 float fmadd (float *a, float *b, float *c) { return (*a * *b) + *c; }
25620 or similar cases that are vectorized into using the fmaddss
25622 else if (mem_count > num_memory)
25625 /* Don't allow more than one memory operation if not optimizing. */
25626 else if (mem_count > 1 && !optimize)
25629 else if (num == 4 && mem_count == 1)
25631 /* formats (destination is the first argument), example fmaddss:
25632 xmm1, xmm1, xmm2, xmm3/mem
25633 xmm1, xmm1, xmm2/mem, xmm3
25634 xmm1, xmm2, xmm3/mem, xmm1
25635 xmm1, xmm2/mem, xmm3, xmm1 */
25637 return ((mem_mask == (1 << 1))
25638 || (mem_mask == (1 << 2))
25639 || (mem_mask == (1 << 3)));
25641 /* format, example pmacsdd:
25642 xmm1, xmm2, xmm3/mem, xmm1 */
25644 return (mem_mask == (1 << 2));
25647 else if (num == 4 && num_memory == 2)
25649 /* If there are two memory operations, we can load one of the memory ops
25650 into the destination register. This is for optimizing the
25651 multiply/add ops, which the combiner has optimized both the multiply
25652 and the add insns to have a memory operation. We have to be careful
25653 that the destination doesn't overlap with the inputs. */
25654 rtx op0 = operands[0];
25656 if (reg_mentioned_p (op0, operands[1])
25657 || reg_mentioned_p (op0, operands[2])
25658 || reg_mentioned_p (op0, operands[3]))
25661 /* formats (destination is the first argument), example fmaddss:
25662 xmm1, xmm1, xmm2, xmm3/mem
25663 xmm1, xmm1, xmm2/mem, xmm3
25664 xmm1, xmm2, xmm3/mem, xmm1
25665 xmm1, xmm2/mem, xmm3, xmm1
25667 For the oc0 case, we will load either operands[1] or operands[3] into
25668 operands[0], so any combination of 2 memory operands is ok. */
25672 /* format, example pmacsdd:
25673 xmm1, xmm2, xmm3/mem, xmm1
25675 For the integer multiply/add instructions be more restrictive and
25676 require operands[2] and operands[3] to be the memory operands. */
25678 return (mem_mask == ((1 << 2) | (1 << 3)));
25681 else if (num == 3 && num_memory == 1)
25683 /* formats, example protb:
25684 xmm1, xmm2, xmm3/mem
25685 xmm1, xmm2/mem, xmm3 */
25687 return ((mem_mask == (1 << 1)) || (mem_mask == (1 << 2)));
25689 /* format, example comeq:
25690 xmm1, xmm2, xmm3/mem */
25692 return (mem_mask == (1 << 2));
25696 gcc_unreachable ();
25702 /* Fixup an SSE5 instruction that has 2 memory input references into a form the
25703 hardware will allow by using the destination register to load one of the
25704 memory operations. Presently this is used by the multiply/add routines to
25705 allow 2 memory references. */
25708 ix86_expand_sse5_multiple_memory (rtx operands[],
25710 enum machine_mode mode)
25712 rtx op0 = operands[0];
25714 || memory_operand (op0, mode)
25715 || reg_mentioned_p (op0, operands[1])
25716 || reg_mentioned_p (op0, operands[2])
25717 || reg_mentioned_p (op0, operands[3]))
25718 gcc_unreachable ();
25720 /* For 2 memory operands, pick either operands[1] or operands[3] to move into
25721 the destination register. */
25722 if (memory_operand (operands[1], mode))
25724 emit_move_insn (op0, operands[1]);
25727 else if (memory_operand (operands[3], mode))
25729 emit_move_insn (op0, operands[3]);
25733 gcc_unreachable ();
25739 /* Table of valid machine attributes. */
25740 static const struct attribute_spec ix86_attribute_table[] =
25742 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler } */
25743 /* Stdcall attribute says callee is responsible for popping arguments
25744 if they are not variable. */
25745 { "stdcall", 0, 0, false, true, true, ix86_handle_cconv_attribute },
25746 /* Fastcall attribute says callee is responsible for popping arguments
25747 if they are not variable. */
25748 { "fastcall", 0, 0, false, true, true, ix86_handle_cconv_attribute },
25749 /* Cdecl attribute says the callee is a normal C declaration */
25750 { "cdecl", 0, 0, false, true, true, ix86_handle_cconv_attribute },
25751 /* Regparm attribute specifies how many integer arguments are to be
25752 passed in registers. */
25753 { "regparm", 1, 1, false, true, true, ix86_handle_cconv_attribute },
25754 /* Sseregparm attribute says we are using x86_64 calling conventions
25755 for FP arguments. */
25756 { "sseregparm", 0, 0, false, true, true, ix86_handle_cconv_attribute },
25757 /* force_align_arg_pointer says this function realigns the stack at entry. */
25758 { (const char *)&ix86_force_align_arg_pointer_string, 0, 0,
25759 false, true, true, ix86_handle_cconv_attribute },
25760 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
25761 { "dllimport", 0, 0, false, false, false, handle_dll_attribute },
25762 { "dllexport", 0, 0, false, false, false, handle_dll_attribute },
25763 { "shared", 0, 0, true, false, false, ix86_handle_shared_attribute },
25765 { "ms_struct", 0, 0, false, false, false, ix86_handle_struct_attribute },
25766 { "gcc_struct", 0, 0, false, false, false, ix86_handle_struct_attribute },
25767 #ifdef SUBTARGET_ATTRIBUTE_TABLE
25768 SUBTARGET_ATTRIBUTE_TABLE,
25770 { NULL, 0, 0, false, false, false, NULL }
25773 /* Implement targetm.vectorize.builtin_vectorization_cost. */
25775 x86_builtin_vectorization_cost (bool runtime_test)
25777 /* If the branch of the runtime test is taken - i.e. - the vectorized
25778 version is skipped - this incurs a misprediction cost (because the
25779 vectorized version is expected to be the fall-through). So we subtract
25780 the latency of a mispredicted branch from the costs that are incured
25781 when the vectorized version is executed.
25783 TODO: The values in individual target tables have to be tuned or new
25784 fields may be needed. For eg. on K8, the default branch path is the
25785 not-taken path. If the taken path is predicted correctly, the minimum
25786 penalty of going down the taken-path is 1 cycle. If the taken-path is
25787 not predicted correctly, then the minimum penalty is 10 cycles. */
25791 return (-(ix86_cost->cond_taken_branch_cost));
25797 /* Initialize the GCC target structure. */
25798 #undef TARGET_ATTRIBUTE_TABLE
25799 #define TARGET_ATTRIBUTE_TABLE ix86_attribute_table
25800 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
25801 # undef TARGET_MERGE_DECL_ATTRIBUTES
25802 # define TARGET_MERGE_DECL_ATTRIBUTES merge_dllimport_decl_attributes
25805 #undef TARGET_COMP_TYPE_ATTRIBUTES
25806 #define TARGET_COMP_TYPE_ATTRIBUTES ix86_comp_type_attributes
25808 #undef TARGET_INIT_BUILTINS
25809 #define TARGET_INIT_BUILTINS ix86_init_builtins
25810 #undef TARGET_EXPAND_BUILTIN
25811 #define TARGET_EXPAND_BUILTIN ix86_expand_builtin
25813 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION
25814 #define TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION \
25815 ix86_builtin_vectorized_function
25817 #undef TARGET_VECTORIZE_BUILTIN_CONVERSION
25818 #define TARGET_VECTORIZE_BUILTIN_CONVERSION ix86_vectorize_builtin_conversion
25820 #undef TARGET_BUILTIN_RECIPROCAL
25821 #define TARGET_BUILTIN_RECIPROCAL ix86_builtin_reciprocal
25823 #undef TARGET_ASM_FUNCTION_EPILOGUE
25824 #define TARGET_ASM_FUNCTION_EPILOGUE ix86_output_function_epilogue
25826 #undef TARGET_ENCODE_SECTION_INFO
25827 #ifndef SUBTARGET_ENCODE_SECTION_INFO
25828 #define TARGET_ENCODE_SECTION_INFO ix86_encode_section_info
25830 #define TARGET_ENCODE_SECTION_INFO SUBTARGET_ENCODE_SECTION_INFO
25833 #undef TARGET_ASM_OPEN_PAREN
25834 #define TARGET_ASM_OPEN_PAREN ""
25835 #undef TARGET_ASM_CLOSE_PAREN
25836 #define TARGET_ASM_CLOSE_PAREN ""
25838 #undef TARGET_ASM_ALIGNED_HI_OP
25839 #define TARGET_ASM_ALIGNED_HI_OP ASM_SHORT
25840 #undef TARGET_ASM_ALIGNED_SI_OP
25841 #define TARGET_ASM_ALIGNED_SI_OP ASM_LONG
25843 #undef TARGET_ASM_ALIGNED_DI_OP
25844 #define TARGET_ASM_ALIGNED_DI_OP ASM_QUAD
25847 #undef TARGET_ASM_UNALIGNED_HI_OP
25848 #define TARGET_ASM_UNALIGNED_HI_OP TARGET_ASM_ALIGNED_HI_OP
25849 #undef TARGET_ASM_UNALIGNED_SI_OP
25850 #define TARGET_ASM_UNALIGNED_SI_OP TARGET_ASM_ALIGNED_SI_OP
25851 #undef TARGET_ASM_UNALIGNED_DI_OP
25852 #define TARGET_ASM_UNALIGNED_DI_OP TARGET_ASM_ALIGNED_DI_OP
25854 #undef TARGET_SCHED_ADJUST_COST
25855 #define TARGET_SCHED_ADJUST_COST ix86_adjust_cost
25856 #undef TARGET_SCHED_ISSUE_RATE
25857 #define TARGET_SCHED_ISSUE_RATE ix86_issue_rate
25858 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD
25859 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD \
25860 ia32_multipass_dfa_lookahead
25862 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
25863 #define TARGET_FUNCTION_OK_FOR_SIBCALL ix86_function_ok_for_sibcall
25866 #undef TARGET_HAVE_TLS
25867 #define TARGET_HAVE_TLS true
25869 #undef TARGET_CANNOT_FORCE_CONST_MEM
25870 #define TARGET_CANNOT_FORCE_CONST_MEM ix86_cannot_force_const_mem
25871 #undef TARGET_USE_BLOCKS_FOR_CONSTANT_P
25872 #define TARGET_USE_BLOCKS_FOR_CONSTANT_P hook_bool_mode_const_rtx_true
25874 #undef TARGET_DELEGITIMIZE_ADDRESS
25875 #define TARGET_DELEGITIMIZE_ADDRESS ix86_delegitimize_address
25877 #undef TARGET_MS_BITFIELD_LAYOUT_P
25878 #define TARGET_MS_BITFIELD_LAYOUT_P ix86_ms_bitfield_layout_p
25881 #undef TARGET_BINDS_LOCAL_P
25882 #define TARGET_BINDS_LOCAL_P darwin_binds_local_p
25884 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
25885 #undef TARGET_BINDS_LOCAL_P
25886 #define TARGET_BINDS_LOCAL_P i386_pe_binds_local_p
25889 #undef TARGET_ASM_OUTPUT_MI_THUNK
25890 #define TARGET_ASM_OUTPUT_MI_THUNK x86_output_mi_thunk
25891 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
25892 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK x86_can_output_mi_thunk
25894 #undef TARGET_ASM_FILE_START
25895 #define TARGET_ASM_FILE_START x86_file_start
25897 #undef TARGET_DEFAULT_TARGET_FLAGS
25898 #define TARGET_DEFAULT_TARGET_FLAGS \
25900 | TARGET_SUBTARGET_DEFAULT \
25901 | TARGET_TLS_DIRECT_SEG_REFS_DEFAULT)
25903 #undef TARGET_HANDLE_OPTION
25904 #define TARGET_HANDLE_OPTION ix86_handle_option
25906 #undef TARGET_RTX_COSTS
25907 #define TARGET_RTX_COSTS ix86_rtx_costs
25908 #undef TARGET_ADDRESS_COST
25909 #define TARGET_ADDRESS_COST ix86_address_cost
25911 #undef TARGET_FIXED_CONDITION_CODE_REGS
25912 #define TARGET_FIXED_CONDITION_CODE_REGS ix86_fixed_condition_code_regs
25913 #undef TARGET_CC_MODES_COMPATIBLE
25914 #define TARGET_CC_MODES_COMPATIBLE ix86_cc_modes_compatible
25916 #undef TARGET_MACHINE_DEPENDENT_REORG
25917 #define TARGET_MACHINE_DEPENDENT_REORG ix86_reorg
25919 #undef TARGET_BUILD_BUILTIN_VA_LIST
25920 #define TARGET_BUILD_BUILTIN_VA_LIST ix86_build_builtin_va_list
25922 #undef TARGET_EXPAND_BUILTIN_VA_START
25923 #define TARGET_EXPAND_BUILTIN_VA_START ix86_va_start
25925 #undef TARGET_MD_ASM_CLOBBERS
25926 #define TARGET_MD_ASM_CLOBBERS ix86_md_asm_clobbers
25928 #undef TARGET_PROMOTE_PROTOTYPES
25929 #define TARGET_PROMOTE_PROTOTYPES hook_bool_const_tree_true
25930 #undef TARGET_STRUCT_VALUE_RTX
25931 #define TARGET_STRUCT_VALUE_RTX ix86_struct_value_rtx
25932 #undef TARGET_SETUP_INCOMING_VARARGS
25933 #define TARGET_SETUP_INCOMING_VARARGS ix86_setup_incoming_varargs
25934 #undef TARGET_MUST_PASS_IN_STACK
25935 #define TARGET_MUST_PASS_IN_STACK ix86_must_pass_in_stack
25936 #undef TARGET_PASS_BY_REFERENCE
25937 #define TARGET_PASS_BY_REFERENCE ix86_pass_by_reference
25938 #undef TARGET_INTERNAL_ARG_POINTER
25939 #define TARGET_INTERNAL_ARG_POINTER ix86_internal_arg_pointer
25940 #undef TARGET_DWARF_HANDLE_FRAME_UNSPEC
25941 #define TARGET_DWARF_HANDLE_FRAME_UNSPEC ix86_dwarf_handle_frame_unspec
25942 #undef TARGET_STRICT_ARGUMENT_NAMING
25943 #define TARGET_STRICT_ARGUMENT_NAMING hook_bool_CUMULATIVE_ARGS_true
25945 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
25946 #define TARGET_GIMPLIFY_VA_ARG_EXPR ix86_gimplify_va_arg
25948 #undef TARGET_SCALAR_MODE_SUPPORTED_P
25949 #define TARGET_SCALAR_MODE_SUPPORTED_P ix86_scalar_mode_supported_p
25951 #undef TARGET_VECTOR_MODE_SUPPORTED_P
25952 #define TARGET_VECTOR_MODE_SUPPORTED_P ix86_vector_mode_supported_p
25954 #undef TARGET_C_MODE_FOR_SUFFIX
25955 #define TARGET_C_MODE_FOR_SUFFIX ix86_c_mode_for_suffix
25958 #undef TARGET_ASM_OUTPUT_DWARF_DTPREL
25959 #define TARGET_ASM_OUTPUT_DWARF_DTPREL i386_output_dwarf_dtprel
25962 #ifdef SUBTARGET_INSERT_ATTRIBUTES
25963 #undef TARGET_INSERT_ATTRIBUTES
25964 #define TARGET_INSERT_ATTRIBUTES SUBTARGET_INSERT_ATTRIBUTES
25967 #undef TARGET_MANGLE_TYPE
25968 #define TARGET_MANGLE_TYPE ix86_mangle_type
25970 #undef TARGET_STACK_PROTECT_FAIL
25971 #define TARGET_STACK_PROTECT_FAIL ix86_stack_protect_fail
25973 #undef TARGET_FUNCTION_VALUE
25974 #define TARGET_FUNCTION_VALUE ix86_function_value
25976 #undef TARGET_SECONDARY_RELOAD
25977 #define TARGET_SECONDARY_RELOAD ix86_secondary_reload
25979 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST
25980 #define TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST x86_builtin_vectorization_cost
25982 struct gcc_target targetm = TARGET_INITIALIZER;
25984 #include "gt-i386.h"